From f9940eacbe4022f8200ad3f7062fbdb712c84217 Mon Sep 17 00:00:00 2001 From: Bad Manners Date: Fri, 30 Jun 2023 17:37:55 -0300 Subject: [PATCH] Refactor out story and description logic --- README.md | 3 +- parse.py => description.py | 52 +++++++++++++------------- example_config.json | 1 - main.py | 75 +++----------------------------------- story.py | 73 +++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 98 deletions(-) rename parse.py => description.py (85%) create mode 100644 story.py diff --git a/README.md b/README.md index 2c425e5..dc8d443 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,6 @@ In order to parse descriptions, you need a configuration file (default path is ` "furaffinity": "My_Username", "inkbunny": "MyUsername", "sofurry": "My Username", - "twitter": "MyUsername", "weasyl": "MyUsername" } ``` @@ -40,7 +39,7 @@ Input descriptions should be formatted as BBCode. The following tags are accepte [url=https://github.com]URL link[/url] ``` -There are also special tags to link to yourself or other users automatically: +There are also special tags to link to yourself or other users automatically. This may include websites not available in the configuration: ```bbcode [self][/self] diff --git a/parse.py b/description.py similarity index 85% rename from parse.py rename to description.py index e822218..1e23180 100644 --- a/parse.py +++ b/description.py @@ -1,10 +1,14 @@ from collections import OrderedDict +import io import json import lark import os +import re +import subprocess import typing -SUPPORTED_USER_TAGS = ('eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter') + +SUPPORTED_USER_TAGS = ['eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter'] DESCRIPTION_GRAMMAR = r""" ?start: document_list @@ -34,7 +38,7 @@ DESCRIPTION_GRAMMAR += r""" USERNAME: /[a-zA-Z0-9][a-zA-Z0-9 _-]*/ URL: /(https?:\/\/)?[^\]]+/ - TEXT: /([^\[:]|[ \t\r\n]|:(?!icon))+/ + TEXT: /([^\[]|[ \t\r\n])+/ %import common.WS """ @@ -43,8 +47,8 @@ DESCRIPTION_PARSER = lark.Lark(DESCRIPTION_GRAMMAR, parser='lalr') class UserTag: - def __init__(self, default=None, **kwargs): - self.default: typing.Optional[str] = default + def __init__(self, default: typing.Optional[str]=None, **kwargs): + self.default = default self._sites: typing.OrderedDict[str, typing.Optional[str]] = OrderedDict() for (k, v) in kwargs.items(): if k in SUPPORTED_USER_TAGS: @@ -241,29 +245,25 @@ class SoFurryTransformer(BbcodeTransformer): return f'ib!{user_data["ib"]}' return super(SoFurryTransformer, self).user_tag_root(data) -class TwitterTransformer(PlaintextTransformer): - def __init__(self, this_user, *args, **kwargs): - super(TwitterTransformer, self).__init__(*args, **kwargs) - self.self_tag = lambda _: self.user_tag_root((UserTag(twitter=this_user),)) - def user_tag_root(self, data): - user_data = data[0] - if user_data['twitter']: - return f'@{user_data["twitter"]}' - return super(TwitterTransformer, self).user_tag_root(data) +def parse_description(description_path, config_path, out_dir, ignore_empty_files=False): + ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE) + description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig')) + if not description or re.match(r'^\s+$', description): + error = f'Description processing returned empty file: libreoffice --cat {description_path}' + if ignore_empty_files: + print(f'Ignoring error ({error})') + else: + raise RuntimeError(error) -TRANSFORMATIONS = { - 'aryion': ('desc_aryion.txt', AryionTransformer), - 'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer), - 'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer), - 'sofurry': ('desc_sofurry.txt', SoFurryTransformer), - 'twitter': ('desc_twitter.txt', TwitterTransformer), - 'weasyl': ('desc_weasyl.md', WeasylTransformer), -} - - -def parse_description(description, config_path, out_dir): parsed_description = DESCRIPTION_PARSER.parse(description) + transformations = { + 'aryion': ('desc_aryion.txt', AryionTransformer), + 'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer), + 'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer), + 'sofurry': ('desc_sofurry.txt', SoFurryTransformer), + 'weasyl': ('desc_weasyl.md', WeasylTransformer), + } with open(config_path, 'r') as f: config = json.load(f) # Validate JSON @@ -272,7 +272,7 @@ def parse_description(description, config_path, out_dir): errors.append(ValueError('Configuration must be a JSON object')) else: for (website, username) in config.items(): - if website not in TRANSFORMATIONS: + if website not in transformations: errors.append(ValueError(f'Website \'{website}\' is unsupported')) elif type(username) is not str: errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\'')) @@ -282,7 +282,7 @@ def parse_description(description, config_path, out_dir): raise ExceptionGroup('Invalid configuration for description parsing', errors) # Create descriptions for (website, username) in config.items(): - (filepath, transformer) = TRANSFORMATIONS[website] + (filepath, transformer) = transformations[website] with open(os.path.join(out_dir, filepath), 'w') as f: if description: f.write(transformer(username).transform(parsed_description)) diff --git a/example_config.json b/example_config.json index bfc26d6..226e2f3 100644 --- a/example_config.json +++ b/example_config.json @@ -3,6 +3,5 @@ "furaffinity": "My_Username", "inkbunny": "MyUsername", "sofurry": "My Username", - "twitter": "MyUsername", "weasyl": "MyUsername" } \ No newline at end of file diff --git a/main.py b/main.py index e639bf8..babb438 100644 --- a/main.py +++ b/main.py @@ -1,26 +1,14 @@ import argparse -import io import os -import re -import subprocess +from subprocess import CalledProcessError import tempfile -from parse import parse_description +from description import parse_description +from story import parse_story OUT_DIR = './out' -def get_rtf_styles(rtf_source: str): - match_list = re.findall(r'\\s(\d+)(?:\\sbasedon\d+)?\\snext\d+((?:\\[a-z0-9]+ ?)+)(?: ([A-Z][a-zA-Z ]*));', rtf_source) - if not match_list: - raise ValueError(f'Couldn\'t find valid RTF styles') - rtf_styles = {} - for (style_number, partial_rtf_style, style_name) in match_list: - rtf_style = r'\s' + style_number + partial_rtf_style - rtf_styles[int(style_number)] = rtf_style - rtf_styles[style_name] = rtf_style - return rtf_styles - def main(story_path=None, description_path=None, config_path='./config.json', keep_out_dir=False, ignore_empty_files=False): remove_out_dir = not keep_out_dir and os.path.isdir(OUT_DIR) with tempfile.TemporaryDirectory() as tdir: @@ -33,64 +21,13 @@ def main(story_path=None, description_path=None, config_path='./config.json', ke try: # Convert original file to .rtf (Aryion) and .txt (all others) if story_path: - story_filename = os.path.split(story_path)[1].rsplit('.')[0] - txt_out_path = os.path.join(OUT_DIR, f'{story_filename}.txt') - txt_tmp_path = os.path.join(tdir, f'{story_filename}.txt') - rtf_out_path = os.path.join(OUT_DIR, f'{story_filename}.rtf') - RE_EMPTY_LINE = re.compile('^$') - is_only_empty_lines = True - ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE) - with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp: - needs_empty_line = False - for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'): - # Remove empty lines - line = line.strip() - if RE_EMPTY_LINE.search(line) and not is_only_empty_lines: - needs_empty_line = True - else: - if is_only_empty_lines: - txt_out.writelines((line,)) - txt_tmp.writelines((line,)) - is_only_empty_lines = False - else: - if needs_empty_line: - txt_out.writelines(('\n\n', line)) - needs_empty_line = False - else: - txt_out.writelines(('\n', line)) - txt_tmp.writelines(('\n', line)) - txt_out.writelines(('\n')) - if is_only_empty_lines: - error = f'Story processing returned empty file: libreoffice --cat {story_path}' - if ignore_empty_files: - print(f'Ignoring error ({error})') - else: - raise RuntimeError(error) - # Convert temporary .txt to .rtf - subprocess.run(['libreoffice', '--convert-to', 'rtf:Rich Text Format', '--outdir', OUT_DIR, txt_tmp_path], check=True, capture_output=True) - # Convert monospace font ('Preformatted Text') to serif ('Normal') - with open(rtf_out_path, 'r+') as f: - rtf = f.read() - rtf_styles = get_rtf_styles(rtf) - monospace_style = rtf_styles['Preformatted Text'] # rtf_styles[20] - serif_style = rtf_styles['Normal'] # rtf_styles[0] - f.seek(0) - f.write(rtf.replace(monospace_style, serif_style)) - f.truncate() + parse_story(story_path, config_path, OUT_DIR, tdir, ignore_empty_files) # Parse FA description and convert for each website if description_path: - ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE) - desc = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig')) - if not desc or re.match(r'^\s+$', desc): - error = f'Description processing returned empty file: libreoffice --cat {description_path}' - if ignore_empty_files: - print(f'Ignoring error ({error})') - else: - raise RuntimeError(error) - parse_description(desc, config_path, OUT_DIR) + parse_description(description_path, config_path, OUT_DIR, ignore_empty_files) - except subprocess.CalledProcessError as e: + except CalledProcessError as e: if remove_out_dir: # Revert directory removal on error os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this')) diff --git a/story.py b/story.py new file mode 100644 index 0000000..247c332 --- /dev/null +++ b/story.py @@ -0,0 +1,73 @@ +import io +import json +import os +import re +import subprocess + + +def get_rtf_styles(rtf_source: str): + match_list = re.findall(r'\\s(\d+)(?:\\sbasedon\d+)?\\snext\d+((?:\\[a-z0-9]+ ?)+)(?: ([A-Z][a-zA-Z ]*));', rtf_source) + if not match_list: + raise ValueError(f'Couldn\'t find valid RTF styles') + rtf_styles = {} + for (style_number, partial_rtf_style, style_name) in match_list: + rtf_style = r'\s' + style_number + partial_rtf_style + rtf_styles[int(style_number)] = rtf_style + rtf_styles[style_name] = rtf_style + return rtf_styles + +def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=False): + with open(config_path, 'r') as f: + config = json.load(f) + if type(config) is not dict: + raise ValueError('Configuration must be a JSON object') + should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry')) + should_create_rtf_story = any(ws in config for ws in ('aryion',)) + if not should_create_txt_story and not should_create_rtf_story: + raise ValueError('') + + story_filename = os.path.split(story_path)[1].rsplit('.')[0] + txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull + txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull + RE_EMPTY_LINE = re.compile('^$') + is_only_empty_lines = True + ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE) + with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp: + needs_empty_line = False + for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'): + # Remove empty lines + line = line.strip() + if RE_EMPTY_LINE.search(line) and not is_only_empty_lines: + needs_empty_line = True + else: + if is_only_empty_lines: + txt_out.writelines((line,)) + txt_tmp.writelines((line,)) + is_only_empty_lines = False + else: + if needs_empty_line: + txt_out.writelines(('\n\n', line)) + needs_empty_line = False + else: + txt_out.writelines(('\n', line)) + txt_tmp.writelines(('\n', line)) + txt_out.writelines(('\n')) + if is_only_empty_lines: + error = f'Story processing returned empty file: libreoffice --cat {story_path}' + if ignore_empty_files: + print(f'Ignoring error ({error})') + else: + raise RuntimeError(error) + if should_create_rtf_story: + rtf_out_path = os.path.join(out_dir, f'{story_filename}.rtf') + # Convert temporary .txt to .rtf + subprocess.run(['libreoffice', '--convert-to', 'rtf:Rich Text Format', '--outdir', out_dir, txt_tmp_path], check=True, capture_output=True) + # Convert monospace font ('Preformatted Text') to serif ('Normal') + with open(rtf_out_path, 'r+') as f: + rtf = f.read() + rtf_styles = get_rtf_styles(rtf) + monospace_style = rtf_styles['Preformatted Text'] # rtf_styles[20] + serif_style = rtf_styles['Normal'] # rtf_styles[0] + f.seek(0) + f.write(rtf.replace(monospace_style, serif_style)) + f.truncate()