From f9940eacbe4022f8200ad3f7062fbdb712c84217 Mon Sep 17 00:00:00 2001
From: Bad Manners <me@badmanners.xyz>
Date: Fri, 30 Jun 2023 17:37:55 -0300
Subject: [PATCH] Refactor out story and description logic

---
 README.md                  |  3 +-
 parse.py => description.py | 52 +++++++++++++-------------
 example_config.json        |  1 -
 main.py                    | 75 +++-----------------------------------
 story.py                   | 73 +++++++++++++++++++++++++++++++++++++
 5 files changed, 106 insertions(+), 98 deletions(-)
 rename parse.py => description.py (85%)
 create mode 100644 story.py

diff --git a/README.md b/README.md
index 2c425e5..dc8d443 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,6 @@ In order to parse descriptions, you need a configuration file (default path is `
   "furaffinity": "My_Username",
   "inkbunny": "MyUsername",
   "sofurry": "My Username",
-  "twitter": "MyUsername",
   "weasyl": "MyUsername"
 }
 ```
@@ -40,7 +39,7 @@ Input descriptions should be formatted as BBCode. The following tags are accepte
 [url=https://github.com]URL link[/url]
 ```
 
-There are also special tags to link to yourself or other users automatically:
+There are also special tags to link to yourself or other users automatically. This may include websites not available in the configuration:
 
 ```bbcode
 [self][/self]
diff --git a/parse.py b/description.py
similarity index 85%
rename from parse.py
rename to description.py
index e822218..1e23180 100644
--- a/parse.py
+++ b/description.py
@@ -1,10 +1,14 @@
 from collections import OrderedDict
+import io
 import json
 import lark
 import os
+import re
+import subprocess
 import typing
 
-SUPPORTED_USER_TAGS = ('eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter')
+
+SUPPORTED_USER_TAGS = ['eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter']
 
 DESCRIPTION_GRAMMAR = r"""
   ?start: document_list
@@ -34,7 +38,7 @@ DESCRIPTION_GRAMMAR += r"""
 
   USERNAME: /[a-zA-Z0-9][a-zA-Z0-9 _-]*/
   URL: /(https?:\/\/)?[^\]]+/
-  TEXT: /([^\[:]|[ \t\r\n]|:(?!icon))+/
+  TEXT: /([^\[]|[ \t\r\n])+/
 
   %import common.WS
 """
@@ -43,8 +47,8 @@ DESCRIPTION_PARSER = lark.Lark(DESCRIPTION_GRAMMAR, parser='lalr')
 
 
 class UserTag:
-  def __init__(self, default=None, **kwargs):
-    self.default: typing.Optional[str] = default
+  def __init__(self, default: typing.Optional[str]=None, **kwargs):
+    self.default = default
     self._sites: typing.OrderedDict[str, typing.Optional[str]] = OrderedDict()
     for (k, v) in kwargs.items():
       if k in SUPPORTED_USER_TAGS:
@@ -241,29 +245,25 @@ class SoFurryTransformer(BbcodeTransformer):
           return f'ib!{user_data["ib"]}'
     return super(SoFurryTransformer, self).user_tag_root(data)
 
-class TwitterTransformer(PlaintextTransformer):
-  def __init__(self, this_user, *args, **kwargs):
-    super(TwitterTransformer, self).__init__(*args, **kwargs)
-    self.self_tag = lambda _: self.user_tag_root((UserTag(twitter=this_user),))
 
-  def user_tag_root(self, data):
-    user_data = data[0]
-    if user_data['twitter']:
-      return f'@{user_data["twitter"]}'
-    return super(TwitterTransformer, self).user_tag_root(data)
+def parse_description(description_path, config_path, out_dir, ignore_empty_files=False):
+  ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
+  description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
+  if not description or re.match(r'^\s+$', description):
+    error = f'Description processing returned empty file: libreoffice --cat {description_path}'
+    if ignore_empty_files:
+      print(f'Ignoring error ({error})')
+    else:
+      raise RuntimeError(error)
 
-TRANSFORMATIONS = {
-  'aryion': ('desc_aryion.txt', AryionTransformer),
-  'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer),
-  'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer),
-  'sofurry': ('desc_sofurry.txt', SoFurryTransformer),
-  'twitter': ('desc_twitter.txt', TwitterTransformer),
-  'weasyl': ('desc_weasyl.md', WeasylTransformer),
-}
-
-
-def parse_description(description, config_path, out_dir):
   parsed_description = DESCRIPTION_PARSER.parse(description)
+  transformations = {
+    'aryion': ('desc_aryion.txt', AryionTransformer),
+    'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer),
+    'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer),
+    'sofurry': ('desc_sofurry.txt', SoFurryTransformer),
+    'weasyl': ('desc_weasyl.md', WeasylTransformer),
+  }
   with open(config_path, 'r') as f:
     config = json.load(f)
   # Validate JSON
@@ -272,7 +272,7 @@ def parse_description(description, config_path, out_dir):
     errors.append(ValueError('Configuration must be a JSON object'))
   else:
     for (website, username) in config.items():
-      if website not in TRANSFORMATIONS:
+      if website not in transformations:
         errors.append(ValueError(f'Website \'{website}\' is unsupported'))
       elif type(username) is not str:
         errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
@@ -282,7 +282,7 @@ def parse_description(description, config_path, out_dir):
     raise ExceptionGroup('Invalid configuration for description parsing', errors)
   # Create descriptions
   for (website, username) in config.items():
-    (filepath, transformer) = TRANSFORMATIONS[website]
+    (filepath, transformer) = transformations[website]
     with open(os.path.join(out_dir, filepath), 'w') as f:
       if description:
         f.write(transformer(username).transform(parsed_description))
diff --git a/example_config.json b/example_config.json
index bfc26d6..226e2f3 100644
--- a/example_config.json
+++ b/example_config.json
@@ -3,6 +3,5 @@
     "furaffinity": "My_Username",
     "inkbunny": "MyUsername",
     "sofurry": "My Username",
-    "twitter": "MyUsername",
     "weasyl": "MyUsername"
 }
\ No newline at end of file
diff --git a/main.py b/main.py
index e639bf8..babb438 100644
--- a/main.py
+++ b/main.py
@@ -1,26 +1,14 @@
 import argparse
-import io
 import os
-import re
-import subprocess
+from subprocess import CalledProcessError
 import tempfile
 
-from parse import parse_description
+from description import parse_description
+from story import parse_story
 
 OUT_DIR = './out'
 
 
-def get_rtf_styles(rtf_source: str):
-  match_list = re.findall(r'\\s(\d+)(?:\\sbasedon\d+)?\\snext\d+((?:\\[a-z0-9]+ ?)+)(?: ([A-Z][a-zA-Z ]*));', rtf_source)
-  if not match_list:
-    raise ValueError(f'Couldn\'t find valid RTF styles')
-  rtf_styles = {}
-  for (style_number, partial_rtf_style, style_name) in match_list:
-    rtf_style = r'\s' + style_number + partial_rtf_style
-    rtf_styles[int(style_number)] = rtf_style
-    rtf_styles[style_name] = rtf_style
-  return rtf_styles
-
 def main(story_path=None, description_path=None, config_path='./config.json', keep_out_dir=False, ignore_empty_files=False):
   remove_out_dir = not keep_out_dir and os.path.isdir(OUT_DIR)
   with tempfile.TemporaryDirectory() as tdir:
@@ -33,64 +21,13 @@ def main(story_path=None, description_path=None, config_path='./config.json', ke
     try:
       # Convert original file to .rtf (Aryion) and .txt (all others)
       if story_path:
-        story_filename = os.path.split(story_path)[1].rsplit('.')[0]
-        txt_out_path = os.path.join(OUT_DIR, f'{story_filename}.txt')
-        txt_tmp_path = os.path.join(tdir, f'{story_filename}.txt')
-        rtf_out_path = os.path.join(OUT_DIR, f'{story_filename}.rtf')
-        RE_EMPTY_LINE = re.compile('^$')
-        is_only_empty_lines = True
-        ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
-        with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
-          needs_empty_line = False
-          for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
-            # Remove empty lines
-            line = line.strip()
-            if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
-              needs_empty_line = True
-            else:
-              if is_only_empty_lines:
-                txt_out.writelines((line,))
-                txt_tmp.writelines((line,))
-                is_only_empty_lines = False
-              else:
-                if needs_empty_line:
-                  txt_out.writelines(('\n\n', line))
-                  needs_empty_line = False
-                else:
-                  txt_out.writelines(('\n', line))
-                txt_tmp.writelines(('\n', line))
-          txt_out.writelines(('\n'))
-        if is_only_empty_lines:
-          error = f'Story processing returned empty file: libreoffice --cat {story_path}'
-          if ignore_empty_files:
-            print(f'Ignoring error ({error})')
-          else:
-            raise RuntimeError(error)
-        # Convert temporary .txt to .rtf
-        subprocess.run(['libreoffice', '--convert-to', 'rtf:Rich Text Format', '--outdir', OUT_DIR, txt_tmp_path], check=True, capture_output=True)
-        # Convert monospace font ('Preformatted Text') to serif ('Normal')
-        with open(rtf_out_path, 'r+') as f:
-          rtf = f.read()
-          rtf_styles = get_rtf_styles(rtf)
-          monospace_style = rtf_styles['Preformatted Text']  # rtf_styles[20]
-          serif_style = rtf_styles['Normal']                 # rtf_styles[0]
-          f.seek(0)
-          f.write(rtf.replace(monospace_style, serif_style))
-          f.truncate()
+        parse_story(story_path, config_path, OUT_DIR, tdir, ignore_empty_files)
 
       # Parse FA description and convert for each website
       if description_path:
-        ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
-        desc = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
-        if not desc or re.match(r'^\s+$', desc):
-          error = f'Description processing returned empty file: libreoffice --cat {description_path}'
-          if ignore_empty_files:
-            print(f'Ignoring error ({error})')
-          else:
-            raise RuntimeError(error)
-        parse_description(desc, config_path, OUT_DIR)
+        parse_description(description_path, config_path, OUT_DIR, ignore_empty_files)
 
-    except subprocess.CalledProcessError as e:
+    except CalledProcessError as e:
       if remove_out_dir:
         # Revert directory removal on error
         os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this'))
diff --git a/story.py b/story.py
new file mode 100644
index 0000000..247c332
--- /dev/null
+++ b/story.py
@@ -0,0 +1,73 @@
+import io
+import json
+import os
+import re
+import subprocess
+
+
+def get_rtf_styles(rtf_source: str):
+  match_list = re.findall(r'\\s(\d+)(?:\\sbasedon\d+)?\\snext\d+((?:\\[a-z0-9]+ ?)+)(?: ([A-Z][a-zA-Z ]*));', rtf_source)
+  if not match_list:
+    raise ValueError(f'Couldn\'t find valid RTF styles')
+  rtf_styles = {}
+  for (style_number, partial_rtf_style, style_name) in match_list:
+    rtf_style = r'\s' + style_number + partial_rtf_style
+    rtf_styles[int(style_number)] = rtf_style
+    rtf_styles[style_name] = rtf_style
+  return rtf_styles
+
+def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=False):
+  with open(config_path, 'r') as f:
+    config = json.load(f)
+  if type(config) is not dict:
+    raise ValueError('Configuration must be a JSON object')
+  should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry'))
+  should_create_rtf_story = any(ws in config for ws in ('aryion',))
+  if not should_create_txt_story and not should_create_rtf_story:
+    raise ValueError('')
+
+  story_filename = os.path.split(story_path)[1].rsplit('.')[0]
+  txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
+  txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull
+  RE_EMPTY_LINE = re.compile('^$')
+  is_only_empty_lines = True
+  ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
+  with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
+    needs_empty_line = False
+    for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
+      # Remove empty lines
+      line = line.strip()
+      if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
+        needs_empty_line = True
+      else:
+        if is_only_empty_lines:
+          txt_out.writelines((line,))
+          txt_tmp.writelines((line,))
+          is_only_empty_lines = False
+        else:
+          if needs_empty_line:
+            txt_out.writelines(('\n\n', line))
+            needs_empty_line = False
+          else:
+            txt_out.writelines(('\n', line))
+          txt_tmp.writelines(('\n', line))
+    txt_out.writelines(('\n'))
+  if is_only_empty_lines:
+    error = f'Story processing returned empty file: libreoffice --cat {story_path}'
+    if ignore_empty_files:
+      print(f'Ignoring error ({error})')
+    else:
+      raise RuntimeError(error)
+  if should_create_rtf_story:
+    rtf_out_path = os.path.join(out_dir, f'{story_filename}.rtf')
+    # Convert temporary .txt to .rtf
+    subprocess.run(['libreoffice', '--convert-to', 'rtf:Rich Text Format', '--outdir', out_dir, txt_tmp_path], check=True, capture_output=True)
+    # Convert monospace font ('Preformatted Text') to serif ('Normal')
+    with open(rtf_out_path, 'r+') as f:
+      rtf = f.read()
+      rtf_styles = get_rtf_styles(rtf)
+      monospace_style = rtf_styles['Preformatted Text']  # rtf_styles[20]
+      serif_style = rtf_styles['Normal']                 # rtf_styles[0]
+      f.seek(0)
+      f.write(rtf.replace(monospace_style, serif_style))
+      f.truncate()