Improvements to text generation and error handling

- Warn about running LibreOffice Writer instance - Better handling of leading/trailing whitespace for descriptions - Create .md file for weasyl
2023-11-20 14:32:19 -03:00 · 2023-11-20 14:32:19 -03:00 · 68603a93d6
commit 68603a93d6
parent 468e219ca8
3 changed files with 60 additions and 26 deletions
--- a/description.py
+++ b/description.py
@ -3,6 +3,7 @@ import io
 import json
 import lark
 import os
 import psutil
 import re
 import subprocess
 import typing
@ -76,7 +77,7 @@ class UserTag:
 class UploadTransformer(lark.Transformer):
  def __init__(self, *args, **kwargs):
-    super(UploadTransformer, self).__init__(*args, **kwargs)
+    super().__init__(*args, **kwargs)
    def _user_tag_factory(tag):
      # Create a new UserTag if innermost node, or append to list in order
      def user_tag(data):
@ -245,48 +246,51 @@ class PlaintextTransformer(UploadTransformer):
        return f'@{mastodon_user} on {mastodon_instance}'
      else:
        print(f'Unknown site "{site}" found in user tag; ignoring...')
-    return super(PlaintextTransformer, self).user_tag_root(data)
+    return super().user_tag_root(data)
 class AryionTransformer(BbcodeTransformer):
  def __init__(self, self_user, *args, **kwargs):
-    super(AryionTransformer, self).__init__(*args, **kwargs)
+    super().__init__(*args, **kwargs)
    def self_tag(data):
      return self.user_tag_root((UserTag(eka=self_user),))
    self.self_tag = self_tag
-  def transformer_matches_site(self, site: str) -> bool:
+  @staticmethod
  def transformer_matches_site(site: str) -> bool:
    return site in ('eka', 'aryion')
  def user_tag_root(self, data):
    user_data = data[0]
    if user_data['eka']:
      return f':icon{user_data["eka"]}:'
-    return super(AryionTransformer, self).user_tag_root(data)
+    return super().user_tag_root(data)
 class FuraffinityTransformer(BbcodeTransformer):
  def __init__(self, self_user, *args, **kwargs):
-    super(FuraffinityTransformer, self).__init__(*args, **kwargs)
+    super().__init__(*args, **kwargs)
    def self_tag(data):
      return self.user_tag_root((UserTag(fa=self_user),))
    self.self_tag = self_tag
-  def transformer_matches_site(self, site: str) -> bool:
+  @staticmethod
  def transformer_matches_site(site: str) -> bool:
    return site in ('fa', 'furaffinity')
  def user_tag_root(self, data):
    user_data = data[0]
    if user_data['fa']:
      return f':icon{user_data["fa"]}:'
-    return super(FuraffinityTransformer, self).user_tag_root(data)
+    return super().user_tag_root(data)
 class WeasylTransformer(MarkdownTransformer):
  def __init__(self, self_user, *args, **kwargs):
-    super(WeasylTransformer, self).__init__(*args, **kwargs)
+    super().__init__(*args, **kwargs)
    def self_tag(data):
      return self.user_tag_root((UserTag(weasyl=self_user),))
    self.self_tag = self_tag
-  def transformer_matches_site(self, site: str) -> bool:
+  @staticmethod
  def transformer_matches_site(site: str) -> bool:
    return site == 'weasyl'
  def user_tag_root(self, data):
@ -301,16 +305,17 @@ class WeasylTransformer(MarkdownTransformer):
          return f'<ib:{user_data["ib"]}>'
        if site == 'sf':
          return f'<sf:{user_data["sf"]}>'
-    return super(WeasylTransformer, self).user_tag_root(data)
+    return super().user_tag_root(data)
 class InkbunnyTransformer(BbcodeTransformer):
  def __init__(self, self_user, *args, **kwargs):
-    super(InkbunnyTransformer, self).__init__(*args, **kwargs)
+    super().__init__(*args, **kwargs)
    def self_tag(data):
      return self.user_tag_root((UserTag(ib=self_user),))
    self.self_tag = self_tag
-  def transformer_matches_site(self, site: str) -> bool:
+  @staticmethod
  def transformer_matches_site(site: str) -> bool:
    return site in ('ib', 'inkbunny')
  def user_tag_root(self, data):
@ -325,16 +330,17 @@ class InkbunnyTransformer(BbcodeTransformer):
          return f'[sf]{user_data["sf"]}[/sf]'
        if site == 'weasyl':
          return f'[weasyl]{user_data["weasyl"].replace(" ", "").lower()}[/weasyl]'
-    return super(InkbunnyTransformer, self).user_tag_root(data)
+    return super().user_tag_root(data)
 class SoFurryTransformer(BbcodeTransformer):
  def __init__(self, self_user, *args, **kwargs):
-    super(SoFurryTransformer, self).__init__(*args, **kwargs)
+    super().__init__(*args, **kwargs)
    def self_tag(data):
      return self.user_tag_root((UserTag(sf=self_user),))
    self.self_tag = self_tag
-  def transformer_matches_site(self, site: str) -> bool:
+  @staticmethod
  def transformer_matches_site(site: str) -> bool:
    return site in ('sf', 'sofurry')
  def user_tag_root(self, data):
@ -347,10 +353,18 @@ class SoFurryTransformer(BbcodeTransformer):
          return f'fa!{user_data["fa"]}'
        if site == 'ib':
          return f'ib!{user_data["ib"]}'
-    return super(SoFurryTransformer, self).user_tag_root(data)
+    return super().user_tag_root(data)
 def parse_description(description_path, config_path, out_dir, ignore_empty_files=False):
  for proc in psutil.process_iter(['cmdline']):
    if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
      if ignore_empty_files:
        print('WARN: LibreOffice Writer appears to be running. This command may output empty files until it is closed.')
        break
      print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
      break
  ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
  description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
  if not description or re.match(r'^\s+$', description):
@ -382,17 +396,17 @@ def parse_description(description_path, config_path, out_dir, ignore_empty_files
        errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
      elif username.strip() == '':
        errors.append(ValueError(f'Website \'{website}\' has empty username'))
-    if not any(ws in config for ws in ('aryion', 'furaffinity', 'weasyl', 'inkbunny', 'sofurry')):
+    if not any(ws in config for ws in transformations):
      errors.append(ValueError('No valid websites found'))
  if errors:
    raise ExceptionGroup('Invalid configuration for description parsing', errors)
  # Create descriptions
-  re_multiple_empty_lines = re.compile(r'\n\n+')
+  RE_MULTIPLE_EMPTY_LINES = re.compile(r'\n\n+')
  for (website, username) in config.items():
    (filepath, transformer) = transformations[website]
    with open(os.path.join(out_dir, filepath), 'w') as f:
      if description.strip():
        transformed_description = transformer(username).transform(parsed_description)
-        f.write(re_multiple_empty_lines.sub('\n\n', transformed_description))
+        f.write(RE_MULTIPLE_EMPTY_LINES.sub('\n\n', transformed_description).strip() + '\n')
      else:
        f.write('')
--- a/requirements.txt
+++ b/requirements.txt
@ -1 +1,2 @@
-lark==1.1.5
+lark==1.1.8
 psutil==5.9.6
--- a/story.py
+++ b/story.py
@ -1,6 +1,7 @@
 import io
 import json
 import os
 import psutil
 import re
 import subprocess
@ -21,38 +22,56 @@ def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=F
    config = json.load(f)
  if type(config) is not dict:
    raise ValueError('Invalid configuration for story parsing: Configuration must be a JSON object')
-  should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry'))
+  should_create_txt_story = any(ws in config for ws in ('furaffinity', 'inkbunny', 'sofurry'))
  should_create_md_story = any(ws in config for ws in ('weasyl',))
  should_create_rtf_story = any(ws in config for ws in ('aryion',))
-  if not should_create_txt_story and not should_create_rtf_story:
+  if not any((should_create_txt_story, should_create_md_story, should_create_rtf_story)):
    raise ValueError('Invalid configuration for story parsing: No valid websites found')
  for proc in psutil.process_iter(['cmdline']):
    if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
      if ignore_empty_files:
        print('WARN: LibreOffice Writer appears to be running. This command may output empty files until it is closed.')
        break
      print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
      break
  story_filename = os.path.split(story_path)[1].rsplit('.')[0]
  txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
  md_out_path = os.path.join(out_dir, f'{story_filename}.md') if should_create_md_story else os.devnull
  txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull
-  RE_EMPTY_LINE = re.compile('^$')
+  RE_EMPTY_LINE = re.compile(r'^$')
  RE_SEQUENTIAL_EQUAL_SIGNS = re.compile(r'=(?==)')
  is_only_empty_lines = True
  ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
-  # Mangle output files so that .RTF will always have a single LF between lines, and .TXT can have one or two CRLF
+  # Mangle output files so that .RTF will always have a single LF between lines, and .TXT/.MD can have one or two CRLF
-  with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
+  with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(md_out_path, 'w', newline='\r\n') as md_out, open(txt_tmp_path, 'w') as txt_tmp:
    needs_empty_line = False
    for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
      # Remove empty lines
      line = line.strip()
      md_line = line
      if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
        needs_empty_line = True
      else:
        if should_create_md_story:
          md_line = RE_SEQUENTIAL_EQUAL_SIGNS.sub('= ', line.replace(r'*', r'\*'))
        if is_only_empty_lines:
          txt_out.writelines((line,))
          md_out.writelines((md_line,))
          txt_tmp.writelines((line,))
          is_only_empty_lines = False
        else:
          if needs_empty_line:
            txt_out.writelines(('\n\n', line))
            md_out.writelines(('\n\n', md_line))
            needs_empty_line = False
          else:
            txt_out.writelines(('\n', line))
            md_out.writelines(('\n', md_line))
          txt_tmp.writelines(('\n', line))
    txt_out.writelines(('\n'))
    md_out.writelines(('\n'))
  if is_only_empty_lines:
    error = f'Story processing returned empty file: libreoffice --cat {story_path}'
    if ignore_empty_files: