Updates to description parsing and add extra flags

2023-08-17 14:08:52 -03:00 · 2023-08-17 14:08:52 -03:00 · 0afcc2fbdc
commit 0afcc2fbdc
parent 46a2400231
6 changed files with 123 additions and 53 deletions
--- a/README.md
+++ b/README.md
@ -9,7 +9,7 @@ Script to generate multi-gallery upload-ready files.

 ## Usage

-Run with `python main.py -h` for options. Generated files are output to `./out/`.
+Run with `python main.py -h` for options. Generated files are output to `./out` by default.

 ### Story files

@ -17,7 +17,7 @@ When generating an .RTF file from the source text, the script expects that Libre

 ### Description files

-In order to parse descriptions, you need a configuration file (default path is `./config.json`) with the websites you wish to upload to and your username there. For example:
+In order to parse descriptions, you need a configuration file (default path is `./config.json`) with the websites you wish to upload to, and your username there. For example:

 ```json
 {
@ -49,10 +49,11 @@ There are also special tags to link to yourself or other users automatically. Th
 [weasyl]WeasylUser[/weasyl]
 [ib]InkbunnyUser[/ib]
 [sf]SoFurryUser[/sf]
-[twitter]TwitterUser[/twitter]
+[twitter]@TwitterUser[/twitter] - Leading '@' is optional
+[mastodon]@MastodonUser@mastodoninstance.com[/mastodon] - Leading '@' is optional
 ```

-`[self]` tags must always be empty. The other tags are nestable and flexible, allowing attributes to display information differently on each supported website. Some examples:
+`[self][/self]` tags must always be empty. The other tags are nestable and flexible, allowing attributes to display information differently on each supported website. Some examples:

 ```bbcode
 [eka=Lorem][/eka] is equivalent to [eka]Lorem[/eka].
@ -64,5 +65,5 @@ There are also special tags to link to yourself or other users automatically. Th

 [ib=Amet][weasyl=Sit]Consectetur[/weasyl][/ib] is the same as above, but Consectetur is displayed as the username for websites other than Inkbunny and Weasyl. The Weasyl gallery is linked to in those websites.

-[generic=https://github.com/BadMannersXYZ]Bad Manners[/generic] can be used as the innermost tag with a mandatory URL attribute and default username, and is similar to the URL tag, but it can be nested within other profile links.
+[generic=https://github.com/BadMannersXYZ]Bad Manners[/generic] can be used as the innermost tag with a mandatory URL attribute and default username, and is similar to the URL tag, but it can be nested within other profile links that get used for intra-linking only.
 ```
--- a/config.example.json
+++ b/config.example.json
@ -0,0 +1,7 @@
+{
+  "aryion": "MyUsername",
+  "furaffinity": "My_Username",
+  "inkbunny": "MyUsername",
+  "sofurry": "My Username",
+  "weasyl": "MyUsername"
+}
--- a/description.py
+++ b/description.py
@ -8,7 +8,7 @@ import subprocess
 import typing


-SUPPORTED_USER_TAGS = ['eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter']
+SUPPORTED_USER_TAGS = ['eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter', 'mastodon']

 DESCRIPTION_GRAMMAR = r"""
  ?start: document_list
@ -17,6 +17,7 @@ DESCRIPTION_GRAMMAR = r"""

  document: b_tag
          | i_tag
+          | u_tag
          | url_tag
          | self_tag
          | user_tag_root
@ -24,14 +25,14 @@ DESCRIPTION_GRAMMAR = r"""

  b_tag: "[b]" [document_list] "[/b]"
  i_tag: "[i]" [document_list] "[/i]"
+  u_tag: "[u]" [document_list] "[/u]"
  url_tag: "[url" ["=" [URL]] "]" [document_list] "[/url]"

-  self_tag: "[self]" [WS] "[/self]"
+  self_tag: "[self][/self]"
  user_tag_root: user_tag
  user_tag: generic_tag | """

 DESCRIPTION_GRAMMAR += ' | '.join(f'{tag}_tag' for tag in SUPPORTED_USER_TAGS)
-
 DESCRIPTION_GRAMMAR += ''.join(f'\n  {tag}_tag: "[{tag}" ["=" USERNAME] "]" USERNAME "[/{tag}]" | "[{tag}" "=" USERNAME  "]" [user_tag] "[/{tag}]"' for tag in SUPPORTED_USER_TAGS)

 DESCRIPTION_GRAMMAR += r"""
@ -40,12 +41,8 @@ DESCRIPTION_GRAMMAR += r"""
  USERNAME: /[a-zA-Z0-9][a-zA-Z0-9 _-]*/
  URL: /(https?:\/\/)?[^\]]+/
  TEXT: /([^\[]|[ \t\r\n])+/
-
-  %import common.WS
 """

-print(DESCRIPTION_GRAMMAR)
-
 DESCRIPTION_PARSER = lark.Lark(DESCRIPTION_GRAMMAR, parser='lalr')


@ -77,6 +74,7 @@ class UploadTransformer(lark.Transformer):
  def __init__(self, *args, **kwargs):
    super(UploadTransformer, self).__init__(*args, **kwargs)
    def _user_tag_factory(tag):
+      # Create a new UserTag if innermost node, or append to list in order
      def user_tag(data):
        attribute, inner = data[0], data[1]
        if attribute and attribute.strip():
@ -105,6 +103,9 @@ class UploadTransformer(lark.Transformer):
  def i_tag(self, _):
    raise NotImplementedError('UploadTransformer.i_tag is abstract')

+  def u_tag(self, _):
+    raise NotImplementedError('UploadTransformer.u_tag is abstract')
+
  def url_tag(self, _):
    raise NotImplementedError('UploadTransformer.url_tag is abstract')

@ -127,7 +128,10 @@ class UploadTransformer(lark.Transformer):
      elif site == 'sf':
        return self.url_tag((f'https://{user_data["sf"].replace(" ", "-").lower()}.sofurry.com', user_data.default or user_data['sf']))
      elif site == 'twitter':
-        return self.url_tag((f'https://twitter.com/{user_data["twitter"]}', user_data.default or user_data['twitter']))
+        return self.url_tag((f'https://twitter.com/{user_data["twitter"].rsplit("@", 1)[-1]}', user_data.default or user_data['twitter']))
+      elif site == 'mastodon':
+        *_, mastodon_user, mastodon_instance = user_data["mastodon"].rsplit('@', 2)
+        return self.url_tag((f'https://{mastodon_instance}/@{mastodon_user}', user_data.default or user_data['mastodon']))
      else:
        print(f'Unknown site "{site}" found in user tag; ignoring...')
    raise TypeError('Invalid UserTag data')
@ -152,6 +156,11 @@ class BbcodeTransformer(UploadTransformer):
      return ''
    return f'[i]{data[0]}[/i]'

+  def u_tag(self, data):
+    if data[0] is None or not data[0].strip():
+      return ''
+    return f'[u]{data[0]}[/u]'
+
  def url_tag(self, data):
    return f'[url={data[0] or ""}]{data[1] or ""}[/url]'

@ -166,25 +175,59 @@ class MarkdownTransformer(UploadTransformer):
      return ''
    return f'*{data[0]}*'

+  def u_tag(self, data):
+    if data[0] is None or not data[0].strip():
+      return ''
+    return f'<u>{data[0]}</u>'  # Markdown should support simple HTML tags
+
  def url_tag(self, data):
    return f'[{data[1] or ""}]({data[0] or ""})'

 class PlaintextTransformer(UploadTransformer):
  def b_tag(self, data):
-    return f'{data[0] or ""}'
+    return str(data[0]) if data[0] else ''

  def i_tag(self, data):
-    return f'{data[0] or ""}'
+    return str(data[0]) if data[0] else ''
+
+  def u_tag(self, data):
+    return str(data[0]) if data[0] else ''

  def url_tag(self, data):
    if data[1] is None or not data[1].strip():
-      return f'{data[0] or ""}'
+      return str(data[0]) if data[0] else ''
    return f'{data[1].strip()}: {data[0] or ""}'

+  def user_tag_root(self, data):
+    user_data = data[0]
+    for site in user_data.sites:
+      if site == 'generic':
+        break
+      elif site == 'eka':
+        return f'{user_data["eka"]} on Eka\'s Portal'
+      elif site == 'fa':
+        return f'{user_data["fa"]} on Fur Affinity'
+      elif site == 'weasyl':
+        return f'{user_data["weasyl"]} on Weasyl'
+      elif site == 'ib':
+        return f'{user_data["ib"]} on Inkbunny'
+      elif site == 'sf':
+        return f'{user_data["sf"]} on SoFurry'
+      elif site == 'twitter':
+        return f'@{user_data["twitter"].rsplit("@", 1)[-1]} on Twitter'
+      elif site == 'mastodon':
+        *_, mastodon_user, mastodon_instance = user_data["mastodon"].rsplit('@', 2)
+        return f'@{mastodon_user} on {mastodon_instance}'
+      else:
+        print(f'Unknown site "{site}" found in user tag; ignoring...')
+    return super(PlaintextTransformer, self).user_tag_root(data)
+
 class AryionTransformer(BbcodeTransformer):
-  def __init__(self, this_user, *args, **kwargs):
+  def __init__(self, self_user, *args, **kwargs):
    super(AryionTransformer, self).__init__(*args, **kwargs)
-    self.self_tag = lambda _: self.user_tag_root((UserTag(eka=this_user),))
+    def self_tag(data):
+      return self.user_tag_root((UserTag(eka=self_user),))
+    self.self_tag = self_tag

  def user_tag_root(self, data):
    user_data = data[0]
@ -193,9 +236,11 @@ class AryionTransformer(BbcodeTransformer):
    return super(AryionTransformer, self).user_tag_root(data)

 class FuraffinityTransformer(BbcodeTransformer):
-  def __init__(self, this_user, *args, **kwargs):
+  def __init__(self, self_user, *args, **kwargs):
    super(FuraffinityTransformer, self).__init__(*args, **kwargs)
-    self.self_tag = lambda _: self.user_tag_root((UserTag(fa=this_user),))
+    def self_tag(data):
+      return self.user_tag_root((UserTag(fa=self_user),))
+    self.self_tag = self_tag

  def user_tag_root(self, data):
    user_data = data[0]
@ -204,9 +249,11 @@ class FuraffinityTransformer(BbcodeTransformer):
    return super(FuraffinityTransformer, self).user_tag_root(data)

 class WeasylTransformer(MarkdownTransformer):
-  def __init__(self, this_user, *args, **kwargs):
+  def __init__(self, self_user, *args, **kwargs):
    super(WeasylTransformer, self).__init__(*args, **kwargs)
-    self.self_tag = lambda _: self.user_tag_root((UserTag(weasyl=this_user),))
+    def self_tag(data):
+      return self.user_tag_root((UserTag(weasyl=self_user),))
+    self.self_tag = self_tag

  def user_tag_root(self, data):
    user_data = data[0]
@ -223,9 +270,11 @@ class WeasylTransformer(MarkdownTransformer):
    return super(WeasylTransformer, self).user_tag_root(data)

 class InkbunnyTransformer(BbcodeTransformer):
-  def __init__(self, this_user, *args, **kwargs):
+  def __init__(self, self_user, *args, **kwargs):
    super(InkbunnyTransformer, self).__init__(*args, **kwargs)
-    self.self_tag = lambda _: self.user_tag_root((UserTag(ib=this_user),))
+    def self_tag(data):
+      return self.user_tag_root((UserTag(ib=self_user),))
+    self.self_tag = self_tag

  def user_tag_root(self, data):
    user_data = data[0]
@ -242,9 +291,11 @@ class InkbunnyTransformer(BbcodeTransformer):
    return super(InkbunnyTransformer, self).user_tag_root(data)

 class SoFurryTransformer(BbcodeTransformer):
-  def __init__(self, this_user, *args, **kwargs):
+  def __init__(self, self_user, *args, **kwargs):
    super(SoFurryTransformer, self).__init__(*args, **kwargs)
-    self.self_tag = lambda _: self.user_tag_root((UserTag(sf=this_user),))
+    def self_tag(data):
+      return self.user_tag_root((UserTag(sf=self_user),))
+    self.self_tag = self_tag

  def user_tag_root(self, data):
    user_data = data[0]
@ -291,6 +342,8 @@ def parse_description(description_path, config_path, out_dir, ignore_empty_files
        errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
      elif username.strip() == '':
        errors.append(ValueError(f'Website \'{website}\' has empty username'))
+    if not any(ws in config for ws in ('aryion', 'furaffinity', 'weasyl', 'inkbunny', 'sofurry')):
+      errors.append(ValueError('No valid websites found'))
  if errors:
    raise ExceptionGroup('Invalid configuration for description parsing', errors)
  # Create descriptions
--- a/example_config.json
+++ b/example_config.json
@ -1,7 +0,0 @@
-{
-    "aryion": "MyUsername",
-    "furaffinity": "My_Username",
-    "inkbunny": "MyUsername",
-    "sofurry": "My Username",
-    "weasyl": "MyUsername"
-}
--- a/main.py
+++ b/main.py
@ -1,67 +1,82 @@
 import argparse
 import os
 from subprocess import CalledProcessError
+import shutil
 import tempfile

 from description import parse_description
 from story import parse_story

-OUT_DIR = './out'

-
-def main(story_path=None, description_path=None, config_path='./config.json', keep_out_dir=False, ignore_empty_files=False):
-  remove_out_dir = not keep_out_dir and os.path.isdir(OUT_DIR)
+def main(out_dir_path=None, story_path=None, description_path=None, file_path=None, config_path=None, keep_out_dir=False, ignore_empty_files=False):
+  if not out_dir_path:
+    raise ValueError('Missing out_dir_path')
+  if not config_path:
+    raise ValueError('Missing config_path')
+  remove_out_dir = not keep_out_dir and os.path.isdir(out_dir_path)
  with tempfile.TemporaryDirectory() as tdir:
-    # Clear OUT_DIR if it exists and shouldn't be kept
+    # Clear output dir if it exists and shouldn't be kept
    if remove_out_dir:
-      os.rename(OUT_DIR, os.path.join(tdir, 'old_out'))
-    if not os.path.isdir(OUT_DIR):
-      os.mkdir(OUT_DIR)
+      os.rename(out_dir_path, os.path.join(tdir, 'old_out'))
+    if not os.path.isdir(out_dir_path):
+      os.mkdir(out_dir_path)

    try:
      # Convert original file to .rtf (Aryion) and .txt (all others)
      if story_path:
-        parse_story(story_path, config_path, OUT_DIR, tdir, ignore_empty_files)
+        parse_story(story_path, config_path, out_dir_path, tdir, ignore_empty_files)

      # Parse FA description and convert for each website
      if description_path:
-        parse_description(description_path, config_path, OUT_DIR, ignore_empty_files)
+        parse_description(description_path, config_path, out_dir_path, ignore_empty_files)
+
+      # Copy generic file over to output
+      if file_path:
+        shutil.copy(file_path, out_dir_path)

    except CalledProcessError as e:
      if remove_out_dir:
        # Revert directory removal on error
-        os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this'))
-        os.rename(os.path.join(tdir, 'old_out'), OUT_DIR)
+        os.rename(out_dir_path, os.path.join(tdir, 'get_rid_of_this'))
+        os.rename(os.path.join(tdir, 'old_out'), out_dir_path)
      print(f'Command exited with code {e.returncode}: {e.stderr.decode("utf-8-sig")}')
      exit(1)
    except Exception as e:
      if remove_out_dir:
        # Revert directory removal on error
-        os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this'))
-        os.rename(os.path.join(tdir, 'old_out'), OUT_DIR)
+        os.rename(out_dir_path, os.path.join(tdir, 'get_rid_of_this'))
+        os.rename(os.path.join(tdir, 'old_out'), out_dir_path)
      raise e


 if __name__ == '__main__':
  parser = argparse.ArgumentParser(description='generate multi-gallery upload-ready files')
+  parser.add_argument('-o', '--output-dir', dest='out_dir_path', default='./out',
+                      help='path of output directory')
+  parser.add_argument('-c', '--config', dest='config_path', default='./config.json',
+                      help='path of JSON configuration file')
  parser.add_argument('-s', '--story', dest='story_path',
                      help='path of LibreOffice-readable story file')
  parser.add_argument('-d', '--description', dest='description_path',
                      help='path of BBCode-formatted description file')
-  parser.add_argument('-c', '--config', dest='config_path', default='./config.json',
-                      help='path of JSON configuration file')
+  parser.add_argument('-f', '--file', dest='file_path',
+                      help='path of generic file to include in output (i.e. an image or thumbnail)')
  parser.add_argument('-k', '--keep-out-dir', dest='keep_out_dir', action='store_true',
-                      help='whether output directory contents should be kept')
-  parser.add_argument('-i', '--ignore-empty-files', dest='ignore_empty_files', action='store_true',
+                      help='whether output directory contents should be kept.\nif set, a script error may leave partial files behind')
+  parser.add_argument('-I', '--ignore-empty-files', dest='ignore_empty_files', action='store_true',
                      help='do not raise an error if any input file is empty or whitespace-only')
  args = parser.parse_args()

  if not any([args.story_path, args.description_path]):
    parser.error('at least one of ( --story | --description ) must be set')
+  if args.out_dir_path and os.path.exists(args.out_dir_path) and not os.path.isdir(args.out_dir_path):
+    parser.error('--output-dir must be an existing directory or inexistent')
  if args.story_path and not os.path.isfile(args.story_path):
    parser.error('--story must be a valid file')
  if args.description_path and not os.path.isfile(args.description_path):
    parser.error('--description must be a valid file')
+  if args.file_path and not os.path.isfile(args.file_path):
+    parser.error('--file must be a valid file')
  if args.config_path and not os.path.isfile(args.config_path):
    parser.error('--config must be a valid file')

--- a/story.py
+++ b/story.py
@ -20,11 +20,11 @@ def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=F
  with open(config_path, 'r') as f:
    config = json.load(f)
  if type(config) is not dict:
-    raise ValueError('Configuration must be a JSON object')
+    raise ValueError('Invalid configuration for story parsing: Configuration must be a JSON object')
  should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry'))
  should_create_rtf_story = any(ws in config for ws in ('aryion',))
  if not should_create_txt_story and not should_create_rtf_story:
-    raise ValueError('')
+    raise ValueError('Invalid configuration for story parsing: No valid websites found')

  story_filename = os.path.split(story_path)[1].rsplit('.')[0]
  txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
@ -32,6 +32,7 @@ def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=F
  RE_EMPTY_LINE = re.compile('^$')
  is_only_empty_lines = True
  ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
+  # Mangle output files so that .RTF will always have a single LF between lines, and .TXT can have one or two CRLF
  with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
    needs_empty_line = False
    for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):