Refactor out story and description logic

This commit is contained in:
Bad Manners 2023-06-30 17:37:55 -03:00
parent 0ae3e97186
commit f9940eacbe
5 changed files with 106 additions and 98 deletions

View file

@ -25,7 +25,6 @@ In order to parse descriptions, you need a configuration file (default path is `
"furaffinity": "My_Username", "furaffinity": "My_Username",
"inkbunny": "MyUsername", "inkbunny": "MyUsername",
"sofurry": "My Username", "sofurry": "My Username",
"twitter": "MyUsername",
"weasyl": "MyUsername" "weasyl": "MyUsername"
} }
``` ```
@ -40,7 +39,7 @@ Input descriptions should be formatted as BBCode. The following tags are accepte
[url=https://github.com]URL link[/url] [url=https://github.com]URL link[/url]
``` ```
There are also special tags to link to yourself or other users automatically: There are also special tags to link to yourself or other users automatically. This may include websites not available in the configuration:
```bbcode ```bbcode
[self][/self] [self][/self]

View file

@ -1,10 +1,14 @@
from collections import OrderedDict from collections import OrderedDict
import io
import json import json
import lark import lark
import os import os
import re
import subprocess
import typing import typing
SUPPORTED_USER_TAGS = ('eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter')
SUPPORTED_USER_TAGS = ['eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter']
DESCRIPTION_GRAMMAR = r""" DESCRIPTION_GRAMMAR = r"""
?start: document_list ?start: document_list
@ -34,7 +38,7 @@ DESCRIPTION_GRAMMAR += r"""
USERNAME: /[a-zA-Z0-9][a-zA-Z0-9 _-]*/ USERNAME: /[a-zA-Z0-9][a-zA-Z0-9 _-]*/
URL: /(https?:\/\/)?[^\]]+/ URL: /(https?:\/\/)?[^\]]+/
TEXT: /([^\[:]|[ \t\r\n]|:(?!icon))+/ TEXT: /([^\[]|[ \t\r\n])+/
%import common.WS %import common.WS
""" """
@ -43,8 +47,8 @@ DESCRIPTION_PARSER = lark.Lark(DESCRIPTION_GRAMMAR, parser='lalr')
class UserTag: class UserTag:
def __init__(self, default=None, **kwargs): def __init__(self, default: typing.Optional[str]=None, **kwargs):
self.default: typing.Optional[str] = default self.default = default
self._sites: typing.OrderedDict[str, typing.Optional[str]] = OrderedDict() self._sites: typing.OrderedDict[str, typing.Optional[str]] = OrderedDict()
for (k, v) in kwargs.items(): for (k, v) in kwargs.items():
if k in SUPPORTED_USER_TAGS: if k in SUPPORTED_USER_TAGS:
@ -241,29 +245,25 @@ class SoFurryTransformer(BbcodeTransformer):
return f'ib!{user_data["ib"]}' return f'ib!{user_data["ib"]}'
return super(SoFurryTransformer, self).user_tag_root(data) return super(SoFurryTransformer, self).user_tag_root(data)
class TwitterTransformer(PlaintextTransformer):
def __init__(self, this_user, *args, **kwargs):
super(TwitterTransformer, self).__init__(*args, **kwargs)
self.self_tag = lambda _: self.user_tag_root((UserTag(twitter=this_user),))
def user_tag_root(self, data): def parse_description(description_path, config_path, out_dir, ignore_empty_files=False):
user_data = data[0] ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
if user_data['twitter']: description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
return f'@{user_data["twitter"]}' if not description or re.match(r'^\s+$', description):
return super(TwitterTransformer, self).user_tag_root(data) error = f'Description processing returned empty file: libreoffice --cat {description_path}'
if ignore_empty_files:
print(f'Ignoring error ({error})')
else:
raise RuntimeError(error)
TRANSFORMATIONS = {
'aryion': ('desc_aryion.txt', AryionTransformer),
'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer),
'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer),
'sofurry': ('desc_sofurry.txt', SoFurryTransformer),
'twitter': ('desc_twitter.txt', TwitterTransformer),
'weasyl': ('desc_weasyl.md', WeasylTransformer),
}
def parse_description(description, config_path, out_dir):
parsed_description = DESCRIPTION_PARSER.parse(description) parsed_description = DESCRIPTION_PARSER.parse(description)
transformations = {
'aryion': ('desc_aryion.txt', AryionTransformer),
'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer),
'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer),
'sofurry': ('desc_sofurry.txt', SoFurryTransformer),
'weasyl': ('desc_weasyl.md', WeasylTransformer),
}
with open(config_path, 'r') as f: with open(config_path, 'r') as f:
config = json.load(f) config = json.load(f)
# Validate JSON # Validate JSON
@ -272,7 +272,7 @@ def parse_description(description, config_path, out_dir):
errors.append(ValueError('Configuration must be a JSON object')) errors.append(ValueError('Configuration must be a JSON object'))
else: else:
for (website, username) in config.items(): for (website, username) in config.items():
if website not in TRANSFORMATIONS: if website not in transformations:
errors.append(ValueError(f'Website \'{website}\' is unsupported')) errors.append(ValueError(f'Website \'{website}\' is unsupported'))
elif type(username) is not str: elif type(username) is not str:
errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\'')) errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
@ -282,7 +282,7 @@ def parse_description(description, config_path, out_dir):
raise ExceptionGroup('Invalid configuration for description parsing', errors) raise ExceptionGroup('Invalid configuration for description parsing', errors)
# Create descriptions # Create descriptions
for (website, username) in config.items(): for (website, username) in config.items():
(filepath, transformer) = TRANSFORMATIONS[website] (filepath, transformer) = transformations[website]
with open(os.path.join(out_dir, filepath), 'w') as f: with open(os.path.join(out_dir, filepath), 'w') as f:
if description: if description:
f.write(transformer(username).transform(parsed_description)) f.write(transformer(username).transform(parsed_description))

View file

@ -3,6 +3,5 @@
"furaffinity": "My_Username", "furaffinity": "My_Username",
"inkbunny": "MyUsername", "inkbunny": "MyUsername",
"sofurry": "My Username", "sofurry": "My Username",
"twitter": "MyUsername",
"weasyl": "MyUsername" "weasyl": "MyUsername"
} }

75
main.py
View file

@ -1,26 +1,14 @@
import argparse import argparse
import io
import os import os
import re from subprocess import CalledProcessError
import subprocess
import tempfile import tempfile
from parse import parse_description from description import parse_description
from story import parse_story
OUT_DIR = './out' OUT_DIR = './out'
def get_rtf_styles(rtf_source: str):
match_list = re.findall(r'\\s(\d+)(?:\\sbasedon\d+)?\\snext\d+((?:\\[a-z0-9]+ ?)+)(?: ([A-Z][a-zA-Z ]*));', rtf_source)
if not match_list:
raise ValueError(f'Couldn\'t find valid RTF styles')
rtf_styles = {}
for (style_number, partial_rtf_style, style_name) in match_list:
rtf_style = r'\s' + style_number + partial_rtf_style
rtf_styles[int(style_number)] = rtf_style
rtf_styles[style_name] = rtf_style
return rtf_styles
def main(story_path=None, description_path=None, config_path='./config.json', keep_out_dir=False, ignore_empty_files=False): def main(story_path=None, description_path=None, config_path='./config.json', keep_out_dir=False, ignore_empty_files=False):
remove_out_dir = not keep_out_dir and os.path.isdir(OUT_DIR) remove_out_dir = not keep_out_dir and os.path.isdir(OUT_DIR)
with tempfile.TemporaryDirectory() as tdir: with tempfile.TemporaryDirectory() as tdir:
@ -33,64 +21,13 @@ def main(story_path=None, description_path=None, config_path='./config.json', ke
try: try:
# Convert original file to .rtf (Aryion) and .txt (all others) # Convert original file to .rtf (Aryion) and .txt (all others)
if story_path: if story_path:
story_filename = os.path.split(story_path)[1].rsplit('.')[0] parse_story(story_path, config_path, OUT_DIR, tdir, ignore_empty_files)
txt_out_path = os.path.join(OUT_DIR, f'{story_filename}.txt')
txt_tmp_path = os.path.join(tdir, f'{story_filename}.txt')
rtf_out_path = os.path.join(OUT_DIR, f'{story_filename}.rtf')
RE_EMPTY_LINE = re.compile('^$')
is_only_empty_lines = True
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
needs_empty_line = False
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
# Remove empty lines
line = line.strip()
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
needs_empty_line = True
else:
if is_only_empty_lines:
txt_out.writelines((line,))
txt_tmp.writelines((line,))
is_only_empty_lines = False
else:
if needs_empty_line:
txt_out.writelines(('\n\n', line))
needs_empty_line = False
else:
txt_out.writelines(('\n', line))
txt_tmp.writelines(('\n', line))
txt_out.writelines(('\n'))
if is_only_empty_lines:
error = f'Story processing returned empty file: libreoffice --cat {story_path}'
if ignore_empty_files:
print(f'Ignoring error ({error})')
else:
raise RuntimeError(error)
# Convert temporary .txt to .rtf
subprocess.run(['libreoffice', '--convert-to', 'rtf:Rich Text Format', '--outdir', OUT_DIR, txt_tmp_path], check=True, capture_output=True)
# Convert monospace font ('Preformatted Text') to serif ('Normal')
with open(rtf_out_path, 'r+') as f:
rtf = f.read()
rtf_styles = get_rtf_styles(rtf)
monospace_style = rtf_styles['Preformatted Text'] # rtf_styles[20]
serif_style = rtf_styles['Normal'] # rtf_styles[0]
f.seek(0)
f.write(rtf.replace(monospace_style, serif_style))
f.truncate()
# Parse FA description and convert for each website # Parse FA description and convert for each website
if description_path: if description_path:
ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE) parse_description(description_path, config_path, OUT_DIR, ignore_empty_files)
desc = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
if not desc or re.match(r'^\s+$', desc):
error = f'Description processing returned empty file: libreoffice --cat {description_path}'
if ignore_empty_files:
print(f'Ignoring error ({error})')
else:
raise RuntimeError(error)
parse_description(desc, config_path, OUT_DIR)
except subprocess.CalledProcessError as e: except CalledProcessError as e:
if remove_out_dir: if remove_out_dir:
# Revert directory removal on error # Revert directory removal on error
os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this')) os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this'))

73
story.py Normal file
View file

@ -0,0 +1,73 @@
import io
import json
import os
import re
import subprocess
def get_rtf_styles(rtf_source: str):
match_list = re.findall(r'\\s(\d+)(?:\\sbasedon\d+)?\\snext\d+((?:\\[a-z0-9]+ ?)+)(?: ([A-Z][a-zA-Z ]*));', rtf_source)
if not match_list:
raise ValueError(f'Couldn\'t find valid RTF styles')
rtf_styles = {}
for (style_number, partial_rtf_style, style_name) in match_list:
rtf_style = r'\s' + style_number + partial_rtf_style
rtf_styles[int(style_number)] = rtf_style
rtf_styles[style_name] = rtf_style
return rtf_styles
def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=False):
with open(config_path, 'r') as f:
config = json.load(f)
if type(config) is not dict:
raise ValueError('Configuration must be a JSON object')
should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry'))
should_create_rtf_story = any(ws in config for ws in ('aryion',))
if not should_create_txt_story and not should_create_rtf_story:
raise ValueError('')
story_filename = os.path.split(story_path)[1].rsplit('.')[0]
txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull
RE_EMPTY_LINE = re.compile('^$')
is_only_empty_lines = True
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
needs_empty_line = False
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
# Remove empty lines
line = line.strip()
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
needs_empty_line = True
else:
if is_only_empty_lines:
txt_out.writelines((line,))
txt_tmp.writelines((line,))
is_only_empty_lines = False
else:
if needs_empty_line:
txt_out.writelines(('\n\n', line))
needs_empty_line = False
else:
txt_out.writelines(('\n', line))
txt_tmp.writelines(('\n', line))
txt_out.writelines(('\n'))
if is_only_empty_lines:
error = f'Story processing returned empty file: libreoffice --cat {story_path}'
if ignore_empty_files:
print(f'Ignoring error ({error})')
else:
raise RuntimeError(error)
if should_create_rtf_story:
rtf_out_path = os.path.join(out_dir, f'{story_filename}.rtf')
# Convert temporary .txt to .rtf
subprocess.run(['libreoffice', '--convert-to', 'rtf:Rich Text Format', '--outdir', out_dir, txt_tmp_path], check=True, capture_output=True)
# Convert monospace font ('Preformatted Text') to serif ('Normal')
with open(rtf_out_path, 'r+') as f:
rtf = f.read()
rtf_styles = get_rtf_styles(rtf)
monospace_style = rtf_styles['Preformatted Text'] # rtf_styles[20]
serif_style = rtf_styles['Normal'] # rtf_styles[0]
f.seek(0)
f.write(rtf.replace(monospace_style, serif_style))
f.truncate()