Refactor out story and description logic
This commit is contained in:
parent
0ae3e97186
commit
f9940eacbe
5 changed files with 106 additions and 98 deletions
|
|
@ -25,7 +25,6 @@ In order to parse descriptions, you need a configuration file (default path is `
|
||||||
"furaffinity": "My_Username",
|
"furaffinity": "My_Username",
|
||||||
"inkbunny": "MyUsername",
|
"inkbunny": "MyUsername",
|
||||||
"sofurry": "My Username",
|
"sofurry": "My Username",
|
||||||
"twitter": "MyUsername",
|
|
||||||
"weasyl": "MyUsername"
|
"weasyl": "MyUsername"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
@ -40,7 +39,7 @@ Input descriptions should be formatted as BBCode. The following tags are accepte
|
||||||
[url=https://github.com]URL link[/url]
|
[url=https://github.com]URL link[/url]
|
||||||
```
|
```
|
||||||
|
|
||||||
There are also special tags to link to yourself or other users automatically:
|
There are also special tags to link to yourself or other users automatically. This may include websites not available in the configuration:
|
||||||
|
|
||||||
```bbcode
|
```bbcode
|
||||||
[self][/self]
|
[self][/self]
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,14 @@
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
import lark
|
import lark
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
SUPPORTED_USER_TAGS = ('eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter')
|
|
||||||
|
SUPPORTED_USER_TAGS = ['eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter']
|
||||||
|
|
||||||
DESCRIPTION_GRAMMAR = r"""
|
DESCRIPTION_GRAMMAR = r"""
|
||||||
?start: document_list
|
?start: document_list
|
||||||
|
|
@ -34,7 +38,7 @@ DESCRIPTION_GRAMMAR += r"""
|
||||||
|
|
||||||
USERNAME: /[a-zA-Z0-9][a-zA-Z0-9 _-]*/
|
USERNAME: /[a-zA-Z0-9][a-zA-Z0-9 _-]*/
|
||||||
URL: /(https?:\/\/)?[^\]]+/
|
URL: /(https?:\/\/)?[^\]]+/
|
||||||
TEXT: /([^\[:]|[ \t\r\n]|:(?!icon))+/
|
TEXT: /([^\[]|[ \t\r\n])+/
|
||||||
|
|
||||||
%import common.WS
|
%import common.WS
|
||||||
"""
|
"""
|
||||||
|
|
@ -43,8 +47,8 @@ DESCRIPTION_PARSER = lark.Lark(DESCRIPTION_GRAMMAR, parser='lalr')
|
||||||
|
|
||||||
|
|
||||||
class UserTag:
|
class UserTag:
|
||||||
def __init__(self, default=None, **kwargs):
|
def __init__(self, default: typing.Optional[str]=None, **kwargs):
|
||||||
self.default: typing.Optional[str] = default
|
self.default = default
|
||||||
self._sites: typing.OrderedDict[str, typing.Optional[str]] = OrderedDict()
|
self._sites: typing.OrderedDict[str, typing.Optional[str]] = OrderedDict()
|
||||||
for (k, v) in kwargs.items():
|
for (k, v) in kwargs.items():
|
||||||
if k in SUPPORTED_USER_TAGS:
|
if k in SUPPORTED_USER_TAGS:
|
||||||
|
|
@ -241,29 +245,25 @@ class SoFurryTransformer(BbcodeTransformer):
|
||||||
return f'ib!{user_data["ib"]}'
|
return f'ib!{user_data["ib"]}'
|
||||||
return super(SoFurryTransformer, self).user_tag_root(data)
|
return super(SoFurryTransformer, self).user_tag_root(data)
|
||||||
|
|
||||||
class TwitterTransformer(PlaintextTransformer):
|
|
||||||
def __init__(self, this_user, *args, **kwargs):
|
|
||||||
super(TwitterTransformer, self).__init__(*args, **kwargs)
|
|
||||||
self.self_tag = lambda _: self.user_tag_root((UserTag(twitter=this_user),))
|
|
||||||
|
|
||||||
def user_tag_root(self, data):
|
def parse_description(description_path, config_path, out_dir, ignore_empty_files=False):
|
||||||
user_data = data[0]
|
ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
|
||||||
if user_data['twitter']:
|
description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
|
||||||
return f'@{user_data["twitter"]}'
|
if not description or re.match(r'^\s+$', description):
|
||||||
return super(TwitterTransformer, self).user_tag_root(data)
|
error = f'Description processing returned empty file: libreoffice --cat {description_path}'
|
||||||
|
if ignore_empty_files:
|
||||||
|
print(f'Ignoring error ({error})')
|
||||||
|
else:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
|
||||||
TRANSFORMATIONS = {
|
|
||||||
'aryion': ('desc_aryion.txt', AryionTransformer),
|
|
||||||
'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer),
|
|
||||||
'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer),
|
|
||||||
'sofurry': ('desc_sofurry.txt', SoFurryTransformer),
|
|
||||||
'twitter': ('desc_twitter.txt', TwitterTransformer),
|
|
||||||
'weasyl': ('desc_weasyl.md', WeasylTransformer),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_description(description, config_path, out_dir):
|
|
||||||
parsed_description = DESCRIPTION_PARSER.parse(description)
|
parsed_description = DESCRIPTION_PARSER.parse(description)
|
||||||
|
transformations = {
|
||||||
|
'aryion': ('desc_aryion.txt', AryionTransformer),
|
||||||
|
'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer),
|
||||||
|
'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer),
|
||||||
|
'sofurry': ('desc_sofurry.txt', SoFurryTransformer),
|
||||||
|
'weasyl': ('desc_weasyl.md', WeasylTransformer),
|
||||||
|
}
|
||||||
with open(config_path, 'r') as f:
|
with open(config_path, 'r') as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
# Validate JSON
|
# Validate JSON
|
||||||
|
|
@ -272,7 +272,7 @@ def parse_description(description, config_path, out_dir):
|
||||||
errors.append(ValueError('Configuration must be a JSON object'))
|
errors.append(ValueError('Configuration must be a JSON object'))
|
||||||
else:
|
else:
|
||||||
for (website, username) in config.items():
|
for (website, username) in config.items():
|
||||||
if website not in TRANSFORMATIONS:
|
if website not in transformations:
|
||||||
errors.append(ValueError(f'Website \'{website}\' is unsupported'))
|
errors.append(ValueError(f'Website \'{website}\' is unsupported'))
|
||||||
elif type(username) is not str:
|
elif type(username) is not str:
|
||||||
errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
|
errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
|
||||||
|
|
@ -282,7 +282,7 @@ def parse_description(description, config_path, out_dir):
|
||||||
raise ExceptionGroup('Invalid configuration for description parsing', errors)
|
raise ExceptionGroup('Invalid configuration for description parsing', errors)
|
||||||
# Create descriptions
|
# Create descriptions
|
||||||
for (website, username) in config.items():
|
for (website, username) in config.items():
|
||||||
(filepath, transformer) = TRANSFORMATIONS[website]
|
(filepath, transformer) = transformations[website]
|
||||||
with open(os.path.join(out_dir, filepath), 'w') as f:
|
with open(os.path.join(out_dir, filepath), 'w') as f:
|
||||||
if description:
|
if description:
|
||||||
f.write(transformer(username).transform(parsed_description))
|
f.write(transformer(username).transform(parsed_description))
|
||||||
|
|
@ -3,6 +3,5 @@
|
||||||
"furaffinity": "My_Username",
|
"furaffinity": "My_Username",
|
||||||
"inkbunny": "MyUsername",
|
"inkbunny": "MyUsername",
|
||||||
"sofurry": "My Username",
|
"sofurry": "My Username",
|
||||||
"twitter": "MyUsername",
|
|
||||||
"weasyl": "MyUsername"
|
"weasyl": "MyUsername"
|
||||||
}
|
}
|
||||||
75
main.py
75
main.py
|
|
@ -1,26 +1,14 @@
|
||||||
import argparse
|
import argparse
|
||||||
import io
|
|
||||||
import os
|
import os
|
||||||
import re
|
from subprocess import CalledProcessError
|
||||||
import subprocess
|
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from parse import parse_description
|
from description import parse_description
|
||||||
|
from story import parse_story
|
||||||
|
|
||||||
OUT_DIR = './out'
|
OUT_DIR = './out'
|
||||||
|
|
||||||
|
|
||||||
def get_rtf_styles(rtf_source: str):
|
|
||||||
match_list = re.findall(r'\\s(\d+)(?:\\sbasedon\d+)?\\snext\d+((?:\\[a-z0-9]+ ?)+)(?: ([A-Z][a-zA-Z ]*));', rtf_source)
|
|
||||||
if not match_list:
|
|
||||||
raise ValueError(f'Couldn\'t find valid RTF styles')
|
|
||||||
rtf_styles = {}
|
|
||||||
for (style_number, partial_rtf_style, style_name) in match_list:
|
|
||||||
rtf_style = r'\s' + style_number + partial_rtf_style
|
|
||||||
rtf_styles[int(style_number)] = rtf_style
|
|
||||||
rtf_styles[style_name] = rtf_style
|
|
||||||
return rtf_styles
|
|
||||||
|
|
||||||
def main(story_path=None, description_path=None, config_path='./config.json', keep_out_dir=False, ignore_empty_files=False):
|
def main(story_path=None, description_path=None, config_path='./config.json', keep_out_dir=False, ignore_empty_files=False):
|
||||||
remove_out_dir = not keep_out_dir and os.path.isdir(OUT_DIR)
|
remove_out_dir = not keep_out_dir and os.path.isdir(OUT_DIR)
|
||||||
with tempfile.TemporaryDirectory() as tdir:
|
with tempfile.TemporaryDirectory() as tdir:
|
||||||
|
|
@ -33,64 +21,13 @@ def main(story_path=None, description_path=None, config_path='./config.json', ke
|
||||||
try:
|
try:
|
||||||
# Convert original file to .rtf (Aryion) and .txt (all others)
|
# Convert original file to .rtf (Aryion) and .txt (all others)
|
||||||
if story_path:
|
if story_path:
|
||||||
story_filename = os.path.split(story_path)[1].rsplit('.')[0]
|
parse_story(story_path, config_path, OUT_DIR, tdir, ignore_empty_files)
|
||||||
txt_out_path = os.path.join(OUT_DIR, f'{story_filename}.txt')
|
|
||||||
txt_tmp_path = os.path.join(tdir, f'{story_filename}.txt')
|
|
||||||
rtf_out_path = os.path.join(OUT_DIR, f'{story_filename}.rtf')
|
|
||||||
RE_EMPTY_LINE = re.compile('^$')
|
|
||||||
is_only_empty_lines = True
|
|
||||||
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
|
|
||||||
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
|
|
||||||
needs_empty_line = False
|
|
||||||
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
|
|
||||||
# Remove empty lines
|
|
||||||
line = line.strip()
|
|
||||||
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
|
|
||||||
needs_empty_line = True
|
|
||||||
else:
|
|
||||||
if is_only_empty_lines:
|
|
||||||
txt_out.writelines((line,))
|
|
||||||
txt_tmp.writelines((line,))
|
|
||||||
is_only_empty_lines = False
|
|
||||||
else:
|
|
||||||
if needs_empty_line:
|
|
||||||
txt_out.writelines(('\n\n', line))
|
|
||||||
needs_empty_line = False
|
|
||||||
else:
|
|
||||||
txt_out.writelines(('\n', line))
|
|
||||||
txt_tmp.writelines(('\n', line))
|
|
||||||
txt_out.writelines(('\n'))
|
|
||||||
if is_only_empty_lines:
|
|
||||||
error = f'Story processing returned empty file: libreoffice --cat {story_path}'
|
|
||||||
if ignore_empty_files:
|
|
||||||
print(f'Ignoring error ({error})')
|
|
||||||
else:
|
|
||||||
raise RuntimeError(error)
|
|
||||||
# Convert temporary .txt to .rtf
|
|
||||||
subprocess.run(['libreoffice', '--convert-to', 'rtf:Rich Text Format', '--outdir', OUT_DIR, txt_tmp_path], check=True, capture_output=True)
|
|
||||||
# Convert monospace font ('Preformatted Text') to serif ('Normal')
|
|
||||||
with open(rtf_out_path, 'r+') as f:
|
|
||||||
rtf = f.read()
|
|
||||||
rtf_styles = get_rtf_styles(rtf)
|
|
||||||
monospace_style = rtf_styles['Preformatted Text'] # rtf_styles[20]
|
|
||||||
serif_style = rtf_styles['Normal'] # rtf_styles[0]
|
|
||||||
f.seek(0)
|
|
||||||
f.write(rtf.replace(monospace_style, serif_style))
|
|
||||||
f.truncate()
|
|
||||||
|
|
||||||
# Parse FA description and convert for each website
|
# Parse FA description and convert for each website
|
||||||
if description_path:
|
if description_path:
|
||||||
ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
|
parse_description(description_path, config_path, OUT_DIR, ignore_empty_files)
|
||||||
desc = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
|
|
||||||
if not desc or re.match(r'^\s+$', desc):
|
|
||||||
error = f'Description processing returned empty file: libreoffice --cat {description_path}'
|
|
||||||
if ignore_empty_files:
|
|
||||||
print(f'Ignoring error ({error})')
|
|
||||||
else:
|
|
||||||
raise RuntimeError(error)
|
|
||||||
parse_description(desc, config_path, OUT_DIR)
|
|
||||||
|
|
||||||
except subprocess.CalledProcessError as e:
|
except CalledProcessError as e:
|
||||||
if remove_out_dir:
|
if remove_out_dir:
|
||||||
# Revert directory removal on error
|
# Revert directory removal on error
|
||||||
os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this'))
|
os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this'))
|
||||||
|
|
|
||||||
73
story.py
Normal file
73
story.py
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
def get_rtf_styles(rtf_source: str):
|
||||||
|
match_list = re.findall(r'\\s(\d+)(?:\\sbasedon\d+)?\\snext\d+((?:\\[a-z0-9]+ ?)+)(?: ([A-Z][a-zA-Z ]*));', rtf_source)
|
||||||
|
if not match_list:
|
||||||
|
raise ValueError(f'Couldn\'t find valid RTF styles')
|
||||||
|
rtf_styles = {}
|
||||||
|
for (style_number, partial_rtf_style, style_name) in match_list:
|
||||||
|
rtf_style = r'\s' + style_number + partial_rtf_style
|
||||||
|
rtf_styles[int(style_number)] = rtf_style
|
||||||
|
rtf_styles[style_name] = rtf_style
|
||||||
|
return rtf_styles
|
||||||
|
|
||||||
|
def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=False):
|
||||||
|
with open(config_path, 'r') as f:
|
||||||
|
config = json.load(f)
|
||||||
|
if type(config) is not dict:
|
||||||
|
raise ValueError('Configuration must be a JSON object')
|
||||||
|
should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry'))
|
||||||
|
should_create_rtf_story = any(ws in config for ws in ('aryion',))
|
||||||
|
if not should_create_txt_story and not should_create_rtf_story:
|
||||||
|
raise ValueError('')
|
||||||
|
|
||||||
|
story_filename = os.path.split(story_path)[1].rsplit('.')[0]
|
||||||
|
txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
|
||||||
|
txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull
|
||||||
|
RE_EMPTY_LINE = re.compile('^$')
|
||||||
|
is_only_empty_lines = True
|
||||||
|
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
|
||||||
|
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
|
||||||
|
needs_empty_line = False
|
||||||
|
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
|
||||||
|
# Remove empty lines
|
||||||
|
line = line.strip()
|
||||||
|
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
|
||||||
|
needs_empty_line = True
|
||||||
|
else:
|
||||||
|
if is_only_empty_lines:
|
||||||
|
txt_out.writelines((line,))
|
||||||
|
txt_tmp.writelines((line,))
|
||||||
|
is_only_empty_lines = False
|
||||||
|
else:
|
||||||
|
if needs_empty_line:
|
||||||
|
txt_out.writelines(('\n\n', line))
|
||||||
|
needs_empty_line = False
|
||||||
|
else:
|
||||||
|
txt_out.writelines(('\n', line))
|
||||||
|
txt_tmp.writelines(('\n', line))
|
||||||
|
txt_out.writelines(('\n'))
|
||||||
|
if is_only_empty_lines:
|
||||||
|
error = f'Story processing returned empty file: libreoffice --cat {story_path}'
|
||||||
|
if ignore_empty_files:
|
||||||
|
print(f'Ignoring error ({error})')
|
||||||
|
else:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
if should_create_rtf_story:
|
||||||
|
rtf_out_path = os.path.join(out_dir, f'{story_filename}.rtf')
|
||||||
|
# Convert temporary .txt to .rtf
|
||||||
|
subprocess.run(['libreoffice', '--convert-to', 'rtf:Rich Text Format', '--outdir', out_dir, txt_tmp_path], check=True, capture_output=True)
|
||||||
|
# Convert monospace font ('Preformatted Text') to serif ('Normal')
|
||||||
|
with open(rtf_out_path, 'r+') as f:
|
||||||
|
rtf = f.read()
|
||||||
|
rtf_styles = get_rtf_styles(rtf)
|
||||||
|
monospace_style = rtf_styles['Preformatted Text'] # rtf_styles[20]
|
||||||
|
serif_style = rtf_styles['Normal'] # rtf_styles[0]
|
||||||
|
f.seek(0)
|
||||||
|
f.write(rtf.replace(monospace_style, serif_style))
|
||||||
|
f.truncate()
|
||||||
Reference in a new issue