Updates to description parsing and add extra flags

This commit is contained in:
Bad Manners 2023-08-17 14:08:52 -03:00
parent 46a2400231
commit 0afcc2fbdc
6 changed files with 123 additions and 53 deletions

View file

@ -9,7 +9,7 @@ Script to generate multi-gallery upload-ready files.
## Usage
Run with `python main.py -h` for options. Generated files are output to `./out/`.
Run with `python main.py -h` for options. Generated files are output to `./out` by default.
### Story files
@ -17,7 +17,7 @@ When generating an .RTF file from the source text, the script expects that Libre
### Description files
In order to parse descriptions, you need a configuration file (default path is `./config.json`) with the websites you wish to upload to and your username there. For example:
In order to parse descriptions, you need a configuration file (default path is `./config.json`) with the websites you wish to upload to, and your username there. For example:
```json
{
@ -49,10 +49,11 @@ There are also special tags to link to yourself or other users automatically. Th
[weasyl]WeasylUser[/weasyl]
[ib]InkbunnyUser[/ib]
[sf]SoFurryUser[/sf]
[twitter]TwitterUser[/twitter]
[twitter]@TwitterUser[/twitter] - Leading '@' is optional
[mastodon]@MastodonUser@mastodoninstance.com[/mastodon] - Leading '@' is optional
```
`[self]` tags must always be empty. The other tags are nestable and flexible, allowing attributes to display information differently on each supported website. Some examples:
`[self][/self]` tags must always be empty. The other tags are nestable and flexible, allowing attributes to display information differently on each supported website. Some examples:
```bbcode
[eka=Lorem][/eka] is equivalent to [eka]Lorem[/eka].
@ -64,5 +65,5 @@ There are also special tags to link to yourself or other users automatically. Th
[ib=Amet][weasyl=Sit]Consectetur[/weasyl][/ib] is the same as above, but Consectetur is displayed as the username for websites other than Inkbunny and Weasyl. The Weasyl gallery is linked to in those websites.
[generic=https://github.com/BadMannersXYZ]Bad Manners[/generic] can be used as the innermost tag with a mandatory URL attribute and default username, and is similar to the URL tag, but it can be nested within other profile links.
[generic=https://github.com/BadMannersXYZ]Bad Manners[/generic] can be used as the innermost tag with a mandatory URL attribute and default username, and is similar to the URL tag, but it can be nested within other profile links that get used for intra-linking only.
```

7
config.example.json Normal file
View file

@ -0,0 +1,7 @@
{
"aryion": "MyUsername",
"furaffinity": "My_Username",
"inkbunny": "MyUsername",
"sofurry": "My Username",
"weasyl": "MyUsername"
}

View file

@ -8,7 +8,7 @@ import subprocess
import typing
SUPPORTED_USER_TAGS = ['eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter']
SUPPORTED_USER_TAGS = ['eka', 'fa', 'weasyl', 'ib', 'sf', 'twitter', 'mastodon']
DESCRIPTION_GRAMMAR = r"""
?start: document_list
@ -17,6 +17,7 @@ DESCRIPTION_GRAMMAR = r"""
document: b_tag
| i_tag
| u_tag
| url_tag
| self_tag
| user_tag_root
@ -24,14 +25,14 @@ DESCRIPTION_GRAMMAR = r"""
b_tag: "[b]" [document_list] "[/b]"
i_tag: "[i]" [document_list] "[/i]"
u_tag: "[u]" [document_list] "[/u]"
url_tag: "[url" ["=" [URL]] "]" [document_list] "[/url]"
self_tag: "[self]" [WS] "[/self]"
self_tag: "[self][/self]"
user_tag_root: user_tag
user_tag: generic_tag | """
DESCRIPTION_GRAMMAR += ' | '.join(f'{tag}_tag' for tag in SUPPORTED_USER_TAGS)
DESCRIPTION_GRAMMAR += ''.join(f'\n {tag}_tag: "[{tag}" ["=" USERNAME] "]" USERNAME "[/{tag}]" | "[{tag}" "=" USERNAME "]" [user_tag] "[/{tag}]"' for tag in SUPPORTED_USER_TAGS)
DESCRIPTION_GRAMMAR += r"""
@ -40,12 +41,8 @@ DESCRIPTION_GRAMMAR += r"""
USERNAME: /[a-zA-Z0-9][a-zA-Z0-9 _-]*/
URL: /(https?:\/\/)?[^\]]+/
TEXT: /([^\[]|[ \t\r\n])+/
%import common.WS
"""
print(DESCRIPTION_GRAMMAR)
DESCRIPTION_PARSER = lark.Lark(DESCRIPTION_GRAMMAR, parser='lalr')
@ -77,6 +74,7 @@ class UploadTransformer(lark.Transformer):
def __init__(self, *args, **kwargs):
super(UploadTransformer, self).__init__(*args, **kwargs)
def _user_tag_factory(tag):
# Create a new UserTag if innermost node, or append to list in order
def user_tag(data):
attribute, inner = data[0], data[1]
if attribute and attribute.strip():
@ -105,6 +103,9 @@ class UploadTransformer(lark.Transformer):
def i_tag(self, _):
raise NotImplementedError('UploadTransformer.i_tag is abstract')
def u_tag(self, _):
raise NotImplementedError('UploadTransformer.u_tag is abstract')
def url_tag(self, _):
raise NotImplementedError('UploadTransformer.url_tag is abstract')
@ -127,7 +128,10 @@ class UploadTransformer(lark.Transformer):
elif site == 'sf':
return self.url_tag((f'https://{user_data["sf"].replace(" ", "-").lower()}.sofurry.com', user_data.default or user_data['sf']))
elif site == 'twitter':
return self.url_tag((f'https://twitter.com/{user_data["twitter"]}', user_data.default or user_data['twitter']))
return self.url_tag((f'https://twitter.com/{user_data["twitter"].rsplit("@", 1)[-1]}', user_data.default or user_data['twitter']))
elif site == 'mastodon':
*_, mastodon_user, mastodon_instance = user_data["mastodon"].rsplit('@', 2)
return self.url_tag((f'https://{mastodon_instance}/@{mastodon_user}', user_data.default or user_data['mastodon']))
else:
print(f'Unknown site "{site}" found in user tag; ignoring...')
raise TypeError('Invalid UserTag data')
@ -152,6 +156,11 @@ class BbcodeTransformer(UploadTransformer):
return ''
return f'[i]{data[0]}[/i]'
def u_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'[u]{data[0]}[/u]'
def url_tag(self, data):
return f'[url={data[0] or ""}]{data[1] or ""}[/url]'
@ -166,25 +175,59 @@ class MarkdownTransformer(UploadTransformer):
return ''
return f'*{data[0]}*'
def u_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'<u>{data[0]}</u>' # Markdown should support simple HTML tags
def url_tag(self, data):
return f'[{data[1] or ""}]({data[0] or ""})'
class PlaintextTransformer(UploadTransformer):
def b_tag(self, data):
return f'{data[0] or ""}'
return str(data[0]) if data[0] else ''
def i_tag(self, data):
return f'{data[0] or ""}'
return str(data[0]) if data[0] else ''
def u_tag(self, data):
return str(data[0]) if data[0] else ''
def url_tag(self, data):
if data[1] is None or not data[1].strip():
return f'{data[0] or ""}'
return str(data[0]) if data[0] else ''
return f'{data[1].strip()}: {data[0] or ""}'
def user_tag_root(self, data):
user_data = data[0]
for site in user_data.sites:
if site == 'generic':
break
elif site == 'eka':
return f'{user_data["eka"]} on Eka\'s Portal'
elif site == 'fa':
return f'{user_data["fa"]} on Fur Affinity'
elif site == 'weasyl':
return f'{user_data["weasyl"]} on Weasyl'
elif site == 'ib':
return f'{user_data["ib"]} on Inkbunny'
elif site == 'sf':
return f'{user_data["sf"]} on SoFurry'
elif site == 'twitter':
return f'@{user_data["twitter"].rsplit("@", 1)[-1]} on Twitter'
elif site == 'mastodon':
*_, mastodon_user, mastodon_instance = user_data["mastodon"].rsplit('@', 2)
return f'@{mastodon_user} on {mastodon_instance}'
else:
print(f'Unknown site "{site}" found in user tag; ignoring...')
return super(PlaintextTransformer, self).user_tag_root(data)
class AryionTransformer(BbcodeTransformer):
def __init__(self, this_user, *args, **kwargs):
def __init__(self, self_user, *args, **kwargs):
super(AryionTransformer, self).__init__(*args, **kwargs)
self.self_tag = lambda _: self.user_tag_root((UserTag(eka=this_user),))
def self_tag(data):
return self.user_tag_root((UserTag(eka=self_user),))
self.self_tag = self_tag
def user_tag_root(self, data):
user_data = data[0]
@ -193,9 +236,11 @@ class AryionTransformer(BbcodeTransformer):
return super(AryionTransformer, self).user_tag_root(data)
class FuraffinityTransformer(BbcodeTransformer):
def __init__(self, this_user, *args, **kwargs):
def __init__(self, self_user, *args, **kwargs):
super(FuraffinityTransformer, self).__init__(*args, **kwargs)
self.self_tag = lambda _: self.user_tag_root((UserTag(fa=this_user),))
def self_tag(data):
return self.user_tag_root((UserTag(fa=self_user),))
self.self_tag = self_tag
def user_tag_root(self, data):
user_data = data[0]
@ -204,9 +249,11 @@ class FuraffinityTransformer(BbcodeTransformer):
return super(FuraffinityTransformer, self).user_tag_root(data)
class WeasylTransformer(MarkdownTransformer):
def __init__(self, this_user, *args, **kwargs):
def __init__(self, self_user, *args, **kwargs):
super(WeasylTransformer, self).__init__(*args, **kwargs)
self.self_tag = lambda _: self.user_tag_root((UserTag(weasyl=this_user),))
def self_tag(data):
return self.user_tag_root((UserTag(weasyl=self_user),))
self.self_tag = self_tag
def user_tag_root(self, data):
user_data = data[0]
@ -223,9 +270,11 @@ class WeasylTransformer(MarkdownTransformer):
return super(WeasylTransformer, self).user_tag_root(data)
class InkbunnyTransformer(BbcodeTransformer):
def __init__(self, this_user, *args, **kwargs):
def __init__(self, self_user, *args, **kwargs):
super(InkbunnyTransformer, self).__init__(*args, **kwargs)
self.self_tag = lambda _: self.user_tag_root((UserTag(ib=this_user),))
def self_tag(data):
return self.user_tag_root((UserTag(ib=self_user),))
self.self_tag = self_tag
def user_tag_root(self, data):
user_data = data[0]
@ -242,9 +291,11 @@ class InkbunnyTransformer(BbcodeTransformer):
return super(InkbunnyTransformer, self).user_tag_root(data)
class SoFurryTransformer(BbcodeTransformer):
def __init__(self, this_user, *args, **kwargs):
def __init__(self, self_user, *args, **kwargs):
super(SoFurryTransformer, self).__init__(*args, **kwargs)
self.self_tag = lambda _: self.user_tag_root((UserTag(sf=this_user),))
def self_tag(data):
return self.user_tag_root((UserTag(sf=self_user),))
self.self_tag = self_tag
def user_tag_root(self, data):
user_data = data[0]
@ -291,6 +342,8 @@ def parse_description(description_path, config_path, out_dir, ignore_empty_files
errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
elif username.strip() == '':
errors.append(ValueError(f'Website \'{website}\' has empty username'))
if not any(ws in config for ws in ('aryion', 'furaffinity', 'weasyl', 'inkbunny', 'sofurry')):
errors.append(ValueError('No valid websites found'))
if errors:
raise ExceptionGroup('Invalid configuration for description parsing', errors)
# Create descriptions

View file

@ -1,7 +0,0 @@
{
"aryion": "MyUsername",
"furaffinity": "My_Username",
"inkbunny": "MyUsername",
"sofurry": "My Username",
"weasyl": "MyUsername"
}

51
main.py
View file

@ -1,67 +1,82 @@
import argparse
import os
from subprocess import CalledProcessError
import shutil
import tempfile
from description import parse_description
from story import parse_story
OUT_DIR = './out'
def main(story_path=None, description_path=None, config_path='./config.json', keep_out_dir=False, ignore_empty_files=False):
remove_out_dir = not keep_out_dir and os.path.isdir(OUT_DIR)
def main(out_dir_path=None, story_path=None, description_path=None, file_path=None, config_path=None, keep_out_dir=False, ignore_empty_files=False):
if not out_dir_path:
raise ValueError('Missing out_dir_path')
if not config_path:
raise ValueError('Missing config_path')
remove_out_dir = not keep_out_dir and os.path.isdir(out_dir_path)
with tempfile.TemporaryDirectory() as tdir:
# Clear OUT_DIR if it exists and shouldn't be kept
# Clear output dir if it exists and shouldn't be kept
if remove_out_dir:
os.rename(OUT_DIR, os.path.join(tdir, 'old_out'))
if not os.path.isdir(OUT_DIR):
os.mkdir(OUT_DIR)
os.rename(out_dir_path, os.path.join(tdir, 'old_out'))
if not os.path.isdir(out_dir_path):
os.mkdir(out_dir_path)
try:
# Convert original file to .rtf (Aryion) and .txt (all others)
if story_path:
parse_story(story_path, config_path, OUT_DIR, tdir, ignore_empty_files)
parse_story(story_path, config_path, out_dir_path, tdir, ignore_empty_files)
# Parse FA description and convert for each website
if description_path:
parse_description(description_path, config_path, OUT_DIR, ignore_empty_files)
parse_description(description_path, config_path, out_dir_path, ignore_empty_files)
# Copy generic file over to output
if file_path:
shutil.copy(file_path, out_dir_path)
except CalledProcessError as e:
if remove_out_dir:
# Revert directory removal on error
os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this'))
os.rename(os.path.join(tdir, 'old_out'), OUT_DIR)
os.rename(out_dir_path, os.path.join(tdir, 'get_rid_of_this'))
os.rename(os.path.join(tdir, 'old_out'), out_dir_path)
print(f'Command exited with code {e.returncode}: {e.stderr.decode("utf-8-sig")}')
exit(1)
except Exception as e:
if remove_out_dir:
# Revert directory removal on error
os.rename(OUT_DIR, os.path.join(tdir, 'get_rid_of_this'))
os.rename(os.path.join(tdir, 'old_out'), OUT_DIR)
os.rename(out_dir_path, os.path.join(tdir, 'get_rid_of_this'))
os.rename(os.path.join(tdir, 'old_out'), out_dir_path)
raise e
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='generate multi-gallery upload-ready files')
parser.add_argument('-o', '--output-dir', dest='out_dir_path', default='./out',
help='path of output directory')
parser.add_argument('-c', '--config', dest='config_path', default='./config.json',
help='path of JSON configuration file')
parser.add_argument('-s', '--story', dest='story_path',
help='path of LibreOffice-readable story file')
parser.add_argument('-d', '--description', dest='description_path',
help='path of BBCode-formatted description file')
parser.add_argument('-c', '--config', dest='config_path', default='./config.json',
help='path of JSON configuration file')
parser.add_argument('-f', '--file', dest='file_path',
help='path of generic file to include in output (i.e. an image or thumbnail)')
parser.add_argument('-k', '--keep-out-dir', dest='keep_out_dir', action='store_true',
help='whether output directory contents should be kept')
parser.add_argument('-i', '--ignore-empty-files', dest='ignore_empty_files', action='store_true',
help='whether output directory contents should be kept.\nif set, a script error may leave partial files behind')
parser.add_argument('-I', '--ignore-empty-files', dest='ignore_empty_files', action='store_true',
help='do not raise an error if any input file is empty or whitespace-only')
args = parser.parse_args()
if not any([args.story_path, args.description_path]):
parser.error('at least one of ( --story | --description ) must be set')
if args.out_dir_path and os.path.exists(args.out_dir_path) and not os.path.isdir(args.out_dir_path):
parser.error('--output-dir must be an existing directory or inexistent')
if args.story_path and not os.path.isfile(args.story_path):
parser.error('--story must be a valid file')
if args.description_path and not os.path.isfile(args.description_path):
parser.error('--description must be a valid file')
if args.file_path and not os.path.isfile(args.file_path):
parser.error('--file must be a valid file')
if args.config_path and not os.path.isfile(args.config_path):
parser.error('--config must be a valid file')

View file

@ -20,11 +20,11 @@ def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=F
with open(config_path, 'r') as f:
config = json.load(f)
if type(config) is not dict:
raise ValueError('Configuration must be a JSON object')
raise ValueError('Invalid configuration for story parsing: Configuration must be a JSON object')
should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry'))
should_create_rtf_story = any(ws in config for ws in ('aryion',))
if not should_create_txt_story and not should_create_rtf_story:
raise ValueError('')
raise ValueError('Invalid configuration for story parsing: No valid websites found')
story_filename = os.path.split(story_path)[1].rsplit('.')[0]
txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
@ -32,6 +32,7 @@ def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=F
RE_EMPTY_LINE = re.compile('^$')
is_only_empty_lines = True
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
# Mangle output files so that .RTF will always have a single LF between lines, and .TXT can have one or two CRLF
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
needs_empty_line = False
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):