568 lines
23 KiB
568 lines
23 KiB
from collections import OrderedDict
import io
import json
import lark
import os
import psutil
import re
import subprocess
import typing
from sites import SUPPORTED_SITE_TAGS
SUPPORTED_USER_TAGS: typing.Mapping[str, typing.Set[str]] = {
'twitter': {'twitter'},
'mastodon': {'mastodon'},
?start: document_list
document_list: document+
document: b_tag
| i_tag
| u_tag
| center_tag
| url_tag
| self_tag
| if_tag
| user_tag_root
| siteurl_tag_root
b_tag: "[b]" [document_list] "[/b]"
i_tag: "[i]" [document_list] "[/i]"
u_tag: "[u]" [document_list] "[/u]"
center_tag: "[center]" [document_list] "[/center]"
url_tag: "[url" ["=" [URL]] "]" [document_list] "[/url]"
self_tag: "[self][/self]"
if_tag: "[if=" CONDITION "]" [document_list] "[/if]" [ "[else]" [document_list] "[/else]" ]
user_tag_root: "[user]" user_tag "[/user]"
user_tag: user_tag_generic | """
DESCRIPTION_GRAMMAR += ' | '.join(f'user_tag_{tag}' for tag in SUPPORTED_USER_TAGS)
for tag, alts in SUPPORTED_USER_TAGS.items():
DESCRIPTION_GRAMMAR += f'\n user_tag_{tag}: '
DESCRIPTION_GRAMMAR += ' | '.join(f'"[{alt}" ["=" USERNAME] "]" USERNAME "[/{alt}]" | "[{alt}" "=" USERNAME "]" [user_tag] "[/{alt}]"' for alt in alts)
user_tag_generic: "[generic=" URL "]" USERNAME "[/generic]"
siteurl_tag_root: "[siteurl]" siteurl_tag "[/siteurl]"
siteurl_tag: siteurl_tag_generic | """
DESCRIPTION_GRAMMAR += ' | '.join(f'siteurl_tag_{tag}' for tag in SUPPORTED_SITE_TAGS)
for tag, alts in SUPPORTED_SITE_TAGS.items():
DESCRIPTION_GRAMMAR += f'\n siteurl_tag_{tag}: '
DESCRIPTION_GRAMMAR += ' | '.join(f'"[{alt}" "=" URL "]" ( siteurl_tag | TEXT ) "[/{alt}]"' for alt in alts)
siteurl_tag_generic: "[generic=" URL "]" TEXT "[/generic]"
USERNAME: / *@?[a-zA-Z0-9][a-zA-Z0-9 @._-]*/
URL: / *(https?:\/\/)?[^\]]+ */
TEXT: /([^\[]|[ \t\r\n])+/
CONDITION: / *[a-z]+ *(==|!=) *[a-zA-Z0-9_-]+ *| *[a-z]+ +in +([a-zA-Z0-9_-]+ *, *)*[a-zA-Z0-9_-]+ */
class DescriptionParsingError(ValueError):
class SiteSwitchTag:
def __init__(self, default: typing.Optional[str]=None, **kwargs):
self.default = default
self._sites: typing.OrderedDict[str, typing.Optional[str]] = OrderedDict()
for (k, v) in kwargs.items():
self.__setitem__(k, v)
def __setitem__(self, name: str, value: typing.Optional[str]) -> None:
if name in self._sites:
if value is None:
self._sites[name] = value
elif value is not None:
self._sites[name] = value
def __getitem__(self, name: str) -> typing.Optional[str]:
return self._sites.get(name)
def __contains__(self, name: str) -> bool:
return name in self._sites
def sites(self):
yield from self._sites
class UploadTransformer(lark.Transformer):
def __init__(self, define_options=set(), *args, **kwargs):
super().__init__(*args, **kwargs)
self.define_options = define_options
# Init user_tag_xxxx methods
def _user_tag_factory(tag):
# Create a new user SiteSwitchTag if innermost node, or append to list in order
def user_tag(data):
attribute, inner = data[0], data[1]
if attribute and attribute.strip():
if isinstance(inner, SiteSwitchTag):
inner[tag] = attribute.strip()
return inner
user = SiteSwitchTag(default=inner and inner.strip())
user[tag] = attribute.strip()
return user
user = SiteSwitchTag()
user[tag] = inner.strip()
return user
return user_tag
setattr(self, f'user_tag_{tag}', _user_tag_factory(tag))
# Init siteurl_tag_xxxx methods
def _siteurl_tag_factory(tag):
# Create a new siteurl SiteSwitchTag if innermost node, or append to list in order
def siteurl_tag(data):
attribute, inner = data[0], data[1]
if attribute and attribute.strip():
if isinstance(inner, SiteSwitchTag):
inner[tag] = attribute.strip()
return inner
siteurl = SiteSwitchTag(default=inner and inner.strip())
siteurl[tag] = attribute.strip()
return siteurl
siteurl = SiteSwitchTag()
siteurl[tag] = inner.strip()
return siteurl
return siteurl_tag
setattr(self, f'siteurl_tag_{tag}', _siteurl_tag_factory(tag))
def document_list(self, data):
return ''.join(data)
def document(self, data):
return data[0]
def b_tag(self, _):
raise NotImplementedError('UploadTransformer.b_tag is abstract')
def i_tag(self, _):
raise NotImplementedError('UploadTransformer.i_tag is abstract')
def u_tag(self, _):
raise NotImplementedError('UploadTransformer.u_tag is abstract')
def center_tag(self, _):
raise NotImplementedError('UploadTransformer.center_tag is abstract')
def url_tag(self, _):
raise NotImplementedError('UploadTransformer.url_tag is abstract')
def self_tag(self, _):
raise NotImplementedError('UploadTransformer.self_tag is abstract')
def transformer_matches_site(self, site: str) -> bool:
raise NotImplementedError('UploadTransformer.transformer_matches_site is abstract')
def transformer_matches_define(self, option: str) -> bool:
return option in self.define_options
def if_tag(self, data: typing.Tuple[str, str, str]):
condition, truthy_document, falsy_document = data[0], data[1], data[2]
# Test equality condition, i.e. `site==foo`
equality_condition = condition.split('==', 1)
if len(equality_condition) == 2 and equality_condition[1].strip():
conditional_test = f'transformer_matches_{equality_condition[0].strip()}'
if hasattr(self, conditional_test):
if getattr(self, conditional_test)(equality_condition[1].strip()):
return truthy_document or ''
return falsy_document or ''
# Test inequality condition, i.e. `site!=foo`
inequality_condition = condition.split('!=', 1)
if len(inequality_condition) == 2 and inequality_condition[1].strip():
conditional_test = f'transformer_matches_{inequality_condition[0].strip()}'
if hasattr(self, conditional_test):
if not getattr(self, conditional_test)(inequality_condition[1].strip()):
return truthy_document or ''
return falsy_document or ''
# Test inclusion condition, i.e. `site in foo,bar`
inclusion_condition = condition.split(' in ', 1)
if len(inclusion_condition) == 2 and inclusion_condition[1].strip():
conditional_test = f'transformer_matches_{inclusion_condition[0].strip()}'
if hasattr(self, conditional_test):
matches = (parameter.strip() for parameter in inclusion_condition[1].split(','))
if any(getattr(self, conditional_test)(match) for match in matches):
return truthy_document or ''
return falsy_document or ''
raise ValueError(f'Invalid [if][/if] tag condition: {condition}')
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
for site in user_data.sites:
if site == 'generic':
return self.url_tag((user_data['generic'], user_data.default))
elif site == 'aryion':
return self.url_tag((f'https://aryion.com/g4/user/{user_data["aryion"]}', user_data.default or user_data["aryion"]))
elif site == 'furaffinity':
return self.url_tag((f'https://furaffinity.net/user/{user_data["furaffinity"].replace("_", "")}', user_data.default or user_data['furaffinity']))
elif site == 'weasyl':
return self.url_tag((f'https://www.weasyl.com/~{user_data["weasyl"].replace(" ", "").lower()}', user_data.default or user_data['weasyl']))
elif site == 'inkbunny':
return self.url_tag((f'https://inkbunny.net/{user_data["inkbunny"]}', user_data.default or user_data['inkbunny']))
elif site == 'sofurry':
return self.url_tag((f'https://{user_data["sofurry"].replace(" ", "-").lower()}.sofurry.com', user_data.default or user_data['sofurry']))
elif site == 'twitter':
return self.url_tag((f'https://twitter.com/{user_data["twitter"].rsplit("@", 1)[-1]}', user_data.default or user_data['twitter']))
elif site == 'mastodon':
*_, mastodon_user, mastodon_instance = user_data["mastodon"].rsplit('@', 2)
return self.url_tag((f'https://{mastodon_instance.strip()}/@{mastodon_user.strip()}', user_data.default or user_data['mastodon']))
print(f'Unknown site "{site}" found in user tag; ignoring...')
raise TypeError('Invalid user SiteSwitchTag data - no matches found')
def user_tag(self, data):
return data[0]
def user_tag_generic(self, data):
attribute, inner = data[0], data[1]
user = SiteSwitchTag(default=inner.strip())
user['generic'] = attribute.strip()
return user
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'generic' in siteurl_data:
return self.url_tag((siteurl_data['generic'], siteurl_data.default))
return ''
def siteurl_tag(self, data):
return data[0]
def siteurl_tag_generic(self, data):
attribute, inner = data[0], data[1]
siteurl = SiteSwitchTag(default=inner.strip())
siteurl['generic'] = attribute.strip()
return siteurl
class BbcodeTransformer(UploadTransformer):
def b_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'[b]{data[0]}[/b]'
def i_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'[i]{data[0]}[/i]'
def u_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'[u]{data[0]}[/u]'
def center_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'[center]{data[0]}[/center]'
def url_tag(self, data):
if data[0] is None or not data[0].strip():
return data[1].strip() if data[1] else ''
return f'[url={data[0].strip()}]{data[1] if data[1] and data[1].strip() else data[0].strip()}[/url]'
class MarkdownTransformer(UploadTransformer):
def b_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'**{data[0]}**'
def i_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'*{data[0]}*'
def u_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'<u>{data[0]}</u>' # Markdown should support simple HTML tags
def url_tag(self, data):
if data[0] is None or not data[0].strip():
return data[1].strip() if data[1] else ''
return f'[{data[1] if data[1] and data[1].strip() else data[0].strip()}]({data[0].strip()})'
class PlaintextTransformer(UploadTransformer):
def b_tag(self, data):
return str(data[0]) if data[0] else ''
def i_tag(self, data):
return str(data[0]) if data[0] else ''
def u_tag(self, data):
return str(data[0]) if data[0] else ''
def center_tag(self, data):
return str(data[0]) if data[0] else ''
def url_tag(self, data):
if data[0] is None or not data[0].strip():
return data[1] if data[1] and data[1].strip() else ''
if data[1] is None or not data[1].strip():
return data[0].strip()
return f'{data[1]}: {data[0].strip()}'
def user_tag_root(self, data):
user_data = data[0]
for site in user_data.sites:
if site == 'generic':
elif site == 'aryion':
return f'{user_data["aryion"]} on Eka\'s Portal'
elif site == 'furaffinity':
return f'{user_data["furaffinity"]} on Fur Affinity'
elif site == 'weasyl':
return f'{user_data["weasyl"]} on Weasyl'
elif site == 'inkbunny':
return f'{user_data["inkbunny"]} on Inkbunny'
elif site == 'sofurry':
return f'{user_data["sofurry"]} on SoFurry'
elif site == 'twitter':
return f'@{user_data["twitter"].rsplit("@", 1)[-1]} on Twitter'
elif site == 'mastodon':
*_, mastodon_user, mastodon_instance = user_data["mastodon"].rsplit('@', 2)
return f'@{mastodon_user.strip()} on {mastodon_instance.strip()}'
print(f'Unknown site "{site}" found in user tag; ignoring...')
return super().user_tag_root(data)
class AryionTransformer(BbcodeTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(aryion=self_user),))
raise ValueError('self_tag is unavailable for AryionTransformer - no user provided')
self.self_tag = self_tag
def transformer_matches_site(site: str) -> bool:
return site in SUPPORTED_USER_TAGS['aryion']
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['aryion']:
return f':icon{user_data["aryion"]}:'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'aryion' in siteurl_data:
return self.url_tag((siteurl_data['aryion'], siteurl_data.default))
return super().siteurl_tag_root(data)
class FuraffinityTransformer(BbcodeTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(furaffinity=self_user),))
raise ValueError('self_tag is unavailable for FuraffinityTransformer - no user provided')
self.self_tag = self_tag
def transformer_matches_site(site: str) -> bool:
return site in SUPPORTED_USER_TAGS['furaffinity']
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['furaffinity']:
return f':icon{user_data["furaffinity"]}:'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'furaffinity' in siteurl_data:
return self.url_tag((siteurl_data['furaffinity'], siteurl_data.default))
return super().siteurl_tag_root(data)
class WeasylTransformer(MarkdownTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(weasyl=self_user),))
raise ValueError('self_tag is unavailable for WeasylTransformer - no user provided')
self.self_tag = self_tag
def transformer_matches_site(site: str) -> bool:
return site == 'weasyl'
def center_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'<div class="align-center">{data[0]}</div>'
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['weasyl']:
return f'<!~{user_data["weasyl"].replace(" ", "")}>'
for site in user_data.sites:
if site == 'furaffinity':
return f'<fa:{user_data["furaffinity"]}>'
if site == 'inkbunny':
return f'<ib:{user_data["inkbunny"]}>'
if site == 'sofurry':
return f'<sf:{user_data["sofurry"]}>'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'weasyl' in siteurl_data:
return self.url_tag((siteurl_data['weasyl'], siteurl_data.default))
return super().siteurl_tag_root(data)
class InkbunnyTransformer(BbcodeTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(inkbunny=self_user),))
raise ValueError('self_tag is unavailable for InkbunnyTransformer - no user provided')
self.self_tag = self_tag
def transformer_matches_site(site: str) -> bool:
return site in SUPPORTED_USER_TAGS['inkbunny']
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['inkbunny']:
return f'[iconname]{user_data["inkbunny"]}[/iconname]'
for site in user_data.sites:
if site == 'furaffinity':
return f'[fa]{user_data["furaffinity"]}[/fa]'
if site == 'sofurry':
return f'[sf]{user_data["sofurry"]}[/sf]'
if site == 'weasyl':
return f'[weasyl]{user_data["weasyl"].replace(" ", "").lower()}[/weasyl]'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'inkbunny' in siteurl_data:
return self.url_tag((siteurl_data['inkbunny'], siteurl_data.default))
return super().siteurl_tag_root(data)
class SoFurryTransformer(BbcodeTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(sofurry=self_user),))
raise ValueError('self_tag is unavailable for SoFurryTransformer - no user provided')
self.self_tag = self_tag
def transformer_matches_site(site: str) -> bool:
return site in SUPPORTED_USER_TAGS['sofurry']
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['sofurry']:
return f':icon{user_data["sofurry"]}:'
for site in user_data.sites:
if site == 'furaffinity':
return f'fa!{user_data["furaffinity"]}'
if site == 'inkbunny':
return f'ib!{user_data["inkbunny"]}'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'sofurry' in siteurl_data:
return self.url_tag((siteurl_data['sofurry'], siteurl_data.default))
return super().siteurl_tag_root(data)
def validate_parsed_tree(parsed_tree):
for node in parsed_tree.iter_subtrees_topdown():
if node.data in {'b_tag', 'i_tag', 'u_tag', 'url_tag'}:
node_type = str(node.data)
for node2 in node.find_data(node_type):
if node != node2:
raise DescriptionParsingError(f'Invalid nested {node_type} on line {node2.data.line} column {node2.data.column}')
def parse_description(description_path, config, out_dir, ignore_empty_files=False, define_options=set()):
for proc in psutil.process_iter(['cmdline']):
if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
if ignore_empty_files:
print('WARN: LibreOffice Writer appears to be running. This command may output empty files until it is closed.')
print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
description = ''
with subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE) as ps:
description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
if not description or re.match(r'^\s+$', description):
error = f'Description processing returned empty file: libreoffice --cat {description_path}'
if ignore_empty_files:
print(f'Ignoring error ({error})')
raise RuntimeError(error)
parsed_description = DESCRIPTION_PARSER.parse(description)
except lark.UnexpectedInput as e:
input_error = e.match_examples(DESCRIPTION_PARSER.parse, {
'Unclosed tag': ['[b]text', '[i]text', '[u]text', '[url]text'],
'Unopened tag': ['text[/b]', 'text[/i]', 'text[/u]', 'text[/url]'],
'Unknown tag': ['[invalid]text[/invalid]'],
'Missing tag brackets': ['b]text[/b]', '[btext[/b]', '[b]text/b]', '[b]text[/b', 'i]text[/i]', '[itext[/i]', '[i]text/i]', '[i]text[/i', 'u]text[/u]', '[utext[/u]', '[u]text/u]', '[u]text[/u'],
'Missing tag slash': ['[b]text[b]', '[i]text[i]', '[u]text[u]'],
'Empty switch tag': ['[user][/user]', '[siteurl][/siteurl]'],
'Empty user tag': ['[user][aryion][/aryion][/user]', '[user][furaffinity][/furaffinity][/user]', '[user][inkbunny][/inkbunny][/user]', '[user][sofurry][/sofurry][/user]', '[user][weasyl][/weasyl][/user]', '[user][twitter][/twitter][/user]', '[user][mastodon][/mastodon][/user]', '[user][aryion=][/aryion][/user]', '[user][furaffinity=][/furaffinity][/user]', '[user][inkbunny=][/inkbunny][/user]', '[user][sofurry=][/sofurry][/user]', '[user][weasyl=][/weasyl][/user]', '[user][twitter=][/twitter][/user]', '[user][mastodon=][/mastodon][/user]'],
'Empty siteurl tag': ['[siteurl][aryion][/aryion][/siteurl]', '[siteurl][furaffinity][/furaffinity][/siteurl]', '[siteurl][inkbunny][/inkbunny][/siteurl]', '[siteurl][sofurry][/sofurry][/siteurl]', '[siteurl][weasyl][/weasyl][/siteurl]' '[siteurl][aryion=][/aryion][/siteurl]', '[siteurl][furaffinity=][/furaffinity][/siteurl]', '[siteurl][inkbunny=][/inkbunny][/siteurl]', '[siteurl][sofurry=][/sofurry][/siteurl]', '[siteurl][weasyl=][/weasyl][/siteurl]'],
raise DescriptionParsingError(f'Unable to parse description. {input_error or "Unknown grammar error"} in line {e.line} column {e.column}:\n{e.get_context(description)}') from e
transformations = {
'aryion': ('desc_aryion.txt', AryionTransformer),
'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer),
'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer),
'sofurry': ('desc_sofurry.txt', SoFurryTransformer),
'weasyl': ('desc_weasyl.md', WeasylTransformer),
# assert all(k in SUPPORTED_SITE_TAGS for k in transformations)
# Validate JSON
errors = []
for (website, username) in config.items():
if website not in transformations:
errors.append(ValueError(f'Website \'{website}\' is unsupported'))
elif type(username) is not str:
errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
elif username.strip() == '':
errors.append(ValueError(f'Website \'{website}\' has empty username'))
if not any(ws in config for ws in transformations):
errors.append(ValueError('No valid websites found'))
if errors:
raise ExceptionGroup('Invalid configuration for description parsing', errors)
# Create descriptions
RE_MULTIPLE_EMPTY_LINES = re.compile(r'\n\n+')
for (website, username) in config.items():
(filepath, transformer) = transformations[website]
with open(os.path.join(out_dir, filepath), 'w') as f:
if description.strip():
transformed_description = transformer(self_user=username, define_options=define_options).transform(parsed_description)
cleaned_description = RE_MULTIPLE_EMPTY_LINES.sub('\n\n', transformed_description).strip()
if cleaned_description: