This repository has been archived on 2024-03-28. You can view files and clone it, but cannot push or open issues or pull requests.
upload-generator/description.py
2024-02-28 09:47:57 -03:00

568 lines
23 KiB
Python

from collections import OrderedDict
import io
import json
import lark
import os
import psutil
import re
import subprocess
import typing
from sites import SUPPORTED_SITE_TAGS
SUPPORTED_USER_TAGS: typing.Mapping[str, typing.Set[str]] = {
**SUPPORTED_SITE_TAGS,
'twitter': {'twitter'},
'mastodon': {'mastodon'},
}
DESCRIPTION_GRAMMAR = r"""
?start: document_list
document_list: document+
document: b_tag
| i_tag
| u_tag
| center_tag
| url_tag
| self_tag
| if_tag
| user_tag_root
| siteurl_tag_root
| TEXT
b_tag: "[b]" [document_list] "[/b]"
i_tag: "[i]" [document_list] "[/i]"
u_tag: "[u]" [document_list] "[/u]"
center_tag: "[center]" [document_list] "[/center]"
url_tag: "[url" ["=" [URL]] "]" [document_list] "[/url]"
self_tag: "[self][/self]"
if_tag: "[if=" CONDITION "]" [document_list] "[/if]" [ "[else]" [document_list] "[/else]" ]
user_tag_root: "[user]" user_tag "[/user]"
user_tag: user_tag_generic | """
DESCRIPTION_GRAMMAR += ' | '.join(f'user_tag_{tag}' for tag in SUPPORTED_USER_TAGS)
for tag, alts in SUPPORTED_USER_TAGS.items():
DESCRIPTION_GRAMMAR += f'\n user_tag_{tag}: '
DESCRIPTION_GRAMMAR += ' | '.join(f'"[{alt}" ["=" USERNAME] "]" USERNAME "[/{alt}]" | "[{alt}" "=" USERNAME "]" [user_tag] "[/{alt}]"' for alt in alts)
DESCRIPTION_GRAMMAR += r"""
user_tag_generic: "[generic=" URL "]" USERNAME "[/generic]"
siteurl_tag_root: "[siteurl]" siteurl_tag "[/siteurl]"
siteurl_tag: siteurl_tag_generic | """
DESCRIPTION_GRAMMAR += ' | '.join(f'siteurl_tag_{tag}' for tag in SUPPORTED_SITE_TAGS)
for tag, alts in SUPPORTED_SITE_TAGS.items():
DESCRIPTION_GRAMMAR += f'\n siteurl_tag_{tag}: '
DESCRIPTION_GRAMMAR += ' | '.join(f'"[{alt}" "=" URL "]" ( siteurl_tag | TEXT ) "[/{alt}]"' for alt in alts)
DESCRIPTION_GRAMMAR += r"""
siteurl_tag_generic: "[generic=" URL "]" TEXT "[/generic]"
USERNAME: / *@?[a-zA-Z0-9][a-zA-Z0-9 @._-]*/
URL: / *(https?:\/\/)?[^\]]+ */
TEXT: /([^\[]|[ \t\r\n])+/
CONDITION: / *[a-z]+ *(==|!=) *[a-zA-Z0-9_-]+ *| *[a-z]+ +in +([a-zA-Z0-9_-]+ *, *)*[a-zA-Z0-9_-]+ */
"""
DESCRIPTION_PARSER = lark.Lark(DESCRIPTION_GRAMMAR, parser='lalr')
class DescriptionParsingError(ValueError):
pass
class SiteSwitchTag:
def __init__(self, default: typing.Optional[str]=None, **kwargs):
self.default = default
self._sites: typing.OrderedDict[str, typing.Optional[str]] = OrderedDict()
for (k, v) in kwargs.items():
if k in SUPPORTED_USER_TAGS:
self.__setitem__(k, v)
def __setitem__(self, name: str, value: typing.Optional[str]) -> None:
if name in self._sites:
if value is None:
self._sites.pop(name)
else:
self._sites[name] = value
elif value is not None:
self._sites[name] = value
def __getitem__(self, name: str) -> typing.Optional[str]:
return self._sites.get(name)
def __contains__(self, name: str) -> bool:
return name in self._sites
@property
def sites(self):
yield from self._sites
class UploadTransformer(lark.Transformer):
def __init__(self, define_options=set(), *args, **kwargs):
super().__init__(*args, **kwargs)
self.define_options = define_options
# Init user_tag_xxxx methods
def _user_tag_factory(tag):
# Create a new user SiteSwitchTag if innermost node, or append to list in order
def user_tag(data):
attribute, inner = data[0], data[1]
if attribute and attribute.strip():
if isinstance(inner, SiteSwitchTag):
inner[tag] = attribute.strip()
return inner
user = SiteSwitchTag(default=inner and inner.strip())
user[tag] = attribute.strip()
return user
user = SiteSwitchTag()
user[tag] = inner.strip()
return user
return user_tag
for tag in SUPPORTED_USER_TAGS:
setattr(self, f'user_tag_{tag}', _user_tag_factory(tag))
# Init siteurl_tag_xxxx methods
def _siteurl_tag_factory(tag):
# Create a new siteurl SiteSwitchTag if innermost node, or append to list in order
def siteurl_tag(data):
attribute, inner = data[0], data[1]
if attribute and attribute.strip():
if isinstance(inner, SiteSwitchTag):
inner[tag] = attribute.strip()
return inner
siteurl = SiteSwitchTag(default=inner and inner.strip())
siteurl[tag] = attribute.strip()
return siteurl
siteurl = SiteSwitchTag()
siteurl[tag] = inner.strip()
return siteurl
return siteurl_tag
for tag in SUPPORTED_SITE_TAGS:
setattr(self, f'siteurl_tag_{tag}', _siteurl_tag_factory(tag))
def document_list(self, data):
return ''.join(data)
def document(self, data):
return data[0]
def b_tag(self, _):
raise NotImplementedError('UploadTransformer.b_tag is abstract')
def i_tag(self, _):
raise NotImplementedError('UploadTransformer.i_tag is abstract')
def u_tag(self, _):
raise NotImplementedError('UploadTransformer.u_tag is abstract')
def center_tag(self, _):
raise NotImplementedError('UploadTransformer.center_tag is abstract')
def url_tag(self, _):
raise NotImplementedError('UploadTransformer.url_tag is abstract')
def self_tag(self, _):
raise NotImplementedError('UploadTransformer.self_tag is abstract')
def transformer_matches_site(self, site: str) -> bool:
raise NotImplementedError('UploadTransformer.transformer_matches_site is abstract')
def transformer_matches_define(self, option: str) -> bool:
return option in self.define_options
def if_tag(self, data: typing.Tuple[str, str, str]):
condition, truthy_document, falsy_document = data[0], data[1], data[2]
# Test equality condition, i.e. `site==foo`
equality_condition = condition.split('==', 1)
if len(equality_condition) == 2 and equality_condition[1].strip():
conditional_test = f'transformer_matches_{equality_condition[0].strip()}'
if hasattr(self, conditional_test):
if getattr(self, conditional_test)(equality_condition[1].strip()):
return truthy_document or ''
return falsy_document or ''
# Test inequality condition, i.e. `site!=foo`
inequality_condition = condition.split('!=', 1)
if len(inequality_condition) == 2 and inequality_condition[1].strip():
conditional_test = f'transformer_matches_{inequality_condition[0].strip()}'
if hasattr(self, conditional_test):
if not getattr(self, conditional_test)(inequality_condition[1].strip()):
return truthy_document or ''
return falsy_document or ''
# Test inclusion condition, i.e. `site in foo,bar`
inclusion_condition = condition.split(' in ', 1)
if len(inclusion_condition) == 2 and inclusion_condition[1].strip():
conditional_test = f'transformer_matches_{inclusion_condition[0].strip()}'
if hasattr(self, conditional_test):
matches = (parameter.strip() for parameter in inclusion_condition[1].split(','))
if any(getattr(self, conditional_test)(match) for match in matches):
return truthy_document or ''
return falsy_document or ''
raise ValueError(f'Invalid [if][/if] tag condition: {condition}')
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
for site in user_data.sites:
if site == 'generic':
return self.url_tag((user_data['generic'], user_data.default))
elif site == 'aryion':
return self.url_tag((f'https://aryion.com/g4/user/{user_data["aryion"]}', user_data.default or user_data["aryion"]))
elif site == 'furaffinity':
return self.url_tag((f'https://furaffinity.net/user/{user_data["furaffinity"].replace("_", "")}', user_data.default or user_data['furaffinity']))
elif site == 'weasyl':
return self.url_tag((f'https://www.weasyl.com/~{user_data["weasyl"].replace(" ", "").lower()}', user_data.default or user_data['weasyl']))
elif site == 'inkbunny':
return self.url_tag((f'https://inkbunny.net/{user_data["inkbunny"]}', user_data.default or user_data['inkbunny']))
elif site == 'sofurry':
return self.url_tag((f'https://{user_data["sofurry"].replace(" ", "-").lower()}.sofurry.com', user_data.default or user_data['sofurry']))
elif site == 'twitter':
return self.url_tag((f'https://twitter.com/{user_data["twitter"].rsplit("@", 1)[-1]}', user_data.default or user_data['twitter']))
elif site == 'mastodon':
*_, mastodon_user, mastodon_instance = user_data["mastodon"].rsplit('@', 2)
return self.url_tag((f'https://{mastodon_instance.strip()}/@{mastodon_user.strip()}', user_data.default or user_data['mastodon']))
else:
print(f'Unknown site "{site}" found in user tag; ignoring...')
raise TypeError('Invalid user SiteSwitchTag data - no matches found')
def user_tag(self, data):
return data[0]
def user_tag_generic(self, data):
attribute, inner = data[0], data[1]
user = SiteSwitchTag(default=inner.strip())
user['generic'] = attribute.strip()
return user
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'generic' in siteurl_data:
return self.url_tag((siteurl_data['generic'], siteurl_data.default))
return ''
def siteurl_tag(self, data):
return data[0]
def siteurl_tag_generic(self, data):
attribute, inner = data[0], data[1]
siteurl = SiteSwitchTag(default=inner.strip())
siteurl['generic'] = attribute.strip()
return siteurl
class BbcodeTransformer(UploadTransformer):
def b_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'[b]{data[0]}[/b]'
def i_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'[i]{data[0]}[/i]'
def u_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'[u]{data[0]}[/u]'
def center_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'[center]{data[0]}[/center]'
def url_tag(self, data):
if data[0] is None or not data[0].strip():
return data[1].strip() if data[1] else ''
return f'[url={data[0].strip()}]{data[1] if data[1] and data[1].strip() else data[0].strip()}[/url]'
class MarkdownTransformer(UploadTransformer):
def b_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'**{data[0]}**'
def i_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'*{data[0]}*'
def u_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'<u>{data[0]}</u>' # Markdown should support simple HTML tags
def url_tag(self, data):
if data[0] is None or not data[0].strip():
return data[1].strip() if data[1] else ''
return f'[{data[1] if data[1] and data[1].strip() else data[0].strip()}]({data[0].strip()})'
class PlaintextTransformer(UploadTransformer):
def b_tag(self, data):
return str(data[0]) if data[0] else ''
def i_tag(self, data):
return str(data[0]) if data[0] else ''
def u_tag(self, data):
return str(data[0]) if data[0] else ''
def center_tag(self, data):
return str(data[0]) if data[0] else ''
def url_tag(self, data):
if data[0] is None or not data[0].strip():
return data[1] if data[1] and data[1].strip() else ''
if data[1] is None or not data[1].strip():
return data[0].strip()
return f'{data[1]}: {data[0].strip()}'
def user_tag_root(self, data):
user_data = data[0]
for site in user_data.sites:
if site == 'generic':
break
elif site == 'aryion':
return f'{user_data["aryion"]} on Eka\'s Portal'
elif site == 'furaffinity':
return f'{user_data["furaffinity"]} on Fur Affinity'
elif site == 'weasyl':
return f'{user_data["weasyl"]} on Weasyl'
elif site == 'inkbunny':
return f'{user_data["inkbunny"]} on Inkbunny'
elif site == 'sofurry':
return f'{user_data["sofurry"]} on SoFurry'
elif site == 'twitter':
return f'@{user_data["twitter"].rsplit("@", 1)[-1]} on Twitter'
elif site == 'mastodon':
*_, mastodon_user, mastodon_instance = user_data["mastodon"].rsplit('@', 2)
return f'@{mastodon_user.strip()} on {mastodon_instance.strip()}'
else:
print(f'Unknown site "{site}" found in user tag; ignoring...')
return super().user_tag_root(data)
class AryionTransformer(BbcodeTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(aryion=self_user),))
raise ValueError('self_tag is unavailable for AryionTransformer - no user provided')
self.self_tag = self_tag
@staticmethod
def transformer_matches_site(site: str) -> bool:
return site in SUPPORTED_USER_TAGS['aryion']
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['aryion']:
return f':icon{user_data["aryion"]}:'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'aryion' in siteurl_data:
return self.url_tag((siteurl_data['aryion'], siteurl_data.default))
return super().siteurl_tag_root(data)
class FuraffinityTransformer(BbcodeTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(furaffinity=self_user),))
raise ValueError('self_tag is unavailable for FuraffinityTransformer - no user provided')
self.self_tag = self_tag
@staticmethod
def transformer_matches_site(site: str) -> bool:
return site in SUPPORTED_USER_TAGS['furaffinity']
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['furaffinity']:
return f':icon{user_data["furaffinity"]}:'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'furaffinity' in siteurl_data:
return self.url_tag((siteurl_data['furaffinity'], siteurl_data.default))
return super().siteurl_tag_root(data)
class WeasylTransformer(MarkdownTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(weasyl=self_user),))
raise ValueError('self_tag is unavailable for WeasylTransformer - no user provided')
self.self_tag = self_tag
@staticmethod
def transformer_matches_site(site: str) -> bool:
return site == 'weasyl'
def center_tag(self, data):
if data[0] is None or not data[0].strip():
return ''
return f'<div class="align-center">{data[0]}</div>'
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['weasyl']:
return f'<!~{user_data["weasyl"].replace(" ", "")}>'
for site in user_data.sites:
if site == 'furaffinity':
return f'<fa:{user_data["furaffinity"]}>'
if site == 'inkbunny':
return f'<ib:{user_data["inkbunny"]}>'
if site == 'sofurry':
return f'<sf:{user_data["sofurry"]}>'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'weasyl' in siteurl_data:
return self.url_tag((siteurl_data['weasyl'], siteurl_data.default))
return super().siteurl_tag_root(data)
class InkbunnyTransformer(BbcodeTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(inkbunny=self_user),))
raise ValueError('self_tag is unavailable for InkbunnyTransformer - no user provided')
self.self_tag = self_tag
@staticmethod
def transformer_matches_site(site: str) -> bool:
return site in SUPPORTED_USER_TAGS['inkbunny']
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['inkbunny']:
return f'[iconname]{user_data["inkbunny"]}[/iconname]'
for site in user_data.sites:
if site == 'furaffinity':
return f'[fa]{user_data["furaffinity"]}[/fa]'
if site == 'sofurry':
return f'[sf]{user_data["sofurry"]}[/sf]'
if site == 'weasyl':
return f'[weasyl]{user_data["weasyl"].replace(" ", "").lower()}[/weasyl]'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'inkbunny' in siteurl_data:
return self.url_tag((siteurl_data['inkbunny'], siteurl_data.default))
return super().siteurl_tag_root(data)
class SoFurryTransformer(BbcodeTransformer):
def __init__(self, self_user=None, *args, **kwargs):
super().__init__(*args, **kwargs)
def self_tag(data):
if self_user:
return self.user_tag_root((SiteSwitchTag(sofurry=self_user),))
raise ValueError('self_tag is unavailable for SoFurryTransformer - no user provided')
self.self_tag = self_tag
@staticmethod
def transformer_matches_site(site: str) -> bool:
return site in SUPPORTED_USER_TAGS['sofurry']
def user_tag_root(self, data):
user_data: SiteSwitchTag = data[0]
if user_data['sofurry']:
return f':icon{user_data["sofurry"]}:'
for site in user_data.sites:
if site == 'furaffinity':
return f'fa!{user_data["furaffinity"]}'
if site == 'inkbunny':
return f'ib!{user_data["inkbunny"]}'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
siteurl_data: SiteSwitchTag = data[0]
if 'sofurry' in siteurl_data:
return self.url_tag((siteurl_data['sofurry'], siteurl_data.default))
return super().siteurl_tag_root(data)
def validate_parsed_tree(parsed_tree):
for node in parsed_tree.iter_subtrees_topdown():
if node.data in {'b_tag', 'i_tag', 'u_tag', 'url_tag'}:
node_type = str(node.data)
for node2 in node.find_data(node_type):
if node != node2:
raise DescriptionParsingError(f'Invalid nested {node_type} on line {node2.data.line} column {node2.data.column}')
def parse_description(description_path, config, out_dir, ignore_empty_files=False, define_options=set()):
for proc in psutil.process_iter(['cmdline']):
if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
if ignore_empty_files:
print('WARN: LibreOffice Writer appears to be running. This command may output empty files until it is closed.')
break
print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
break
description = ''
with subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE) as ps:
description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
if not description or re.match(r'^\s+$', description):
error = f'Description processing returned empty file: libreoffice --cat {description_path}'
if ignore_empty_files:
print(f'Ignoring error ({error})')
else:
raise RuntimeError(error)
try:
parsed_description = DESCRIPTION_PARSER.parse(description)
except lark.UnexpectedInput as e:
input_error = e.match_examples(DESCRIPTION_PARSER.parse, {
'Unclosed tag': ['[b]text', '[i]text', '[u]text', '[url]text'],
'Unopened tag': ['text[/b]', 'text[/i]', 'text[/u]', 'text[/url]'],
'Unknown tag': ['[invalid]text[/invalid]'],
'Missing tag brackets': ['b]text[/b]', '[btext[/b]', '[b]text/b]', '[b]text[/b', 'i]text[/i]', '[itext[/i]', '[i]text/i]', '[i]text[/i', 'u]text[/u]', '[utext[/u]', '[u]text/u]', '[u]text[/u'],
'Missing tag slash': ['[b]text[b]', '[i]text[i]', '[u]text[u]'],
'Empty switch tag': ['[user][/user]', '[siteurl][/siteurl]'],
'Empty user tag': ['[user][aryion][/aryion][/user]', '[user][furaffinity][/furaffinity][/user]', '[user][inkbunny][/inkbunny][/user]', '[user][sofurry][/sofurry][/user]', '[user][weasyl][/weasyl][/user]', '[user][twitter][/twitter][/user]', '[user][mastodon][/mastodon][/user]', '[user][aryion=][/aryion][/user]', '[user][furaffinity=][/furaffinity][/user]', '[user][inkbunny=][/inkbunny][/user]', '[user][sofurry=][/sofurry][/user]', '[user][weasyl=][/weasyl][/user]', '[user][twitter=][/twitter][/user]', '[user][mastodon=][/mastodon][/user]'],
'Empty siteurl tag': ['[siteurl][aryion][/aryion][/siteurl]', '[siteurl][furaffinity][/furaffinity][/siteurl]', '[siteurl][inkbunny][/inkbunny][/siteurl]', '[siteurl][sofurry][/sofurry][/siteurl]', '[siteurl][weasyl][/weasyl][/siteurl]' '[siteurl][aryion=][/aryion][/siteurl]', '[siteurl][furaffinity=][/furaffinity][/siteurl]', '[siteurl][inkbunny=][/inkbunny][/siteurl]', '[siteurl][sofurry=][/sofurry][/siteurl]', '[siteurl][weasyl=][/weasyl][/siteurl]'],
})
raise DescriptionParsingError(f'Unable to parse description. {input_error or "Unknown grammar error"} in line {e.line} column {e.column}:\n{e.get_context(description)}') from e
validate_parsed_tree(parsed_description)
transformations = {
'aryion': ('desc_aryion.txt', AryionTransformer),
'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer),
'inkbunny': ('desc_inkbunny.txt', InkbunnyTransformer),
'sofurry': ('desc_sofurry.txt', SoFurryTransformer),
'weasyl': ('desc_weasyl.md', WeasylTransformer),
}
# assert all(k in SUPPORTED_SITE_TAGS for k in transformations)
# Validate JSON
errors = []
for (website, username) in config.items():
if website not in transformations:
errors.append(ValueError(f'Website \'{website}\' is unsupported'))
elif type(username) is not str:
errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
elif username.strip() == '':
errors.append(ValueError(f'Website \'{website}\' has empty username'))
if not any(ws in config for ws in transformations):
errors.append(ValueError('No valid websites found'))
if errors:
raise ExceptionGroup('Invalid configuration for description parsing', errors)
# Create descriptions
RE_MULTIPLE_EMPTY_LINES = re.compile(r'\n\n+')
for (website, username) in config.items():
(filepath, transformer) = transformations[website]
with open(os.path.join(out_dir, filepath), 'w') as f:
if description.strip():
transformed_description = transformer(self_user=username, define_options=define_options).transform(parsed_description)
cleaned_description = RE_MULTIPLE_EMPTY_LINES.sub('\n\n', transformed_description).strip()
if cleaned_description:
f.write(cleaned_description)
f.write('\n')
f.write('')