Improve error raising and add initial tests

This commit is contained in:
Bad Manners 2024-01-25 23:59:29 -03:00
parent f3fabf2d8a
commit dbd93e4956
22 changed files with 268 additions and 55 deletions

View file

@ -57,6 +57,7 @@ Input descriptions should be formatted as BBCode. The following tags are accepte
```bbcode
[b]Bold text[/b]
[i]Italic text[/i]
[u]Underline text[/u]
[url=https://github.com/BadMannersXYZ]URL link[/url]
```

View file

@ -37,7 +37,7 @@ DESCRIPTION_GRAMMAR = r"""
url_tag: "[url" ["=" [URL]] "]" [document_list] "[/url]"
self_tag: "[self][/self]"
if_tag: "[if=" CONDITION "]" [document_list] "[/if]" [ "[else]" document_list "[/else]" ]
if_tag: "[if=" CONDITION "]" [document_list] "[/if]" [ "[else]" [document_list] "[/else]" ]
user_tag_root: "[user]" user_tag "[/user]"
user_tag: user_tag_generic | """
@ -61,12 +61,17 @@ for tag, alts in SUPPORTED_SITE_TAGS.items():
DESCRIPTION_GRAMMAR += r"""
siteurl_tag_generic: "[generic=" URL "]" TEXT "[/generic]"
USERNAME: / *[a-zA-Z0-9][a-zA-Z0-9 _-]*/
USERNAME: / *@?[a-zA-Z0-9][a-zA-Z0-9 @._-]*/
URL: / *(https?:\/\/)?[^\]]+ */
TEXT: /([^\[]|[ \t\r\n])+/
CONDITION: / *[a-z]+ *(==|!=) *[a-zA-Z0-9_-]+ *| *[a-z]+ +in +([a-zA-Z0-9_-]+ *, *)*[a-zA-Z0-9_-]+ */
"""
DESCRIPTION_PARSER = lark.Lark(DESCRIPTION_GRAMMAR, parser='lalr')
class DescriptionParsingError(ValueError):
pass
class SiteSwitchTag:
def __init__(self, default: typing.Optional[str]=None, **kwargs):
@ -186,7 +191,7 @@ class UploadTransformer(lark.Transformer):
if len(inclusion_condition) == 2 and inclusion_condition[1].strip():
conditional_test = f'transformer_matches_{inclusion_condition[0].strip()}'
if hasattr(self, conditional_test):
matches = (parameter.strip() for parameter in equality_condition[1].split(','))
matches = (parameter.strip() for parameter in inclusion_condition[1].split(','))
if any(getattr(self, conditional_test)(match) for match in matches):
return truthy_document or ''
return falsy_document or ''
@ -390,14 +395,13 @@ class WeasylTransformer(MarkdownTransformer):
user_data: SiteSwitchTag = data[0]
if user_data['weasyl']:
return f'<!~{user_data["weasyl"].replace(" ", "")}>'
if user_data.default is None:
for site in user_data.sites:
if site == 'furaffinity':
return f'<fa:{user_data["furaffinity"]}>'
if site == 'inkbunny':
return f'<ib:{user_data["inkbunny"]}>'
if site == 'sofurry':
return f'<sf:{user_data["sofurry"]}>'
for site in user_data.sites:
if site == 'furaffinity':
return f'<fa:{user_data["furaffinity"]}>'
if site == 'inkbunny':
return f'<ib:{user_data["inkbunny"]}>'
if site == 'sofurry':
return f'<sf:{user_data["sofurry"]}>'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
@ -423,14 +427,13 @@ class InkbunnyTransformer(BbcodeTransformer):
user_data: SiteSwitchTag = data[0]
if user_data['inkbunny']:
return f'[iconname]{user_data["inkbunny"]}[/iconname]'
if user_data.default is None:
for site in user_data.sites:
if site == 'furaffinity':
return f'[fa]{user_data["furaffinity"]}[/fa]'
if site == 'sofurry':
return f'[sf]{user_data["sofurry"]}[/sf]'
if site == 'weasyl':
return f'[weasyl]{user_data["weasyl"].replace(" ", "").lower()}[/weasyl]'
for site in user_data.sites:
if site == 'furaffinity':
return f'[fa]{user_data["furaffinity"]}[/fa]'
if site == 'sofurry':
return f'[sf]{user_data["sofurry"]}[/sf]'
if site == 'weasyl':
return f'[weasyl]{user_data["weasyl"].replace(" ", "").lower()}[/weasyl]'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
@ -456,12 +459,11 @@ class SoFurryTransformer(BbcodeTransformer):
user_data: SiteSwitchTag = data[0]
if user_data['sofurry']:
return f':icon{user_data["sofurry"]}:'
if user_data.default is None:
for site in user_data.sites:
if site == 'furaffinity':
return f'fa!{user_data["furaffinity"]}'
if site == 'inkbunny':
return f'ib!{user_data["inkbunny"]}'
for site in user_data.sites:
if site == 'furaffinity':
return f'fa!{user_data["furaffinity"]}'
if site == 'inkbunny':
return f'ib!{user_data["inkbunny"]}'
return super().user_tag_root(data)
def siteurl_tag_root(self, data):
@ -471,6 +473,14 @@ class SoFurryTransformer(BbcodeTransformer):
return super().siteurl_tag_root(data)
def validate_parsed_tree(parsed_tree):
for node in parsed_tree.iter_subtrees_topdown():
if node.data in {'b_tag', 'i_tag', 'u_tag', 'url_tag'}:
node_type = str(node.data)
for node2 in node.find_data(node_type):
if node != node2:
raise DescriptionParsingError(f'Invalid nested {node_type} on line {node2.data.line} column {node2.data.column}')
def parse_description(description_path, config, out_dir, ignore_empty_files=False, define_options=set()):
for proc in psutil.process_iter(['cmdline']):
if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
@ -480,8 +490,9 @@ def parse_description(description_path, config, out_dir, ignore_empty_files=Fals
print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
break
ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
description = ''
with subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE) as ps:
description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
if not description or re.match(r'^\s+$', description):
error = f'Description processing returned empty file: libreoffice --cat {description_path}'
if ignore_empty_files:
@ -489,7 +500,21 @@ def parse_description(description_path, config, out_dir, ignore_empty_files=Fals
else:
raise RuntimeError(error)
parsed_description = lark.Lark(DESCRIPTION_GRAMMAR, parser='lalr').parse(description)
try:
parsed_description = DESCRIPTION_PARSER.parse(description)
except lark.UnexpectedInput as e:
input_error = e.match_examples(DESCRIPTION_PARSER.parse, {
'Unclosed tag': ['[b]text', '[i]text', '[u]text', '[url]text'],
'Unopened tag': ['text[/b]', 'text[/i]', 'text[/u]', 'text[/url]'],
'Unknown tag': ['[invalid]text[/invalid]'],
'Missing tag brackets': ['b]text[/b]', '[btext[/b]', '[b]text/b]', '[b]text[/b', 'i]text[/i]', '[itext[/i]', '[i]text/i]', '[i]text[/i', 'u]text[/u]', '[utext[/u]', '[u]text/u]', '[u]text[/u'],
'Missing tag slash': ['[b]text[b]', '[i]text[i]', '[u]text[u]'],
'Empty switch tag': ['[user][/user]', '[siteurl][/siteurl]'],
'Empty user tag': ['[user][aryion][/aryion][/user]', '[user][furaffinity][/furaffinity][/user]', '[user][inkbunny][/inkbunny][/user]', '[user][sofurry][/sofurry][/user]', '[user][weasyl][/weasyl][/user]', '[user][twitter][/twitter][/user]', '[user][mastodon][/mastodon][/user]', '[user][aryion=][/aryion][/user]', '[user][furaffinity=][/furaffinity][/user]', '[user][inkbunny=][/inkbunny][/user]', '[user][sofurry=][/sofurry][/user]', '[user][weasyl=][/weasyl][/user]', '[user][twitter=][/twitter][/user]', '[user][mastodon=][/mastodon][/user]'],
'Empty siteurl tag': ['[siteurl][aryion][/aryion][/siteurl]', '[siteurl][furaffinity][/furaffinity][/siteurl]', '[siteurl][inkbunny][/inkbunny][/siteurl]', '[siteurl][sofurry][/sofurry][/siteurl]', '[siteurl][weasyl][/weasyl][/siteurl]' '[siteurl][aryion=][/aryion][/siteurl]', '[siteurl][furaffinity=][/furaffinity][/siteurl]', '[siteurl][inkbunny=][/inkbunny][/siteurl]', '[siteurl][sofurry=][/sofurry][/siteurl]', '[siteurl][weasyl=][/weasyl][/siteurl]'],
})
raise DescriptionParsingError(f'Unable to parse description. {input_error or "Unknown grammar error"} in line {e.line} column {e.column}:\n{e.get_context(description)}') from e
validate_parsed_tree(parsed_description)
transformations = {
'aryion': ('desc_aryion.txt', AryionTransformer),
'furaffinity': ('desc_furaffinity.txt', FuraffinityTransformer),

View file

@ -1,3 +1,4 @@
argcomplete==3.2.1
lark==1.1.8
parameterized==0.9.0
psutil==5.9.6

View file

@ -39,35 +39,35 @@ def parse_story(story_path, config, out_dir, temp_dir, ignore_empty_files=False)
RE_EMPTY_LINE = re.compile(r'^$')
RE_SEQUENTIAL_EQUAL_SIGNS = re.compile(r'=(?==)')
is_only_empty_lines = True
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
# Mangle output files so that .RTF will always have a single LF between lines, and .TXT/.MD can have one or two CRLF
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(md_out_path, 'w', newline='\r\n') as md_out, open(txt_tmp_path, 'w') as txt_tmp:
needs_empty_line = False
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
# Remove empty lines
line = line.strip()
md_line = line
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
needs_empty_line = True
else:
if should_create_md_story:
md_line = RE_SEQUENTIAL_EQUAL_SIGNS.sub('= ', line.replace(r'*', r'\*'))
if is_only_empty_lines:
txt_out.writelines((line,))
md_out.writelines((md_line,))
txt_tmp.writelines((line,))
is_only_empty_lines = False
with subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE) as ps:
# Mangle output files so that .RTF will always have a single LF between lines, and .TXT/.MD can have one or two CRLF
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(md_out_path, 'w', newline='\r\n') as md_out, open(txt_tmp_path, 'w') as txt_tmp:
needs_empty_line = False
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
# Remove empty lines
line = line.strip()
md_line = line
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
needs_empty_line = True
else:
if needs_empty_line:
txt_out.writelines(('\n\n', line))
md_out.writelines(('\n\n', md_line))
needs_empty_line = False
if should_create_md_story:
md_line = RE_SEQUENTIAL_EQUAL_SIGNS.sub('= ', line.replace(r'*', r'\*'))
if is_only_empty_lines:
txt_out.writelines((line,))
md_out.writelines((md_line,))
txt_tmp.writelines((line,))
is_only_empty_lines = False
else:
txt_out.writelines(('\n', line))
md_out.writelines(('\n', md_line))
txt_tmp.writelines(('\n', line))
txt_out.writelines(('\n'))
md_out.writelines(('\n'))
if needs_empty_line:
txt_out.writelines(('\n\n', line))
md_out.writelines(('\n\n', md_line))
needs_empty_line = False
else:
txt_out.writelines(('\n', line))
md_out.writelines(('\n', md_line))
txt_tmp.writelines(('\n', line))
txt_out.writelines(('\n'))
md_out.writelines(('\n'))
if is_only_empty_lines:
error = f'Story processing returned empty file: libreoffice --cat {story_path}'
if ignore_empty_files:

55
test.py Normal file
View file

@ -0,0 +1,55 @@
#!/usr/bin/env python
import glob
import os.path
from parameterized import parameterized
import re
import tempfile
import unittest
import warnings
from description import parse_description, DescriptionParsingError
class TestParseDescription(unittest.TestCase):
config = {
'aryion': 'UserAryion',
'furaffinity': 'UserFuraffinity',
'inkbunny': 'UserInkbunny',
'sofurry': 'UserSoFurry',
'weasyl': 'UserWeasyl',
}
define_options = {'test_parse_description'}
def setUp(self):
self.tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
warnings.simplefilter('ignore', ResourceWarning)
def tearDown(self):
self.tmpdir.cleanup()
warnings.simplefilter('default', ResourceWarning)
@parameterized.expand([
(re.match(r'.*(input_\d+)\.txt', v)[1], v) for v in sorted(glob.iglob('./test/description/input_*.txt'))
])
def test_parse_success(self, name, test_description):
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
parse_description(test_description, self.config, tmpdir, define_options=self.define_options)
for expected_output_file in glob.iglob(f'./test/description/output_{name[6:]}/*'):
received_output_file = os.path.join(tmpdir, os.path.split(expected_output_file)[1])
self.assertTrue(os.path.exists(received_output_file))
self.assertTrue(os.path.isfile(received_output_file))
with open(received_output_file, 'r') as f:
received_description = f.read()
with open(expected_output_file, 'r') as f:
expected_description = f.read()
self.assertEqual(received_description, expected_description)
@parameterized.expand([
(re.match(r'.*(error_.+)\.txt', v)[1], v) for v in sorted(glob.iglob('./test/description/error_*.txt'))
])
def test_parse_errors(self, _, test_description):
self.assertRaises(DescriptionParsingError, lambda: parse_description(test_description, self.config, self.tmpdir.name, define_options=self.define_options))
self.assertListEqual(glob.glob(os.path.join(self.tmpdir.name, '*')), [])
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1 @@
[url=https://example.com]Nested [url=https://example.net]URLs[/url][/url]

View file

@ -0,0 +1 @@
ZERO[b]ONE[i]TWO[u]THREE[b]FOUR[url=https://example.com]FIVE[/url]FOUR[/b]THREE[/u]TWO[/i]ONE[/b]ZERO

View file

@ -0,0 +1 @@
[i]Hello world!

View file

@ -0,0 +1 @@
Hello world![/u]

View file

@ -0,0 +1 @@
[user][unknown=Foo]Bar[/unknown][/user]

View file

@ -0,0 +1,9 @@
[b]Hello world![/b]
This is just a [u]simple[/u] test to show that basic functionality of [url=https://github.com/BadMannersXYZ/upload-generator]upload-generator[/url] [i]works[/i]. [if=define==test_parse_description]And this is running in a unit test.[/if][else]Why did you parse this outside of a unit test?![/else]
Reminder that I am [self][/self]!
My friend: [user][sofurry=FriendSoFurry][fa=FriendFa][mastodon=@FriendMastodon@example.org]Friend123[/mastodon][/fa][/sofurry][/user][if=site in ib,aryion,weasyl] (I dunno his account here...)[/if]
[siteurl][eka=https://example.com/eka][inkbunny=https://example.com/ib][generic=https://example.com/generic]Check this page![/generic][/inkbunny][/eka][/siteurl]

View file

@ -0,0 +1,12 @@
[self][/self]
[if=site==eka] -> [/if][user][eka=EkaPerson]EkaName[/eka][/user] [user][eka]EkaPerson[/eka][/user]
[if=site==fa] -> [/if][user][fa=FaPerson]FaName[/fa][/user] [user][fa]FaPerson[/fa][/user]
[if=site==ib] -> [/if][user][ib=IbPerson]IbName[/ib][/user] [user][ib]IbPerson[/ib][/user]
[if=site==sofurry] -> [/if][user][sf=SfPerson]SfName[/sf][/user] [user][sf]SfPerson[/sf][/user]
[if=site==weasyl] -> [/if][user][weasyl=WeasylPerson]WeasylName[/weasyl][/user] [user][weasyl]WeasylPerson[/weasyl][/user]
[user][twitter=XPerson]XName[/twitter][/user] [user][twitter]XPerson[/twitter][/user]
[user][mastodon=MastodonPerson@example.com]MastodonName[/mastodon][/user] [user][mastodon]MastodonPerson@example.com[/mastodon][/user]
[user][twitter=Ignored][generic=https://example.net/GenericPerson]GenericName[/generic][/twitter][/user]
[siteurl][aryion=https://example.com/aryion][furaffinity=https://example.com/furaffinity][inkbunny=https://example.com/inkbunny][sofurry=https://example.com/sofurry][generic=https://example.com/generic]Link[/generic][/sofurry][/inkbunny][/furaffinity][/aryion][/siteurl]

View file

@ -0,0 +1,9 @@
[b]Hello world![/b]
This is just a [u]simple[/u] test to show that basic functionality of [url=https://github.com/BadMannersXYZ/upload-generator]upload-generator[/url] [i]works[/i]. And this is running in a unit test.
Reminder that I am :iconUserAryion:!
My friend: [url=https://example.org/@FriendMastodon]Friend123[/url] (I dunno his account here...)
[url=https://example.com/eka]Check this page![/url]

View file

@ -0,0 +1,9 @@
[b]Hello world![/b]
This is just a [u]simple[/u] test to show that basic functionality of [url=https://github.com/BadMannersXYZ/upload-generator]upload-generator[/url] [i]works[/i]. And this is running in a unit test.
Reminder that I am :iconUserFuraffinity:!
My friend: :iconFriendFa:
[url=https://example.com/generic]Check this page![/url]

View file

@ -0,0 +1,9 @@
[b]Hello world![/b]
This is just a [u]simple[/u] test to show that basic functionality of [url=https://github.com/BadMannersXYZ/upload-generator]upload-generator[/url] [i]works[/i]. And this is running in a unit test.
Reminder that I am [iconname]UserInkbunny[/iconname]!
My friend: [fa]FriendFa[/fa] (I dunno his account here...)
[url=https://example.com/ib]Check this page![/url]

View file

@ -0,0 +1,9 @@
[b]Hello world![/b]
This is just a [u]simple[/u] test to show that basic functionality of [url=https://github.com/BadMannersXYZ/upload-generator]upload-generator[/url] [i]works[/i]. And this is running in a unit test.
Reminder that I am :iconUserSoFurry:!
My friend: :iconFriendSoFurry:
[url=https://example.com/generic]Check this page![/url]

View file

@ -0,0 +1,9 @@
**Hello world!**
This is just a <u>simple</u> test to show that basic functionality of [upload-generator](https://github.com/BadMannersXYZ/upload-generator) *works*. And this is running in a unit test.
Reminder that I am <!~UserWeasyl>!
My friend: <fa:FriendFa> (I dunno his account here...)
[Check this page!](https://example.com/generic)

View file

@ -0,0 +1,12 @@
:iconUserAryion:
-> :iconEkaPerson: :iconEkaPerson:
[url=https://furaffinity.net/user/FaPerson]FaName[/url] [url=https://furaffinity.net/user/FaPerson]FaPerson[/url]
[url=https://inkbunny.net/IbPerson]IbName[/url] [url=https://inkbunny.net/IbPerson]IbPerson[/url]
[url=https://sfperson.sofurry.com]SfName[/url] [url=https://sfperson.sofurry.com]SfPerson[/url]
[url=https://www.weasyl.com/~weasylperson]WeasylName[/url] [url=https://www.weasyl.com/~weasylperson]WeasylPerson[/url]
[url=https://twitter.com/XPerson]XName[/url] [url=https://twitter.com/XPerson]XPerson[/url]
[url=https://example.com/@MastodonPerson]MastodonName[/url] [url=https://example.com/@MastodonPerson]MastodonPerson@example.com[/url]
[url=https://example.net/GenericPerson]GenericName[/url]
[url=https://example.com/aryion]Link[/url]

View file

@ -0,0 +1,12 @@
:iconUserFuraffinity:
[url=https://aryion.com/g4/user/EkaPerson]EkaName[/url] [url=https://aryion.com/g4/user/EkaPerson]EkaPerson[/url]
-> :iconFaPerson: :iconFaPerson:
[url=https://inkbunny.net/IbPerson]IbName[/url] [url=https://inkbunny.net/IbPerson]IbPerson[/url]
[url=https://sfperson.sofurry.com]SfName[/url] [url=https://sfperson.sofurry.com]SfPerson[/url]
[url=https://www.weasyl.com/~weasylperson]WeasylName[/url] [url=https://www.weasyl.com/~weasylperson]WeasylPerson[/url]
[url=https://twitter.com/XPerson]XName[/url] [url=https://twitter.com/XPerson]XPerson[/url]
[url=https://example.com/@MastodonPerson]MastodonName[/url] [url=https://example.com/@MastodonPerson]MastodonPerson@example.com[/url]
[url=https://example.net/GenericPerson]GenericName[/url]
[url=https://example.com/furaffinity]Link[/url]

View file

@ -0,0 +1,12 @@
[iconname]UserInkbunny[/iconname]
[url=https://aryion.com/g4/user/EkaPerson]EkaName[/url] [url=https://aryion.com/g4/user/EkaPerson]EkaPerson[/url]
[fa]FaPerson[/fa] [fa]FaPerson[/fa]
-> [iconname]IbPerson[/iconname] [iconname]IbPerson[/iconname]
[sf]SfPerson[/sf] [sf]SfPerson[/sf]
[weasyl]weasylperson[/weasyl] [weasyl]weasylperson[/weasyl]
[url=https://twitter.com/XPerson]XName[/url] [url=https://twitter.com/XPerson]XPerson[/url]
[url=https://example.com/@MastodonPerson]MastodonName[/url] [url=https://example.com/@MastodonPerson]MastodonPerson@example.com[/url]
[url=https://example.net/GenericPerson]GenericName[/url]
[url=https://example.com/inkbunny]Link[/url]

View file

@ -0,0 +1,12 @@
:iconUserSoFurry:
[url=https://aryion.com/g4/user/EkaPerson]EkaName[/url] [url=https://aryion.com/g4/user/EkaPerson]EkaPerson[/url]
fa!FaPerson fa!FaPerson
ib!IbPerson ib!IbPerson
-> :iconSfPerson: :iconSfPerson:
[url=https://www.weasyl.com/~weasylperson]WeasylName[/url] [url=https://www.weasyl.com/~weasylperson]WeasylPerson[/url]
[url=https://twitter.com/XPerson]XName[/url] [url=https://twitter.com/XPerson]XPerson[/url]
[url=https://example.com/@MastodonPerson]MastodonName[/url] [url=https://example.com/@MastodonPerson]MastodonPerson@example.com[/url]
[url=https://example.net/GenericPerson]GenericName[/url]
[url=https://example.com/sofurry]Link[/url]

View file

@ -0,0 +1,12 @@
<!~UserWeasyl>
[EkaName](https://aryion.com/g4/user/EkaPerson) [EkaPerson](https://aryion.com/g4/user/EkaPerson)
<fa:FaPerson> <fa:FaPerson>
<ib:IbPerson> <ib:IbPerson>
<sf:SfPerson> <sf:SfPerson>
-> <!~WeasylPerson> <!~WeasylPerson>
[XName](https://twitter.com/XPerson) [XPerson](https://twitter.com/XPerson)
[MastodonName](https://example.com/@MastodonPerson) [MastodonPerson@example.com](https://example.com/@MastodonPerson)
[GenericName](https://example.net/GenericPerson)
[Link](https://example.com/generic)