Improvements to text generation and error handling

- Warn about running LibreOffice Writer instance
- Better handling of leading/trailing whitespace for descriptions
- Create .md file for weasyl
This commit is contained in:
Bad Manners 2023-11-20 14:32:19 -03:00
parent 468e219ca8
commit 68603a93d6
3 changed files with 60 additions and 26 deletions

View file

@ -3,6 +3,7 @@ import io
import json import json
import lark import lark
import os import os
import psutil
import re import re
import subprocess import subprocess
import typing import typing
@ -76,7 +77,7 @@ class UserTag:
class UploadTransformer(lark.Transformer): class UploadTransformer(lark.Transformer):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(UploadTransformer, self).__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def _user_tag_factory(tag): def _user_tag_factory(tag):
# Create a new UserTag if innermost node, or append to list in order # Create a new UserTag if innermost node, or append to list in order
def user_tag(data): def user_tag(data):
@ -245,48 +246,51 @@ class PlaintextTransformer(UploadTransformer):
return f'@{mastodon_user} on {mastodon_instance}' return f'@{mastodon_user} on {mastodon_instance}'
else: else:
print(f'Unknown site "{site}" found in user tag; ignoring...') print(f'Unknown site "{site}" found in user tag; ignoring...')
return super(PlaintextTransformer, self).user_tag_root(data) return super().user_tag_root(data)
class AryionTransformer(BbcodeTransformer): class AryionTransformer(BbcodeTransformer):
def __init__(self, self_user, *args, **kwargs): def __init__(self, self_user, *args, **kwargs):
super(AryionTransformer, self).__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def self_tag(data): def self_tag(data):
return self.user_tag_root((UserTag(eka=self_user),)) return self.user_tag_root((UserTag(eka=self_user),))
self.self_tag = self_tag self.self_tag = self_tag
def transformer_matches_site(self, site: str) -> bool: @staticmethod
def transformer_matches_site(site: str) -> bool:
return site in ('eka', 'aryion') return site in ('eka', 'aryion')
def user_tag_root(self, data): def user_tag_root(self, data):
user_data = data[0] user_data = data[0]
if user_data['eka']: if user_data['eka']:
return f':icon{user_data["eka"]}:' return f':icon{user_data["eka"]}:'
return super(AryionTransformer, self).user_tag_root(data) return super().user_tag_root(data)
class FuraffinityTransformer(BbcodeTransformer): class FuraffinityTransformer(BbcodeTransformer):
def __init__(self, self_user, *args, **kwargs): def __init__(self, self_user, *args, **kwargs):
super(FuraffinityTransformer, self).__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def self_tag(data): def self_tag(data):
return self.user_tag_root((UserTag(fa=self_user),)) return self.user_tag_root((UserTag(fa=self_user),))
self.self_tag = self_tag self.self_tag = self_tag
def transformer_matches_site(self, site: str) -> bool: @staticmethod
def transformer_matches_site(site: str) -> bool:
return site in ('fa', 'furaffinity') return site in ('fa', 'furaffinity')
def user_tag_root(self, data): def user_tag_root(self, data):
user_data = data[0] user_data = data[0]
if user_data['fa']: if user_data['fa']:
return f':icon{user_data["fa"]}:' return f':icon{user_data["fa"]}:'
return super(FuraffinityTransformer, self).user_tag_root(data) return super().user_tag_root(data)
class WeasylTransformer(MarkdownTransformer): class WeasylTransformer(MarkdownTransformer):
def __init__(self, self_user, *args, **kwargs): def __init__(self, self_user, *args, **kwargs):
super(WeasylTransformer, self).__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def self_tag(data): def self_tag(data):
return self.user_tag_root((UserTag(weasyl=self_user),)) return self.user_tag_root((UserTag(weasyl=self_user),))
self.self_tag = self_tag self.self_tag = self_tag
def transformer_matches_site(self, site: str) -> bool: @staticmethod
def transformer_matches_site(site: str) -> bool:
return site == 'weasyl' return site == 'weasyl'
def user_tag_root(self, data): def user_tag_root(self, data):
@ -301,16 +305,17 @@ class WeasylTransformer(MarkdownTransformer):
return f'<ib:{user_data["ib"]}>' return f'<ib:{user_data["ib"]}>'
if site == 'sf': if site == 'sf':
return f'<sf:{user_data["sf"]}>' return f'<sf:{user_data["sf"]}>'
return super(WeasylTransformer, self).user_tag_root(data) return super().user_tag_root(data)
class InkbunnyTransformer(BbcodeTransformer): class InkbunnyTransformer(BbcodeTransformer):
def __init__(self, self_user, *args, **kwargs): def __init__(self, self_user, *args, **kwargs):
super(InkbunnyTransformer, self).__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def self_tag(data): def self_tag(data):
return self.user_tag_root((UserTag(ib=self_user),)) return self.user_tag_root((UserTag(ib=self_user),))
self.self_tag = self_tag self.self_tag = self_tag
def transformer_matches_site(self, site: str) -> bool: @staticmethod
def transformer_matches_site(site: str) -> bool:
return site in ('ib', 'inkbunny') return site in ('ib', 'inkbunny')
def user_tag_root(self, data): def user_tag_root(self, data):
@ -325,16 +330,17 @@ class InkbunnyTransformer(BbcodeTransformer):
return f'[sf]{user_data["sf"]}[/sf]' return f'[sf]{user_data["sf"]}[/sf]'
if site == 'weasyl': if site == 'weasyl':
return f'[weasyl]{user_data["weasyl"].replace(" ", "").lower()}[/weasyl]' return f'[weasyl]{user_data["weasyl"].replace(" ", "").lower()}[/weasyl]'
return super(InkbunnyTransformer, self).user_tag_root(data) return super().user_tag_root(data)
class SoFurryTransformer(BbcodeTransformer): class SoFurryTransformer(BbcodeTransformer):
def __init__(self, self_user, *args, **kwargs): def __init__(self, self_user, *args, **kwargs):
super(SoFurryTransformer, self).__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def self_tag(data): def self_tag(data):
return self.user_tag_root((UserTag(sf=self_user),)) return self.user_tag_root((UserTag(sf=self_user),))
self.self_tag = self_tag self.self_tag = self_tag
def transformer_matches_site(self, site: str) -> bool: @staticmethod
def transformer_matches_site(site: str) -> bool:
return site in ('sf', 'sofurry') return site in ('sf', 'sofurry')
def user_tag_root(self, data): def user_tag_root(self, data):
@ -347,10 +353,18 @@ class SoFurryTransformer(BbcodeTransformer):
return f'fa!{user_data["fa"]}' return f'fa!{user_data["fa"]}'
if site == 'ib': if site == 'ib':
return f'ib!{user_data["ib"]}' return f'ib!{user_data["ib"]}'
return super(SoFurryTransformer, self).user_tag_root(data) return super().user_tag_root(data)
def parse_description(description_path, config_path, out_dir, ignore_empty_files=False): def parse_description(description_path, config_path, out_dir, ignore_empty_files=False):
for proc in psutil.process_iter(['cmdline']):
if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
if ignore_empty_files:
print('WARN: LibreOffice Writer appears to be running. This command may output empty files until it is closed.')
break
print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
break
ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE) ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig')) description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
if not description or re.match(r'^\s+$', description): if not description or re.match(r'^\s+$', description):
@ -382,17 +396,17 @@ def parse_description(description_path, config_path, out_dir, ignore_empty_files
errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\'')) errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
elif username.strip() == '': elif username.strip() == '':
errors.append(ValueError(f'Website \'{website}\' has empty username')) errors.append(ValueError(f'Website \'{website}\' has empty username'))
if not any(ws in config for ws in ('aryion', 'furaffinity', 'weasyl', 'inkbunny', 'sofurry')): if not any(ws in config for ws in transformations):
errors.append(ValueError('No valid websites found')) errors.append(ValueError('No valid websites found'))
if errors: if errors:
raise ExceptionGroup('Invalid configuration for description parsing', errors) raise ExceptionGroup('Invalid configuration for description parsing', errors)
# Create descriptions # Create descriptions
re_multiple_empty_lines = re.compile(r'\n\n+') RE_MULTIPLE_EMPTY_LINES = re.compile(r'\n\n+')
for (website, username) in config.items(): for (website, username) in config.items():
(filepath, transformer) = transformations[website] (filepath, transformer) = transformations[website]
with open(os.path.join(out_dir, filepath), 'w') as f: with open(os.path.join(out_dir, filepath), 'w') as f:
if description.strip(): if description.strip():
transformed_description = transformer(username).transform(parsed_description) transformed_description = transformer(username).transform(parsed_description)
f.write(re_multiple_empty_lines.sub('\n\n', transformed_description)) f.write(RE_MULTIPLE_EMPTY_LINES.sub('\n\n', transformed_description).strip() + '\n')
else: else:
f.write('') f.write('')

View file

@ -1 +1,2 @@
lark==1.1.5 lark==1.1.8
psutil==5.9.6

View file

@ -1,6 +1,7 @@
import io import io
import json import json
import os import os
import psutil
import re import re
import subprocess import subprocess
@ -21,38 +22,56 @@ def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=F
config = json.load(f) config = json.load(f)
if type(config) is not dict: if type(config) is not dict:
raise ValueError('Invalid configuration for story parsing: Configuration must be a JSON object') raise ValueError('Invalid configuration for story parsing: Configuration must be a JSON object')
should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry')) should_create_txt_story = any(ws in config for ws in ('furaffinity', 'inkbunny', 'sofurry'))
should_create_md_story = any(ws in config for ws in ('weasyl',))
should_create_rtf_story = any(ws in config for ws in ('aryion',)) should_create_rtf_story = any(ws in config for ws in ('aryion',))
if not should_create_txt_story and not should_create_rtf_story: if not any((should_create_txt_story, should_create_md_story, should_create_rtf_story)):
raise ValueError('Invalid configuration for story parsing: No valid websites found') raise ValueError('Invalid configuration for story parsing: No valid websites found')
for proc in psutil.process_iter(['cmdline']):
if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
if ignore_empty_files:
print('WARN: LibreOffice Writer appears to be running. This command may output empty files until it is closed.')
break
print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
break
story_filename = os.path.split(story_path)[1].rsplit('.')[0] story_filename = os.path.split(story_path)[1].rsplit('.')[0]
txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
md_out_path = os.path.join(out_dir, f'{story_filename}.md') if should_create_md_story else os.devnull
txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull
RE_EMPTY_LINE = re.compile('^$') RE_EMPTY_LINE = re.compile(r'^$')
RE_SEQUENTIAL_EQUAL_SIGNS = re.compile(r'=(?==)')
is_only_empty_lines = True is_only_empty_lines = True
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE) ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
# Mangle output files so that .RTF will always have a single LF between lines, and .TXT can have one or two CRLF # Mangle output files so that .RTF will always have a single LF between lines, and .TXT/.MD can have one or two CRLF
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp: with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(md_out_path, 'w', newline='\r\n') as md_out, open(txt_tmp_path, 'w') as txt_tmp:
needs_empty_line = False needs_empty_line = False
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'): for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
# Remove empty lines # Remove empty lines
line = line.strip() line = line.strip()
md_line = line
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines: if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
needs_empty_line = True needs_empty_line = True
else: else:
if should_create_md_story:
md_line = RE_SEQUENTIAL_EQUAL_SIGNS.sub('= ', line.replace(r'*', r'\*'))
if is_only_empty_lines: if is_only_empty_lines:
txt_out.writelines((line,)) txt_out.writelines((line,))
md_out.writelines((md_line,))
txt_tmp.writelines((line,)) txt_tmp.writelines((line,))
is_only_empty_lines = False is_only_empty_lines = False
else: else:
if needs_empty_line: if needs_empty_line:
txt_out.writelines(('\n\n', line)) txt_out.writelines(('\n\n', line))
md_out.writelines(('\n\n', md_line))
needs_empty_line = False needs_empty_line = False
else: else:
txt_out.writelines(('\n', line)) txt_out.writelines(('\n', line))
md_out.writelines(('\n', md_line))
txt_tmp.writelines(('\n', line)) txt_tmp.writelines(('\n', line))
txt_out.writelines(('\n')) txt_out.writelines(('\n'))
md_out.writelines(('\n'))
if is_only_empty_lines: if is_only_empty_lines:
error = f'Story processing returned empty file: libreoffice --cat {story_path}' error = f'Story processing returned empty file: libreoffice --cat {story_path}'
if ignore_empty_files: if ignore_empty_files: