Improvements to text generation and error handling
- Warn about running LibreOffice Writer instance - Better handling of leading/trailing whitespace for descriptions - Create .md file for weasyl
This commit is contained in:
parent
468e219ca8
commit
68603a93d6
3 changed files with 60 additions and 26 deletions
|
|
@ -3,6 +3,7 @@ import io
|
||||||
import json
|
import json
|
||||||
import lark
|
import lark
|
||||||
import os
|
import os
|
||||||
|
import psutil
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import typing
|
import typing
|
||||||
|
|
@ -76,7 +77,7 @@ class UserTag:
|
||||||
|
|
||||||
class UploadTransformer(lark.Transformer):
|
class UploadTransformer(lark.Transformer):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(UploadTransformer, self).__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
def _user_tag_factory(tag):
|
def _user_tag_factory(tag):
|
||||||
# Create a new UserTag if innermost node, or append to list in order
|
# Create a new UserTag if innermost node, or append to list in order
|
||||||
def user_tag(data):
|
def user_tag(data):
|
||||||
|
|
@ -245,48 +246,51 @@ class PlaintextTransformer(UploadTransformer):
|
||||||
return f'@{mastodon_user} on {mastodon_instance}'
|
return f'@{mastodon_user} on {mastodon_instance}'
|
||||||
else:
|
else:
|
||||||
print(f'Unknown site "{site}" found in user tag; ignoring...')
|
print(f'Unknown site "{site}" found in user tag; ignoring...')
|
||||||
return super(PlaintextTransformer, self).user_tag_root(data)
|
return super().user_tag_root(data)
|
||||||
|
|
||||||
class AryionTransformer(BbcodeTransformer):
|
class AryionTransformer(BbcodeTransformer):
|
||||||
def __init__(self, self_user, *args, **kwargs):
|
def __init__(self, self_user, *args, **kwargs):
|
||||||
super(AryionTransformer, self).__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
def self_tag(data):
|
def self_tag(data):
|
||||||
return self.user_tag_root((UserTag(eka=self_user),))
|
return self.user_tag_root((UserTag(eka=self_user),))
|
||||||
self.self_tag = self_tag
|
self.self_tag = self_tag
|
||||||
|
|
||||||
def transformer_matches_site(self, site: str) -> bool:
|
@staticmethod
|
||||||
|
def transformer_matches_site(site: str) -> bool:
|
||||||
return site in ('eka', 'aryion')
|
return site in ('eka', 'aryion')
|
||||||
|
|
||||||
def user_tag_root(self, data):
|
def user_tag_root(self, data):
|
||||||
user_data = data[0]
|
user_data = data[0]
|
||||||
if user_data['eka']:
|
if user_data['eka']:
|
||||||
return f':icon{user_data["eka"]}:'
|
return f':icon{user_data["eka"]}:'
|
||||||
return super(AryionTransformer, self).user_tag_root(data)
|
return super().user_tag_root(data)
|
||||||
|
|
||||||
class FuraffinityTransformer(BbcodeTransformer):
|
class FuraffinityTransformer(BbcodeTransformer):
|
||||||
def __init__(self, self_user, *args, **kwargs):
|
def __init__(self, self_user, *args, **kwargs):
|
||||||
super(FuraffinityTransformer, self).__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
def self_tag(data):
|
def self_tag(data):
|
||||||
return self.user_tag_root((UserTag(fa=self_user),))
|
return self.user_tag_root((UserTag(fa=self_user),))
|
||||||
self.self_tag = self_tag
|
self.self_tag = self_tag
|
||||||
|
|
||||||
def transformer_matches_site(self, site: str) -> bool:
|
@staticmethod
|
||||||
|
def transformer_matches_site(site: str) -> bool:
|
||||||
return site in ('fa', 'furaffinity')
|
return site in ('fa', 'furaffinity')
|
||||||
|
|
||||||
def user_tag_root(self, data):
|
def user_tag_root(self, data):
|
||||||
user_data = data[0]
|
user_data = data[0]
|
||||||
if user_data['fa']:
|
if user_data['fa']:
|
||||||
return f':icon{user_data["fa"]}:'
|
return f':icon{user_data["fa"]}:'
|
||||||
return super(FuraffinityTransformer, self).user_tag_root(data)
|
return super().user_tag_root(data)
|
||||||
|
|
||||||
class WeasylTransformer(MarkdownTransformer):
|
class WeasylTransformer(MarkdownTransformer):
|
||||||
def __init__(self, self_user, *args, **kwargs):
|
def __init__(self, self_user, *args, **kwargs):
|
||||||
super(WeasylTransformer, self).__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
def self_tag(data):
|
def self_tag(data):
|
||||||
return self.user_tag_root((UserTag(weasyl=self_user),))
|
return self.user_tag_root((UserTag(weasyl=self_user),))
|
||||||
self.self_tag = self_tag
|
self.self_tag = self_tag
|
||||||
|
|
||||||
def transformer_matches_site(self, site: str) -> bool:
|
@staticmethod
|
||||||
|
def transformer_matches_site(site: str) -> bool:
|
||||||
return site == 'weasyl'
|
return site == 'weasyl'
|
||||||
|
|
||||||
def user_tag_root(self, data):
|
def user_tag_root(self, data):
|
||||||
|
|
@ -301,16 +305,17 @@ class WeasylTransformer(MarkdownTransformer):
|
||||||
return f'<ib:{user_data["ib"]}>'
|
return f'<ib:{user_data["ib"]}>'
|
||||||
if site == 'sf':
|
if site == 'sf':
|
||||||
return f'<sf:{user_data["sf"]}>'
|
return f'<sf:{user_data["sf"]}>'
|
||||||
return super(WeasylTransformer, self).user_tag_root(data)
|
return super().user_tag_root(data)
|
||||||
|
|
||||||
class InkbunnyTransformer(BbcodeTransformer):
|
class InkbunnyTransformer(BbcodeTransformer):
|
||||||
def __init__(self, self_user, *args, **kwargs):
|
def __init__(self, self_user, *args, **kwargs):
|
||||||
super(InkbunnyTransformer, self).__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
def self_tag(data):
|
def self_tag(data):
|
||||||
return self.user_tag_root((UserTag(ib=self_user),))
|
return self.user_tag_root((UserTag(ib=self_user),))
|
||||||
self.self_tag = self_tag
|
self.self_tag = self_tag
|
||||||
|
|
||||||
def transformer_matches_site(self, site: str) -> bool:
|
@staticmethod
|
||||||
|
def transformer_matches_site(site: str) -> bool:
|
||||||
return site in ('ib', 'inkbunny')
|
return site in ('ib', 'inkbunny')
|
||||||
|
|
||||||
def user_tag_root(self, data):
|
def user_tag_root(self, data):
|
||||||
|
|
@ -325,16 +330,17 @@ class InkbunnyTransformer(BbcodeTransformer):
|
||||||
return f'[sf]{user_data["sf"]}[/sf]'
|
return f'[sf]{user_data["sf"]}[/sf]'
|
||||||
if site == 'weasyl':
|
if site == 'weasyl':
|
||||||
return f'[weasyl]{user_data["weasyl"].replace(" ", "").lower()}[/weasyl]'
|
return f'[weasyl]{user_data["weasyl"].replace(" ", "").lower()}[/weasyl]'
|
||||||
return super(InkbunnyTransformer, self).user_tag_root(data)
|
return super().user_tag_root(data)
|
||||||
|
|
||||||
class SoFurryTransformer(BbcodeTransformer):
|
class SoFurryTransformer(BbcodeTransformer):
|
||||||
def __init__(self, self_user, *args, **kwargs):
|
def __init__(self, self_user, *args, **kwargs):
|
||||||
super(SoFurryTransformer, self).__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
def self_tag(data):
|
def self_tag(data):
|
||||||
return self.user_tag_root((UserTag(sf=self_user),))
|
return self.user_tag_root((UserTag(sf=self_user),))
|
||||||
self.self_tag = self_tag
|
self.self_tag = self_tag
|
||||||
|
|
||||||
def transformer_matches_site(self, site: str) -> bool:
|
@staticmethod
|
||||||
|
def transformer_matches_site(site: str) -> bool:
|
||||||
return site in ('sf', 'sofurry')
|
return site in ('sf', 'sofurry')
|
||||||
|
|
||||||
def user_tag_root(self, data):
|
def user_tag_root(self, data):
|
||||||
|
|
@ -347,10 +353,18 @@ class SoFurryTransformer(BbcodeTransformer):
|
||||||
return f'fa!{user_data["fa"]}'
|
return f'fa!{user_data["fa"]}'
|
||||||
if site == 'ib':
|
if site == 'ib':
|
||||||
return f'ib!{user_data["ib"]}'
|
return f'ib!{user_data["ib"]}'
|
||||||
return super(SoFurryTransformer, self).user_tag_root(data)
|
return super().user_tag_root(data)
|
||||||
|
|
||||||
|
|
||||||
def parse_description(description_path, config_path, out_dir, ignore_empty_files=False):
|
def parse_description(description_path, config_path, out_dir, ignore_empty_files=False):
|
||||||
|
for proc in psutil.process_iter(['cmdline']):
|
||||||
|
if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
|
||||||
|
if ignore_empty_files:
|
||||||
|
print('WARN: LibreOffice Writer appears to be running. This command may output empty files until it is closed.')
|
||||||
|
break
|
||||||
|
print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
|
||||||
|
break
|
||||||
|
|
||||||
ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
|
ps = subprocess.Popen(('libreoffice', '--cat', description_path), stdout=subprocess.PIPE)
|
||||||
description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
|
description = '\n'.join(line.strip() for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'))
|
||||||
if not description or re.match(r'^\s+$', description):
|
if not description or re.match(r'^\s+$', description):
|
||||||
|
|
@ -382,17 +396,17 @@ def parse_description(description_path, config_path, out_dir, ignore_empty_files
|
||||||
errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
|
errors.append(ValueError(f'Website \'{website}\' has invalid username \'{json.dumps(username)}\''))
|
||||||
elif username.strip() == '':
|
elif username.strip() == '':
|
||||||
errors.append(ValueError(f'Website \'{website}\' has empty username'))
|
errors.append(ValueError(f'Website \'{website}\' has empty username'))
|
||||||
if not any(ws in config for ws in ('aryion', 'furaffinity', 'weasyl', 'inkbunny', 'sofurry')):
|
if not any(ws in config for ws in transformations):
|
||||||
errors.append(ValueError('No valid websites found'))
|
errors.append(ValueError('No valid websites found'))
|
||||||
if errors:
|
if errors:
|
||||||
raise ExceptionGroup('Invalid configuration for description parsing', errors)
|
raise ExceptionGroup('Invalid configuration for description parsing', errors)
|
||||||
# Create descriptions
|
# Create descriptions
|
||||||
re_multiple_empty_lines = re.compile(r'\n\n+')
|
RE_MULTIPLE_EMPTY_LINES = re.compile(r'\n\n+')
|
||||||
for (website, username) in config.items():
|
for (website, username) in config.items():
|
||||||
(filepath, transformer) = transformations[website]
|
(filepath, transformer) = transformations[website]
|
||||||
with open(os.path.join(out_dir, filepath), 'w') as f:
|
with open(os.path.join(out_dir, filepath), 'w') as f:
|
||||||
if description.strip():
|
if description.strip():
|
||||||
transformed_description = transformer(username).transform(parsed_description)
|
transformed_description = transformer(username).transform(parsed_description)
|
||||||
f.write(re_multiple_empty_lines.sub('\n\n', transformed_description))
|
f.write(RE_MULTIPLE_EMPTY_LINES.sub('\n\n', transformed_description).strip() + '\n')
|
||||||
else:
|
else:
|
||||||
f.write('')
|
f.write('')
|
||||||
|
|
|
||||||
|
|
@ -1 +1,2 @@
|
||||||
lark==1.1.5
|
lark==1.1.8
|
||||||
|
psutil==5.9.6
|
||||||
|
|
|
||||||
29
story.py
29
story.py
|
|
@ -1,6 +1,7 @@
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import psutil
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
|
@ -21,38 +22,56 @@ def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=F
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
if type(config) is not dict:
|
if type(config) is not dict:
|
||||||
raise ValueError('Invalid configuration for story parsing: Configuration must be a JSON object')
|
raise ValueError('Invalid configuration for story parsing: Configuration must be a JSON object')
|
||||||
should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry'))
|
should_create_txt_story = any(ws in config for ws in ('furaffinity', 'inkbunny', 'sofurry'))
|
||||||
|
should_create_md_story = any(ws in config for ws in ('weasyl',))
|
||||||
should_create_rtf_story = any(ws in config for ws in ('aryion',))
|
should_create_rtf_story = any(ws in config for ws in ('aryion',))
|
||||||
if not should_create_txt_story and not should_create_rtf_story:
|
if not any((should_create_txt_story, should_create_md_story, should_create_rtf_story)):
|
||||||
raise ValueError('Invalid configuration for story parsing: No valid websites found')
|
raise ValueError('Invalid configuration for story parsing: No valid websites found')
|
||||||
|
|
||||||
|
for proc in psutil.process_iter(['cmdline']):
|
||||||
|
if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
|
||||||
|
if ignore_empty_files:
|
||||||
|
print('WARN: LibreOffice Writer appears to be running. This command may output empty files until it is closed.')
|
||||||
|
break
|
||||||
|
print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
|
||||||
|
break
|
||||||
|
|
||||||
story_filename = os.path.split(story_path)[1].rsplit('.')[0]
|
story_filename = os.path.split(story_path)[1].rsplit('.')[0]
|
||||||
txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
|
txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
|
||||||
|
md_out_path = os.path.join(out_dir, f'{story_filename}.md') if should_create_md_story else os.devnull
|
||||||
txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull
|
txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull
|
||||||
RE_EMPTY_LINE = re.compile('^$')
|
RE_EMPTY_LINE = re.compile(r'^$')
|
||||||
|
RE_SEQUENTIAL_EQUAL_SIGNS = re.compile(r'=(?==)')
|
||||||
is_only_empty_lines = True
|
is_only_empty_lines = True
|
||||||
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
|
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
|
||||||
# Mangle output files so that .RTF will always have a single LF between lines, and .TXT can have one or two CRLF
|
# Mangle output files so that .RTF will always have a single LF between lines, and .TXT/.MD can have one or two CRLF
|
||||||
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
|
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(md_out_path, 'w', newline='\r\n') as md_out, open(txt_tmp_path, 'w') as txt_tmp:
|
||||||
needs_empty_line = False
|
needs_empty_line = False
|
||||||
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
|
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
|
||||||
# Remove empty lines
|
# Remove empty lines
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
md_line = line
|
||||||
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
|
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
|
||||||
needs_empty_line = True
|
needs_empty_line = True
|
||||||
else:
|
else:
|
||||||
|
if should_create_md_story:
|
||||||
|
md_line = RE_SEQUENTIAL_EQUAL_SIGNS.sub('= ', line.replace(r'*', r'\*'))
|
||||||
if is_only_empty_lines:
|
if is_only_empty_lines:
|
||||||
txt_out.writelines((line,))
|
txt_out.writelines((line,))
|
||||||
|
md_out.writelines((md_line,))
|
||||||
txt_tmp.writelines((line,))
|
txt_tmp.writelines((line,))
|
||||||
is_only_empty_lines = False
|
is_only_empty_lines = False
|
||||||
else:
|
else:
|
||||||
if needs_empty_line:
|
if needs_empty_line:
|
||||||
txt_out.writelines(('\n\n', line))
|
txt_out.writelines(('\n\n', line))
|
||||||
|
md_out.writelines(('\n\n', md_line))
|
||||||
needs_empty_line = False
|
needs_empty_line = False
|
||||||
else:
|
else:
|
||||||
txt_out.writelines(('\n', line))
|
txt_out.writelines(('\n', line))
|
||||||
|
md_out.writelines(('\n', md_line))
|
||||||
txt_tmp.writelines(('\n', line))
|
txt_tmp.writelines(('\n', line))
|
||||||
txt_out.writelines(('\n'))
|
txt_out.writelines(('\n'))
|
||||||
|
md_out.writelines(('\n'))
|
||||||
if is_only_empty_lines:
|
if is_only_empty_lines:
|
||||||
error = f'Story processing returned empty file: libreoffice --cat {story_path}'
|
error = f'Story processing returned empty file: libreoffice --cat {story_path}'
|
||||||
if ignore_empty_files:
|
if ignore_empty_files:
|
||||||
|
|
|
||||||
Reference in a new issue