Improvements to text generation and error handling

- Warn about running LibreOffice Writer instance
- Better handling of leading/trailing whitespace for descriptions
- Create .md file for weasyl
This commit is contained in:
Bad Manners 2023-11-20 14:32:19 -03:00
parent 468e219ca8
commit 68603a93d6
3 changed files with 60 additions and 26 deletions

View file

@ -1,6 +1,7 @@
import io
import json
import os
import psutil
import re
import subprocess
@ -21,38 +22,56 @@ def parse_story(story_path, config_path, out_dir, temp_dir, ignore_empty_files=F
config = json.load(f)
if type(config) is not dict:
raise ValueError('Invalid configuration for story parsing: Configuration must be a JSON object')
should_create_txt_story = any(ws in config for ws in ('furaffinity', 'weasyl', 'inkbunny', 'sofurry'))
should_create_txt_story = any(ws in config for ws in ('furaffinity', 'inkbunny', 'sofurry'))
should_create_md_story = any(ws in config for ws in ('weasyl',))
should_create_rtf_story = any(ws in config for ws in ('aryion',))
if not should_create_txt_story and not should_create_rtf_story:
if not any((should_create_txt_story, should_create_md_story, should_create_rtf_story)):
raise ValueError('Invalid configuration for story parsing: No valid websites found')
for proc in psutil.process_iter(['cmdline']):
if proc.info['cmdline'] and 'libreoffice' in proc.info['cmdline'][0] and '--writer' in proc.info['cmdline'][1:]:
if ignore_empty_files:
print('WARN: LibreOffice Writer appears to be running. This command may output empty files until it is closed.')
break
print('WARN: LibreOffice Writer appears to be running. This command may raise an error until it is closed.')
break
story_filename = os.path.split(story_path)[1].rsplit('.')[0]
txt_out_path = os.path.join(out_dir, f'{story_filename}.txt') if should_create_txt_story else os.devnull
md_out_path = os.path.join(out_dir, f'{story_filename}.md') if should_create_md_story else os.devnull
txt_tmp_path = os.path.join(temp_dir, f'{story_filename}.txt') if should_create_rtf_story else os.devnull
RE_EMPTY_LINE = re.compile('^$')
RE_EMPTY_LINE = re.compile(r'^$')
RE_SEQUENTIAL_EQUAL_SIGNS = re.compile(r'=(?==)')
is_only_empty_lines = True
ps = subprocess.Popen(('libreoffice', '--cat', story_path), stdout=subprocess.PIPE)
# Mangle output files so that .RTF will always have a single LF between lines, and .TXT can have one or two CRLF
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(txt_tmp_path, 'w') as txt_tmp:
# Mangle output files so that .RTF will always have a single LF between lines, and .TXT/.MD can have one or two CRLF
with open(txt_out_path, 'w', newline='\r\n') as txt_out, open(md_out_path, 'w', newline='\r\n') as md_out, open(txt_tmp_path, 'w') as txt_tmp:
needs_empty_line = False
for line in io.TextIOWrapper(ps.stdout, encoding='utf-8-sig'):
# Remove empty lines
line = line.strip()
md_line = line
if RE_EMPTY_LINE.search(line) and not is_only_empty_lines:
needs_empty_line = True
else:
if should_create_md_story:
md_line = RE_SEQUENTIAL_EQUAL_SIGNS.sub('= ', line.replace(r'*', r'\*'))
if is_only_empty_lines:
txt_out.writelines((line,))
md_out.writelines((md_line,))
txt_tmp.writelines((line,))
is_only_empty_lines = False
else:
if needs_empty_line:
txt_out.writelines(('\n\n', line))
md_out.writelines(('\n\n', md_line))
needs_empty_line = False
else:
txt_out.writelines(('\n', line))
md_out.writelines(('\n', md_line))
txt_tmp.writelines(('\n', line))
txt_out.writelines(('\n'))
md_out.writelines(('\n'))
if is_only_empty_lines:
error = f'Story processing returned empty file: libreoffice --cat {story_path}'
if ignore_empty_files: