Various changes to cron uploader:

Removed count limit, the cron job now checks the max PH execution time and auto-stops itself at 80% of that value.

Now skips os-specific image cache files like thumbs.db and the __macosx folder.

Changed failed folder re-deployment to allow re-deploying to populated queue, making it easier to re-process lots of failed batches all at once.

Changed page to output as a stream, allowing a long-running process to provide output as it runs rather than just at the very end.

Changed import loop to use the yield convention, allowing faster consumption of found files and lower memory use overall.
This commit is contained in:
Matthew Barbour 2020-05-28 13:34:55 -05:00 committed by Shish
parent 63b2601e67
commit 007e07e507
4 changed files with 142 additions and 76 deletions

View file

@ -393,6 +393,68 @@ function get_dir_contents(string $dir): array
);
}
function remove_empty_dirs(string $dir): bool
{
assert(!empty($dir));
$result = true;
if (!is_dir($dir)) {
return false;
}
$items = array_diff(
scandir(
$dir
),
['..', '.']
);
foreach($items as $item) {
$path = join_path($dir, $item);
if(is_dir($path)) {
$result = $result && remove_empty_dirs($path);
} else {
$result = false;
}
}
if($result===true) {
$result = $result && rmdir($dir);
}
return $result;
}
function get_files_recursively(string $dir): array
{
assert(!empty($dir));
if (!is_dir($dir)) {
return [];
}
$things = array_diff(
scandir(
$dir
),
['..', '.']
);
$output = [];
foreach($things as $thing) {
$path = join_path($dir,$thing);
if(is_file($path)) {
$output[] = $path;
} else {
$output = array_merge($output, get_files_recursively($path));
}
}
return $output;
}
/**
* Returns amount of files & total size of dir.
*/
@ -598,6 +660,7 @@ function _fatal_error(Exception $e): void
<p><b>Message:</b> '.html_escape($message).'
'.$q.'
<p><b>Version:</b> '.$version.' (on '.$phpver.')
<p><b>Stack Trace:</b></p><pre>'.$e->getTraceAsString().'</pre>
</body>
</html>
';

View file

@ -6,14 +6,12 @@ abstract class CronUploaderConfig
const DEFAULT_PATH = "cron_uploader";
const KEY = "cron_uploader_key";
const COUNT = "cron_uploader_count";
const DIR = "cron_uploader_dir";
const USER = "cron_uploader_user";
public static function set_defaults(): void
{
global $config;
$config->set_default_int(self::COUNT, 1);
$config->set_default_string(self::DIR, data_path(self::DEFAULT_PATH));
$upload_key = $config->get_string(self::KEY, "");
@ -48,18 +46,6 @@ abstract class CronUploaderConfig
$config->set_string(self::KEY, $value);
}
public static function get_count(): int
{
global $config;
return $config->get_int(self::COUNT);
}
public static function set_count(int $value): void
{
global $config;
$config->set_int(self::COUNT, $value);
}
public static function get_dir(): string
{
global $config;

View file

@ -15,8 +15,6 @@ class CronUploader extends Extension
const UPLOADED_DIR = "uploaded";
const FAILED_DIR = "failed_to_upload";
public $output_buffer = [];
public function onInitExt(InitExtEvent $event)
{
// Set default values
@ -57,7 +55,6 @@ class CronUploader extends Extension
$sb = new SetupBlock("Cron Uploader");
$sb->start_table();
$sb->add_int_option(CronUploaderConfig::COUNT, "Upload per run", true);
$sb->add_text_option(CronUploaderConfig::DIR, "Root dir", true);
$sb->add_text_option(CronUploaderConfig::KEY, "Key", true);
$sb->add_choice_option(CronUploaderConfig::USER, $users, "User", true);
@ -123,33 +120,46 @@ class CronUploader extends Extension
$this->prep_root_dir();
$results = get_dir_contents($queue_dir);
if (count($results) > 0) {
$page->flash("Queue folder must be empty to re-stage");
return;
}
$results = get_dir_contents($stage_dir);
$results = get_files_recursively($stage_dir);
if (count($results) == 0) {
if (rmdir($stage_dir)===false) {
if (remove_empty_dirs($stage_dir)===false) {
$page->flash("Nothing to stage from $folder, cannot remove folder");
} else {
$page->flash("Nothing to stage from $folder, removing folder");
}
return;
}
foreach ($results as $result) {
$original_path = join_path($stage_dir, $result);
$new_path = join_path($queue_dir, $result);
$new_path = join_path($queue_dir, substr($result,strlen($stage_dir)));
rename($original_path, $new_path);
if(file_exists($new_path)) {
$page->flash("File already exists in queue folder: " .$result);
return;
}
}
$page->flash("Re-staged $folder to queue");
rmdir($stage_dir);
$success = true;
foreach ($results as $result) {
$new_path = join_path($queue_dir, substr($result,strlen($stage_dir)));
$dir = dirname($new_path);
if(!is_dir($dir)) {
mkdir($dir, 0775, true);
}
if(rename($result, $new_path)===false){
$page->flash("Could not move file: " .$result);
$success = false;
}
}
if($success===true) {
$page->flash("Re-staged $folder to queue");
if(remove_empty_dirs($stage_dir)===false) {
$page->flash("Could not remove $folder");
}
}
}
private function clear_folder($folder)
@ -265,7 +275,11 @@ class CronUploader extends Extension
*/
public function process_upload(string $key, ?int $upload_count = null): bool
{
global $database;
global $database, $_shm_load_start;
$max_time = intval(ini_get('max_execution_time'))*.8;
$this->set_headers();
if ($key!=CronUploaderConfig::get_key()) {
throw new SCoreException("Cron upload key incorrect");
@ -290,21 +304,8 @@ class CronUploader extends Extension
try {
//set_time_limit(0);
// Gets amount of imgs to upload
if ($upload_count == null) {
$upload_count = CronUploaderConfig::get_count();
}
$output_subdir = date('Ymd-His', time());
$image_queue = $this->generate_image_queue(CronUploaderConfig::get_dir(), $upload_count);
// Throw exception if there's nothing in the queue
if (count($image_queue) == 0) {
$this->log_message(SCORE_LOG_WARNING, "Your queue is empty so nothing could be uploaded.");
$this->handle_log();
return false;
}
$image_queue = $this->generate_image_queue(CronUploaderConfig::get_dir());
// Randomize Images
//shuffle($this->image_queue);
@ -314,9 +315,11 @@ class CronUploader extends Extension
$failed = 0;
// Upload the file(s)
for ($i = 0; $i < $upload_count && sizeof($image_queue) > 0; $i++) {
$img = array_pop($image_queue);
foreach ($image_queue as $img) {
$execution_time = microtime(true) - $_shm_load_start;
if($execution_time>$max_time) {
break;
}
try {
$database->beginTransaction();
$this->log_message(SCORE_LOG_INFO, "Adding file: {$img[0]} - tags: {$img[2]}");
@ -341,15 +344,17 @@ class CronUploader extends Extension
}
}
// Throw exception if there's nothing in the queue
if ($merged+$failed+$added === 0) {
$this->log_message(SCORE_LOG_WARNING, "Your queue is empty so nothing could be uploaded.");
return false;
}
$this->log_message(SCORE_LOG_INFO, "Items added: $added");
$this->log_message(SCORE_LOG_INFO, "Items merged: $merged");
$this->log_message(SCORE_LOG_INFO, "Items failed: $failed");
// Display upload log
$this->handle_log();
return true;
} finally {
flock($lockfile, LOCK_UN);
@ -428,26 +433,42 @@ class CronUploader extends Extension
}
private const PARTIAL_DOWNLOAD_EXTENSIONS = ['crdownload','part'];
private const SKIPPABLE_FILES = ['.ds_store','thumbs.db'];
private const SKIPPABLE_DIRECTORIES = ['__macosx'];
private function is_skippable_file(string $path)
private function is_skippable_dir(string $path)
{
$info = pathinfo($path);
if (in_array(strtolower($info['extension']), self::PARTIAL_DOWNLOAD_EXTENSIONS)) {
if (array_key_exists("basename", $info) && in_array(strtolower($info['basename']), self::SKIPPABLE_DIRECTORIES)) {
return true;
}
return false;
}
private function generate_image_queue(string $root_dir, ?int $limit = null): array
private function is_skippable_file(string $path)
{
$info = pathinfo($path);
if (array_key_exists("basename", $info) && in_array(strtolower($info['basename']), self::SKIPPABLE_FILES)) {
return true;
}
if (array_key_exists("extension", $info) && in_array(strtolower($info['extension']), self::PARTIAL_DOWNLOAD_EXTENSIONS)) {
return true;
}
return false;
}
private function generate_image_queue(string $root_dir, ?int $limit = null): Generator
{
$base = $this->get_queue_dir();
$output = [];
if (!is_dir($base)) {
$this->log_message(SCORE_LOG_WARNING, "Image Queue Directory could not be found at \"$base\".");
return [];
return;
}
$ite = new RecursiveDirectoryIterator($base, FilesystemIterator::SKIP_DOTS);
@ -458,18 +479,13 @@ class CronUploader extends Extension
$relativePath = substr($fullpath, strlen($base));
$tags = path_to_tags($relativePath);
$img = [
yield [
0 => $fullpath,
1 => $pathinfo ["basename"],
2 => $tags
];
$output[] = $img;
if (!empty($limit) && count($output) >= $limit) {
break;
}
}
}
return $output;
}
@ -478,10 +494,11 @@ class CronUploader extends Extension
log_msg(self::NAME, $severity, $message);
$time = "[" . date('Y-m-d H:i:s') . "]";
$this->output_buffer[] = $time . " " . $message;
echo $time . " " . $message."\r\n";
flush_output();
$log_path = $this->get_log_file();
file_put_contents($log_path, $time . " " . $message);
}
@ -490,16 +507,13 @@ class CronUploader extends Extension
return join_path(CronUploaderConfig::get_dir(), "uploads.log");
}
/**
* This is run at the end to display & save the log.
*/
private function handle_log()
private function set_headers(): void
{
global $page;
// Display message
$page->set_mode(PageMode::DATA);
$page->set_type("text/plain");
$page->set_data(implode("\r\n", $this->output_buffer));
$page->set_mode(PageMode::MANUAL);
$page->set_type(MIME_TYPE_TEXT);
$page->send_headers();
}
}

View file

@ -57,6 +57,9 @@ class CronUploaderTheme extends Themelet
<br />When you create the cron job, you choose when to upload new images.</li>
</ol>";
$max_time = intval(ini_get('max_execution_time'))*.8;
$usage_html = "Upload your images you want to be uploaded to the queue directory using your FTP client or other means.
<br />(<b>{$queue_dirinfo['path']}</b>)
<ol>
@ -71,7 +74,7 @@ class CronUploaderTheme extends Themelet
<ul>
<li>If an import is already running, another cannot start until it is done.</li>
<li>Each time it runs it will import up to ".CronUploaderConfig::get_count()." file(s). This is controlled from <a href='".make_link("setup")."'>Board Config</a>.</li>
<li>Each time it runs it will import for up to ".number_format ($max_time)." seconds. This is controlled by the PHP max execution time.</li>
<li>Uploaded images will be moved to the 'uploaded' directory into a subfolder named after the time the import started. It's recommended that you remove everything out of this directory from time to time. If you have admin controls enabled, this can be done from <a href='".make_link("admin")."'>Board Admin</a>.</li>
<li>If you enable the db logging extension, you can view the log output on this screen. Otherwise the log will be written to a file at ".CronUploaderConfig::get_dir().DIRECTORY_SEPARATOR."uploads.log</li>
</ul>
@ -107,7 +110,7 @@ class CronUploaderTheme extends Themelet
$html .= make_form(make_link("admin/cron_uploader_restage"));
$html .= "<table class='form'>";
$html .= "<tr><th>Failed dir</th><td><select name='failed_dir' required='required'><option></option>";
$html .= "<tr><th>Failed dir</th><td><select name='failed_dir' required='required'>";
foreach ($failed_dirs as $dir) {
$html .= "<option value='$dir'>$dir</option>";