fix more search edge-cases

This commit is contained in:
Shish 2023-12-14 02:08:50 +00:00 committed by Shish
parent e114057dfe
commit 4c8274161f
2 changed files with 75 additions and 35 deletions

View file

@ -159,7 +159,8 @@ class Image
} }
} }
$querylet = Image::build_search_querylet($tags, $limit, $start); [$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order, $limit, $start);
return $database->get_all_iterable($querylet->sql, $querylet->variables); return $database->get_all_iterable($querylet->sql, $querylet->variables);
} }
@ -246,7 +247,8 @@ class Image
if (Extension::is_enabled(RatingsInfo::KEY)) { if (Extension::is_enabled(RatingsInfo::KEY)) {
$tags[] = "rating:*"; $tags[] = "rating:*";
} }
$querylet = Image::build_search_querylet($tags); [$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order);
$total = (int)$database->get_one("SELECT COUNT(*) AS cnt FROM ($querylet->sql) AS tbl", $querylet->variables); $total = (int)$database->get_one("SELECT COUNT(*) AS cnt FROM ($querylet->sql) AS tbl", $querylet->variables);
if (SPEED_HAX && $total > 5000) { if (SPEED_HAX && $total > 5000) {
// when we have a ton of images, the count // when we have a ton of images, the count
@ -307,7 +309,8 @@ class Image
} else { } else {
$tags[] = 'id'. $gtlt . $this->id; $tags[] = 'id'. $gtlt . $this->id;
$tags[] = 'order:id_'. strtolower($dir); $tags[] = 'order:id_'. strtolower($dir);
$querylet = Image::build_search_querylet($tags); [$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order);
$querylet->append_sql(' LIMIT 1'); $querylet->append_sql(' LIMIT 1');
$row = $database->get_row($querylet->sql, $querylet->variables); $row = $database->get_row($querylet->sql, $querylet->variables);
} }
@ -749,13 +752,13 @@ class Image
} }
/** /**
* Turn a human input string into a an abstract search query
*
* @param string[] $terms * @param string[] $terms
* @return array{0: TagCondition[], 1: ImgCondition[], 2: string}
*/ */
private static function build_search_querylet( private static function terms_to_conditions(array $terms): array
array $terms, {
?int $limit = null,
?int $offset = null
): Querylet {
global $config; global $config;
$tag_conditions = []; $tag_conditions = [];
@ -776,8 +779,11 @@ class Image
$order = ($order ?: "images.".$config->get_string(IndexConfig::ORDER)); $order = ($order ?: "images.".$config->get_string(IndexConfig::ORDER));
/* return [$tag_conditions, $img_conditions, $order];
* Turn a bunch of Querylet objects into a base query }
/**
* Turn an abstract search query into an SQL Querylet
* *
* Must follow the format * Must follow the format
* *
@ -786,8 +792,17 @@ class Image
* WHERE (...) * WHERE (...)
* *
* ie, return a set of images.* columns, and end with a WHERE * ie, return a set of images.* columns, and end with a WHERE
*
* @param TagCondition[] $tag_conditions
* @param ImgCondition[] $img_conditions
*/ */
private static function build_search_querylet(
array $tag_conditions,
array $img_conditions,
string $order,
?int $limit = null,
?int $offset = null
): Querylet {
// no tags, do a simple search // no tags, do a simple search
if (count($tag_conditions) === 0) { if (count($tag_conditions) === 0) {
$query = new Querylet("SELECT images.* FROM images WHERE 1=1"); $query = new Querylet("SELECT images.* FROM images WHERE 1=1");
@ -796,15 +811,28 @@ class Image
// one tag sorted by ID - we can fetch this from the image_tags table, // one tag sorted by ID - we can fetch this from the image_tags table,
// and do the offset / limit there, which is 10x faster than fetching // and do the offset / limit there, which is 10x faster than fetching
// all the image_tags and doing the offset / limit on the result. // all the image_tags and doing the offset / limit on the result.
//
// NOTE: this is currently impossible to test, because the test suite
// loads all extensions, some of whom add generic img_conditions onto
// the search, which prevents this optimisation from being used.
elseif ( elseif (
count($tag_conditions) === 1 count($tag_conditions) === 1
&& $tag_conditions[0]->positive
// We can only do this if img_conditions is empty, because
// we're going to apply the offset / limit to the image_tags
// subquery, and applying extra conditions to the top-level
// query might reduce the total results below the target limit
&& empty($img_conditions) && empty($img_conditions)
// We can only do this if we're sorting by ID, because
// we're going to be using the image_tags table, which
// only has image_id and tag_id, not any other columns
&& ($order == "id DESC" || $order == "images.id DESC") && ($order == "id DESC" || $order == "images.id DESC")
&& !is_null($offset) // This is only an optimisation if we are applying limit
// and offset
&& !is_null($limit) && !is_null($limit)
&& !is_null($offset)
) { ) {
$tc = $tag_conditions[0]; $tc = $tag_conditions[0];
$in = $tc->positive ? "IN" : "NOT IN";
// IN (SELECT id FROM tags) is 100x slower than doing a separate // IN (SELECT id FROM tags) is 100x slower than doing a separate
// query and then a second query for IN(first_query_results)?? // query and then a second query for IN(first_query_results)??
$tag_array = self::tag_or_wildcard_to_ids($tc->tag); $tag_array = self::tag_or_wildcard_to_ids($tc->tag);
@ -820,17 +848,16 @@ class Image
$query = new Querylet(" $query = new Querylet("
SELECT images.* SELECT images.*
FROM images INNER JOIN ( FROM images INNER JOIN (
SELECT it.image_id SELECT DISTINCT it.image_id
FROM image_tags it FROM image_tags it
WHERE it.tag_id $in ($set) WHERE it.tag_id IN ($set)
ORDER BY it.image_id DESC ORDER BY it.image_id DESC
LIMIT :limit OFFSET :offset LIMIT :limit OFFSET :offset
) a on a.image_id = images.id ) a on a.image_id = images.id
ORDER BY images.id DESC WHERE 1=1
", ["limit" => $limit, "offset" => $offset]); ", ["limit" => $limit, "offset" => $offset]);
// don't offset at the image level because // don't offset at the image level because
// we already offset at the image_tags level // we already offset at the image_tags level
$order = null;
$limit = null; $limit = null;
$offset = null; $offset = null;
} }
@ -890,7 +917,7 @@ class Image
} }
$first = array_shift($inner_joins); $first = array_shift($inner_joins);
$sub_query = "SELECT it.image_id FROM image_tags it "; $sub_query = "SELECT DISTINCT it.image_id FROM image_tags it ";
$i = 0; $i = 0;
foreach ($inner_joins as $inner_join) { foreach ($inner_joins as $inner_join) {
$i++; $i++;
@ -946,9 +973,8 @@ class Image
$query->append(new Querylet($img_sql, $img_vars)); $query->append(new Querylet($img_sql, $img_vars));
} }
if (!is_null($order)) {
$query->append(new Querylet(" ORDER BY ".$order)); $query->append(new Querylet(" ORDER BY ".$order));
}
if (!is_null($limit)) { if (!is_null($limit)) {
$query->append(new Querylet(" LIMIT :limit ", ["limit" => $limit])); $query->append(new Querylet(" LIMIT :limit ", ["limit" => $limit]));
$query->append(new Querylet(" OFFSET :offset ", ["offset" => $offset])); $query->append(new Querylet(" OFFSET :offset ", ["offset" => $offset]));

View file

@ -178,14 +178,21 @@ class IndexTest extends ShimmiePHPUnitTestCase
} }
#[Depends('testUpload')] #[Depends('testUpload')]
public function testWildSearchManyResults($image_ids) public function testWildSearchManyResultsSimple($image_ids)
{ {
$image_ids = $this->testUpload(); $image_ids = $this->testUpload();
// two images match comp* - one matches it once, // two images match comp* - one matches it once, one matches it twice
// one matches it twice
$this->assert_search_results(["comp*"], [$image_ids[1], $image_ids[0]]); $this->assert_search_results(["comp*"], [$image_ids[1], $image_ids[0]]);
} }
#[Depends('testUpload')]
public function testWildSearchManyResultsComplex($image_ids)
{
$image_ids = $this->testUpload();
// same thing, but with the complex branch
$this->assert_search_results(["comp*", "-asdf"], [$image_ids[1], $image_ids[0]]);
}
/* * * * * * * * * * * /* * * * * * * * * * *
* Mixed * * Mixed *
* * * * * * * * * * */ * * * * * * * * * * */
@ -196,20 +203,27 @@ class IndexTest extends ShimmiePHPUnitTestCase
// multiple tags, many results // multiple tags, many results
$this->assert_search_results(["computer", "size=640x480"], [$image_ids[1], $image_ids[0]]); $this->assert_search_results(["computer", "size=640x480"], [$image_ids[1], $image_ids[0]]);
} }
// tag + negative
// wildcards + ???
/* * * * * * * * * * * /* * * * * * * * * * *
* Negative * * Negative *
* * * * * * * * * * */ * * * * * * * * * * */
#[Depends('testUpload')] #[Depends('testUpload')]
public function testNegative($image_ids) public function testSubtractFromSearch($image_ids)
{ {
$image_ids = $this->testUpload(); $image_ids = $this->testUpload();
// negative tag, should have one result // negative tag, should have one result
$this->assert_search_results(["computer", "-pbx"], [$image_ids[1]]); $this->assert_search_results(["computer", "-pbx"], [$image_ids[1]]);
// removing something that doesn't exist should have no effect
$this->assert_search_results(["computer", "-not_a_tag"], [$image_ids[1], $image_ids[0]]);
}
#[Depends('testUpload')]
public function testSubtractFromDefault($image_ids)
{
$image_ids = $this->testUpload();
// negative tag alone, should work // negative tag alone, should work
$this->assert_search_results(["-pbx"], [$image_ids[1]]); $this->assert_search_results(["-pbx"], [$image_ids[1]]);