fix more search edge-cases

This commit is contained in:
Shish 2023-12-14 02:08:50 +00:00 committed by Shish
parent e114057dfe
commit 4c8274161f
2 changed files with 75 additions and 35 deletions

View file

@ -159,7 +159,8 @@ class Image
}
}
$querylet = Image::build_search_querylet($tags, $limit, $start);
[$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order, $limit, $start);
return $database->get_all_iterable($querylet->sql, $querylet->variables);
}
@ -246,7 +247,8 @@ class Image
if (Extension::is_enabled(RatingsInfo::KEY)) {
$tags[] = "rating:*";
}
$querylet = Image::build_search_querylet($tags);
[$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order);
$total = (int)$database->get_one("SELECT COUNT(*) AS cnt FROM ($querylet->sql) AS tbl", $querylet->variables);
if (SPEED_HAX && $total > 5000) {
// when we have a ton of images, the count
@ -307,7 +309,8 @@ class Image
} else {
$tags[] = 'id'. $gtlt . $this->id;
$tags[] = 'order:id_'. strtolower($dir);
$querylet = Image::build_search_querylet($tags);
[$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order);
$querylet->append_sql(' LIMIT 1');
$row = $database->get_row($querylet->sql, $querylet->variables);
}
@ -749,13 +752,13 @@ class Image
}
/**
* Turn a human input string into a an abstract search query
*
* @param string[] $terms
* @return array{0: TagCondition[], 1: ImgCondition[], 2: string}
*/
private static function build_search_querylet(
array $terms,
?int $limit = null,
?int $offset = null
): Querylet {
private static function terms_to_conditions(array $terms): array
{
global $config;
$tag_conditions = [];
@ -776,18 +779,30 @@ class Image
$order = ($order ?: "images.".$config->get_string(IndexConfig::ORDER));
/*
* Turn a bunch of Querylet objects into a base query
*
* Must follow the format
*
* SELECT images.*
* FROM (...) AS images
* WHERE (...)
*
* ie, return a set of images.* columns, and end with a WHERE
*/
return [$tag_conditions, $img_conditions, $order];
}
/**
* Turn an abstract search query into an SQL Querylet
*
* Must follow the format
*
* SELECT images.*
* FROM (...) AS images
* WHERE (...)
*
* ie, return a set of images.* columns, and end with a WHERE
*
* @param TagCondition[] $tag_conditions
* @param ImgCondition[] $img_conditions
*/
private static function build_search_querylet(
array $tag_conditions,
array $img_conditions,
string $order,
?int $limit = null,
?int $offset = null
): Querylet {
// no tags, do a simple search
if (count($tag_conditions) === 0) {
$query = new Querylet("SELECT images.* FROM images WHERE 1=1");
@ -796,15 +811,28 @@ class Image
// one tag sorted by ID - we can fetch this from the image_tags table,
// and do the offset / limit there, which is 10x faster than fetching
// all the image_tags and doing the offset / limit on the result.
//
// NOTE: this is currently impossible to test, because the test suite
// loads all extensions, some of whom add generic img_conditions onto
// the search, which prevents this optimisation from being used.
elseif (
count($tag_conditions) === 1
&& $tag_conditions[0]->positive
// We can only do this if img_conditions is empty, because
// we're going to apply the offset / limit to the image_tags
// subquery, and applying extra conditions to the top-level
// query might reduce the total results below the target limit
&& empty($img_conditions)
// We can only do this if we're sorting by ID, because
// we're going to be using the image_tags table, which
// only has image_id and tag_id, not any other columns
&& ($order == "id DESC" || $order == "images.id DESC")
&& !is_null($offset)
// This is only an optimisation if we are applying limit
// and offset
&& !is_null($limit)
&& !is_null($offset)
) {
$tc = $tag_conditions[0];
$in = $tc->positive ? "IN" : "NOT IN";
// IN (SELECT id FROM tags) is 100x slower than doing a separate
// query and then a second query for IN(first_query_results)??
$tag_array = self::tag_or_wildcard_to_ids($tc->tag);
@ -820,17 +848,16 @@ class Image
$query = new Querylet("
SELECT images.*
FROM images INNER JOIN (
SELECT it.image_id
SELECT DISTINCT it.image_id
FROM image_tags it
WHERE it.tag_id $in ($set)
WHERE it.tag_id IN ($set)
ORDER BY it.image_id DESC
LIMIT :limit OFFSET :offset
) a on a.image_id = images.id
ORDER BY images.id DESC
WHERE 1=1
", ["limit" => $limit, "offset" => $offset]);
// don't offset at the image level because
// we already offset at the image_tags level
$order = null;
$limit = null;
$offset = null;
}
@ -890,7 +917,7 @@ class Image
}
$first = array_shift($inner_joins);
$sub_query = "SELECT it.image_id FROM image_tags it ";
$sub_query = "SELECT DISTINCT it.image_id FROM image_tags it ";
$i = 0;
foreach ($inner_joins as $inner_join) {
$i++;
@ -946,9 +973,8 @@ class Image
$query->append(new Querylet($img_sql, $img_vars));
}
if (!is_null($order)) {
$query->append(new Querylet(" ORDER BY ".$order));
}
$query->append(new Querylet(" ORDER BY ".$order));
if (!is_null($limit)) {
$query->append(new Querylet(" LIMIT :limit ", ["limit" => $limit]));
$query->append(new Querylet(" OFFSET :offset ", ["offset" => $offset]));

View file

@ -178,14 +178,21 @@ class IndexTest extends ShimmiePHPUnitTestCase
}
#[Depends('testUpload')]
public function testWildSearchManyResults($image_ids)
public function testWildSearchManyResultsSimple($image_ids)
{
$image_ids = $this->testUpload();
// two images match comp* - one matches it once,
// one matches it twice
// two images match comp* - one matches it once, one matches it twice
$this->assert_search_results(["comp*"], [$image_ids[1], $image_ids[0]]);
}
#[Depends('testUpload')]
public function testWildSearchManyResultsComplex($image_ids)
{
$image_ids = $this->testUpload();
// same thing, but with the complex branch
$this->assert_search_results(["comp*", "-asdf"], [$image_ids[1], $image_ids[0]]);
}
/* * * * * * * * * * *
* Mixed *
* * * * * * * * * * */
@ -196,20 +203,27 @@ class IndexTest extends ShimmiePHPUnitTestCase
// multiple tags, many results
$this->assert_search_results(["computer", "size=640x480"], [$image_ids[1], $image_ids[0]]);
}
// tag + negative
// wildcards + ???
/* * * * * * * * * * *
* Negative *
* * * * * * * * * * */
#[Depends('testUpload')]
public function testNegative($image_ids)
public function testSubtractFromSearch($image_ids)
{
$image_ids = $this->testUpload();
// negative tag, should have one result
$this->assert_search_results(["computer", "-pbx"], [$image_ids[1]]);
// removing something that doesn't exist should have no effect
$this->assert_search_results(["computer", "-not_a_tag"], [$image_ids[1], $image_ids[0]]);
}
#[Depends('testUpload')]
public function testSubtractFromDefault($image_ids)
{
$image_ids = $this->testUpload();
// negative tag alone, should work
$this->assert_search_results(["-pbx"], [$image_ids[1]]);