fix more search edge-cases
This commit is contained in:
parent
e114057dfe
commit
4c8274161f
2 changed files with 75 additions and 35 deletions
|
@ -159,7 +159,8 @@ class Image
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$querylet = Image::build_search_querylet($tags, $limit, $start);
|
[$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
|
||||||
|
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order, $limit, $start);
|
||||||
return $database->get_all_iterable($querylet->sql, $querylet->variables);
|
return $database->get_all_iterable($querylet->sql, $querylet->variables);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,7 +247,8 @@ class Image
|
||||||
if (Extension::is_enabled(RatingsInfo::KEY)) {
|
if (Extension::is_enabled(RatingsInfo::KEY)) {
|
||||||
$tags[] = "rating:*";
|
$tags[] = "rating:*";
|
||||||
}
|
}
|
||||||
$querylet = Image::build_search_querylet($tags);
|
[$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
|
||||||
|
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order);
|
||||||
$total = (int)$database->get_one("SELECT COUNT(*) AS cnt FROM ($querylet->sql) AS tbl", $querylet->variables);
|
$total = (int)$database->get_one("SELECT COUNT(*) AS cnt FROM ($querylet->sql) AS tbl", $querylet->variables);
|
||||||
if (SPEED_HAX && $total > 5000) {
|
if (SPEED_HAX && $total > 5000) {
|
||||||
// when we have a ton of images, the count
|
// when we have a ton of images, the count
|
||||||
|
@ -307,7 +309,8 @@ class Image
|
||||||
} else {
|
} else {
|
||||||
$tags[] = 'id'. $gtlt . $this->id;
|
$tags[] = 'id'. $gtlt . $this->id;
|
||||||
$tags[] = 'order:id_'. strtolower($dir);
|
$tags[] = 'order:id_'. strtolower($dir);
|
||||||
$querylet = Image::build_search_querylet($tags);
|
[$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
|
||||||
|
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order);
|
||||||
$querylet->append_sql(' LIMIT 1');
|
$querylet->append_sql(' LIMIT 1');
|
||||||
$row = $database->get_row($querylet->sql, $querylet->variables);
|
$row = $database->get_row($querylet->sql, $querylet->variables);
|
||||||
}
|
}
|
||||||
|
@ -749,13 +752,13 @@ class Image
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Turn a human input string into a an abstract search query
|
||||||
|
*
|
||||||
* @param string[] $terms
|
* @param string[] $terms
|
||||||
|
* @return array{0: TagCondition[], 1: ImgCondition[], 2: string}
|
||||||
*/
|
*/
|
||||||
private static function build_search_querylet(
|
private static function terms_to_conditions(array $terms): array
|
||||||
array $terms,
|
{
|
||||||
?int $limit = null,
|
|
||||||
?int $offset = null
|
|
||||||
): Querylet {
|
|
||||||
global $config;
|
global $config;
|
||||||
|
|
||||||
$tag_conditions = [];
|
$tag_conditions = [];
|
||||||
|
@ -776,8 +779,11 @@ class Image
|
||||||
|
|
||||||
$order = ($order ?: "images.".$config->get_string(IndexConfig::ORDER));
|
$order = ($order ?: "images.".$config->get_string(IndexConfig::ORDER));
|
||||||
|
|
||||||
/*
|
return [$tag_conditions, $img_conditions, $order];
|
||||||
* Turn a bunch of Querylet objects into a base query
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Turn an abstract search query into an SQL Querylet
|
||||||
*
|
*
|
||||||
* Must follow the format
|
* Must follow the format
|
||||||
*
|
*
|
||||||
|
@ -786,8 +792,17 @@ class Image
|
||||||
* WHERE (...)
|
* WHERE (...)
|
||||||
*
|
*
|
||||||
* ie, return a set of images.* columns, and end with a WHERE
|
* ie, return a set of images.* columns, and end with a WHERE
|
||||||
|
*
|
||||||
|
* @param TagCondition[] $tag_conditions
|
||||||
|
* @param ImgCondition[] $img_conditions
|
||||||
*/
|
*/
|
||||||
|
private static function build_search_querylet(
|
||||||
|
array $tag_conditions,
|
||||||
|
array $img_conditions,
|
||||||
|
string $order,
|
||||||
|
?int $limit = null,
|
||||||
|
?int $offset = null
|
||||||
|
): Querylet {
|
||||||
// no tags, do a simple search
|
// no tags, do a simple search
|
||||||
if (count($tag_conditions) === 0) {
|
if (count($tag_conditions) === 0) {
|
||||||
$query = new Querylet("SELECT images.* FROM images WHERE 1=1");
|
$query = new Querylet("SELECT images.* FROM images WHERE 1=1");
|
||||||
|
@ -796,15 +811,28 @@ class Image
|
||||||
// one tag sorted by ID - we can fetch this from the image_tags table,
|
// one tag sorted by ID - we can fetch this from the image_tags table,
|
||||||
// and do the offset / limit there, which is 10x faster than fetching
|
// and do the offset / limit there, which is 10x faster than fetching
|
||||||
// all the image_tags and doing the offset / limit on the result.
|
// all the image_tags and doing the offset / limit on the result.
|
||||||
|
//
|
||||||
|
// NOTE: this is currently impossible to test, because the test suite
|
||||||
|
// loads all extensions, some of whom add generic img_conditions onto
|
||||||
|
// the search, which prevents this optimisation from being used.
|
||||||
elseif (
|
elseif (
|
||||||
count($tag_conditions) === 1
|
count($tag_conditions) === 1
|
||||||
|
&& $tag_conditions[0]->positive
|
||||||
|
// We can only do this if img_conditions is empty, because
|
||||||
|
// we're going to apply the offset / limit to the image_tags
|
||||||
|
// subquery, and applying extra conditions to the top-level
|
||||||
|
// query might reduce the total results below the target limit
|
||||||
&& empty($img_conditions)
|
&& empty($img_conditions)
|
||||||
|
// We can only do this if we're sorting by ID, because
|
||||||
|
// we're going to be using the image_tags table, which
|
||||||
|
// only has image_id and tag_id, not any other columns
|
||||||
&& ($order == "id DESC" || $order == "images.id DESC")
|
&& ($order == "id DESC" || $order == "images.id DESC")
|
||||||
&& !is_null($offset)
|
// This is only an optimisation if we are applying limit
|
||||||
|
// and offset
|
||||||
&& !is_null($limit)
|
&& !is_null($limit)
|
||||||
|
&& !is_null($offset)
|
||||||
) {
|
) {
|
||||||
$tc = $tag_conditions[0];
|
$tc = $tag_conditions[0];
|
||||||
$in = $tc->positive ? "IN" : "NOT IN";
|
|
||||||
// IN (SELECT id FROM tags) is 100x slower than doing a separate
|
// IN (SELECT id FROM tags) is 100x slower than doing a separate
|
||||||
// query and then a second query for IN(first_query_results)??
|
// query and then a second query for IN(first_query_results)??
|
||||||
$tag_array = self::tag_or_wildcard_to_ids($tc->tag);
|
$tag_array = self::tag_or_wildcard_to_ids($tc->tag);
|
||||||
|
@ -820,17 +848,16 @@ class Image
|
||||||
$query = new Querylet("
|
$query = new Querylet("
|
||||||
SELECT images.*
|
SELECT images.*
|
||||||
FROM images INNER JOIN (
|
FROM images INNER JOIN (
|
||||||
SELECT it.image_id
|
SELECT DISTINCT it.image_id
|
||||||
FROM image_tags it
|
FROM image_tags it
|
||||||
WHERE it.tag_id $in ($set)
|
WHERE it.tag_id IN ($set)
|
||||||
ORDER BY it.image_id DESC
|
ORDER BY it.image_id DESC
|
||||||
LIMIT :limit OFFSET :offset
|
LIMIT :limit OFFSET :offset
|
||||||
) a on a.image_id = images.id
|
) a on a.image_id = images.id
|
||||||
ORDER BY images.id DESC
|
WHERE 1=1
|
||||||
", ["limit" => $limit, "offset" => $offset]);
|
", ["limit" => $limit, "offset" => $offset]);
|
||||||
// don't offset at the image level because
|
// don't offset at the image level because
|
||||||
// we already offset at the image_tags level
|
// we already offset at the image_tags level
|
||||||
$order = null;
|
|
||||||
$limit = null;
|
$limit = null;
|
||||||
$offset = null;
|
$offset = null;
|
||||||
}
|
}
|
||||||
|
@ -890,7 +917,7 @@ class Image
|
||||||
}
|
}
|
||||||
|
|
||||||
$first = array_shift($inner_joins);
|
$first = array_shift($inner_joins);
|
||||||
$sub_query = "SELECT it.image_id FROM image_tags it ";
|
$sub_query = "SELECT DISTINCT it.image_id FROM image_tags it ";
|
||||||
$i = 0;
|
$i = 0;
|
||||||
foreach ($inner_joins as $inner_join) {
|
foreach ($inner_joins as $inner_join) {
|
||||||
$i++;
|
$i++;
|
||||||
|
@ -946,9 +973,8 @@ class Image
|
||||||
$query->append(new Querylet($img_sql, $img_vars));
|
$query->append(new Querylet($img_sql, $img_vars));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_null($order)) {
|
|
||||||
$query->append(new Querylet(" ORDER BY ".$order));
|
$query->append(new Querylet(" ORDER BY ".$order));
|
||||||
}
|
|
||||||
if (!is_null($limit)) {
|
if (!is_null($limit)) {
|
||||||
$query->append(new Querylet(" LIMIT :limit ", ["limit" => $limit]));
|
$query->append(new Querylet(" LIMIT :limit ", ["limit" => $limit]));
|
||||||
$query->append(new Querylet(" OFFSET :offset ", ["offset" => $offset]));
|
$query->append(new Querylet(" OFFSET :offset ", ["offset" => $offset]));
|
||||||
|
|
|
@ -178,14 +178,21 @@ class IndexTest extends ShimmiePHPUnitTestCase
|
||||||
}
|
}
|
||||||
|
|
||||||
#[Depends('testUpload')]
|
#[Depends('testUpload')]
|
||||||
public function testWildSearchManyResults($image_ids)
|
public function testWildSearchManyResultsSimple($image_ids)
|
||||||
{
|
{
|
||||||
$image_ids = $this->testUpload();
|
$image_ids = $this->testUpload();
|
||||||
// two images match comp* - one matches it once,
|
// two images match comp* - one matches it once, one matches it twice
|
||||||
// one matches it twice
|
|
||||||
$this->assert_search_results(["comp*"], [$image_ids[1], $image_ids[0]]);
|
$this->assert_search_results(["comp*"], [$image_ids[1], $image_ids[0]]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[Depends('testUpload')]
|
||||||
|
public function testWildSearchManyResultsComplex($image_ids)
|
||||||
|
{
|
||||||
|
$image_ids = $this->testUpload();
|
||||||
|
// same thing, but with the complex branch
|
||||||
|
$this->assert_search_results(["comp*", "-asdf"], [$image_ids[1], $image_ids[0]]);
|
||||||
|
}
|
||||||
|
|
||||||
/* * * * * * * * * * *
|
/* * * * * * * * * * *
|
||||||
* Mixed *
|
* Mixed *
|
||||||
* * * * * * * * * * */
|
* * * * * * * * * * */
|
||||||
|
@ -196,20 +203,27 @@ class IndexTest extends ShimmiePHPUnitTestCase
|
||||||
// multiple tags, many results
|
// multiple tags, many results
|
||||||
$this->assert_search_results(["computer", "size=640x480"], [$image_ids[1], $image_ids[0]]);
|
$this->assert_search_results(["computer", "size=640x480"], [$image_ids[1], $image_ids[0]]);
|
||||||
}
|
}
|
||||||
// tag + negative
|
|
||||||
// wildcards + ???
|
|
||||||
|
|
||||||
/* * * * * * * * * * *
|
/* * * * * * * * * * *
|
||||||
* Negative *
|
* Negative *
|
||||||
* * * * * * * * * * */
|
* * * * * * * * * * */
|
||||||
#[Depends('testUpload')]
|
#[Depends('testUpload')]
|
||||||
public function testNegative($image_ids)
|
public function testSubtractFromSearch($image_ids)
|
||||||
{
|
{
|
||||||
$image_ids = $this->testUpload();
|
$image_ids = $this->testUpload();
|
||||||
|
|
||||||
// negative tag, should have one result
|
// negative tag, should have one result
|
||||||
$this->assert_search_results(["computer", "-pbx"], [$image_ids[1]]);
|
$this->assert_search_results(["computer", "-pbx"], [$image_ids[1]]);
|
||||||
|
|
||||||
|
// removing something that doesn't exist should have no effect
|
||||||
|
$this->assert_search_results(["computer", "-not_a_tag"], [$image_ids[1], $image_ids[0]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[Depends('testUpload')]
|
||||||
|
public function testSubtractFromDefault($image_ids)
|
||||||
|
{
|
||||||
|
$image_ids = $this->testUpload();
|
||||||
|
|
||||||
// negative tag alone, should work
|
// negative tag alone, should work
|
||||||
$this->assert_search_results(["-pbx"], [$image_ids[1]]);
|
$this->assert_search_results(["-pbx"], [$image_ids[1]]);
|
||||||
|
|
||||||
|
|
Reference in a new issue