Merge branch 'develop' of https://github.com/shish/shimmie2 into develop

This commit is contained in:
Shish 2016-06-18 01:18:29 +01:00
commit 79648b04d2
6 changed files with 377 additions and 346 deletions

View file

@ -756,18 +756,16 @@ class Image {
private static function build_search_querylet($terms) {
assert('is_array($terms)');
global $database;
if($database->get_driver_name() === "mysql")
return Image::build_ugly_search_querylet($terms);
else
return Image::build_accurate_search_querylet($terms);
}
/**
* @param string[] $terms
* @return ImgQuerylet[]
*/
private static function parse_meta_terms($terms) {
$tag_querylets = array();
$img_querylets = array();
$positive_tag_count = 0;
$negative_tag_count = 0;
/*
* Turn a bunch of strings into a bunch of TagQuerylet
* and ImgQuerylet objects
*/
$stpe = new SearchTermParseEvent(null, $terms);
send_event($stpe);
if ($stpe->is_querylet_set()) {
@ -775,39 +773,105 @@ class Image {
$img_querylets[] = new ImgQuerylet($querylet, true);
}
}
return $img_querylets;
$terms = Tag::resolve_aliases($terms);
foreach ($terms as $term) {
$positive = true;
if (is_string($term) && !empty($term) && ($term[0] == '-')) {
$positive = false;
$term = substr($term, 1);
}
if (strlen($term) === 0) {
continue;
}
/**
* @param ImgQuerylet[] $img_querylets
* @return Querylet
$stpe = new SearchTermParseEvent($term, $terms);
send_event($stpe);
if ($stpe->is_querylet_set()) {
foreach ($stpe->get_querylets() as $querylet) {
$img_querylets[] = new ImgQuerylet($querylet, $positive);
}
} else {
$expansions = Tag::resolve_wildcard($term);
if ($expansions) {
if ($positive) $positive_tag_count++;
else $negative_tag_count++;
}
foreach ($expansions as $expanded_term) {
$tag_querylets[] = new TagQuerylet($expanded_term, $positive);
}
}
}
/*
* Turn a bunch of Querylet objects into a base query
*
* Must follow the format
*
* SELECT images.*
* FROM (...) AS images
* WHERE (...)
*
* ie, return a set of images.* columns, and end with a WHERE
*/
private static function build_img_search($img_querylets) {
// merge all the image metadata searches into one generic querylet
// no tags, do a simple search
if($positive_tag_count + $negative_tag_count == 0) {
$query = new Querylet("
SELECT images.*
FROM images
WHERE 1=1
");
}
// one positive tag (a common case), do an optimised search
else if($positive_tag_count === 1 && $negative_tag_count === 0) {
$query = new Querylet($database->scoreql_to_sql("
SELECT *
FROM (
SELECT images.*
FROM images
JOIN image_tags ON images.id=image_tags.image_id
JOIN tags ON image_tags.tag_id=tags.id
WHERE SCORE_STRNORM(tag) LIKE SCORE_STRNORM(:tag)
GROUP BY images.id
) AS images
WHERE 1=1
"), array("tag"=>$tag_querylets[0]->tag));
}
// more than one positive tag, or more than zero negative tags
else {
if($database->get_driver_name() === "mysql")
$query = Image::build_ugly_search_querylet(
$tag_querylets,
$positive_tag_count
);
else
$query = Image::build_accurate_search_querylet(
$tag_querylets,
$positive_tag_count
);
}
/*
* Merge all the image metadata searches into one generic querylet
* and append to the base querylet with "AND blah"
*/
if($img_querylets) {
$n = 0;
$sql = "";
$terms = array();
$img_sql = "";
$img_vars = array();
foreach ($img_querylets as $iq) {
if ($n++ > 0) $sql .= " AND";
if (!$iq->positive) $sql .= " NOT";
$sql .= " (" . $iq->qlet->sql . ")";
$terms = array_merge($terms, $iq->qlet->variables);
if ($n++ > 0) $img_sql .= " AND";
if (!$iq->positive) $img_sql .= " NOT";
$img_sql .= " (" . $iq->qlet->sql . ")";
$img_vars = array_merge($img_vars, $iq->qlet->variables);
}
return new Querylet($sql, $terms);
$query->append_sql(" AND ");
$query->append(new Querylet($img_sql, $img_vars));
}
/**
* @param Querylet $img_search
* @return Querylet
*/
private static function build_simple_query($img_search) {
$query = new Querylet("SELECT images.* FROM images ");
if (!empty($img_search->sql)) {
$query->append_sql(" WHERE ");
$query->append($img_search);
return $query;
}
return $query;
}
@ -832,100 +896,49 @@ class Image {
* All the subqueries are executed every time for every row in the
* images table. Yes, MySQL does suck this much.
*
* @param string[] $terms
* @return \Querylet
* @param array $tag_querylets
* @param int $positive_tag_count
* @return Querylet
*/
private static function build_accurate_search_querylet($terms) {
private static function build_accurate_search_querylet(
$tag_querylets,
$positive_tag_count
) {
global $database;
$tag_querylets = array();
$img_querylets = self::parse_meta_terms($terms);
$positive_tag_count = 0;
// parse the words that are searched for into
// various types of querylet
$terms = Tag::resolve_aliases($terms);
foreach($terms as $term) {
$positive = true;
if(is_string($term) && !empty($term) && ($term[0] == '-')) {
$positive = false;
$term = substr($term, 1);
}
if(strlen($term) === 0) {
continue;
}
$stpe = new SearchTermParseEvent($term, $terms);
send_event($stpe);
if($stpe->is_querylet_set()) {
foreach($stpe->get_querylets() as $querylet) {
$img_querylets[] = new ImgQuerylet($querylet, $positive);
}
}
else {
$expansions = Tag::resolve_wildcard($term);
if($expansions && $positive) $positive_tag_count++;
foreach($expansions as $expanded_term) {
$tag_querylets[] = new TagQuerylet($expanded_term, $positive);
}
}
}
$img_search = self::build_img_search($img_querylets);
// How many tag querylets are there?
$count_tag_querylets = count($tag_querylets);
// no tags, do a simple search (+image metadata if we have any)
if($count_tag_querylets === 0) {
$query = self::build_simple_query($img_search);
}
// one positive tag (a common case), do an optimised search
else if($count_tag_querylets === 1 && $tag_querylets[0]->positive) {
$query = new Querylet($database->scoreql_to_sql("
SELECT images.*
FROM images
JOIN image_tags ON images.id=image_tags.image_id
JOIN tags ON image_tags.tag_id=tags.id
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
"), array("tag"=>$tag_querylets[0]->tag));
if(!empty($img_search->sql)) {
$query->append_sql(" AND ");
$query->append($img_search);
}
}
// more than one positive tag, or more than zero negative tags
else {
$positive_tag_id_array = array();
$negative_tag_id_array = array();
$tags_ok = true;
foreach($tag_querylets as $tq) {
foreach ($tag_querylets as $tq) {
$tag_ids = $database->get_col(
$database->scoreql_to_sql("
SELECT id
FROM tags
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
"), array("tag"=>$tq->tag)
"),
array("tag" => $tq->tag)
);
if($tq->positive) {
if ($tq->positive) {
$positive_tag_id_array = array_merge($positive_tag_id_array, $tag_ids);
$tags_ok = count($tag_ids) > 0;
if(!$tags_ok) break;
if (count($tag_ids) == 0) {
# one of the positive tags had zero results, therefor there
# can be no results; "where 1=0" should shortcut things
return new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
else {
} else {
$negative_tag_id_array = array_merge($negative_tag_id_array, $tag_ids);
}
}
if($tags_ok) {
$have_pos = count($positive_tag_id_array) > 0;
$have_neg = count($negative_tag_id_array) > 0;
$sql = "";
if($have_pos) {
if ($have_pos) {
$positive_tag_id_list = join(', ', $positive_tag_id_array);
$sql .= "
SELECT image_id
@ -935,10 +948,10 @@ class Image {
HAVING COUNT(image_id)>=$positive_tag_count
";
}
if($have_pos && $have_neg) {
if ($have_pos && $have_neg) {
$sql .= " EXCEPT ";
}
if($have_neg) {
if ($have_neg) {
$negative_tag_id_list = join(', ', $negative_tag_id_array);
$sql .= "
SELECT image_id
@ -946,72 +959,36 @@ class Image {
WHERE tag_id IN ($negative_tag_id_list)
";
}
$query = new Querylet("
return new Querylet("
SELECT images.*
FROM images
WHERE images.id IN ($sql)
");
if(strlen($img_search->sql) > 0) {
$query->append_sql(" AND ");
$query->append($img_search);
}
}
else {
# one of the positive tags had zero results, therefor there
# can be no results; "where 1=0" should shortcut things
$query = new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
}
return $query;
}
/**
* this function exists because mysql is a turd, see the docs for
* build_accurate_search_querylet() for a full explanation
*
* @param array $terms
* @param array $tag_querylets
* @param int $positive_tag_count
* @return Querylet
*/
private static function build_ugly_search_querylet($terms) {
private static function build_ugly_search_querylet(
$tag_querylets,
$positive_tag_count
) {
global $database;
$tag_querylets = array();
$img_querylets = self::parse_meta_terms($terms);
$positive_tag_count = 0;
$negative_tag_count = 0;
$terms = Tag::resolve_aliases($terms);
reset($terms); // rewind to first element in array.
// turn each term into a specific type of querylet
foreach($terms as $term) {
$negative = false;
if( !empty($term) && ($term[0] == '-')) {
$negative = true;
$term = substr($term, 1);
}
$stpe = new SearchTermParseEvent($term, $terms);
send_event($stpe);
if($stpe->is_querylet_set()) {
foreach($stpe->get_querylets() as $querylet) {
$img_querylets[] = new ImgQuerylet($querylet, !$negative);
}
}
else {
$term = str_replace("*", "%", $term);
$term = str_replace("?", "_", $term);
if(!preg_match("/^[%_]+$/", $term)) {
$tag_querylets[] = new TagQuerylet($term, !$negative);
}
}
// only negative tags - shortcut to fail
if($positive_tag_count == 0) {
// TODO: This isn't currently implemented.
// SEE: https://github.com/shish/shimmie2/issues/66
return new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
// merge all the tag querylets into one generic one
@ -1019,104 +996,56 @@ class Image {
$terms = array();
foreach($tag_querylets as $tq) {
$sign = $tq->positive ? "+" : "-";
$sql .= ' '.$sign.' (tag LIKE :tag'.Image::$tag_n.')';
$sql .= ' '.$sign.' IF(tag LIKE :tag'.Image::$tag_n.', 1, 0)';
$terms['tag'.Image::$tag_n] = $tq->tag;
Image::$tag_n++;
if($sign === "+") $positive_tag_count++;
else $negative_tag_count++;
}
$tag_search = new Querylet($sql, $terms);
$img_search = self::build_img_search($img_querylets);
// no tags, do a simple search (+image metadata if we have any)
if($positive_tag_count + $negative_tag_count == 0) {
$query = self::build_simple_query($img_search);
}
// one positive tag (a common case), do an optimised search
else if($positive_tag_count === 1 && $negative_tag_count === 0) {
// MySQL is braindead, and does a full table scan on images, running the subquery once for each row -_-
// "{$this->get_images} WHERE images.id IN (SELECT image_id FROM tags WHERE tag LIKE ?) ",
$query = new Querylet("
SELECT images.*
FROM images
JOIN image_tags ON images.id=image_tags.image_id
JOIN tags ON image_tags.tag_id=tags.id
WHERE tag LIKE :tag0
", $tag_search->variables);
if(!empty($img_search->sql)) {
$query->append_sql(" AND ");
$query->append($img_search);
}
}
// more than one positive tag, and zero or more negative tags
else if($positive_tag_count >= 1) {
$tag_id_array = array();
$tags_ok = true;
$x = 0;
foreach($tag_search->variables as $tag) {
$tag_ids = $database->get_col(
"SELECT id FROM tags WHERE tag LIKE :tag",
array("tag"=>$tag)
$database->scoreql_to_sql("
SELECT id
FROM tags
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
"),
array("tag" => $tag)
);
$tag_id_array = array_merge($tag_id_array, $tag_ids);
$tags_ok = count($tag_ids) > 0 || !$tag_querylets[$x]->positive;
if(!$tags_ok) break;
if($tag_querylets[$x]->positive && count($tag_ids) == 0) {
# one of the positive tags had zero results, therefor there
# can be no results; "where 1=0" should shortcut things
return new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
$x++;
}
if($tags_ok) {
$tag_id_list = join(', ', $tag_id_array);
$subquery = new Querylet('
Image::$tag_n = 0;
return new Querylet('
SELECT *
FROM (
SELECT images.*, SUM('.$tag_search->sql.') AS score
FROM images
LEFT JOIN image_tags ON image_tags.image_id = images.id
JOIN tags ON image_tags.tag_id = tags.id
WHERE tags.id IN ('.$tag_id_list.')
WHERE tags.id IN (' . join(', ', $tag_id_array) . ')
GROUP BY images.id
HAVING score = :score',
array_merge(
HAVING score = :score
) AS images
WHERE 1=1
', array_merge(
$tag_search->variables,
array("score"=>$positive_tag_count)
)
);
$query = new Querylet('
SELECT *
FROM ('.$subquery->sql.') AS images ', $subquery->variables);
if(!empty($img_search->sql)) {
$query->append_sql(" WHERE ");
$query->append($img_search);
}
}
else {
# there are no results, "where 1=0" should shortcut things
$query = new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
}
//zero positive tags and one or more negative tags
//TODO: This isn't currently implemented. SEE: https://github.com/shish/shimmie2/issues/66
else{
$query = new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
Image::$tag_n = 0;
return $query;
));
}
}

View file

@ -85,12 +85,12 @@ class VideoFileHandler extends DataHandlerExtension {
if ($config->get_bool("video_thumb_ignore_aspect_ratio") == true)
{
$cmd = escapeshellcmd("{$ffmpeg} -i {$inname} -ss 00:00:00.0 -f image2 -vframes 1 {$outname}");
$cmd = escapeshellcmd("{$ffmpeg} -y -i {$inname} -ss 00:00:00.0 -f image2 -vframes 1 {$outname}");
}
else
{
$scale = 'scale="' . escapeshellarg("if(gt(a,{$w}/{$h}),{$w},-1)") . ':' . escapeshellarg("if(gt(a,{$w}/{$h}),-1,{$h})") . '"';
$cmd = "{$ffmpeg} -i {$inname} -vf {$scale} -ss 00:00:00.0 -f image2 -vframes 1 {$outname}";
$cmd = "{$ffmpeg} -y -i {$inname} -vf {$scale} -ss 00:00:00.0 -f image2 -vframes 1 {$outname}";
}
exec($cmd, $output, $returnValue);

View file

@ -1,5 +1,5 @@
<?php
class ImageTest extends ShimmiePHPUnitTestCase {
class ImageIOTest extends ShimmiePHPUnitTestCase {
public function testUserStats() {
$this->log_in_as_user();
$image_id = $this->post_image("tests/pbx_screenshot.jpg", "test");

View file

@ -1,5 +1,18 @@
<?php
class IndexTest extends ShimmiePHPUnitTestCase {
private function upload() {
$this->log_in_as_user();
$image_id_1 = $this->post_image("tests/pbx_screenshot.jpg", "thing computer screenshot pbx phone");
$image_id_2 = $this->post_image("tests/bedroom_workshop.jpg", "thing computer computing bedroom workshop");
$this->log_out();
# make sure both uploads were ok
$this->assertTrue($image_id_1 > 0);
$this->assertTrue($image_id_2 > 0);
return array($image_id_1, $image_id_2);
}
public function testIndexPage() {
$this->get_page('post/list');
$this->assert_title("Welcome to Shimmie ".VERSION);
@ -24,85 +37,162 @@ class IndexTest extends ShimmiePHPUnitTestCase {
$this->assert_title("Shimmie");
$this->get_page('post/list/99999');
$this->assert_title("No Images Found");
# FIXME: test search box
$this->assert_response(404);
}
public function testSearches() {
$this->log_in_as_user();
$image_id_1 = $this->post_image("tests/pbx_screenshot.jpg", "pbx computer screenshot");
$image_id_2 = $this->post_image("tests/bedroom_workshop.jpg", "computer bedroom workshop");
$this->log_out();
/* * * * * * * * * * *
* Tag Search *
* * * * * * * * * * */
public function testTagSearchNoResults() {
$image_ids = $this->upload();
# make sure both uploads were ok
$this->assertTrue($image_id_1 > 0);
$this->assertTrue($image_id_2 > 0);
# regular tag, no results
$this->get_page('post/list/maumaumau/1');
$this->assert_title("No Images Found");
$this->assert_response(404);
}
public function testTagSearchOneResult() {
$image_ids = $this->upload();
$this->get_page("post/list/pbx/1");
$this->assert_response(302);
}
public function testTagSearchManyResults() {
$image_ids = $this->upload();
# regular tag, many results
$this->get_page('post/list/computer/1');
$this->assert_response(200);
$this->assert_title("computer");
$this->assert_no_text("No Images Found");
}
# meta tag, many results
$this->get_page('post/list/size=640x480/1');
$this->assert_title("size=640x480");
$this->assert_no_text("No Images Found");
# meta tag, one result
$this->get_page("post/list/hash=feb01bab5698a11dd87416724c7a89e3/1");
//$this->assert_title(new PatternExpectation("/^Image $image_id_1: /"));
$this->assert_no_text("No Images Found");
# meta tag, one result
$this->get_page("post/list/md5=feb01bab5698a11dd87416724c7a89e3/1");
//$this->assert_title(new PatternExpectation("/^Image $image_id_1: /"));
$this->assert_no_text("No Images Found");
$this->markTestIncomplete();
# multiple tags, many results
$this->get_page('post/list/computer%20size=640x480/1');
$this->assert_title("computer size=640x480");
$this->assert_no_text("No Images Found");
/* * * * * * * * * * *
* Multi-Tag Search *
* * * * * * * * * * */
public function testMultiTagSearchNoResults() {
$image_ids = $this->upload();
# multiple tags, one of which doesn't exist
# (test the "one tag doesn't exist = no hits" path)
$this->get_page('post/list/computer%20asdfasdfwaffle/1');
$this->assert_text("No Images Found");
$this->get_page('post/list/computer asdfasdfwaffle/1');
$this->assert_response(404);
}
# multiple tags, single result; search with one result = direct to image
$this->get_page('post/list/screenshot%20computer/1');
//$this->assert_title(new PatternExpectation("/^Image $image_id_1: /"));
public function testMultiTagSearchOneResult() {
$image_ids = $this->upload();
$this->get_page('post/list/computer screenshot/1');
$this->assert_response(302);
}
public function testMultiTagSearchManyResults() {
$image_ids = $this->upload();
$this->get_page('post/list/computer thing/1');
$this->assert_response(200);
}
/* * * * * * * * * * *
* Meta Search *
* * * * * * * * * * */
public function testMetaSearchNoResults() {
$this->get_page('post/list/hash=1234567890/1');
$this->assert_response(404);
}
public function testMetaSearchOneResult() {
$image_ids = $this->upload();
$this->get_page("post/list/hash=feb01bab5698a11dd87416724c7a89e3/1");
$this->assert_response(302);
$this->get_page("post/list/md5=feb01bab5698a11dd87416724c7a89e3/1");
$this->assert_response(302);
$this->get_page("post/list/id={$image_ids[1]}/1");
$this->assert_response(302);
$this->get_page("post/list/filename=screenshot/1");
$this->assert_response(302);
}
public function testMetaSearchManyResults() {
$image_ids = $this->upload();
$this->get_page('post/list/size=640x480/1');
$this->assert_response(200);
$this->get_page("post/list/tags=5/1");
$this->assert_response(200);
$this->get_page("post/list/ext=jpg/1");
$this->assert_response(200);
}
/* * * * * * * * * * *
* Wildcards *
* * * * * * * * * * */
public function testWildSearchNoResults() {
$image_ids = $this->upload();
$this->get_page("post/list/asdfasdf*/1");
$this->assert_response(404);
}
public function testWildSearchOneResult() {
$image_ids = $this->upload();
// Only the first image matches both the wildcard and the tag.
// This checks for https://github.com/shish/shimmie2/issues/547
// (comp* is expanded to "computer computing", then we searched
// for images which match two or more of the tags in
// "computer computing screenshot")
$this->get_page("post/list/comp* screenshot/1");
$this->assert_response(302);
}
public function testWildSearchManyResults() {
$image_ids = $this->upload();
// two images match comp* - one matches it once,
// one matches it twice
$this->get_page("post/list/comp*/1");
$this->assert_response(200);
}
/* * * * * * * * * * *
* Mixed *
* * * * * * * * * * */
public function testMixedSearchTagMeta() {
$image_ids = $this->upload();
# multiple tags, many results
$this->get_page('post/list/computer size=640x480/1');
$this->assert_response(200);
}
// tag + negative
// wildcards + ???
/* * * * * * * * * * *
* Other *
* - negative tags *
* - wildcards *
* * * * * * * * * * */
public function testOther() {
$this->markTestIncomplete();
# negative tag, should have one result
$this->get_page('post/list/computer%20-pbx/1');
//$this->assert_title(new PatternExpectation("/^Image $image_id_2: /"));
$this->get_page('post/list/computer -pbx/1');
$this->assert_response(302);
# negative tag alone, should work
# FIXME: known broken in mysql
//$this->get_page('post/list/-pbx/1');
//$this->assert_title(new PatternExpectation("/^Image $image_id_2: /"));
//$this->assert_response(302);
# test various search methods
$this->get_page("post/list/bedroo*/1");
//$this->assert_title(new PatternExpectation("/^Image $image_id_2: /"));
$this->get_page("post/list/id=$image_id_1/1");
//$this->assert_title(new PatternExpectation("/^Image $image_id_1: /"));
$this->assert_no_text("No Images Found");
$this->get_page("post/list/filename=screenshot/1");
//$this->assert_title(new PatternExpectation("/^Image $image_id_1: /"));
$this->assert_no_text("No Images Found");
$this->get_page("post/list/tags=3/1");
$this->assert_title("tags=3");
$this->assert_no_text("No Images Found");
$this->get_page("post/list/ext=jpg/1");
$this->assert_title("ext=jpg");
$this->assert_no_text("No Images Found");
$this->assert_response(302);
}
}

View file

@ -22,7 +22,10 @@ abstract class ShimmiePHPUnitTestCase extends PHPUnit_Framework_TestCase {
public function setUp() {
$class = str_replace("Test", "", get_class($this));
if(!method_exists($class, "is_live") || !ext_is_live($class)) {
if(!class_exists($class)) {
$this->markTestSkipped("$class not loaded");
}
elseif(!ext_is_live($class)) {
$this->markTestSkipped("$class not supported with this database");
}
@ -44,12 +47,20 @@ abstract class ShimmiePHPUnitTestCase extends PHPUnit_Framework_TestCase {
$_GET = $args;
$page = class_exists("CustomPage") ? new CustomPage() : new Page();
send_event(new PageRequestEvent($page_name));
if($page->mode == "redirect") {
$page->code = 302;
}
}
// page things
protected function assert_title($title) {
global $page;
$this->assertEquals($title, $page->title);
$this->assertContains($title, $page->title);
}
protected function assert_no_title($title) {
global $page;
$this->assertNotContains($title, $page->title);
}
protected function assert_response($code) {
@ -59,7 +70,7 @@ abstract class ShimmiePHPUnitTestCase extends PHPUnit_Framework_TestCase {
protected function page_to_text($section=null) {
global $page;
$text = "";
$text = $page->title . "\n";
foreach($page->blocks as $block) {
if(is_null($section) || $section == $block->section) {
$text .= $block->header . "\n";

View file

@ -18,5 +18,6 @@ if(preg_match('/\.(?:png|jpg|jpeg|gif|css|js|php)(\?.*)?$/', $_SERVER["REQUEST_U
// all other requests (use shimmie routing based on URL)
$_SERVER["PHP_SELF"] = '/';
$_GET['q'] = $_SERVER["REQUEST_URI"];
$_GET['q'] = explode("?", $_SERVER["REQUEST_URI"])[0];
error_log($_GET['q']);
require_once "index.php";