[sitemap] add tests, and big rewrite to pass tests, fixes #1016

This commit is contained in:
Shish 2024-01-18 19:47:48 +00:00
parent cc9cd396ad
commit 2a561392d0
2 changed files with 94 additions and 137 deletions

View file

@ -4,173 +4,131 @@ declare(strict_types=1);
namespace Shimmie2;
class XMLSitemapURL
{
public function __construct(
public string $url,
public string $changefreq,
public string $priority,
public string $date
) {
}
}
class XMLSitemap extends Extension
{
private string $sitemap_queue = "";
private string $sitemap_filepath = ""; // set onPageRequest
public function onPageRequest(PageRequestEvent $event): void
{
if ($event->page_matches("sitemap.xml")) {
global $config;
global $config, $page;
$this->sitemap_filepath = data_path("cache/sitemap.xml");
// determine if new sitemap needs to be generated
if ($this->new_sitemap_needed()) {
// determine which type of sitemap to generate
if ($config->get_bool("sitemap_generatefull", false)) {
$this->handle_full_sitemap(); // default false until cache fixed
} else {
$this->handle_smaller_sitemap();
}
} else {
$this->display_existing_sitemap();
$cache_path = data_path("cache/sitemap.xml");
if ($this->new_sitemap_needed($cache_path)) {
$xml = $this->handle_full_sitemap();
file_put_contents($cache_path, $xml);
}
$xml = file_get_contents($cache_path);
$page->set_mode(PageMode::DATA);
$page->set_mime(MimeType::XML_APPLICATION);
$page->set_data($xml);
}
}
public function onSetupBuilding(SetupBuildingEvent $event): void
{
$sb = $event->panel->create_new_block("Sitemap");
$sb->add_bool_option("sitemap_generatefull", "Generate full sitemap");
$sb->add_label("<br>(Enabled: every image and tag in sitemap, generation takes longer)");
$sb->add_label("<br>(Disabled: only display the last 50 uploads in the sitemap)");
}
// sitemap with only the latest 50 images
private function handle_smaller_sitemap()
{
/* --- Add latest images to sitemap with higher priority --- */
$latestimages = Search::find_images(limit: 50);
if (empty($latestimages)) {
return;
}
$latestimages_urllist = [];
$last_image = null;
foreach ($latestimages as $arrayid => $image) {
// create url from image id's
$latestimages_urllist[$arrayid] = "post/view/$image->id";
$last_image = $image;
}
$this->add_sitemap_queue(
$latestimages_urllist,
"monthly",
"0.8",
date("Y-m-d", strtotime($last_image->posted))
);
/* --- Display page --- */
// when sitemap is ok, display it from the file
$this->generate_display_sitemap();
}
// Full sitemap
private function handle_full_sitemap()
{
global $database, $config;
$urls = [];
// add index
$index = [];
$index[0] = $config->get_string(SetupConfig::FRONT_PAGE);
$this->add_sitemap_queue($index, "weekly", "1");
$urls[] = new XMLSitemapURL(
$config->get_string(SetupConfig::FRONT_PAGE),
"weekly",
"1",
date("Y-m-d")
);
/* --- Add 20 most used tags --- */
$popular_tags = $database->get_all("SELECT tag, count FROM tags ORDER BY `count` DESC LIMIT 0,20");
foreach ($popular_tags as $arrayid => $tag) {
$tag = $tag['tag'];
$popular_tags[$arrayid] = "post/list/$tag/";
foreach ($database->get_col("SELECT tag FROM tags ORDER BY count DESC LIMIT 20") as $tag) {
$urls[] = new XMLSitemapURL(
"post/list/$tag/1",
"weekly",
"0.9",
date("Y-m-d")
);
}
$this->add_sitemap_queue($popular_tags, "monthly", "0.9" /* not sure how to deal with date here */);
/* --- Add latest images to sitemap with higher priority --- */
$latestimages = Search::find_images(limit: 50);
$latestimages_urllist = [];
$latest_image = null;
foreach ($latestimages as $arrayid => $image) {
// create url from image id's
$latestimages_urllist[$arrayid] = "post/view/$image->id";
$latest_image = $image;
foreach(Search::find_images(limit: 50) as $image) {
$urls[] = new XMLSitemapURL(
"post/view/$image->id",
"weekly",
"0.8",
date("Y-m-d", strtotime($image->posted))
);
}
$this->add_sitemap_queue($latestimages_urllist, "monthly", "0.8", date("Y-m-d", strtotime($latest_image->posted)));
/* --- Add other tags --- */
$other_tags = $database->get_all("SELECT tag, count FROM tags ORDER BY `count` DESC LIMIT 21,10000000");
foreach ($other_tags as $arrayid => $tag) {
$tag = $tag['tag'];
// create url from tags (tagme ignored)
if ($tag != "tagme") {
$other_tags[$arrayid] = "post/list/$tag/";
}
foreach ($database->get_col("SELECT tag FROM tags ORDER BY count DESC LIMIT 10000 OFFSET 21") as $tag) {
$urls[] = new XMLSitemapURL(
"post/list/$tag/1",
"weekly",
"0.7",
date("Y-m-d")
);
}
$this->add_sitemap_queue($other_tags, "monthly", "0.7" /* not sure how to deal with date here */);
/* --- Add all other images to sitemap with lower priority --- */
$otherimages = Search::find_images(offset: 51, limit: 10000000);
$image = null;
foreach ($otherimages as $arrayid => $image) {
// create url from image id's
$otherimages[$arrayid] = "post/view/$image->id";
foreach(Search::find_images(offset: 51, limit: 10000) as $image) {
$urls[] = new XMLSitemapURL(
"post/view/$image->id",
"monthly",
"0.6",
date("Y-m-d", strtotime($image->posted))
);
}
assert(!is_null($image));
$this->add_sitemap_queue($otherimages, "monthly", "0.6", date("Y-m-d", strtotime($image->posted)));
/* --- Display page --- */
// when sitemap is ok, display it from the file
$this->generate_display_sitemap();
return $this->generate_sitemap($urls);
}
/**
* Adds an array of urls to the sitemap with the given information.
* @param XMLSitemapURL[] $urls
*/
private function add_sitemap_queue(
array $urls,
string $changefreq = "monthly",
string $priority = "0.5",
string $date = "2013-02-01"
) {
foreach ($urls as $url) {
$link = make_http(make_link("$url"));
$this->sitemap_queue .= "
<url>
<loc>$link</loc>
<lastmod>$date</lastmod>
<changefreq>$changefreq</changefreq>
<priority>$priority</priority>
</url>";
}
}
// sets sitemap with entries in sitemap_queue
private function generate_display_sitemap()
private function generate_sitemap(array $urls): string
{
global $page;
$xml = "<" . "?xml version=\"1.0\" encoding=\"utf-8\"?" . ">\n" .
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n";
foreach($urls as $url) {
$link = make_http(make_link($url->url));
$xml .= "
<url>
<loc>$link</loc>
<lastmod>$url->date</lastmod>
<changefreq>$url->changefreq</changefreq>
<priority>$url->priority</priority>
</url>
";
}
$xml .= "</urlset>\n";
$xml = "<" . "?xml version=\"1.0\" encoding=\"utf-8\"?" . ">
<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">
$this->sitemap_queue
</urlset>";
// Generate new sitemap
file_put_contents($this->sitemap_filepath, $xml);
$page->set_mode(PageMode::DATA);
$page->set_mime(MimeType::XML_APPLICATION);
$page->set_data($xml);
return $xml;
}
/**
* Returns true if a new sitemap is needed.
*/
private function new_sitemap_needed(): bool
private function new_sitemap_needed($cache_path): bool
{
if (!file_exists($this->sitemap_filepath)) {
if (!file_exists($cache_path)) {
return true;
}
$sitemap_generation_interval = 86400; // allow new site map every day
$last_generated_time = filemtime($this->sitemap_filepath);
$last_generated_time = filemtime($cache_path);
// if file doesn't exist, return true
if ($last_generated_time == false) {
@ -178,21 +136,6 @@ class XMLSitemap extends Extension
}
// if it's been a day since last sitemap creation, return true
if ($last_generated_time + $sitemap_generation_interval < time()) {
return true;
} else {
return false;
}
}
private function display_existing_sitemap()
{
global $page;
$xml = file_get_contents($this->sitemap_filepath);
$page->set_mode(PageMode::DATA);
$page->set_mime(MimeType::XML_APPLICATION);
$page->set_data($xml);
return ($last_generated_time + $sitemap_generation_interval < time());
}
}

View file

@ -8,6 +8,20 @@ class XMLSitemapTest extends ShimmiePHPUnitTestCase
{
public function testBasic(): void
{
// check empty DB
@unlink(data_path("cache/sitemap.xml"));
$page = $this->get_page('sitemap.xml');
$this->assertEquals(200, $page->code);
$this->log_in_as_user();
$this->post_image("tests/pbx_screenshot.jpg", "pbx computer screenshot");
// check DB with one image
@unlink(data_path("cache/sitemap.xml"));
$page = $this->get_page('sitemap.xml');
$this->assertEquals(200, $page->code);
// check caching
$page = $this->get_page('sitemap.xml');
$this->assertEquals(200, $page->code);
}