[static] allow extensions to customise robots.txt

This commit is contained in:
Shish 2023-03-30 20:36:58 +01:00
parent eea4866e27
commit 46f75f7b34
5 changed files with 45 additions and 11 deletions

View file

@ -216,6 +216,13 @@ class CommentList extends Extension
} }
} }
public function onRobotsBuilding(RobotsBuildingEvent $event)
{
// comment lists change all the time, crawlers should
// index individual image's comments
$event->add_disallow("comment");
}
private function onPageRequest_add() private function onPageRequest_add()
{ {
global $user, $page; global $user, $page;

View file

@ -101,6 +101,14 @@ class Rule34 extends Extension
} }
} }
public function onRobotsBuilding(RobotsBuildingEvent $event)
{
// robots should only check the canonical site, not mirrors
if ($_SERVER['HTTP_HOST'] != "rule34.paheal.net") {
$event->add_disallow("");
}
}
public function onPageRequest(PageRequestEvent $event) public function onPageRequest(PageRequestEvent $event)
{ {
global $database, $page, $user; global $database, $page, $user;

View file

@ -4,11 +4,33 @@ declare(strict_types=1);
namespace Shimmie2; namespace Shimmie2;
class RobotsBuildingEvent extends Event
{
public array $parts = [
"User-agent: *",
// Site is rate limited to 1 request / sec,
// returns 503 for more than that
"Crawl-delay: 3",
];
public function add_disallow(string $path): void
{
$this->parts[] = "Disallow: /$path";
}
}
class StaticFiles extends Extension class StaticFiles extends Extension
{ {
public function onPageRequest(PageRequestEvent $event) public function onPageRequest(PageRequestEvent $event)
{ {
global $config, $page; global $config, $page;
if ($event->page_matches("robots.txt")) {
$rbe = send_event(new RobotsBuildingEvent());
$page->set_mode(PageMode::DATA);
$page->set_data(join("\n", $rbe->parts));
}
// hax. // hax.
if ($page->mode == PageMode::PAGE && (!isset($page->blocks) || $this->count_main($page->blocks) == 0)) { if ($page->mode == PageMode::PAGE && (!isset($page->blocks) || $this->count_main($page->blocks) == 0)) {
$h_pagename = html_escape(implode('/', $event->args)); $h_pagename = html_escape(implode('/', $event->args));

View file

@ -1,11 +0,0 @@
User-agent: *
# comment lists change all the time, crawlers should
# index individual image's comments
Disallow: /comment/
# next and prev are just CPU-heavier ways of getting
# to the same images that the index shows
Disallow: /post/next/
Disallow: /post/prev/
# Site is rate limited to 1 request / sec,
# returns 503 for more than that
Crawl-delay: 3

View file

@ -94,6 +94,14 @@ class ViewImage extends Extension
} }
} }
public function onRobotsBuilding(RobotsBuildingEvent $event)
{
// next and prev are just CPU-heavier ways of getting
// to the same images that the index shows
$event->add_disallow("post/next");
$event->add_disallow("post/prev");
}
public function onDisplayingImage(DisplayingImageEvent $event) public function onDisplayingImage(DisplayingImageEvent $event)
{ {
global $page, $user; global $page, $user;