[static] allow extensions to customise robots.txt
This commit is contained in:
parent
eea4866e27
commit
46f75f7b34
5 changed files with 45 additions and 11 deletions
|
@ -216,6 +216,13 @@ class CommentList extends Extension
|
|||
}
|
||||
}
|
||||
|
||||
public function onRobotsBuilding(RobotsBuildingEvent $event)
|
||||
{
|
||||
// comment lists change all the time, crawlers should
|
||||
// index individual image's comments
|
||||
$event->add_disallow("comment");
|
||||
}
|
||||
|
||||
private function onPageRequest_add()
|
||||
{
|
||||
global $user, $page;
|
||||
|
|
|
@ -101,6 +101,14 @@ class Rule34 extends Extension
|
|||
}
|
||||
}
|
||||
|
||||
public function onRobotsBuilding(RobotsBuildingEvent $event)
|
||||
{
|
||||
// robots should only check the canonical site, not mirrors
|
||||
if ($_SERVER['HTTP_HOST'] != "rule34.paheal.net") {
|
||||
$event->add_disallow("");
|
||||
}
|
||||
}
|
||||
|
||||
public function onPageRequest(PageRequestEvent $event)
|
||||
{
|
||||
global $database, $page, $user;
|
||||
|
|
|
@ -4,11 +4,33 @@ declare(strict_types=1);
|
|||
|
||||
namespace Shimmie2;
|
||||
|
||||
class RobotsBuildingEvent extends Event
|
||||
{
|
||||
public array $parts = [
|
||||
"User-agent: *",
|
||||
// Site is rate limited to 1 request / sec,
|
||||
// returns 503 for more than that
|
||||
"Crawl-delay: 3",
|
||||
];
|
||||
|
||||
public function add_disallow(string $path): void
|
||||
{
|
||||
$this->parts[] = "Disallow: /$path";
|
||||
}
|
||||
}
|
||||
|
||||
class StaticFiles extends Extension
|
||||
{
|
||||
public function onPageRequest(PageRequestEvent $event)
|
||||
{
|
||||
global $config, $page;
|
||||
|
||||
if ($event->page_matches("robots.txt")) {
|
||||
$rbe = send_event(new RobotsBuildingEvent());
|
||||
$page->set_mode(PageMode::DATA);
|
||||
$page->set_data(join("\n", $rbe->parts));
|
||||
}
|
||||
|
||||
// hax.
|
||||
if ($page->mode == PageMode::PAGE && (!isset($page->blocks) || $this->count_main($page->blocks) == 0)) {
|
||||
$h_pagename = html_escape(implode('/', $event->args));
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
User-agent: *
|
||||
# comment lists change all the time, crawlers should
|
||||
# index individual image's comments
|
||||
Disallow: /comment/
|
||||
# next and prev are just CPU-heavier ways of getting
|
||||
# to the same images that the index shows
|
||||
Disallow: /post/next/
|
||||
Disallow: /post/prev/
|
||||
# Site is rate limited to 1 request / sec,
|
||||
# returns 503 for more than that
|
||||
Crawl-delay: 3
|
|
@ -94,6 +94,14 @@ class ViewImage extends Extension
|
|||
}
|
||||
}
|
||||
|
||||
public function onRobotsBuilding(RobotsBuildingEvent $event)
|
||||
{
|
||||
// next and prev are just CPU-heavier ways of getting
|
||||
// to the same images that the index shows
|
||||
$event->add_disallow("post/next");
|
||||
$event->add_disallow("post/prev");
|
||||
}
|
||||
|
||||
public function onDisplayingImage(DisplayingImageEvent $event)
|
||||
{
|
||||
global $page, $user;
|
||||
|
|
Reference in a new issue