[static] allow extensions to customise robots.txt
This commit is contained in:
parent
eea4866e27
commit
46f75f7b34
5 changed files with 45 additions and 11 deletions
|
@ -216,6 +216,13 @@ class CommentList extends Extension
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function onRobotsBuilding(RobotsBuildingEvent $event)
|
||||||
|
{
|
||||||
|
// comment lists change all the time, crawlers should
|
||||||
|
// index individual image's comments
|
||||||
|
$event->add_disallow("comment");
|
||||||
|
}
|
||||||
|
|
||||||
private function onPageRequest_add()
|
private function onPageRequest_add()
|
||||||
{
|
{
|
||||||
global $user, $page;
|
global $user, $page;
|
||||||
|
|
|
@ -101,6 +101,14 @@ class Rule34 extends Extension
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function onRobotsBuilding(RobotsBuildingEvent $event)
|
||||||
|
{
|
||||||
|
// robots should only check the canonical site, not mirrors
|
||||||
|
if ($_SERVER['HTTP_HOST'] != "rule34.paheal.net") {
|
||||||
|
$event->add_disallow("");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public function onPageRequest(PageRequestEvent $event)
|
public function onPageRequest(PageRequestEvent $event)
|
||||||
{
|
{
|
||||||
global $database, $page, $user;
|
global $database, $page, $user;
|
||||||
|
|
|
@ -4,11 +4,33 @@ declare(strict_types=1);
|
||||||
|
|
||||||
namespace Shimmie2;
|
namespace Shimmie2;
|
||||||
|
|
||||||
|
class RobotsBuildingEvent extends Event
|
||||||
|
{
|
||||||
|
public array $parts = [
|
||||||
|
"User-agent: *",
|
||||||
|
// Site is rate limited to 1 request / sec,
|
||||||
|
// returns 503 for more than that
|
||||||
|
"Crawl-delay: 3",
|
||||||
|
];
|
||||||
|
|
||||||
|
public function add_disallow(string $path): void
|
||||||
|
{
|
||||||
|
$this->parts[] = "Disallow: /$path";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class StaticFiles extends Extension
|
class StaticFiles extends Extension
|
||||||
{
|
{
|
||||||
public function onPageRequest(PageRequestEvent $event)
|
public function onPageRequest(PageRequestEvent $event)
|
||||||
{
|
{
|
||||||
global $config, $page;
|
global $config, $page;
|
||||||
|
|
||||||
|
if ($event->page_matches("robots.txt")) {
|
||||||
|
$rbe = send_event(new RobotsBuildingEvent());
|
||||||
|
$page->set_mode(PageMode::DATA);
|
||||||
|
$page->set_data(join("\n", $rbe->parts));
|
||||||
|
}
|
||||||
|
|
||||||
// hax.
|
// hax.
|
||||||
if ($page->mode == PageMode::PAGE && (!isset($page->blocks) || $this->count_main($page->blocks) == 0)) {
|
if ($page->mode == PageMode::PAGE && (!isset($page->blocks) || $this->count_main($page->blocks) == 0)) {
|
||||||
$h_pagename = html_escape(implode('/', $event->args));
|
$h_pagename = html_escape(implode('/', $event->args));
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
User-agent: *
|
|
||||||
# comment lists change all the time, crawlers should
|
|
||||||
# index individual image's comments
|
|
||||||
Disallow: /comment/
|
|
||||||
# next and prev are just CPU-heavier ways of getting
|
|
||||||
# to the same images that the index shows
|
|
||||||
Disallow: /post/next/
|
|
||||||
Disallow: /post/prev/
|
|
||||||
# Site is rate limited to 1 request / sec,
|
|
||||||
# returns 503 for more than that
|
|
||||||
Crawl-delay: 3
|
|
|
@ -94,6 +94,14 @@ class ViewImage extends Extension
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function onRobotsBuilding(RobotsBuildingEvent $event)
|
||||||
|
{
|
||||||
|
// next and prev are just CPU-heavier ways of getting
|
||||||
|
// to the same images that the index shows
|
||||||
|
$event->add_disallow("post/next");
|
||||||
|
$event->add_disallow("post/prev");
|
||||||
|
}
|
||||||
|
|
||||||
public function onDisplayingImage(DisplayingImageEvent $event)
|
public function onDisplayingImage(DisplayingImageEvent $event)
|
||||||
{
|
{
|
||||||
global $page, $user;
|
global $page, $user;
|
||||||
|
|
Reference in a new issue