Block bots

This commit is contained in:
Bad Manners 2024-09-04 18:10:14 -03:00
parent 90fc60e871
commit 6cdd20eedc
Signed by: badmanners
GPG key ID: 8C88292CCB075609
6 changed files with 110 additions and 2 deletions

View file

@ -30,6 +30,8 @@
"[typescript]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"prettier.requireConfig": true,
"prettier.configPath": ".prettierrc.mjs",
"editor.defaultFormatter": "esbenp.prettier-vscode"
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.formatOnSave": true
}

View file

@ -3,6 +3,7 @@ import tailwindIntegration from "@astrojs/tailwind";
import markdownIntegration from "@astropub/md";
import htaccessIntegration from "astro-htaccess";
import pagefindIntegration from "astro-pagefind";
import { AI_BOTS } from "./src/data/ai_bots";
// https://astro.build/config
export default defineConfig({
@ -14,6 +15,15 @@ export default defineConfig({
markdownIntegration(),
htaccessIntegration({
generateHtaccessFile: import.meta.env.APACHE_CONFIG === "true",
customRules: [
// Block AI bots
"<IfModule mod_rewrite.c>",
" RewriteEngine on",
" RewriteBase /",
` RewriteCond %{HTTP_USER_AGENT} ${AI_BOTS.map((bot) => `^${bot}$`).join("|")} [NC]`,
" RewriteRule ^ [F]",
"</IfModule>",
],
redirects: [
{ match: "/story/", url: "/stories/" },
{ match: "/game/", url: "/games/" },

View file

@ -1,3 +1,41 @@
User-agent: AI2Bot
User-agent: Ai2Bot-Dolma
User-agent: Amazonbot
User-agent: Applebot
User-agent: Applebot-Extended
User-agent: Bytespider
User-agent: CCBot
User-agent: ChatGPT-User
User-agent: Claude-Web
User-agent: ClaudeBot
User-agent: Diffbot
User-agent: FacebookBot
User-agent: FriendlyCrawler
User-agent: GPTBot
User-agent: Google-Extended
User-agent: GoogleOther
User-agent: GoogleOther-Image
User-agent: GoogleOther-Video
User-agent: ICC-Crawler
User-agent: ImagesiftBot
User-agent: Meta-ExternalAgent
User-agent: Meta-ExternalFetcher
User-agent: OAI-SearchBot
User-agent: PerplexityBot
User-agent: PetalBot
User-agent: Scrapy
User-agent: Timpibot
User-agent: VelenPublicWebCrawler
User-agent: Webzio-Extended
User-agent: YouBot
User-agent: anthropic-ai
User-agent: cohere-ai
User-agent: facebookexternalhit
User-agent: img2dataset
User-agent: omgili
User-agent: omgilibot
Disallow: /
User-agent: *
Disallow: .htaccess
Disallow: /stories/drafts/

38
src/data/ai_bots.ts Normal file
View file

@ -0,0 +1,38 @@
export const AI_BOTS = [
"AI2Bot",
"Ai2Bot-Dolma",
"Amazonbot",
"Applebot",
"Applebot-Extended",
"Bytespider",
"CCBot",
"ChatGPT-User",
"Claude-Web",
"ClaudeBot",
"Diffbot",
"FacebookBot",
"FriendlyCrawler",
"GPTBot",
"Google-Extended",
"GoogleOther",
"GoogleOther-Image",
"GoogleOther-Video",
"ICC-Crawler",
"ImagesiftBot",
"Meta-ExternalAgent",
"Meta-ExternalFetcher",
"OAI-SearchBot",
"PerplexityBot",
"PetalBot",
"Scrapy",
"Timpibot",
"VelenPublicWebCrawler",
"Webzio-Extended",
"YouBot",
"anthropic-ai",
"cohere-ai",
"facebookexternalhit",
"img2dataset",
"omgili",
"omgilibot",
];

View file

@ -152,7 +152,9 @@ const thumbnail =
>
<IconSun width="1.25rem" height="1.25rem" class="hidden dark:block" />
<IconMoon width="1.25rem" height="1.25rem" class="block dark:hidden" />
<span class="sr-only select-none" id="label-toggle-dark-mode">{t(props.lang, "published_content/toggle_dark_mode")}</span>
<span class="sr-only select-none" id="label-toggle-dark-mode"
>{t(props.lang, "published_content/toggle_dark_mode")}</span
>
</button>
</div>
</div>

18
src/pages/robots.txt.ts Normal file
View file

@ -0,0 +1,18 @@
import type { APIRoute } from "astro";
import { AI_BOTS } from "../data/ai_bots";
export const GET: APIRoute = async () => {
const robots = [
AI_BOTS.map((bot) => `User-agent: ${bot}`),
"Disallow: /",
"",
"User-agent: *",
"Disallow: .htaccess",
"Disallow: /stories/drafts/",
"Disallow: /games/drafts/",
]
.flat()
.join("\n");
return new Response(robots, { headers: { "Content-Type": "text/plain; charset=utf-8" } });
};