-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtaccess-minimal-comments.txt
More file actions
57 lines (42 loc) · 5.67 KB
/
htaccess-minimal-comments.txt
File metadata and controls
57 lines (42 loc) · 5.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# BEGIN CODEWORDCREATIVE BOT BLOCK https://github.com/codewordcreative/bot-block-list
<IfModule mod_rewrite.c>
RewriteEngine On
# Force HTTPS - uncomment if you want to integrate the standard http > https redirect into this block.
# RewriteCond %{HTTPS} !=on
# RewriteRule ^ https://%{HTTP_HOST}%{REQUEST_URI} [L,R=301]
# Allow all user agents to access robots.txt. Where they are on both lists, it may stop them from trying again if they obey it. Note: Don't use this if using WordPress or any CMS or plugin that dynamically generates robots.txt. Alternatively, you can use this to take control over robots.txt - just remember to add one manually.
# RewriteCond %{REQUEST_URI} ^/robots\.txt$ [NC]
# RewriteRule .* - [L]
#Allow list for bots that may otherwise fail
RewriteCond %{HTTP_USER_AGENT} "(facebookexternalhit|Facebot|WhatsApp|Twitterbot|LinkedInBot|Slackbot|Discordbot|TeamsBot|Outlook|Applebot|TelegramBot|Pinterestbot|Redditbot)" [NC]
RewriteRule ^ - [L]
# Block SEO tools you don't use.
RewriteCond %{HTTP_USER_AGENT} (Ahrefs|Barkrowler|BLEXBot|Cocolyzebot|DataForSeoBot|DomainStatsBot|dotbot|hypestat|Keyword\ Density/0\.9|linkdexbot|MegaIndex|MJ12bot|online-webceo-bot|Openfind|Nutch|Rogerbot|Semrush|SenutoBot|SEOkicks|SEOlizer|serpstatpot|scrapy|SiteCheckerBot|SISTRIX|SnapPreviewBot|Sogou|spbot|Zeus|ZoomBot) [NC]
RewriteRule ^ - [F,L]
# Block suspicious or outdated user agents.
RewriteCond %{HTTP_USER_AGENT} (Windows\ NT\ 5\.1|Firefox/11\.0|Chrome/45\.0|Not-A\.Brand|Mozilla/5\.0\ \(Windows\ NT\ 15\.0\)|MSIE\ 10\.0|Firefox/39\.0) [NC]
RewriteRule ^ - [F,L]
# Block suspicious or malicious bots - note that php, python, crawler etc. are pretty broad. You may need to unblock or whitelist some APIs or scripts manually as needed, or just remove from the list.
RewriteCond %{HTTP_USER_AGENT} (Missigua\ Locator|larbin|libwww|lwp-trivial|Mata\ Hari|tencent|php|crawl|scrape|spider|spyder|harvest|collector|python|curl|nutch|stripper|wget) [NC]
RewriteRule .* - [F,L]
# Block specific bots, especially potential scrapers/copiers, also intel gatherers - now also archive.org because AI scraapers use that
RewriteCond %{HTTP_USER_AGENT} (008|abonti|adbeat_bot|addsearchbot|ai2bot|aihitbot|amazonbot|amazon-kendra|anderspinkbot|andibot|anthropic|archiver|awario|acapbot|acoonbot|alexibot|anthropic|archive\.org|asterias|attackbot|backdor|base64_decode|bash|bdcbot|bedrockbot|bigsur|bitsightbot|blackboard|brandverity|brightbot|builtbottough|bytedance|becomebot|binlar|blackwidow|blekkobot|blex|blowfish|bullseye|bunnys|butterfly|c99|careerbot|casper|censysinspect|ccbot|chatgpt|chatglm|checkpriv|cherrypick|chinaclaw|choppy|cincraw|clshttp|cmsworld|copernic|copier|copyrightcheck|cosmos|cotoyogi|crescent|cheesebot|cherrypicker|claude|cohere|devin|dataprovider|diffbot|download\ ninja|datacha|DeepSeek|diavol|discobot|dotnetdotcom|dumbot|disconnect|echobot|econtext|email|eolasbot|eval|eventures|extract|eyenetie|facebookbot|factset|feedbooster|foobot|feedfinder|flaming|flashget|flicky|fuck|genieo|gptbot|grub-client|g00g1e|getright|gigabot|go-ahead-got|Google-CloudVertex|GoogleOther|gozilla|grabnet|grafula|heritrix|hloader|httrack|icarus6j|imagesiftbot|img2dataset|internetseer|isscyber|jetbot|jetcar|kangaroo|klibweb|kmccrew|TheKnowledgeAI|leechftp|libweb|libwww-perl|liebaofast|linerbot|linkwalker|linkextractor|linkfluence|linko|linkwalker|lwp-download|magpie|majestic|mauibot|mediapartners-google|mediatoolkitbot|mechanize|meta-externalagent|metainspector|miner|mippin|mister\ pix|mistral|moget|muckrack|morfeus|moveoverbot|mozlila|nbot|netEstate|netmechanic|nicerspro|nikto|ninja|nominet|netants|netcraftsurvey|netestate|netvibes|news-please|newspaper|novaact|oa-searchbot|OAI-SearchBot|offline\ explorer|omgili|octopus|operator|oppo|pagegrabber|pandalytics|pangubot|panscient|peer39|perplexity|petalbot|phantom|phindbot|planetwork|poseidon|postrank|propowerbot|proximic|prowebwalker|purebot|quillbot|quora|queryn|queryseeker|radian6|radiation|realdownload|remoteview|rogerbot|repomonkey|sbintuition|scan|scoop|scooter|seekerspid|seekportbot|semalt|sider|smtbot|spinn3r|shell|siclab|sidetrade|sindice|sistrix|sitebot|siteexplorer|sitesnagger|skygrid|smartdownload|snoopy|spankbot|spbot|sqlmap|stackrambler|sucker|surftbot|sux0r|suzukacz|suzuran|takeout|teleport|telesoft|textrazor|thinkbot|true_robots|turingos|turnit|timpibot|trendiction|trendsmap|turnitin|tweetmemebot|twingly|um-ln|unserializ|vampire|velenpublic|virustotal|voideye|w4mwnpbXf3MFAbxOkJRw|WARDBot|web\ downloader|web\ image|webauto|webbandit|webenhancer|webmasterworld|webleacher|webreaper|websauger|website\ quester|webster|webstripper|webvac|webviewer|webwhacker|webzio|webzip|winhttp|wwwoffle|woxbot|wp-bot|www-collector|xenu|xaldon|xxxyy|yak|yandexadditional|yamanalab|yioopbot|youda|youbot|zoominfobot|zeus|zmeu|zune|zyborg) [NC]
RewriteRule ^ - [F,L]
# Block malicious request patterns (8G Firewall)
RewriteCond %{QUERY_STRING} (eval\(|UNION.*SELECT|base64_decode\(|/etc/passwd|self/environ|input_file|input_file_name|mosConfig) [NC,OR]
RewriteCond %{QUERY_STRING} (GLOBALS|_REQUEST|proc/self|wget|curl|python|<script|<iframe) [NC]
RewriteRule .* - [F,L]
# Block bad request methods
RewriteCond %{REQUEST_METHOD} !^(GET|POST|HEAD|OPTIONS) [NC]
RewriteRule .* - [F,L]
# Block URL encoding attacks (8G Firewall)
RewriteCond %{THE_REQUEST} "%20--" [NC,OR]
RewriteCond %{THE_REQUEST} "%0A" [NC,OR]
RewriteCond %{THE_REQUEST} "%0D" [NC]
RewriteRule .* - [F,L]
# Block dangerous query strings (8G Firewall)
RewriteCond %{QUERY_STRING} (\.\.\/|\.\.\\) [NC,OR]
RewriteCond %{QUERY_STRING} (<|\"|'|>|\%3C|\%3E) [NC]
RewriteRule ^.*$ - [F,L]
</IfModule>
# END CODEWORD CREATIVE BOT BLOCK