User-agent: * Disallow: /test/robots/noindex/ Disallow: /test/robots/disal/ Disallow: /test/robots/partial Disallow: /background/ Disallow: /blog/ Disallow: /.kde/ Disallow: /cgi-bin/ Disallow: /images/ Disallow: /info/articles/ Disallow: /info/conferences-past.html Disallow: /info/meetings/examples/ Disallow: /info/meetings/thunderlizard/examples/ Disallow: /info/robots/ Disallow: /info/slides/ Disallow: /lists/ Disallow: /related/ Disallow: /reviews/ Disallow: /test/relativelinks/2ndlevel/http:// Disallow: /test/relativelinks/rtestprob/http://searchtools/about/ Disallow: /test/relativelinks/rtestprob/http://searchtools/analysis/ Disallow: /test/relativelinks/rtestprob/http://searchtools/guide/ Disallow: /test/relativelinks/rtestprob/http://searchtools/info/ Disallow: /test/relativelinks/rtestprob/http://searchtools/pub/ Disallow: /test/relativelinks/rtestprob/http://searchtools/robots/ Disallow: /test/relativelinks/rtestprob/http://searchtools/search/ Disallow: /test/relativelinks/rtestprob/http://searchtools/site/ Disallow: /test/relativelinks/rtestprob/http://searchtools/slides/ Disallow: /test/relativelinks/rtestprob/http://searchtools/surveys/ Disallow: /test/relativelinks/rtestprob/http://searchtools/tools/ Disallow: /searchtools/ Disallow: /slides/examples/ Disallow: /ST/ Disallow: /st/ Disallow: /St/ Disallow: /wr/ # don't let search engines see the RSS feed, it's just confusing. User-agent: Googlebot User-agent: InfoNaviRobot User-agent: Ask User-agent: TV33_Mercator User-agent: AVSearch User-agent: Mercator User-agent: Scooter User-agent: Slurp User-agent: SearchengineLicenceSheep User-agent: shadow User-agent: MultiText User-agent: FAST-WebCrawler User-agent: Lycos_Spider User-agent: Atomz User-agent: htdig User-agent: spider00.logika.net User-agent: NetMechanic User-agent: libwww-perl User-agent: Teleport Pro User-agent: BizBot04 kirk.overleaf.com User-agent: HappyBot (gserver.kw.net) User-agent: CaliforniaBrownSpider User-agent: EI*Net/0.1 libwww/0.1 User-agent: Ibot/1.0 libwww-perl/0.40 User-agent: Merritt/1.0 User-agent: StatFetcher/1.0 User-agent: TeacherSoft/1.0 libwww/2.17 User-agent: WWW Collector User-agent: processor/0.0ALPHA libwww-perl/0.20 User-agent: wobot/1.0 from 206.214.202.45 User-agent: Libertech-Rover User-agent: WhoWhere Robot User-agent: ITI Spider User-agent: w3index User-agent: MyCNNSpider User-agent: SummyCrawler User-agent: OGspider User-agent: linklooker User-agent: CyberSpyder User-agent: SlowBot User-agent: heraSpider User-agent: Surfbot User-agent: Bizbot003 User-agent: WebWalker User-agent: SandBot User-agent: EnigmaBot User-agent: spyder3.microsys.com User-agent: www.freeloader.com. User-agent: 'Ahoy! The Homepage Finder' User-agent: Arachnophilia User-agent: ArchitextSpider User-agent: explorersearch User-agent: Freecrawl User-agent: Gromit/1.0 User-agent: HTMLgobble v2.2 User-agent: WebCrawler/3.0 Robot libwww/5.0a User-agent: WebFetcher/0.8 User-agent: METAGOPHER User-agent: MSNBOT/0.1 User-agent: Yahoo-MMCrawler/3.x Disallow: /searchtools-rss.xml # User-Agents with no privileges (mostly spambots/spybots/offline downloaders that ignore robots.txt) RewriteCond %{REMOTE_ADDR} “^63.148.99.2(2[4-9]|[3-4][0-9]|5[0-5])$” [OR] # Cyveillance spybot RewriteCond %{REMOTE_ADDR} ^12.148.196.(12[8-9]|1[3-9][0-9]|2[0-4][0-9]|25[0-5])$ [OR] # NameProtect spybot RewriteCond %{REMOTE_ADDR} ^12.148.209.(19[2-9]|2[0-4][0-9]|25[0-5])$ [OR] # NameProtect spybot RewriteCond %{REMOTE_ADDR} ^64.140.49.6([6-9])$ [OR] # Turnitin spybot RewriteCond %{HTTP_REFERER} iaea.org [OR] # spambot RewriteCond %{HTTP_USER_AGENT} ^[A-Z]+$ [OR] # spambot RewriteCond %{HTTP_USER_AGENT} anarchie [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} Atomz [OR] # rude bot RewriteCond %{HTTP_USER_AGENT} cherry.?picker [NC,OR] # spambot RewriteCond %{HTTP_USER_AGENT} “compatible ; MSIE 6.0″ [OR] # spambot (note extra space before semicolon) RewriteCond %{HTTP_USER_AGENT} crescent [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} “^DA d.d+” [OR] # OD RewriteCond %{HTTP_USER_AGENT} “DTS Agent” [OR] # OD RewriteCond %{HTTP_USER_AGENT} “^Download” [OR] # OD RewriteCond %{HTTP_USER_AGENT} EasyDL/d.d+ [OR] # OD RewriteCond %{HTTP_USER_AGENT} e?mail.?(collector|magnet|reaper|siphon|sweeper|harvest|collect|wolf) [NC,OR] # spambot RewriteCond %{HTTP_USER_AGENT} express [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} extractor [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} “Fetch API Request” [OR] # OD RewriteCond %{HTTP_USER_AGENT} flashget [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} FlickBot [OR] # rude bot RewriteCond %{HTTP_USER_AGENT} FrontPage [OR] # stupid user trying to edit my site RewriteCond %{HTTP_USER_AGENT} getright [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} go.?zilla [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} “efp@gmx.net” [OR] # rude bot RewriteCond %{HTTP_USER_AGENT} grabber [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} imagefetch [OR] # rude bot RewriteCond %{HTTP_USER_AGENT} httrack [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} “Indy Library” [OR] # spambot RewriteCond %{HTTP_USER_AGENT} “^Internet Explore” [OR] # spambot RewriteCond %{HTTP_USER_AGENT} ^IE d.d Compatible.*Browser$ [OR] # spambot RewriteCond %{HTTP_USER_AGENT} “LINKS ARoMATIZED” [OR] # rude bot RewriteCond %{HTTP_USER_AGENT} “Microsoft URL Control” [OR] # spambot RewriteCond %{HTTP_USER_AGENT} “mister pix” [NC,OR] # rude bot RewriteCond %{HTTP_USER_AGENT} “^Mozilla/4.0$” [OR] # dumb bot RewriteCond %{HTTP_USER_AGENT} “^Mozilla/??$” [OR] # formmail attacker RewriteCond %{HTTP_USER_AGENT} MSIECrawler [OR] # IE’s “make available offline” mode RewriteCond %{HTTP_USER_AGENT} ^NG [OR] # unknown bot RewriteCond %{HTTP_USER_AGENT} offline [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} net.?(ants|mechanic|spider|vampire|zip) [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} nicerspro [NC,OR] # spambot RewriteCond %{HTTP_USER_AGENT} ninja [NC,OR] # Download Ninja OD RewriteCond %{HTTP_USER_AGENT} NPBot [OR] # NameProtect spybot RewriteCond %{HTTP_USER_AGENT} PersonaPilot [OR] # rude bot RewriteCond %{HTTP_USER_AGENT} snagger [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} Sqworm [OR] # rude bot RewriteCond %{HTTP_USER_AGENT} SurveyBot [OR] # rude bot RewriteCond %{HTTP_USER_AGENT} tele(port|soft) [NC,OR] # OD RewriteCond %{HTTP_USER_AGENT} TurnitinBot [OR] # Turnitin spybot RewriteCond %{HTTP_USER_AGENT} web.?(auto|bandit|collector|copier|devil|downloader|fetch|hook|mole|miner|mirror|reaper|sauger|sucker|site|snake|stripper|weasel|zip) [NC,OR] # ODs RewriteCond %{HTTP_USER_AGENT} vayala [OR] # dumb bot, doesn’t know how to follow links, generates lots of 404s RewriteCond %{HTTP_USER_AGENT} zeus [NC] RewriteRule .* - [F,L] # updated 2002-03-22 (disallow rtestprob links) # updated 2002-06-25 (disallow info/slides links, info/robots/) # updated 2002-07-25 (disallow /searchtools/ which is an alias