# robots.txt # $Id: robots.txt 5918 2024-03-26 15:21:36Z koelndeweb $ # (alle generischen Anweisungen bei den spez. Useragents wiederholen) # Bei Updates: testen mit google webmastertools, Sitemapgenerator anpassen # for generic User-agent:* see below Sitemap: https://www.koeln.de/sitemap.xml User-agent: Googlebot User-agent: Googlebot-Mobile # google-spezifisch: mit Wildcard #http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=156449 # spezifisches zuerst, also erst page=... erlauben, dann Rest verbieten # wikiID GOOROBTXT: Allow: /*?page= Allow: /jobs/jobsuche.html? Allow: /misc/*.js Allow: /misc/*.css Allow: /misc/*.png Allow: /sites/*.js Allow: /sites/*.css # Disallow: /*? Disallow: /aframe Disallow: /apps/kinokalender Disallow: /apps/wahlomat Disallow: /archiv Disallow: /artikel Allow: /bilder/data/pictures/*/normal Allow: /bilder/data/pictures/*/thumb Disallow: /bilder/data Disallow: /bilder/picture Disallow: /bilder/vorschau Disallow: /branchenfuehrer Disallow: /branchen/api Disallow: /cgi-bin Disallow: /cms Disallow: /content Disallow: /event Disallow: /events Disallow: /eventreihe Disallow: /veranstaltungsort Disallow: /veranstaltungsorte Disallow: /filter/ Disallow: /forum Disallow: /hotspot Disallow: /index.php Disallow: /intern/ Disallow: /_includes Disallow: /koeln/nachrichten/24hticker Disallow: /koeln/nachrichten/nrwticker Disallow: /koeln/nachrichten/wetter Disallow: /koeln/was_ist_los/kino/ Disallow: /markt Disallow: /mein_koeln Disallow: /mobil Disallow: /nachrichten Disallow: /service/mail/support Disallow: /tourismus/stadtfuehrungen/rent-a-guide/booking-step* Disallow: /tourismus/stadtfuehrungen/rent-a-guide/tour/*/2* #Drupal-URLs: Disallow: /admin Disallow: /category Disallow: /comment Disallow: /contact Disallow: /database Disallow: /filter Disallow: /flag_content Disallow: /includes Disallow: /login Disallow: /logout Disallow: /misc Disallow: /modules Disallow: /node Disallow: /profiles Disallow: /scripts Disallow: /search Disallow: /suche Disallow: /algolia Disallow: /taxonomy Disallow: /updates Disallow: /user #b.a.w: alles unter /user #Disallow: /user/login #Disallow: /user/password #Disallow: /user/register # zur sicherheit :-) # wir erlauben alles, was nicht verboten ist Allow: / User-agent: Mediapartners-Google* # spezifische URLs via Wildcard f. Adsense zulassen Allow: /*?page= Disallow: /*? Disallow: /admin Disallow: /markt Disallow: /mein_koeln Disallow: /forum #Drupal-URLs: Disallow: /admin Disallow: /apps/kinokalender Disallow: /apps/wahlomat Disallow: /archiv Disallow: /category Disallow: /comment Disallow: /contact Disallow: /database Disallow: /flag_content Disallow: /filter Disallow: /hotspot Disallow: /includes Disallow: /intern/ Disallow: /login Disallow: /logout Disallow: /misc Disallow: /modules Disallow: /node Disallow: /profiles Disallow: /scripts Disallow: /search Disallow: /tourismus/stadtfuehrungen/rent-a-guide/booking-step* Disallow: /tourismus/stadtfuehrungen/rent-a-guide/tour/*/* Disallow: /taxonomy Disallow: /updates Disallow: /user # bingbot and its numerous siblings (which are, in part, not documented :-(( ) # require specific (and slower) crawl rate settings because they simply ignore # the crawl delay settings. so if you actually want a crawl delay of, say, 2 then there # must be a crawl delay setting of 20 to 30 - according to bing support. User-agent: bingbot User-agent: msnbot User-agent: msnbot-NewsBlogs User-agent: msnbot-media User-agent: msnbot-UDiscovery Crawl-delay: 20 Allow: /*?page= Allow: /jobs/jobsuche.html? Disallow: /aframe Disallow: /artikel Disallow: /apps/kinokalender Disallow: /apps/strassen/strassensuche Disallow: /apps/wahlomat Disallow: /archiv Disallow: /bilder/data Disallow: /bilder/picture Disallow: /bilder/vorschau Disallow: /branchen/api Disallow: /branchenfuehrer Disallow: /cgi-bin Disallow: /cms Disallow: /content Disallow: /forum Disallow: /hotspot Disallow: /index.php Disallow: /intern/ Disallow: /_includes Disallow: /markt Disallow: /mein_koeln Disallow: /koeln/nachrichten/24hticker Disallow: /koeln/nachrichten/nrwticker Disallow: /koeln/nachrichten/wetter Disallow: /mobil Disallow: /nachrichten Disallow: /service/mail/support #Drupal-URLs: Disallow: /admin Disallow: /category Disallow: /comment Disallow: /contact Disallow: /database Disallow: /filter Disallow: /flag_content Disallow: /includes Disallow: /login Disallow: /logout Disallow: /misc Disallow: /modules Disallow: /node Disallow: /profiles Disallow: /scripts Disallow: /search Disallow: /sites Disallow: /suche Disallow: /algolia Disallow: /taxonomy Disallow: /tourismus/stadtfuehrungen/rent-a-guide/booking-step1 Disallow: /updates Disallow: /user # according to bing docs, wildcards are accepted ... let's see : Disallow: /*? # Bot Operators: If you are listed here, please contact us. # And yes, we know that multiple User-Agent-Lines with # identical rules could be folded. But honestly: who would trust # a rogue bot operator to implement parsing correctly? User-agent: Yandex Disallow: / User-agent: MJ12bot # Test 202208-202209: will Mj12bot accept Allow Syntax and will it behave Allow: /branchen Disallow: / User-agent: proximic Disallow: / User-agent: UnisterBot Disallow: / # Next ones in line for getting blocked: nachtschatten, neofonie # And now the final fallback instructions for the rest of the bots User-agent: * Crawl-delay: 2 Disallow: /_includes Disallow: /aframe Disallow: /archiv Disallow: /apps/kinokalender Disallow: /apps/strassen/strassensuche Disallow: /apps/wahlomat Disallow: /artikel Allow: /bilder/data/pictures/*/normal/ Allow: /bilder/data/pictures/*/thumb/ Disallow: /bilder/data Disallow: /bilder/picture Disallow: /bilder/vorschau Disallow: /branchen/api Disallow: /branchenfuehrer Disallow: /category Disallow: /cgi-bin Disallow: /cms Disallow: /comment Disallow: /contact Disallow: /database Disallow: /filter Disallow: /five-star Disallow: /flag_content Disallow: /forum Disallow: /hotspot Disallow: /index.php Disallow: /includes Disallow: /intern/ Disallow: /koeln/nachrichten/24hticker Disallow: /koeln/nachrichten/nrwticker Disallow: /koeln/nachrichten/wetter Disallow: /login Disallow: /logout Disallow: /markt Disallow: /mein_koeln Disallow: /misc Disallow: /modules Disallow: /nachrichten Disallow: /node Disallow: /profiles Disallow: /scripts Disallow: /search Disallow: /sites Disallow: /suche Disallow: /algolia Disallow: /taxonomy Disallow: /tourismus/stadtfuehrungen/rent-a-guide/booking-step1 Disallow: /updates Disallow: /user # at least some bots might unterstand wildcards and crawl delay ... Disallow: /*?