# $Id: robots.txt,v 1.9.2.2 2010/09/06 10:37:16 goba Exp $ # # robots.txt # # This file is to prevent the crawling and indexing of certain parts # of your site by web crawlers and spiders run by sites like Yahoo! # and Google. By telling these "robots" where not to go on your site, # you save bandwidth and server resources. # # This file will be ignored unless it is at the root of your host: # Used: http://example.com/robots.txt # Ignored: http://example.com/site/robots.txt # # For more information about the robots.txt standard, see: # http://www.robotstxt.org/wc/robots.html # # For syntax checking, see: # http://www.sxw.org.uk/computing/robots/check.html # DueDil robots.txt - Complete no-index configuration # Prevent all search engines from crawling and indexing the entire site User-agent: * Disallow: / User-agent: Seekbot/1.0* Disallow: / User-agent: * Crawl-delay: 10 # Directories Disallow: /includes/ Disallow: /misc/ Disallow: /modules/ Disallow: /profiles/ Disallow: /scripts/ Disallow: /themes/ # Files Disallow: /CHANGELOG.txt Disallow: /cron.php Disallow: /INSTALL.mysql.txt Disallow: /INSTALL.pgsql.txt Disallow: /install.php Disallow: /INSTALL.txt Disallow: /LICENSE.txt Disallow: /MAINTAINERS.txt Disallow: /update.php Disallow: /UPGRADE.txt # Paths (clean URLs) Disallow: /admin/ Disallow: /comment/reply/ Disallow: /contact/ Disallow: /logout/ Disallow: /node/add/ Disallow: /user/register/ Disallow: /user/password Disallow: /signup_link_share Disallow: /signup/ Disallow: /verify_signup/ Disallow: /user Disallow: /user/ Disallow: /ajax_fast_offsite/ Disallow: /ajax_slow_offsite/ Disallow: /linkedin/authenticate Disallow: /login/ Disallow: /*edit$ Disallow: */contact$ Disallow: */lists/introduction/* Disallow: */map$ Disallow: */company/*/*/*/contact$ Disallow: */company/*/*/*/group$ Disallow: */company/*/*/*/documents$ Disallow: */company/*/*/*/risk$ Disallow: */company/*/*/*/activity$ Disallow: */company/*/*/*/news$ Disallow: */company/*/*/*/financials$ Disallow: */company/*/*/*/ownership$ Disallow: */ownership/parent-companies$ # Paths (no clean URLs) Disallow: /?q=admin/ Disallow: /?q=comment/reply/ Disallow: /?q=contact/ Disallow: /?q=logout/ Disallow: /?q=node/add/ Disallow: /?q=search/ Disallow: /?q=user/password/ Disallow: /?q=user/register/ Disallow: /?q=user/login/ Disallow: *?status=* Disallow: *?view=* Disallow: *?to=* # NoIndex Noindex: *?ref=* Noindex: *?source=* Noindex: *?t-curated_collections=* Noindex: *?flagged=* Noindex: *?replytocom=* Noindex: *?s=* Noindex: *?_method=* Noindex: */company/*/*/*/people$ Noindex: *?utm_source=* Noindex: *?sort=* Noindex: *utm_medium=* # International Disallow: */company/fr/* Disallow: */company/de/* Disallow: */company/be/* Disallow: */company/nl/* Disallow: */company/lu/* Disallow: */company/no/* Disallow: */company/se/* Disallow: */company/fi/* Disallow: */company/it/* Disallow: */company/pt/* Disallow: */company/hu/* Disallow: */company/pl/* Disallow: */company/lt/* Disallow: */company/sk/* Disallow: */company/mt/* Disallow: */company/is/* # Directors Noindex: */director/*/* # Misc User-agent: Companybook-Crawler* Disallow: / Disallow: /legal/enterprise/april2018terms