User-agent: * Disallow: /sfCounter/* Disallow: /sfRating/* Disallow: /sfRecommend/* Disallow: /sfComment/score/id/* Disallow: /sfReadLater/* # IAs # Google (Bard / Gemini). # Permitido experimentalmente User-agent: Google-Extended Disallow: #Facebook agent (NoIA) User-agent: facebookexternalhit Allow: / # Amazon (IA relacionada con Alexa y modelos) User-agent: AmazonBot Disallow: / # Permitimos el bot publicitario de amazon User-agent: AmazonAdBot Disallow: #Recomendacion https://knownagents.com/agents/ #AI Data Scrapers (completa) #Sección Undocumented AI Agents (parcial) # AI data scrappers for training LLMs User-agent: anthropic-ai Disallow: / User-agent: AI2Bot Disallow: / User-agent: Ai2Bot-Dolma Disallow: / User-agent: ApifyBot Disallow: / User-agent: ApifyWebsiteContentCrawler Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Bytespider Disallow: / User-agent: CCBot Disallow: / User-agent: ChatGLM-Spider Disallow: / User-agent: ClaudeBot Disallow: / User-agent: cohere-ai Disallow: / User-agent: cohere-training-data-crawler Disallow: / User-agent: Cotoyogi Disallow: / User-agent: Crawl4AI Disallow: / User-agent: Datenbank Crawler Disallow: / User-agent: DeepSeekBot Disallow: / User-agent: Diffbot Disallow: / User-agent: FacebookBot Allow: / User-agent: FirecrawlAgent Disallow: / User-agent: GPTBot Disallow: / User-agent: iAskBot Disallow: / User-agent: iaskspider Disallow: / User-agent: ICC-Crawler Disallow: / User-agent: Kangaroo Bot Disallow: / User-agent: KunatoCrawler Disallow: / User-agent: laion-huggingface-processor Disallow: / User-agent: LCC Disallow: / User-agent: meta-externalagent Disallow: / User-agent: netEstate Imprint Crawler Disallow: / User-agent: omgili Disallow: / User-agent: omgilibot Disallow: / User-agent: PanguBot Disallow: / User-agent: SBIntuitionsBot Disallow: / User-agent: SemrushBot-OCOB Disallow: / User-agent: Spider Disallow: / User-agent: TavilyBot Disallow: / User-agent: Timpibot Disallow: / User-agent: VelenPublicWebCrawler Disallow: / User-agent: webzio-extended Disallow: / User-agent: WRTNBot Disallow: / # Yahoo's Slurp Robot - Please wait 15 seconds in between visits User-agent: slurp Crawl-delay: 15 # MSN Robot - Please wait 15 seconds in between visits User-agent: msnbot Crawl-delay: 15 #Generales Sitemap: https://www.dbalears.cat/sitemap.xml Sitemap: https://www.dbalears.cat/googlenews.xml Sitemap: https://www.dbalears.cat/image-sitemap.xml #Autores Sitemap: https://www.dbalears.cat/autors.xml #Hemeroteca Sitemap: https://www.dbalears.cat/noticies/sitemapIndex.xml #Elecciones: Sitemap: https://www.dbalears.cat/elecciones.xml #Media Sitemap: https://www.dbalears.cat/fotogalerias.xml Sitemap: https://www.dbalears.cat/videos.xml