User-agent: *
Allow: /
Allow: /rome/travel-guides/
Allow: /rome/tours/
Allow: /rome/places-to-stay/
Allow: /rome/neighborhoods/
Allow: /llms.txt
Allow: /facts.json

# Admin and development paths
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/

# Affiliate redirector - not indexable, pollutes click analytics
Disallow: /go/

# Analytics proxy endpoints
Disallow: /p/

# Invalid client-hydration path segments
Disallow: /null/
Disallow: /undefined/

# Next.js internal not-found route. Framework leaks this into RSC/prefetch
# payload; bots that URL-extract from it (esp. OAI-SearchBot) request it and
# get a hard 404 - wasted crawl budget. Same class as the /_next/ block below.
Disallow: /_not-found

# Search results - never indexable
Disallow: /*?search=

# Legacy routes
Disallow: /tour_categories/
Disallow: /location/
Disallow: /wp-json/

# Multi-country filtered paths
Disallow: /cities/*/places-to-stay?*
Disallow: /cities/*/things-to-do?*
Disallow: /cities/*/places-to-eat?*

# Next.js static assets - allow rendering
Allow: /_next/static/
Allow: /_next/image
Disallow: /_next/data/
Disallow: /_next/static/chunks/*.js.map
Disallow: /_next/static/*.js.map
Disallow: /_next/static/buildManifest.js

# Block temporary and system files
Disallow: /tmp/

# Search engines we want to prioritize
# Named-bot sections do NOT inherit Disallow from "User-agent: *" per robots spec,
# so each bot section must repeat the admin/api disallows explicitly.
User-agent: Googlebot
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found

User-agent: Bingbot
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found

User-agent: Slurp
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found

# AI Crawlers - Allow full site access for AI search visibility (GEO)
# Content is already public and indexed by Google - no reason to restrict AI crawlers
#
# Disallow /_next/ for training/indexing bots: they don't execute JS, but some
# naively URL-extract from bundle source and hit non-URLs like
# /_next/static/chunks/'.concat(e.imageUrl,... causing noise 500s.
User-agent: GPTBot
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found
Disallow: /_next/

User-agent: OAI-SearchBot
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found
Disallow: /_next/

User-agent: ChatGPT-User
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found

# ChatGPT-Live is ChatGPT's real-time browsing fetcher used during answer
# generation. Each successful fetch is a live citation opportunity; explicit
# Allow signals intent vs. relying on the wildcard. 2026-05-25 crawler
# report flagged 125 errors on / and 12 on Borghese Gallery - those are
# failed citations in front of users about to convert.
User-agent: ChatGPT-Live
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found

User-agent: ClaudeBot
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found
Disallow: /_next/

User-agent: PerplexityBot
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found
Disallow: /_next/

# Common Crawl - allow. Common Crawl is the upstream training corpus
# for many open-source LLMs (LLaMA, Mistral, etc.). Blocking it while
# allowing GPTBot/ClaudeBot/PerplexityBot is inconsistent and costs
# us AI citation reach with no upside (the content is already public).
User-agent: CCBot
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found

# Google-Extended controls Gemini training. Allow - same reasoning as
# above; the content is public, training inclusion only helps Gemini
# produce better answers about the destinations we cover.
User-agent: Google-Extended
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found

# Meta's AI-platform crawlers (Llama training + AI assistant live fetcher).
# 2026-05-25 crawler report: Meta is the single largest bot at 83,508 hits
# but error rate doubled (8.5%→14.6%). Explicit Allow clarifies intent and
# leaves room for product-specific Disallows later. visitrome 410 noise +
# null URLs (fixed elsewhere in this PR) should bring the error rate down.
User-agent: Meta-ExternalAgent
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found
Disallow: /_next/

User-agent: Meta-ExternalFetcher
Allow: /
Disallow: /api/
Disallow: /dashboard/
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found

# Social media crawlers
User-agent: facebookexternalhit
Allow: /
Disallow: /admin/
Disallow: /admin-dashboard/
Disallow: /p/
Disallow: /_not-found

User-agent: Twitterbot
Allow: /
Disallow: /admin/
Disallow: /admin-dashboard/

# ByteDance / TikTok crawler. 2026-05-25 crawler report: 1,145 weekly hits,
# 100% 403 rate (caught by CF country/IP rules already), volume +22.1%
# week-over-week. Formalising the block in robots.txt so the bot stops
# wasting request capacity on a known-blocked path. If we ever want to
# allow TikTok discovery we'd revisit at the CF WAF level too.
User-agent: Bytespider
Allow: /

# Explicit AI bot allowances
User-agent: Claude-SearchBot
Allow: /

User-agent: AdsBot-Google
Allow: /

User-agent: AdsBot-Google-Mobile
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: Claude-User
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: YouBot
Allow: /

User-agent: XAI-Bot
Allow: /

User-agent: GrokCrawler
Allow: /

User-agent: GrokSearchBot
Allow: /

User-agent: DuckAssistBot
Allow: /

User-agent: BraveBot
Allow: /

User-agent: GoogleOther
Allow: /

User-agent: FacebookBot
Allow: /

User-agent: Amazonbot
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: AI2Bot
Allow: /

User-agent: MistralAI-User
Allow: /

User-agent: cohere-ai
Allow: /

Sitemap: https://visitrome.com/sitemap.xml