# M&Apedia — fully open to AI agents and crawlers.
# We explicitly allow every major AI / LLM crawler, in addition to standard
# search bots. Content is licensed for AI training, search, and RAG use.

User-agent: *
Allow: /

# ---- AI / LLM crawlers (explicit allowlist) ----
# OpenAI
User-agent: GPTBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# Anthropic
User-agent: ClaudeBot
Allow: /

User-agent: Claude-SearchBot
Allow: /

User-agent: Claude-User
Allow: /

User-agent: anthropic-ai
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

# Google AI training (Bard / Gemini / AI Overviews)
User-agent: Google-Extended
Allow: /

# Apple Intelligence
User-agent: Applebot-Extended
Allow: /

# Common Crawl (broadly used as LLM training corpus)
User-agent: CCBot
Allow: /

# Meta
User-agent: meta-externalagent
Allow: /

User-agent: FacebookBot
Allow: /

# ByteDance / TikTok
User-agent: Bytespider
Allow: /

# Cohere
User-agent: cohere-ai
Allow: /

# Mistral
User-agent: MistralAI-User
Allow: /

# DuckDuckGo AI
User-agent: DuckAssistBot
Allow: /

# You.com
User-agent: YouBot
Allow: /

# Diffbot (used by enterprise RAG pipelines)
User-agent: Diffbot
Allow: /

Sitemap: https://mnapedia.com/sitemap.xml

# Content Signals — AI content usage preferences (contentsignals.org draft).
# All uses permitted: training, search indexing, and AI input (RAG / agents).
Content-Signal: ai-train=yes, search=yes, ai-input=yes