# # robots.txt for https://www.statemirror.com/ User-agent: * Allow: / Disallow: /admin/* Disallow: /search/* Disallow: /breaking-ticker-articles/* Disallow: /search?* Disallow: /xhr/* Disallow: /preview/story-* Disallow: /amp/preview/story-* Disallow: /staging/* Disallow: /alfoo Disallow: /sildoo Disallow: /dutas Disallow: /metsmall Disallow: /bulletin/* Disallow: /cartoons/* Disallow: /tags/??? Disallow: /weekly-items Disallow: /daily-items Disallow: /bulletin Disallow: /the-news-state Disallow: /ashwani Disallow: /reema-roy Disallow: /abhishek Disallow: /ddff Disallow: /ashwani-kumar-mishra-from-uttar-pradesh Disallow: /author/tech-seo-product Disallow: /author/editor-1 Disallow: /author/editor-2 Disallow: /author/editor-4 Disallow: /pdf_upload/1640358657509219522021-406708.pdf Disallow: /xhr/getNewsMixin* Disallow: /h-ajax-request/* Allow: /content/servlet/RDESController?* Allow: /ads.txt Sitemap: https://www.statemirror.com/sitemap/sitemap-index.xml Sitemap: https://www.statemirror.com/news-sitemap-daily.xml Sitemap: https://www.statemirror.com/sitemap-daily.xml # Disallow bots that are harmful, heavy, or low-value: User-agent: AhrefsBot # SEO backlink crawler (Ahrefs) Disallow: / User-agent: SemrushBot # SEO crawler (Semrush) Disallow: / User-agent: MJ12bot # SEO crawler (Majestic) Disallow: / User-agent: DotBot # SEO crawler (Moz) Disallow: / User-agent: BLEXBot # SEO crawler (WebMeUp) Disallow: / User-agent: Timpibot # LLM dataset crawler (Timpi) Disallow: / User-agent: Diffbot # AI data scraper (Diffbot) Disallow: / User-agent: CCBot # Common Crawl bot (if you choose to block it) Disallow: / User-agent: TurnitinBot # Plagiarism checker bot Disallow: / User-agent: PiplBot # Data broker / people search bot Disallow: / User-agent: Amazonbot Disallow: /