# General crawl rules for all bots User-agent: * Disallow: /admin/ # Block admin area Disallow: /login/ # Block login page Disallow: /register/ # Block registration page Disallow: /checkout/ # Block checkout process Disallow: /cart/ # Block cart page Disallow: /user/ # Block user dashboard Disallow: /settings/ # Block settings page Disallow: /private/ # Block private or restricted content Disallow: /search/ # Block search results pages (avoid index duplication) Disallow: /wishlist/ # Block wishlist (if irrelevant for SEO) # Allow essential resources for better crawling Allow: /*.css$ # Allow CSS files for proper layout rendering Allow: /*.js$ # Allow JS files for functionality Allow: /images/ # Allow images to be indexed (important for SEO) Allow: /static/ # Allow static files such as fonts or icons # Block specific crawlers to reduce server load or avoid bad bots User-agent: MJ12bot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: YandexBot Disallow: / # Optional, if you don't need Yandex to crawl the site User-agent: Baiduspider Disallow: / # Optional, if you don't need Baidu's bot # Crawl delay (to prevent aggressive crawlers from overloading the server) Crawl-delay: 10 # Pause for 10 seconds between requests (if needed) # Specific directives for Googlebot User-agent: Googlebot Disallow: /staging/ # Block staging or development environment # Prevent Googlebot from crawling dynamic URLs that might cause issues Disallow: /*?session_id= # Block session ID URLs Disallow: /*?sort= # Block URLs with sorting parameters (avoid duplicate content) Disallow: /*?filter= # Block URLs with filtering parameters # Allow Googlebot to access mobile-friendly content User-agent: Googlebot-Mobile Allow: /mobile/ # Allow crawling of the mobile version if it's separate # Sitemap URLs for better crawlability Sitemap: https://www.studyonloan.com/sitemap.xml Sitemap: https://www.studyonloan.com/sitemap-news.xml # If you have a news section Sitemap: https://www.studyonloan.com/sitemap-products.xml # If you have products section # Blocking access to sensitive files (robots.txt itself is public) Disallow: /robots.txt # Prevent robots from crawling the robots.txt file itself (although it's usually public) Disallow: /humans.txt # Block any humans.txt if not intended for indexing Disallow: /wp-config.php # Block WordPress config if applicable # Block PDF or other document types (if unnecessary for SEO indexing) Disallow: /*.pdf$ # Block PDFs from being indexed by search engines # Host (only used by Yandex search engine) Host: https://www.studyonloan.com