From 8f01db016d31a2a37eef5f39fccc321324eba651 Mon Sep 17 00:00:00 2001 From: harlan Date: Sun, 3 Mar 2024 16:57:27 +1100 Subject: [PATCH] fix: `--sitemaps` option will override robots.txt sitemaps Fixes #169 --- packages/cli/src/createCli.ts | 2 +- packages/core/src/discovery/robotsTxt.ts | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/createCli.ts b/packages/cli/src/createCli.ts index 6d050f71..06769959 100644 --- a/packages/cli/src/createCli.ts +++ b/packages/cli/src/createCli.ts @@ -21,7 +21,7 @@ export default function createCli() { cli.option('--site ', 'Host URL to scan.') cli.option('--router-prefix ', 'The URL path prefix for the client and API to run from.') - cli.option('--sitemaps ', 'Comma separated list of sitemaps to use for scanning.') + cli.option('--sitemaps ', 'Comma separated list of sitemaps to use for scanning. Providing these will override any in robots.txt.') cli.option('--samples ', 'Specify the amount of samples to run.') cli.option('--throttle', 'Enable the throttling') cli.option('--enable-javascript', 'When inspecting the HTML wait for the javascript to execute. Useful for SPAs.') diff --git a/packages/core/src/discovery/robotsTxt.ts b/packages/core/src/discovery/robotsTxt.ts index 6f8c53cb..80b76cf3 100644 --- a/packages/core/src/discovery/robotsTxt.ts +++ b/packages/core/src/discovery/robotsTxt.ts @@ -80,6 +80,9 @@ export function mergeRobotsTxtConfig(config: ResolvedUserConfig, { groups, sitem ...normalisedGroups.flatMap(group => group.allow), ])].filter(isValidRegex) } - if (config.scanner.sitemap !== false && sitemaps.length) - config.scanner.sitemap = [...new Set([...(Array.isArray(config.scanner.sitemap) ? config.scanner.sitemap : []), ...sitemaps])] + if (config.scanner.sitemap !== false && sitemaps.length) { + // allow overriding the robots.txt sitemaps with your own + if (!Array.isArray(config.scanner.sitemap) || !config.scanner.sitemap.length) + config.scanner.sitemap = [...new Set([...(Array.isArray(config.scanner.sitemap) ? config.scanner.sitemap : []), ...sitemaps])] + } }