diff --git a/src/lib/server/websearch/scrape/playwright.ts b/src/lib/server/websearch/scrape/playwright.ts index 4e4f7ddac45..b4b84dce450 100644 --- a/src/lib/server/websearch/scrape/playwright.ts +++ b/src/lib/server/websearch/scrape/playwright.ts @@ -68,7 +68,18 @@ export async function withPage( try { const page = await ctx.newPage(); - env.PLAYWRIGHT_ADBLOCKER === "true" && (await blocker.enableBlockingInPage(page)); + if (env.PLAYWRIGHT_ADBLOCKER === "true") { + await blocker.enableBlockingInPage(page); + } + + await page.route("**", (route, request) => { + const requestUrl = request.url(); + if (!requestUrl.startsWith("https://")) { + logger.warn(`Blocked request to: ${requestUrl}`); + return route.abort(); + } + return route.continue(); + }); const res = await page .goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) }) @@ -78,9 +89,8 @@ export async function withPage( ); }); - // await needed here so that we don't close the context before the callback is done return await callback(page, res ?? undefined); } finally { - ctx.close(); + await ctx.close(); } }