Skip to content

Commit

Permalink
fix(websearch): block protocols on playwright (#1579)
Browse files Browse the repository at this point in the history
* fix(websearch): block protocols on playwright

* fix: only allow https
  • Loading branch information
nsarrazin authored Nov 20, 2024
1 parent c683bb7 commit 5f256c3
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions src/lib/server/websearch/scrape/playwright.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,18 @@ export async function withPage<T>(

try {
const page = await ctx.newPage();
env.PLAYWRIGHT_ADBLOCKER === "true" && (await blocker.enableBlockingInPage(page));
if (env.PLAYWRIGHT_ADBLOCKER === "true") {
await blocker.enableBlockingInPage(page);
}

await page.route("**", (route, request) => {
const requestUrl = request.url();
if (!requestUrl.startsWith("https://")) {
logger.warn(`Blocked request to: ${requestUrl}`);
return route.abort();
}
return route.continue();
});

const res = await page
.goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) })
Expand All @@ -78,9 +89,8 @@ export async function withPage<T>(
);
});

// await needed here so that we don't close the context before the callback is done
return await callback(page, res ?? undefined);
} finally {
ctx.close();
await ctx.close();
}
}

0 comments on commit 5f256c3

Please sign in to comment.