Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bring up to date with puppeteer-extra-plugin-stealth #6

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ stealth(driver,
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
hardware_concurrency=4,
)

url = "https://bot.sannysoft.com/"
Expand All @@ -71,6 +72,7 @@ stealth(
renderer: str = "Intel Iris OpenGL Engine",
fix_hairline: bool = False,
run_on_insecure_origins: bool = False,
hardware_concurrency: int = 4,
)
```

Expand Down
5 changes: 4 additions & 1 deletion selenium_stealth/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .webgl_vendor import webgl_vendor_override
from .window_outerdimensions import window_outerdimensions
from .hairline_fix import hairline_fix
from .navigator_hardware_concurrency import navigator_hardware_concurrency

"""
If user_agent = None then selenium-stealth only remove the 'headless' from userAgent
Expand All @@ -32,10 +33,11 @@
def stealth(driver: Driver, user_agent: str = None,
languages: [str] = ["en-US", "en"],
vendor: str = "Google Inc.",
platform: str = None,
platform: str = "Win32",
webgl_vendor: str = "Intel Inc.",
renderer: str = "Intel Iris OpenGL Engine",
fix_hairline: bool = False,
hardware_concurrency: int = 4,
run_on_insecure_origins: bool = False, **kwargs) -> None:
if not isinstance(driver, Driver):
raise ValueError("driver must is selenium.webdriver.Chrome, currently this lib only support Chrome")
Expand All @@ -47,6 +49,7 @@ def stealth(driver: Driver, user_agent: str = None,
chrome_runtime(driver, run_on_insecure_origins, **kwargs)
iframe_content_window(driver, **kwargs)
media_codecs(driver, **kwargs)
navigator_hardware_concurrency(driver, hardware_concurrency, **kwargs)
navigator_languages(driver, languages, **kwargs)
navigator_permissions(driver, **kwargs)
navigator_plugins(driver, **kwargs)
Expand Down
9 changes: 5 additions & 4 deletions selenium_stealth/js/iframe.contentWindow.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
// Adds a hook to intercept iframe creation events
const addIframeCreationSniffer = () => {
/* global document */
const createElement = {
const createElementHandler = {
// Make toString() native
get(target, key) {
return Reflect.get(target, key)
Expand All @@ -86,9 +86,10 @@
}
}
// All this just due to iframes with srcdoc bug
document.createElement = new Proxy(
document.createElement,
createElement
utils.replaceWithProxy(
document,
'createElement',
createElementHandler
)
}

Expand Down
11 changes: 11 additions & 0 deletions selenium_stealth/js/navigator.hardwareConcurrency.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// https://github.com/berstend/puppeteer-extra/blob/3ea4ebca4237bb45ce402ba6a44d852e3499cb5f/packages/puppeteer-extra-plugin-stealth/evasions/navigator.hardwareConcurrency/index.js

(hardwareConcurrency) => {
const patchNavigator = (name, value) =>
utils.replaceProperty(Object.getPrototypeOf(navigator), name, {
get() {
return value
}
})
patchNavigator('hardwareConcurrency', hardwareConcurrency || 4)
}
58 changes: 52 additions & 6 deletions selenium_stealth/js/navigator.plugins.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
value,
writable: false,
enumerable: false, // Important for mimeTypes & plugins: `JSON.stringify(navigator.mimeTypes)`
configurable: false
configurable: true
})

// Loop over our fake data and construct items
Expand All @@ -78,8 +78,42 @@
}
defineProp(item, prop, data[prop])
}
return patchItem(item, data)
}

const patchItem = (item, data) => {
let descriptor = Object.getOwnPropertyDescriptors(item)

// Special case: Plugins have a magic length property which is not enumerable
// e.g. `navigator.plugins[i].length` should always be the length of the assigned mimeTypes
if (itemProto === Plugin.prototype) {
descriptor = {
...descriptor,
length: {
value: data.__mimeTypes.length,
writable: false,
enumerable: false,
configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length`
}
}
}

// We need to spoof a specific `MimeType` or `Plugin` object
return Object.create(itemProto, Object.getOwnPropertyDescriptors(item))
const obj = Object.create(itemProto, descriptor)

// Virtually all property keys are not enumerable in vanilla
const blacklist = [...Object.keys(data), 'length', 'enabledPlugin']
return new Proxy(obj, {
ownKeys(target) {
return Reflect.ownKeys(target).filter(k => !blacklist.includes(k))
},
getOwnPropertyDescriptor(target, prop) {
if (blacklist.includes(prop)) {
return undefined
}
return Reflect.getOwnPropertyDescriptor(target, prop)
}
})
}

const magicArray = []
Expand Down Expand Up @@ -144,6 +178,12 @@
typeProps.forEach((_, i) => keys.push(`${i}`))
typeProps.forEach(propName => keys.push(propName))
return keys
},
getOwnPropertyDescriptor(target, prop) {
if (prop === 'length') {
return undefined
}
return Reflect.getOwnPropertyDescriptor(target, prop)
}
})

Expand Down Expand Up @@ -221,12 +261,18 @@
for (const pluginData of data.plugins) {
pluginData.__mimeTypes.forEach((type, index) => {
plugins[pluginData.name][index] = mimeTypes[type]
plugins[type] = mimeTypes[type]
Object.defineProperty(mimeTypes[type], 'enabledPlugins', {
value: JSON.parse(JSON.stringify(plugins[pluginData.name])),

Object.defineProperty(plugins[pluginData.name], type, {
value: mimeTypes[type],
writable: false,
enumerable: false, // Not enumerable
configurable: true
})
Object.defineProperty(mimeTypes[type], 'enabledPlugin', {
value: new Proxy(plugins[pluginData.name], {}), // Prevent circular references
writable: false,
enumerable: false, // Important: `JSON.stringify(navigator.plugins)`
configurable: false
configurable: true
})
})
}
Expand Down
9 changes: 9 additions & 0 deletions selenium_stealth/navigator_hardware_concurrency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from pathlib import Path
from .wrapper import evaluateOnNewDocument
from selenium.webdriver import Chrome as Driver


def navigator_hardware_concurrency(driver: Driver, hardware_concurrency: int, **kwargs) -> None:
evaluateOnNewDocument(
driver, Path(__file__).parent.joinpath("js/navigator.hardwareConcurrency.js").read_text(), hardware_concurrency
)