Skip to content

Commit

Permalink
Don't use embedded package bundle when external package loading is en…
Browse files Browse the repository at this point in the history
…abled.
  • Loading branch information
dom96 committed Nov 22, 2024
1 parent 7efa08a commit ce98a0a
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 22 deletions.
1 change: 1 addition & 0 deletions src/pyodide/internal/jaeger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { default as internalJaeger } from 'pyodide-internal:internalJaeger';
* Used for tracing via Jaeger.
*/
export function enterJaegerSpan<T>(span: string, callback: () => T): T {
console.log("Trace start: ", span);
if (!internalJaeger.traceId) {
// Jaeger tracing not enabled or traceId is not present in request.
return callback();
Expand Down
16 changes: 16 additions & 0 deletions src/pyodide/internal/python.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { enterJaegerSpan } from 'pyodide-internal:jaeger';
import {
SITE_PACKAGES,
TRANSITIVE_REQUIREMENTS,
adjustSysPath,
mountSitePackages,
mountWorkerFiles,
Expand Down Expand Up @@ -50,6 +51,7 @@ import {
setUnsafeEval,
setGetRandomValues,
} from 'pyodide-internal:generated/emscriptenSetup';
import { loadPackages } from 'pyodide-internal:loadPackage';

/**
* After running `instantiateEmscriptenModule` but before calling into any C
Expand All @@ -62,6 +64,11 @@ async function prepareWasmLinearMemory(Module: Module): Promise<void> {
mountSitePackages(Module, SITE_PACKAGES.rootInfo);
entropyMountFiles(Module);
Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT;

// NB. loadPackages adds the packages to the `SITE_PACKAGES` global which then gets used in
// preloadDynamicLibs.
await loadPackages(Module, TRANSITIVE_REQUIREMENTS);

enterJaegerSpan('preload_dynamic_libs', () => preloadDynamicLibs(Module));
enterJaegerSpan('remove_run_dependency', () =>
Module.removeRunDependency('dynlibs')
Expand Down Expand Up @@ -100,9 +107,17 @@ export async function loadPyodide(
}
setUnsafeEval(UnsafeEval);
setGetRandomValues(getRandomValues);

await enterJaegerSpan('prepare_wasm_linear_memory', () =>
prepareWasmLinearMemory(Module)
);


// TODO: setupPackages needs to be called when we haven't loaded a snapshot and we're not generating one.
// maybe even needs to be called when snapshot was loaded?
// await setupPackages(pyodide);
// await loadPackages(Module, TRANSITIVE_REQUIREMENTS);

maybeSetupSnapshotUpload(Module);
// Mount worker files after doing snapshot upload so we ensure that data from the files is never
// present in snapshot memory.
Expand All @@ -111,6 +126,7 @@ export async function loadPyodide(
// Finish setting up Pyodide's ffi so we can use the nice Python interface
await enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap);
const pyodide = Module.API.public_api;

finishSnapshotSetup(pyodide);
return pyodide;
}
23 changes: 22 additions & 1 deletion src/pyodide/internal/setupPackages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import {
LOAD_WHEELS_FROM_ARTIFACT_BUNDLER,
} from 'pyodide-internal:metadata';
import { simpleRunPython } from 'pyodide-internal:util';
import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader';
import { loadPackages } from 'pyodide-internal:loadPackage';
import { enterJaegerSpan } from 'pyodide-internal:jaeger';

const canonicalizeNameRegex = /[-_.]+/g;

Expand Down Expand Up @@ -44,6 +47,7 @@ class SitePackagesDir {
path: '',
name: '',
parts: [],
// reader: null,
};
this.soFiles = [];
this.loadedRequirements = new Set();
Expand Down Expand Up @@ -125,9 +129,11 @@ class SitePackagesDir {
*
* This also returns the list of soFiles in the resulting site-packages
* directory so we can preload them.
*
* TODO(later): This needs to be removed when external package loading is enabled.
*/
export function buildSitePackages(requirements: Set<string>): SitePackagesDir {
const [bigTarInfo, bigTarSoFiles] = parseTarInfo();
const [bigTarInfo, bigTarSoFiles] = parseTarInfo(EmbeddedPackagesTarReader);

let requirementsInBigBundle = new Set([...STDLIB_PACKAGES]);

Expand Down Expand Up @@ -171,6 +177,7 @@ function disabledLoadPackage(): never {
function getTransitiveRequirements(): Set<string> {
const requirements = REQUIREMENTS.map(canonicalizePackageName);
// resolve transitive dependencies of requirements and if IN_WORKERD install them from the cdn.
// TODO(later): use current package's LOCKFILE instead of the global.
const packageDatas = recursiveDependencies(LOCKFILE, requirements);
return new Set(packageDatas.map(({ name }) => canonicalizePackageName(name)));
}
Expand Down Expand Up @@ -265,6 +272,20 @@ function addPackageToLoad(
}
}

/**
* Set up Python packages:
* - patch loadPackage to ignore integrity
* - get requirements
* - Use tar file + requirements to mount site packages directory
* - if in workerd use loadPackage to load packages
*/
export async function setupPackages(pyodide: Pyodide): Promise<void> {
return await enterJaegerSpan('setup_packages', async () => {
patchLoadPackage(pyodide);
await loadPackages(pyodide._module, TRANSITIVE_REQUIREMENTS);
});
}

export { REQUIREMENTS };
export const TRANSITIVE_REQUIREMENTS = getTransitiveRequirements();
export const SITE_PACKAGES = buildSitePackages(TRANSITIVE_REQUIREMENTS);
4 changes: 2 additions & 2 deletions src/pyodide/internal/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
SITE_PACKAGES,
getSitePackagesPath,
} from 'pyodide-internal:setupPackages';
import { default as TarReader } from 'pyodide-internal:packages_tar_reader';
import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader';
import {
SHOULD_SNAPSHOT_TO_DISK,
IS_CREATING_BASELINE_SNAPSHOT,
Expand Down Expand Up @@ -136,7 +136,7 @@ export function preloadDynamicLibs(Module: Module): void {
throw Error('contentsOffset not defined for ' + soFile);
}
const wasmModuleData = new Uint8Array(size);
TarReader.read(contentsOffset, wasmModuleData);
(node.reader ?? EmbeddedPackagesTarReader).read(contentsOffset, wasmModuleData);
const path = sitePackages + '/' + soFile.join('/');
PRELOADED_SO_FILES.push(path);
loadDynlib(Module, path, wasmModuleData);
Expand Down
4 changes: 1 addition & 3 deletions src/pyodide/internal/tar.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { default as TarReader } from 'pyodide-internal:packages_tar_reader';

// This is based on the info about the tar file format on wikipedia
// And some trial and error with real tar files.
// https://en.wikipedia.org/wiki/Tar_(computing)#File_format
Expand Down Expand Up @@ -44,7 +42,7 @@ function decodeHeader(buf: Uint8Array, reader: Reader): TarFSInfo {
};
}

export function parseTarInfo(reader = TarReader): [TarFSInfo, string[]] {
export function parseTarInfo(reader: Reader): [TarFSInfo, string[]] {
const directories: TarFSInfo[] = [];
const soFiles = [];
const root: TarFSInfo = {
Expand Down
19 changes: 3 additions & 16 deletions src/pyodide/python-entrypoint-helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,21 +74,8 @@ async function applyPatch(pyodide: Pyodide, patchName: string): Promise<void> {
pyodide.pyimport(patchName + '_patch');
}

/**
* Set up Python packages:
* - patch loadPackage to ignore integrity
* - get requirements
* - Use tar file + requirements to mount site packages directory
* - if in workerd use loadPackage to load packages
* - install patches to make various requests packages work
*
* TODO: move this into setupPackages.js. Can't now because the patch imports
* fail from there for some reason.
*/
export async function setupPackages(pyodide: Pyodide): Promise<void> {
return await enterJaegerSpan('setup_packages', async () => {
patchLoadPackage(pyodide);
await loadPackages(pyodide._module, TRANSITIVE_REQUIREMENTS);
async function setupPatches(pyodide: Pyodide): Promise<void> {
return await enterJaegerSpan('setup_patches', async () => {
// install any extra packages into the site-packages directory, so calculate where that is.
const pymajor = pyodide._module._py_version_major();
const pyminor = pyodide._module._py_version_minor();
Expand Down Expand Up @@ -119,7 +106,7 @@ function getMainModule(): Promise<PyModule> {
}
mainModulePromise = (async function () {
const pyodide = await getPyodide();
await setupPackages(pyodide);
await setupPatches(pyodide);
Limiter.beginStartup();
try {
return enterJaegerSpan('pyimport_main_module', () =>
Expand Down
1 change: 1 addition & 0 deletions src/workerd/server/workerd-api.c++
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ void WorkerdApi::compileModules(jsg::Lock& lockParam,
makePyodideMetadataReader(conf, impl->pythonConfig), jsg::ModuleRegistry::Type::INTERNAL);

// Inject packages tar file
// TODO(later): This shouldn't exist once featureFlags.getPythonExternalPackages() is true.
modules->addBuiltinModule("pyodide-internal:packages_tar_reader",
jsg::alloc<ReadOnlyBuffer>(PYODIDE_PACKAGES_TAR.get()),
workerd::jsg::ModuleRegistry::Type::INTERNAL);
Expand Down

0 comments on commit ce98a0a

Please sign in to comment.