diff --git a/CHANGELOG.md b/CHANGELOG.md index c6e2594..705eee5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,20 @@ This change log follows the format documented in [Keep a CHANGELOG]. [semantic versioning]: https://semver.org [keep a changelog]: https://keepachangelog.com +## 1.4.0 - 2022-06-03 + +### Changed + +- Set default `memory` (`1GB`) and `timeoutSeconds` (`540`) Firebase Functions runtime options. It solves the problem with the huge users' backups that either run out of memory or timeout. + +- Even further improved the memory usage by the users backup. + +- Updated dependencies to the latest supported versions. + +### Added + +- Added delayed users backup feature. If the delay is requested, the agent will respond with a pending backup state. When the backup is completed, the agent will notify the controller. That prevents multiple backups caused by timeouts. + ## 1.3.0 - 2022-06-01 ### Fixed diff --git a/package.json b/package.json index e11c0dc..bc1d919 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@backupfire/firebase", - "version": "1.3.0", + "version": "1.4.0", "description": "Backup Fire Firebase agent", "keywords": [ "backup Firebase database", diff --git a/src/_lib/operation/index.ts b/src/_lib/operation/index.ts index 89084a9..0241533 100644 --- a/src/_lib/operation/index.ts +++ b/src/_lib/operation/index.ts @@ -16,7 +16,10 @@ export type FirestoreStatusResponse = export type UsersStatusResponse = | { - state: 'completed' | 'pending' + state: 'pending' + } + | { + state: 'completed' data: { usersCount: number | undefined size: string diff --git a/src/index.ts b/src/index.ts index aa04049..c33a281 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,20 +8,26 @@ import { format } from 'url' import { backupFirestoreMiddleware, checkFirestoreBackupStatusMiddleware, - getCollectionsMiddleware + getCollectionsMiddleware, } from './firestore' import { - listFilesMiddleware, + defaultControllerDomain, + defaultMemory, + defaultRegion, + defaultTimeout, +} from './options' +import { createStorageMiddleware, + listFilesMiddleware, storageListMiddleware, - updateStorageMiddleware + updateStorageMiddleware, } from './storage' import { AgentOptions, BackupFireEnvConfig, BackupFireHTTPSHandler, BackupFireOptions, - RuntimeEnvironment + RuntimeEnvironment, } from './types' import { backupUsersMiddleware } from './users' import version from './version' @@ -29,18 +35,14 @@ import { configureExceptionsScope, createCrashedApp, exceptionHandlerMiddleware, - initExceptionsTracker + initExceptionsTracker, } from './_lib/exceptions' -export const defaultControllerDomain = 'backupfire.dev' - -export const defaultRegion = 'us-central1' - export enum BackupFireConfig { Token = 'BACKUPFIRE_TOKEN', Password = 'BACKUPFIRE_PASSWORD', Domain = 'BACKUPFIRE_DOMAIN', - Allowlist = 'BACKUPFIRE_ALLOWLIST' + Allowlist = 'BACKUPFIRE_ALLOWLIST', } // Fallback for CommonJS @@ -60,7 +62,7 @@ export default function backupFire(agentOptions?: AgentOptions) { return dummyHandler({ region: agentOptions?.region, memory: agentOptions?.memory, - timeout: agentOptions?.timeout + timeout: agentOptions?.timeout, }) // Derive Backup Fire options from environment configuration @@ -80,7 +82,7 @@ export default function backupFire(agentOptions?: AgentOptions) { controllerToken: envConfig.token, adminPassword: envConfig.password, bucketsAllowlist: envConfig.allowlist?.split(','), - debug: envConfig.debug === 'true' + debug: envConfig.debug === 'true', }, agentOptions ) @@ -111,7 +113,7 @@ export default function backupFire(agentOptions?: AgentOptions) { } // Set additional context - configureExceptionsScope(scope => { + configureExceptionsScope((scope) => { scope.setUser({ id: envConfig.token }) scope.setTag('project_id', runtimeEnv.projectId) scope.setTag('node_version', process.version) @@ -131,12 +133,12 @@ export default function backupFire(agentOptions?: AgentOptions) { return httpsHandler({ handler: createApp(runtimeEnv, options), agentOptions, - runtimeEnv + runtimeEnv, }) } catch (err) { return httpsHandler({ handler: createCrashedApp(err), - agentOptions + agentOptions, }) } } @@ -173,7 +175,7 @@ export function createApp( '/firestore', backupFirestoreMiddleware({ projectId: runtimeEnv.projectId, - ...globalOptions + ...globalOptions, }) ) // Check Firestore backup status @@ -185,7 +187,13 @@ export function createApp( // Backup Firebase users app.post( '/users', - backupUsersMiddleware({ projectId: runtimeEnv.projectId, ...globalOptions }) + backupUsersMiddleware({ + projectId: runtimeEnv.projectId, + controllerToken: options.controllerToken, + controllerDomain: options.controllerDomain, + agentURL: agentURL(runtimeEnv), + ...globalOptions, + }) ) // List storage @@ -197,7 +205,7 @@ export function createApp( '/storage/:storageId', updateStorageMiddleware({ adminPassword: options.adminPassword, - ...globalOptions + ...globalOptions, }) ) // List files in the storage @@ -217,21 +225,16 @@ interface HTTPSHandlerProps { function httpsHandler({ handler, agentOptions, - runtimeEnv + runtimeEnv, }: HTTPSHandlerProps) { if (runtimeEnv?.extensionId) { return functions.handler.https.onRequest(handler) } else { - const runtimeOptions: functions.RuntimeOptions = { - secrets: Object.values(BackupFireConfig) - } - - if (agentOptions?.memory) runtimeOptions.memory = agentOptions.memory - if (agentOptions?.timeout) - runtimeOptions.timeoutSeconds = agentOptions.timeout - return functions - .runWith(runtimeOptions) + .runWith({ + ...getRuntimeOptions(agentOptions), + secrets: Object.values(BackupFireConfig), + }) .region(agentOptions?.region || defaultRegion) .https.onRequest(handler) } @@ -253,8 +256,8 @@ function sendInitializationPing( token: options.controllerToken, projectId: runtimeEnv.projectId, runtime: runtimeEnv.region, - agentURL: agentURL(runtimeEnv) - } + agentURL: agentURL(runtimeEnv), + }, }) return fetch(pingURL) } @@ -290,7 +293,7 @@ function getRuntimeEnv( // Node.js v8 runtime uses FUNCTION_NAME, v10 — FUNCTION_TARGET // See: https://cloud.google.com/functions/docs/env-var#environment_variables_set_automatically functionName: process.env.FUNCTION_NAME || process.env.FUNCTION_TARGET, - extensionId + extensionId, } } @@ -326,7 +329,7 @@ function dummyHandler( if (options?.timeout) runtimeOptions.timeoutSeconds = options.timeout return functions - .runWith(runtimeOptions) + .runWith(getRuntimeOptions(options)) .region(options.region || defaultRegion) .https.onRequest((_req, resp) => { resp.end() @@ -336,3 +339,24 @@ function dummyHandler( function prettyJSON(obj: any) { return JSON.stringify(obj, null, 2) } + +/** + * + * @param agentOptions - TODO + * @returns + */ +function getRuntimeOptions( + agentOptions: AgentOptions | undefined +): functions.RuntimeOptions { + const options: functions.RuntimeOptions = { + // Always assign timeout to runtime options. Unless the user defines + // a custom timeout, we want to use the default timeout of 9 minutes, + // to make sure the user backups are completed regardless of how many + // there are. + timeoutSeconds: agentOptions?.timeout || defaultTimeout, + } + + if (agentOptions?.memory) options.memory = agentOptions.memory + + return options +} diff --git a/src/options.ts b/src/options.ts new file mode 100644 index 0000000..794a3a1 --- /dev/null +++ b/src/options.ts @@ -0,0 +1,21 @@ +export const defaultControllerDomain = 'backupfire.dev' + +export const defaultRegion = 'us-central1' + +/** + * The default function timeout - 9 minutes. It ensures that the user backups + * are completed regardless of how many there are. + * + * Unlike the memory runtime option, timeout doesn't affect the function + * instance price, so it's safe to set it to max. + */ +export const defaultTimeout = 540 + +/** + * The default function memory. With the increased timeout, it ensures + * the users' backup completion. + * + * Internal testing shows that 1GB is the sweet spot. It's still cheap to run + * and gives room to process huge backups. + */ +export const defaultMemory = '1GB' diff --git a/src/users/index.ts b/src/users/index.ts index 1e08a99..a211189 100644 --- a/src/users/index.ts +++ b/src/users/index.ts @@ -1,39 +1,94 @@ -import * as tools from 'firebase-tools' import * as admin from 'firebase-admin' +import * as tools from 'firebase-tools' +import fs from 'fs' +import fetch from 'node-fetch' import { tmpdir } from 'os' -import { resolve, parse } from 'path' -import operationResponse, { UsersStatusResponse } from '../_lib/operation' -import asyncMiddleware from '../_lib/asyncMiddleware' +import { parse, resolve } from 'path' +import { format } from 'url' import { promisify } from 'util' -import fs from 'fs' +import { defaultControllerDomain } from '../options' +import asyncMiddleware from '../_lib/asyncMiddleware' +import operationResponse, { UsersStatusResponse } from '../_lib/operation' const unlink = promisify(fs.unlink) -export type UsersBackupOptions = { +export interface UsersBackupOptions { bucketsAllowlist?: string[] projectId: string + controllerDomain?: string + controllerToken: string + agentURL: string } -export type UsersBackupRequestOptions = { +export interface UsersBackupRequestBody { storageId: string path: string + delay?: { + backupId: string + state: 'delay' | 'backup' + } } export function backupUsersMiddleware({ bucketsAllowlist, - projectId + projectId, + controllerDomain, + controllerToken, + agentURL, }: UsersBackupOptions) { return asyncMiddleware(async (request, response) => { // TODO: Validate options - const options = request.body as UsersBackupRequestOptions - const state = await backupUsers(projectId, options) - operationResponse(response, state) + const body = request.body as UsersBackupRequestBody + + if (body.delay?.state === 'delay') { + // NOTE: Trigger backup, but do not wait for the result + fetch(agentURL + request.path, { + method: 'POST', + headers: { + // Authorization can't be missing as we verify it + Authorization: request.header('Authorization')!, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + storageId: body.storageId, + path: body.path, + delay: { + state: 'backup', + backupId: body.delay.backupId, + }, + }), + }) + + operationResponse(response, { state: 'pending' }) + } else { + const backupResponse = await backupUsers(projectId, body) + + if (body.delay) { + const reportURL = format({ + hostname: controllerDomain || defaultControllerDomain, + protocol: 'https', + pathname: '/reportBackup', + }) + await fetch(reportURL, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + token: controllerToken, + backupId: body.delay.backupId, + type: 'users', + ...backupResponse, + }), + }) + } + + operationResponse(response, backupResponse) + } }) } async function backupUsers( projectId: string, - options: UsersBackupRequestOptions + options: UsersBackupRequestBody ): Promise { // Create bucket const bucket = admin.storage().bucket(options.storageId) @@ -51,7 +106,7 @@ async function backupUsers( calculateUsers(path), bucket .upload(path, { destination: options.path }) - .then(([file]) => file.metadata.size as string) + .then(([file]) => file.metadata.size as string), ]) // Remove the temporary file @@ -66,12 +121,25 @@ async function backupUsers( * @returns the number of users in the backup */ export async function calculateUsers(path: string) { - const usersStream = fs.createReadStream(path, { - encoding: 'utf8', - highWaterMark: 10000000 // 10MB - }) + let usersCount = 0 + let lookingForEnding: string | null = null + + for await (const chunk of generateFileChunks(path, 10000000 /* 10MB */)) { + const text: string = chunk.toString() + + if (lookingForEnding) { + if (text.slice(0, lookingForEnding.length) === lookingForEnding) + usersCount++ + lookingForEnding = null + } - return calculateUsersInSteam(usersStream) + usersCount += text.match(/"localId"/g)?.length || 0 + + const ending = text.match(/"(l(o(c(a(l(I(d(")?)?)?)?)?)?)?)?$/) + if (ending) lookingForEnding = '"localId"'.slice(ending[0].length) + } + + return usersCount } /** @@ -101,6 +169,52 @@ export async function calculateUsersInSteam(usersStream: fs.ReadStream) { return usersCount } +/** + * Genenerates chunks of data from the given file. + * + * The code is based on the article by Kasper Moskwiak (https://github.com/kmoskwiak): https://betterprogramming.pub/a-memory-friendly-way-of-reading-files-in-node-js-a45ad0cc7bb6 + * + * @param path - the file path to read + * @param size - the chunk size + */ +async function* generateFileChunks(path: string, size: number) { + const sharedBuffer = Buffer.alloc(size) + const stats = fs.statSync(path) + const file = fs.openSync(path, 'r') + + let bytesRead = 0 // How many bytes were read + let end = size + + for (let chunk = 0; chunk < Math.ceil(stats.size / size); chunk++) { + await readFileBytes(file, sharedBuffer) + + bytesRead = (chunk + 1) * size + // When we reach the end of file, we have to calculate how many bytes were + // actually read. + if (bytesRead > stats.size) end = size - (bytesRead - stats.size) + + yield sharedBuffer.slice(0, end) + } +} + +/** + * Reads the file bytes into the shared buffer. + * + * The code is based on the article by Kasper Moskwiak (https://github.com/kmoskwiak): https://betterprogramming.pub/a-memory-friendly-way-of-reading-files-in-node-js-a45ad0cc7bb6 + * + * @param file - the file descriptor + * @param buffer - the shared buffer to use + * @returns promise to file read + */ +function readFileBytes(file: number, buffer: Buffer) { + return new Promise((resolve, reject) => { + fs.read(file, buffer, 0, buffer.length, null, (error) => { + if (error) return reject(error) + resolve(void 0) + }) + }) +} + /** * Generates temporary path on the FS from passed path in a bucket * @param path - The path to backup in a bucket