From 5b6e3439a25f75d7ddafdf1a7cd9ab32d4491510 Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Wed, 2 Oct 2024 19:13:29 +0100 Subject: [PATCH] After uploading the file to s3, send an SQS message requesting a transcription --- packages/media-downloader/src/index.ts | 53 ++++++++++++++++++++++++- packages/media-downloader/src/yt-dlp.ts | 2 +- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/packages/media-downloader/src/index.ts b/packages/media-downloader/src/index.ts index b0fd135..f09996e 100644 --- a/packages/media-downloader/src/index.ts +++ b/packages/media-downloader/src/index.ts @@ -1,19 +1,27 @@ import { + generateOutputSignedUrlAndSendMessage, getConfig, + getSignedDownloadUrl, getSQSClient, + isSqsFailure, + TranscriptionConfig, } from '@guardian/transcription-service-backend-common'; import { Upload } from '@aws-sdk/lib-storage'; import { S3Client } from '@aws-sdk/client-s3'; import { createReadStream } from 'node:fs'; import { getNextJob } from './sqs'; import { downloadMedia, MediaMetadata } from './yt-dlp'; +import { SQSClient } from '@aws-sdk/client-sqs'; +import { MediaDownloadJob } from '@guardian/transcription-service-common'; const uploadToS3 = async ( s3Client: S3Client, metadata: MediaMetadata, bucket: string, + id: string, ) => { const fileStream = createReadStream(`${metadata.mediaPath}`); + const key = `downloaded-media/${id}.${metadata.extension}`; try { const upload = new Upload({ client: s3Client, @@ -24,9 +32,45 @@ const uploadToS3 = async ( }, }); + upload.on('httpUploadProgress', (progress) => { + console.log(`Uploaded ${progress.loaded} of ${progress.total} bytes`); + }); + await upload.done(); + return key; } catch (e) { console.error(e); + throw e; + } +}; + +const requestTranscription = async ( + config: TranscriptionConfig, + s3Key: string, + sqsClient: SQSClient, + job: MediaDownloadJob, + metadata: MediaMetadata, +) => { + const signedUrl = await getSignedDownloadUrl( + config.aws.region, + config.app.sourceMediaBucket, + s3Key, + 604800, // one week in seconds + ); + const sendResult = await generateOutputSignedUrlAndSendMessage( + s3Key, + sqsClient, + config.app.taskQueueUrl, + config.app.transcriptionOutputBucket, + config.aws.region, + job.userEmail, + metadata.title, + signedUrl, + job.languageCode, + job.translationRequested, + ); + if (isSqsFailure(sendResult)) { + throw new Error('Failed to send transcription job'); } }; @@ -45,8 +89,15 @@ const main = async () => { ); if (job) { const metadata = await downloadMedia(job.url, '/tmp', job.id); - await uploadToS3(s3Client, metadata, config.app.sourceMediaBucket); + const key = await uploadToS3( + s3Client, + metadata, + config.app.sourceMediaBucket, + job.id, + ); + await requestTranscription(config, key, sqsClient, job, metadata); } + setTimeout(main, 1000); }; main(); diff --git a/packages/media-downloader/src/yt-dlp.ts b/packages/media-downloader/src/yt-dlp.ts index 45b53b9..fb9d15e 100644 --- a/packages/media-downloader/src/yt-dlp.ts +++ b/packages/media-downloader/src/yt-dlp.ts @@ -25,7 +25,7 @@ export const downloadMedia = async ( id: string, ) => { const output = - await $`yt-dlp --write-info-json --no-clean-info-json -o "${destinationDirectoryPath}/${id}" ${url}`; + await $`yt-dlp --write-info-json --no-clean-info-json --newline -o "${destinationDirectoryPath}/${id}" ${url}`; console.log(output); const metadata = extractInfoJson( `${destinationDirectoryPath}/${id}.info.json`,