Skip to content

Commit

Permalink
After uploading the file to s3, send an SQS message requesting a tran…
Browse files Browse the repository at this point in the history
…scription
  • Loading branch information
philmcmahon committed Oct 2, 2024
1 parent 24212da commit 5b6e343
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 2 deletions.
53 changes: 52 additions & 1 deletion packages/media-downloader/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
import {
generateOutputSignedUrlAndSendMessage,
getConfig,
getSignedDownloadUrl,
getSQSClient,
isSqsFailure,
TranscriptionConfig,
} from '@guardian/transcription-service-backend-common';
import { Upload } from '@aws-sdk/lib-storage';
import { S3Client } from '@aws-sdk/client-s3';
import { createReadStream } from 'node:fs';
import { getNextJob } from './sqs';
import { downloadMedia, MediaMetadata } from './yt-dlp';
import { SQSClient } from '@aws-sdk/client-sqs';
import { MediaDownloadJob } from '@guardian/transcription-service-common';

const uploadToS3 = async (
s3Client: S3Client,
metadata: MediaMetadata,
bucket: string,
id: string,
) => {
const fileStream = createReadStream(`${metadata.mediaPath}`);
const key = `downloaded-media/${id}.${metadata.extension}`;
try {
const upload = new Upload({
client: s3Client,
Expand All @@ -24,9 +32,45 @@ const uploadToS3 = async (
},
});

upload.on('httpUploadProgress', (progress) => {
console.log(`Uploaded ${progress.loaded} of ${progress.total} bytes`);
});

await upload.done();
return key;
} catch (e) {
console.error(e);
throw e;
}
};

const requestTranscription = async (
config: TranscriptionConfig,
s3Key: string,
sqsClient: SQSClient,
job: MediaDownloadJob,
metadata: MediaMetadata,
) => {
const signedUrl = await getSignedDownloadUrl(
config.aws.region,
config.app.sourceMediaBucket,
s3Key,
604800, // one week in seconds
);
const sendResult = await generateOutputSignedUrlAndSendMessage(
s3Key,
sqsClient,
config.app.taskQueueUrl,
config.app.transcriptionOutputBucket,
config.aws.region,
job.userEmail,
metadata.title,
signedUrl,
job.languageCode,
job.translationRequested,
);
if (isSqsFailure(sendResult)) {
throw new Error('Failed to send transcription job');
}
};

Expand All @@ -45,8 +89,15 @@ const main = async () => {
);
if (job) {
const metadata = await downloadMedia(job.url, '/tmp', job.id);
await uploadToS3(s3Client, metadata, config.app.sourceMediaBucket);
const key = await uploadToS3(
s3Client,
metadata,
config.app.sourceMediaBucket,
job.id,
);
await requestTranscription(config, key, sqsClient, job, metadata);
}
setTimeout(main, 1000);
};

main();
2 changes: 1 addition & 1 deletion packages/media-downloader/src/yt-dlp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export const downloadMedia = async (
id: string,
) => {
const output =
await $`yt-dlp --write-info-json --no-clean-info-json -o "${destinationDirectoryPath}/${id}" ${url}`;
await $`yt-dlp --write-info-json --no-clean-info-json --newline -o "${destinationDirectoryPath}/${id}" ${url}`;
console.log(output);
const metadata = extractInfoJson(
`${destinationDirectoryPath}/${id}.info.json`,
Expand Down

0 comments on commit 5b6e343

Please sign in to comment.