kaiban-ai · elkernel128 · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024 · Nov 19, 2024
diff --git a/packages/tools/README.md b/packages/tools/README.md
@@ -33,6 +33,73 @@ Tavily Search is a tool that provides AI-optimized search capabilities, deliveri
 
 Learn more: https://tavily.com/
 
+### 3. YouTube Captions Scraper
+
+The YouTube Captions Scraper tool allows agents to extract captions from YouTube videos. It requires a valid YouTube API access token to function.
+
+Learn more: https://developers.google.com/youtube/v3
+
+#### Guide: Obtain YouTube API Authorization Token Without Custom Credentials
+
+This guide will help you get a YouTube API authorization token for testing purposes using Google’s OAuth 2.0 Playground without requiring a Client ID and Client Secret.
+
+##### **Step 1: Access OAuth 2.0 Playground**
+
+- Go to the [OAuth 2.0 Playground](https://developers.google.com/oauthplayground/).
+- This tool allows you to interact with Google APIs and generate authorization tokens.
+
+##### **Step 2: Select YouTube API Scopes**
+
+- Look for the **YouTube Data API scopes** in Step 1 of the Playground.
+- Choose the appropriate scopes depending on the API actions you want to test:
+  - **`https://www.googleapis.com/auth/youtube`**: Full access to manage YouTube account.
+  - **`https://www.googleapis.com/auth/youtube.readonly`**: Read-only access to YouTube account.
+  - **`https://www.googleapis.com/auth/youtube.force-ssl`**: Allows uploading, managing, and retrieving video content securely, including captions.
+
+##### **Step 3: Authorize the APIs**
+
+- Click the **"Authorize APIs"** button.
+- A Google sign-in page will appear:
+  - Sign in with your Google account.
+  - Grant the requested permissions.
+
+##### **Step 4: Exchange Authorization Code for Tokens**
+
+- After successful authorization, go to Step 2 of the Playground.
+- Click the **"Exchange authorization code for tokens"** button.
+- The Playground will generate:
+  - **Access Token:** Use this token to make API requests.
+  - (Optional) **Refresh Token:** If available, use this to get a new access token when the current one expires.
+
+##### **Step 5: Test API Requests**
+
+- Use Step 3 of the Playground to test API requests directly:
+  1. Enter the YouTube API endpoint (e.g., `https://www.googleapis.com/youtube/v3/videos`).
+  2. Add required parameters and headers.
+  3. Use the generated access token for authentication.
+
+##### **Limitations**
+
+- **Short-lived Access Token:** The token typically expires in 1 hour.
+- **Default Credentials Restrictions:** Some advanced API features may not be accessible.
+- **No Persistent Refresh Token:** A new authorization process might be needed for extended testing.
+
+##### **Recommendation for Advanced Testing**
+
+For more comprehensive testing, create your own project in the [Google Cloud Console](https://console.cloud.google.com/) and obtain a **Client ID** and **Client Secret** to fully unlock the YouTube API features.
+
+#### Getting YouTube API Access Token
+
+To use the YouTube Captions Scraper tool, you need to obtain a YouTube API access token. Follow these steps:
+
+- Go to the Google Cloud Console.
+- Create a new project or select an existing project.
+- Enable the YouTube Data API v3 for your project.
+- Go to the "Credentials" section and create an OAuth 2.0 Client ID.
+- Download the OAuth 2.0 Client ID JSON file.
+- Use the OAuth 2.0 Client ID to obtain an access token. You can use the OAuth 2.0 Playground to generate the token.
+- Copy the access token and add it to your .env file as VITE_YOUTUBE_LONG_LIVE_TOKEN.
+
 ### 3. Serper
 
 Serper is a tool that integrates with Google Search API service, providing access to various types of search results including web search, news, images, and more. It's particularly useful for retrieving real-time information from Google's search engine.
@@ -111,6 +178,7 @@ Create a `.env` file in the root directory with your API keys:
 ```env
 VITE_FIRECRAWL_API_KEY=your_firecrawl_api_key
 VITE_TAVILY_API_KEY=your_tavily_api_key
+VITE_YOUTUBE_LONG_LIVE_TOKEN=your_youtube_long_livetoken
 VITE_SERPER_API_KEY=your_serper_api_key
 VITE_EXA_API_KEY=your_exa_api_key
 VITE_WOLFRAM_APP_ID=your_wolfram_app_id

diff --git a/packages/tools/package-lock.json b/packages/tools/package-lock.json
diff --git a/packages/tools/package.json b/packages/tools/package.json
@@ -18,6 +18,10 @@
       "import": "./dist/tavily/index.esm.js",
       "require": "./dist/tavily/index.cjs.js"
     },
+    "./youtube-captions-scraper": {
+      "import": "./dist/youtube-captions-scraper/index.esm.js",
+      "require": "./dist/youtube-captions-scraper/index.cjs.js"
+    },
     "./serper": {
       "import": "./dist/serper/index.esm.js",
       "require": "./dist/serper/index.cjs.js"
@@ -74,6 +78,7 @@
   "dependencies": {
     "@langchain/core": "0.2.16",
     "ky": "^1.7.2",
+    "youtube-captions-scraper": "^2.0.3",
     "zod": "^3.23.8"
   },
   "devDependencies": {

diff --git a/packages/tools/rollup.config.mjs b/packages/tools/rollup.config.mjs
@@ -12,6 +12,7 @@ const toolFolders = [
   'serper',
   'exa',
   'wolfram-alpha',
+  'youtube-captions-scraper',
   'github-issues',
 ]; // Add more folder names as needed
 

diff --git a/packages/tools/src/index.js b/packages/tools/src/index.js
@@ -1,5 +1,6 @@
 export * from './firecrawl/index.js';
 export * from './tavily/index.js';
+export * from './youtube-captions-scraper/index.js';
 export * from './github-issues/index.js';
 export * from './serper/index.js';
 export * from './wolfram-alpha/index.js';

diff --git a/packages/tools/src/youtube-captions-scraper/index.js b/packages/tools/src/youtube-captions-scraper/index.js
@@ -0,0 +1,98 @@
+import { Tool } from '@langchain/core/tools';
+import { z } from 'zod';
+import ky from 'ky';
+import { HTTPError } from 'ky';
+import { getSubtitles } from 'youtube-captions-scraper';
+
+export class YouTubeCaptionsScraper extends Tool {
+  constructor(fields) {
+    super(fields);
+    this.token = fields.token;
+    this.name = 'youtube-captions-scraper';
+    this.description = 'Tools to extract video caption from youtube';
+    this.httpClient = ky;
+
+    // Define the input schema using Zod
+    this.schema = z.object({
+      videoUrl: z
+        .string()
+        .url()
+        .describe('The URL of the YouTube video to extract captions from.'),
+    });
+  }
+
+  async _call(input) {
+    try {
+      const videoId = this.extractVideoId(input.videoUrl);
+      if (!videoId) {
+        return 'Invalid video URL: Unable to extract video ID';
+      }
+
+      const headers = {
+        Authorization: `Bearer ${this.token}`,
+      };
+      // Make an API request to the YouTube Data API
+      const jsonData = await this.httpClient
+        .get(
+          `https://www.googleapis.com/youtube/v3/captions?videoId=${videoId}&part=id,snippet`,
+          { headers }
+        )
+        .json();
+      // Extract and validate the results from the response
+      const captions = jsonData?.items[0]?.snippet;
+      if (!captions) {
+        return 'No captions found';
+      }
+      const captionContent = await downloadAutoGeneratedCaptions(
+        videoId,
+        captions.language
+      );
+
+      return captionContent;
+    } catch (error) {
+      if (error instanceof HTTPError) {
+        const statusCode = error.response.status;
+        let errorType = 'Unknown';
+        if (statusCode >= 400 && statusCode < 500) {
+          errorType = 'Client Error';
+        } else if (statusCode >= 500) {
+          errorType = 'Server Error';
+        }
+        return `API request failed: ${errorType} (${statusCode})`;
+      } else {
+        return `An unexpected error occurred: ${error.message}`;
+      }
+    }
+  }
+
+  extractVideoId(videoUrl) {
+    try {
+      const url = new URL(videoUrl);
+      const urlParams = new URLSearchParams(url.search);
+      return urlParams.get('v');
+    } catch {
+      throw new Error('Invalid URL');
+    }
+  }
+}
+
+const downloadAutoGeneratedCaptions = async (videoId, lang = 'en') => {
+  try {
+    const captions = await getSubtitles({
+      videoID: videoId, // YouTube video ID
+      lang, // Language code for captions, change as needed
+    });
+
+    // Process and display the captions
+    let captionContent = `Video Id: ${videoId}\n\n`;
+
+    for (let index = 0; index < captions.length; index++) {
+      const caption = captions[index];
+      captionContent += `[${caption.start}]: ${caption.text}\n`;
+    }
+
+    return captionContent;
+  } catch (error) {
+    console.error('Error downloading captions:', error);
+  }
+};