From 6bc983b404320624aac76eb5a567302015f7dd0e Mon Sep 17 00:00:00 2001
From: n4ze3m
- Click or drag PDF, Docx, CSV , TXT, MP3, MP4, Zip files to - this + Click or drag PDF, Docx, CSV, TXT, MP3, MP4, Zip, or JSON + files to this area
{`Support is available for a single or bulk upload of up to ${botConfig?.fileUploadSizeLimit} diff --git a/server/src/queue/controllers/json.controller.ts b/server/src/queue/controllers/json.controller.ts new file mode 100644 index 00000000..50ac3a79 --- /dev/null +++ b/server/src/queue/controllers/json.controller.ts @@ -0,0 +1,55 @@ +import { QSource } from "../type"; +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; +import { DialoqbaseVectorStore } from "../../utils/store"; +import { embeddings } from "../../utils/embeddings"; +import { PrismaClient } from "@prisma/client"; +import { getModelInfo } from "../../utils/get-model-info"; +import * as fs from "fs/promises" + +export const jsonQueueController = async ( + source: QSource, + prisma: PrismaClient +) => { + console.log("loading json"); + + const location = source.location!; + + const json = await fs.readFile(location, "utf-8"); + + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: source.chunkSize, + chunkOverlap: source.chunkOverlap, + }); + + const chunks = await textSplitter.splitDocuments([ + { + pageContent: json, + metadata: { + source: location, + }, + }, + ]); + + const embeddingInfo = await getModelInfo({ + model: source.embedding, + prisma, + type: "embedding", + }); + + if (!embeddingInfo) { + throw new Error("Embedding not found. Please verify the embedding id"); + } + + await DialoqbaseVectorStore.fromDocuments( + chunks, + embeddings( + embeddingInfo.model_provider!.toLowerCase(), + embeddingInfo.model_id, + embeddingInfo?.config + ), + { + botId: source.botId, + sourceId: source.id, + } + ); +}; diff --git a/server/src/queue/controllers/zip.controller.ts b/server/src/queue/controllers/zip.controller.ts index 04f3ef84..0befd2b8 100644 --- a/server/src/queue/controllers/zip.controller.ts +++ b/server/src/queue/controllers/zip.controller.ts @@ -26,6 +26,8 @@ function getMimeType(filename: string): string { return "audio/mpeg"; case ".zip": return "application/zip"; + case ".json": + return "application/json"; default: return "none"; } diff --git a/server/src/queue/index.ts b/server/src/queue/index.ts index 2ac8119e..2df98cc7 100644 --- a/server/src/queue/index.ts +++ b/server/src/queue/index.ts @@ -16,6 +16,7 @@ import { sitemapQueueController } from "./controllers/sitemap.controller"; import { SandboxedJob } from "bullmq"; import { getRagSettings } from "../utils/rag-settings"; import { zipQueueController } from "./controllers/zip.controller"; +import { jsonQueueController } from "./controllers/json.controller"; const prisma = new PrismaClient(); @@ -84,6 +85,9 @@ export default async function queueHandler(job: SandboxedJob) { case "zip": await zipQueueController(source, prisma); break; + case "json": + await jsonQueueController(source, prisma); + break; default: break; } diff --git a/server/src/utils/fileType.ts b/server/src/utils/fileType.ts index 9622c5d6..b46c2f40 100644 --- a/server/src/utils/fileType.ts +++ b/server/src/utils/fileType.ts @@ -20,6 +20,8 @@ export const fileTypeFinder = (mimeType: string) => { return "zip"; case "application/x-zip-compressed": return "zip"; + case "application/json": + return "json"; default: return "none"; }