diff --git a/core/src/node/api/HttpServer.ts b/core/src/node/api/HttpServer.ts
deleted file mode 100644
index 32d5977175..0000000000
--- a/core/src/node/api/HttpServer.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-export interface HttpServer {
-  post: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  get: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  patch: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  put: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  delete: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  register: (router: any, opts?: any) => void
-}
diff --git a/core/src/node/api/index.test.ts b/core/src/node/api/index.test.ts
deleted file mode 100644
index c35d6e792a..0000000000
--- a/core/src/node/api/index.test.ts
+++ /dev/null
@@ -1,7 +0,0 @@
-
-import * as restfulV1 from './restful/v1';
-
-it('should re-export from restful/v1', () => {
-  const restfulV1Exports = require('./restful/v1');
-  expect(restfulV1Exports).toBeDefined();
-})
diff --git a/core/src/node/api/index.ts b/core/src/node/api/index.ts
index ab0c516569..56becd0548 100644
--- a/core/src/node/api/index.ts
+++ b/core/src/node/api/index.ts
@@ -1,3 +1 @@
-export * from './HttpServer'
-export * from './restful/v1'
 export * from './common/handler'
diff --git a/core/src/node/api/processors/app.test.ts b/core/src/node/api/processors/app.test.ts
index 5c4daef29d..f09c6cb6b3 100644
--- a/core/src/node/api/processors/app.test.ts
+++ b/core/src/node/api/processors/app.test.ts
@@ -2,7 +2,6 @@ jest.mock('../../helper', () => ({
   ...jest.requireActual('../../helper'),
   getJanDataFolderPath: () => './app',
 }))
-import { dirname } from 'path'
 import { App } from './app'
 
 it('should call stopServer', () => {
diff --git a/core/src/node/api/processors/app.ts b/core/src/node/api/processors/app.ts
index a0808c5ac6..96af8468ea 100644
--- a/core/src/node/api/processors/app.ts
+++ b/core/src/node/api/processors/app.ts
@@ -3,7 +3,6 @@ import { basename, dirname, isAbsolute, join, relative } from 'path'
 import { Processor } from './Processor'
 import {
   log as writeLog,
-  appResourcePath,
   getAppConfigurations as appConfiguration,
   updateAppConfiguration,
   normalizeFilePath,
@@ -91,8 +90,6 @@ export class App implements Processor {
       port: args?.port,
       isCorsEnabled: args?.isCorsEnabled,
       isVerboseEnabled: args?.isVerboseEnabled,
-      schemaPath: join(appResourcePath(), 'docs', 'openapi', 'jan.yaml'),
-      baseDir: join(appResourcePath(), 'docs', 'openapi'),
       prefix: args?.prefix,
     })
   }
diff --git a/core/src/node/api/restful/app/download.test.ts b/core/src/node/api/restful/app/download.test.ts
deleted file mode 100644
index b2af1bb0d9..0000000000
--- a/core/src/node/api/restful/app/download.test.ts
+++ /dev/null
@@ -1,62 +0,0 @@
-import { HttpServer } from '../../HttpServer'
-import { DownloadManager } from '../../../helper/download'
-
-describe('downloadRouter', () => {
-  let app: HttpServer
-
-  beforeEach(() => {
-    app = {
-      register: jest.fn(),
-      post: jest.fn(),
-      get: jest.fn(),
-      patch: jest.fn(),
-      put: jest.fn(),
-      delete: jest.fn(),
-    }
-  })
-
-  it('should return download progress for a given modelId', async () => {
-    const modelId = '123'
-    const downloadProgress = { progress: 50 }
-
-    DownloadManager.instance.downloadProgressMap[modelId] = downloadProgress as any
-
-    const req = { params: { modelId } }
-    const res = {
-      status: jest.fn(),
-      send: jest.fn(),
-    }
-
-    jest.spyOn(app, 'get').mockImplementation((path, handler) => {
-      if (path === `/download/getDownloadProgress/${modelId}`) {
-        res.status(200)
-        res.send(downloadProgress)
-      }
-    })
-
-    app.get(`/download/getDownloadProgress/${modelId}`, req as any)
-    expect(res.status).toHaveBeenCalledWith(200)
-    expect(res.send).toHaveBeenCalledWith(downloadProgress)
-  })
-
-  it('should return 404 if download progress is not found', async () => {
-    const modelId = '123'
-
-    const req = { params: { modelId } }
-    const res = {
-      status: jest.fn(),
-      send: jest.fn(),
-    }
-
-
-    jest.spyOn(app, 'get').mockImplementation((path, handler) => {
-      if (path === `/download/getDownloadProgress/${modelId}`) {
-        res.status(404)
-        res.send({ message: 'Download progress not found' })
-      }
-    })
-    app.get(`/download/getDownloadProgress/${modelId}`, req as any)
-    expect(res.status).toHaveBeenCalledWith(404)
-    expect(res.send).toHaveBeenCalledWith({ message: 'Download progress not found' })
-  })
-})
diff --git a/core/src/node/api/restful/app/download.ts b/core/src/node/api/restful/app/download.ts
deleted file mode 100644
index 5e0c83d01a..0000000000
--- a/core/src/node/api/restful/app/download.ts
+++ /dev/null
@@ -1,23 +0,0 @@
-import { DownloadRoute } from '../../../../types/api'
-import { DownloadManager } from '../../../helper/download'
-import { HttpServer } from '../../HttpServer'
-
-export const downloadRouter = async (app: HttpServer) => {
-  app.get(`/download/${DownloadRoute.getDownloadProgress}/:modelId`, async (req, res) => {
-    const modelId = req.params.modelId
-
-    console.debug(`Getting download progress for model ${modelId}`)
-    console.debug(
-      `All Download progress: ${JSON.stringify(DownloadManager.instance.downloadProgressMap)}`
-    )
-
-    // check if null DownloadManager.instance.downloadProgressMap
-    if (!DownloadManager.instance.downloadProgressMap[modelId]) {
-      return res.status(404).send({
-        message: 'Download progress not found',
-      })
-    } else {
-      return res.status(200).send(DownloadManager.instance.downloadProgressMap[modelId])
-    }
-  })
-}
diff --git a/core/src/node/api/restful/app/handlers.test.ts b/core/src/node/api/restful/app/handlers.test.ts
deleted file mode 100644
index 680623d869..0000000000
--- a/core/src/node/api/restful/app/handlers.test.ts
+++ /dev/null
@@ -1,16 +0,0 @@
-// 
-import { jest } from '@jest/globals';
-
-import { HttpServer } from '../../HttpServer';
-import { handleRequests } from './handlers';
-import { Handler, RequestHandler } from '../../common/handler';
-
-it('should initialize RequestHandler and call handle', () => {
-  const mockHandle = jest.fn();
-  jest.spyOn(RequestHandler.prototype, 'handle').mockImplementation(mockHandle);
-  
-  const mockApp = { post: jest.fn() };
-  handleRequests(mockApp as unknown as HttpServer);
-  
-  expect(mockHandle).toHaveBeenCalled();
-});
diff --git a/core/src/node/api/restful/app/handlers.ts b/core/src/node/api/restful/app/handlers.ts
deleted file mode 100644
index 43c3f7add9..0000000000
--- a/core/src/node/api/restful/app/handlers.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-import { HttpServer } from '../../HttpServer'
-import { Handler, RequestHandler } from '../../common/handler'
-
-export function handleRequests(app: HttpServer) {
-  const restWrapper: Handler = (route: string, listener: (...args: any[]) => any) => {
-    app.post(`/app/${route}`, async (request: any, reply: any) => {
-      const args = JSON.parse(request.body) as any[]
-      reply.send(JSON.stringify(await listener(...args)))
-    })
-  }
-  const handler = new RequestHandler(restWrapper)
-  handler.handle()
-}
diff --git a/core/src/node/api/restful/common.test.ts b/core/src/node/api/restful/common.test.ts
deleted file mode 100644
index b40f6606ff..0000000000
--- a/core/src/node/api/restful/common.test.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-
-import { commonRouter } from './common';
-import { JanApiRouteConfiguration } from './helper/configuration';
-
-test('commonRouter sets up routes for each key in JanApiRouteConfiguration', async () => {
-  const mockHttpServer = {
-    get: jest.fn(),
-    post: jest.fn(),
-    patch: jest.fn(),
-    put: jest.fn(),
-    delete: jest.fn(),
-  };
-  await commonRouter(mockHttpServer as any);
-
-  const expectedRoutes = Object.keys(JanApiRouteConfiguration);
-  expectedRoutes.forEach((key) => {
-    expect(mockHttpServer.get).toHaveBeenCalledWith(`/${key}`, expect.any(Function));
-    expect(mockHttpServer.get).toHaveBeenCalledWith(`/${key}/:id`, expect.any(Function));
-    expect(mockHttpServer.delete).toHaveBeenCalledWith(`/${key}/:id`, expect.any(Function));
-  });
-});
diff --git a/core/src/node/api/restful/common.ts b/core/src/node/api/restful/common.ts
deleted file mode 100644
index 989104e033..0000000000
--- a/core/src/node/api/restful/common.ts
+++ /dev/null
@@ -1,82 +0,0 @@
-import { HttpServer } from '../HttpServer'
-import {
-  chatCompletions,
-  downloadModel,
-  getBuilder,
-  retrieveBuilder,
-  createMessage,
-  createThread,
-  getMessages,
-  retrieveMessage,
-  updateThread,
-  models,
-} from './helper/builder'
-
-import { JanApiRouteConfiguration } from './helper/configuration'
-
-export const commonRouter = async (app: HttpServer) => {
-  const normalizeData = (data: any) => {
-    return {
-      object: 'list',
-      data,
-    }
-  }
-  // Common Routes
-  // Read & Delete :: Threads | Models | Assistants
-  Object.keys(JanApiRouteConfiguration).forEach((key) => {
-    app.get(`/${key}`, async (_req, _res) => {
-      if (key.includes('models')) {
-        return models(_req, _res)
-      }
-      return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
-    })
-
-    app.get(`/${key}/:id`, async (_req: any, _res: any) => {
-      if (key.includes('models')) {
-        return models(_req, _res)
-      }
-      return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id)
-    })
-
-    app.delete(`/${key}/:id`, async (_req: any, _res: any) => {
-      if (key.includes('models')) {
-        return models(_req, _res)
-      }
-      return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id)
-    })
-  })
-
-  // Threads
-  app.post(`/threads`, async (req, res) => createThread(req.body))
-
-  app.get(`/threads/:threadId/messages`, async (req, res) =>
-    getMessages(req.params.threadId).then(normalizeData)
-  )
-
-  app.get(`/threads/:threadId/messages/:messageId`, async (req, res) =>
-    retrieveMessage(req.params.threadId, req.params.messageId)
-  )
-
-  app.post(`/threads/:threadId/messages`, async (req, res) =>
-    createMessage(req.params.threadId as any, req.body as any)
-  )
-
-  app.patch(`/threads/:threadId`, async (request: any) =>
-    updateThread(request.params.threadId, request.body)
-  )
-
-  // Models
-  app.get(`/models/download/:modelId`, async (request: any) =>
-    downloadModel(request.params.modelId, {
-      ignoreSSL: request.query.ignoreSSL === 'true',
-      proxy: request.query.proxy,
-    })
-  )
-
-  app.post(`/models/start`, async (request: any, reply: any) => models(request, reply))
-
-  app.post(`/models/stop`, async (request: any, reply: any) => models(request, reply))
-
-  // Chat Completion
-  app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))
-}
diff --git a/core/src/node/api/restful/helper/builder.test.ts b/core/src/node/api/restful/helper/builder.test.ts
deleted file mode 100644
index cfaee60076..0000000000
--- a/core/src/node/api/restful/helper/builder.test.ts
+++ /dev/null
@@ -1,251 +0,0 @@
-import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, appendFileSync } from 'fs'
-import {
-  getBuilder,
-  retrieveBuilder,
-  getMessages,
-  retrieveMessage,
-  createThread,
-  updateThread,
-  createMessage,
-  downloadModel,
-  chatCompletions,
-} from './builder'
-import { RouteConfiguration } from './configuration'
-
-jest.mock('fs')
-jest.mock('path')
-jest.mock('../../../helper', () => ({
-  getEngineConfiguration: jest.fn(),
-  getJanDataFolderPath: jest.fn().mockReturnValue('/mock/path'),
-}))
-jest.mock('request')
-jest.mock('request-progress')
-jest.mock('node-fetch')
-
-describe('builder helper functions', () => {
-  const mockConfiguration: RouteConfiguration = {
-    dirName: 'mockDir',
-    metadataFileName: 'metadata.json',
-    delete: {
-      object: 'mockObject',
-    },
-  }
-
-  beforeEach(() => {
-    jest.clearAllMocks()
-  })
-
-  describe('getBuilder', () => {
-    it('should return an empty array if directory does not exist', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(false)
-      const result = await getBuilder(mockConfiguration)
-      expect(result).toEqual([])
-    })
-
-    it('should return model data if directory exists', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await getBuilder(mockConfiguration)
-      expect(result).toEqual([{ id: 'model1' }])
-    })
-  })
-
-  describe('retrieveBuilder', () => {
-    it('should return undefined if no data matches the id', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await retrieveBuilder(mockConfiguration, 'nonexistentId')
-      expect(result).toBeUndefined()
-    })
-
-    it('should return the matching data', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await retrieveBuilder(mockConfiguration, 'model1')
-      expect(result).toEqual({ id: 'model1' })
-    })
-  })
-
-  describe('getMessages', () => {
-    it('should return an empty array if message file does not exist', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(false)
-
-      const result = await getMessages('thread1')
-      expect(result).toEqual([])
-    })
-
-    it('should return messages if message file exists', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['messages.jsonl'])
-      ;(readFileSync as jest.Mock).mockReturnValue('{"id":"msg1"}\n{"id":"msg2"}\n')
-
-      const result = await getMessages('thread1')
-      expect(result).toEqual([{ id: 'msg1' }, { id: 'msg2' }])
-    })
-  })
-
-  describe('retrieveMessage', () => {
-    it('should return a message if no messages match the id', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['messages.jsonl'])
-      ;(readFileSync as jest.Mock).mockReturnValue('{"id":"msg1"}\n')
-
-      const result = await retrieveMessage('thread1', 'nonexistentId')
-      expect(result).toEqual({ message: 'Not found' })
-    })
-
-    it('should return the matching message', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['messages.jsonl'])
-      ;(readFileSync as jest.Mock).mockReturnValue('{"id":"msg1"}\n')
-
-      const result = await retrieveMessage('thread1', 'msg1')
-      expect(result).toEqual({ id: 'msg1' })
-    })
-  })
-
-  describe('createThread', () => {
-    it('should return a message if thread has no assistants', async () => {
-      const result = await createThread({})
-      expect(result).toEqual({ message: 'Thread must have at least one assistant' })
-    })
-
-    it('should create a thread and return the updated thread', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(false)
-
-      const thread = { assistants: [{ assistant_id: 'assistant1' }] }
-      const result = await createThread(thread)
-      expect(mkdirSync).toHaveBeenCalled()
-      expect(writeFileSync).toHaveBeenCalled()
-      expect(result.id).toBeDefined()
-    })
-  })
-
-  describe('updateThread', () => {
-    it('should return a message if thread is not found', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await updateThread('nonexistentId', {})
-      expect(result).toEqual({ message: 'Thread not found' })
-    })
-
-    it('should update the thread and return the updated thread', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await updateThread('model1', { name: 'updatedName' })
-      expect(writeFileSync).toHaveBeenCalled()
-      expect(result.name).toEqual('updatedName')
-    })
-  })
-
-  describe('createMessage', () => {
-    it('should create a message and return the created message', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(false)
-      const message = { role: 'user', content: 'Hello' }
-
-      const result = (await createMessage('thread1', message)) as any
-      expect(mkdirSync).toHaveBeenCalled()
-      expect(appendFileSync).toHaveBeenCalled()
-      expect(result.id).toBeDefined()
-    })
-  })
-
-  describe('downloadModel', () => {
-    it('should return a message if model is not found', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await downloadModel('nonexistentId')
-      expect(result).toEqual({ message: 'Model not found' })
-    })
-
-    it('should start downloading the model', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(
-        JSON.stringify({ id: 'model1', object: 'model', sources: ['http://example.com'] })
-      )
-      const result = await downloadModel('model1')
-      expect(result).toEqual({ message: 'Starting download model1' })
-    })
-  })
-
-  describe('chatCompletions', () => {
-    it('should return the error on status not ok', async () => {
-      const request = { body: { model: 'model1' } }
-      const mockSend = jest.fn()
-      const reply = {
-        code: jest.fn().mockReturnThis(),
-        send: jest.fn(),
-        headers: jest.fn().mockReturnValue({
-          send: mockSend,
-        }),
-        raw: {
-          writeHead: jest.fn(),
-          pipe: jest.fn(),
-        },
-      }
-
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(
-        JSON.stringify({ id: 'model1', engine: 'openai' })
-      )
-
-      // Mock fetch
-      const fetch = require('node-fetch')
-      fetch.mockResolvedValue({
-        status: 400,
-        headers: new Map([
-          ['content-type', 'application/json'],
-          ['x-request-id', '123456'],
-        ]),
-        body: { pipe: jest.fn() },
-        text: jest.fn().mockResolvedValue({ error: 'Mock error response' }),
-      })
-      await chatCompletions(request, reply)
-      expect(reply.code).toHaveBeenCalledWith(400)
-      expect(mockSend).toHaveBeenCalledWith(
-        expect.objectContaining({
-          error: 'Mock error response',
-        })
-      )
-    })
-
-    it('should return the chat completions', async () => {
-      const request = { body: { model: 'model1' } }
-      const reply = {
-        code: jest.fn().mockReturnThis(),
-        send: jest.fn(),
-        raw: { writeHead: jest.fn(), pipe: jest.fn() },
-      }
-
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(
-        JSON.stringify({ id: 'model1', engine: 'openai' })
-      )
-
-      // Mock fetch
-      const fetch = require('node-fetch')
-      fetch.mockResolvedValue({
-        status: 200,
-        body: { pipe: jest.fn() },
-        json: jest.fn().mockResolvedValue({ completions: ['completion1'] }),
-      })
-      await chatCompletions(request, reply)
-      expect(reply.raw.writeHead).toHaveBeenCalledWith(200, expect.any(Object))
-    })
-  })
-})
diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts
deleted file mode 100644
index 31eb649a3c..0000000000
--- a/core/src/node/api/restful/helper/builder.ts
+++ /dev/null
@@ -1,339 +0,0 @@
-import {
-  existsSync,
-  readdirSync,
-  readFileSync,
-  writeFileSync,
-  mkdirSync,
-  appendFileSync,
-  createWriteStream,
-} from 'fs'
-import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
-import { join } from 'path'
-import { ContentType, InferenceEngine, MessageStatus, ThreadMessage } from '../../../../types'
-import { getJanDataFolderPath } from '../../../helper'
-import { CORTEX_API_URL } from './consts'
-
-// TODO: Refactor these
-export const getBuilder = async (configuration: RouteConfiguration) => {
-  const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
-  try {
-    if (!existsSync(directoryPath)) {
-      console.debug('model folder not found')
-      return []
-    }
-
-    const files: string[] = readdirSync(directoryPath)
-
-    const allDirectories: string[] = []
-    for (const file of files) {
-      if (file === '.DS_Store') continue
-      allDirectories.push(file)
-    }
-
-    const results = allDirectories
-      .map((dirName) => {
-        const jsonPath = join(directoryPath, dirName, configuration.metadataFileName)
-        return readModelMetadata(jsonPath)
-      })
-      .filter((data) => !!data)
-    const modelData = results
-      .map((result: any) => {
-        try {
-          return JSON.parse(result)
-        } catch (err) {
-          console.error(err)
-        }
-      })
-      .filter((e: any) => !!e)
-
-    return modelData
-  } catch (err) {
-    console.error(err)
-    return []
-  }
-}
-
-const readModelMetadata = (path: string): string | undefined => {
-  if (existsSync(path)) {
-    return readFileSync(path, 'utf-8')
-  } else {
-    return undefined
-  }
-}
-
-export const retrieveBuilder = async (configuration: RouteConfiguration, id: string) => {
-  const data = await getBuilder(configuration)
-  const filteredData = data.filter((d: any) => d.id === id)[0]
-
-  if (!filteredData) {
-    return undefined
-  }
-
-  return filteredData
-}
-
-export const getMessages = async (threadId: string): Promise<ThreadMessage[]> => {
-  const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
-  const messageFile = 'messages.jsonl'
-  try {
-    const files: string[] = readdirSync(threadDirPath)
-    if (!files.includes(messageFile)) {
-      console.error(`${threadDirPath} not contains message file`)
-      return []
-    }
-
-    const messageFilePath = join(threadDirPath, messageFile)
-    if (!existsSync(messageFilePath)) {
-      console.debug('message file not found')
-      return []
-    }
-
-    const lines = readFileSync(messageFilePath, 'utf-8')
-      .toString()
-      .split('\n')
-      .filter((line: any) => line !== '')
-
-    const messages: ThreadMessage[] = []
-    lines.forEach((line: string) => {
-      messages.push(JSON.parse(line) as ThreadMessage)
-    })
-    return messages
-  } catch (err) {
-    console.error(err)
-    return []
-  }
-}
-
-export const retrieveMessage = async (threadId: string, messageId: string) => {
-  const messages = await getMessages(threadId)
-  const filteredMessages = messages.filter((m) => m.id === messageId)
-  if (!filteredMessages || filteredMessages.length === 0) {
-    return {
-      message: 'Not found',
-    }
-  }
-
-  return filteredMessages[0]
-}
-
-export const createThread = async (thread: any) => {
-  const threadMetadataFileName = 'thread.json'
-  // TODO: add validation
-  if (!thread.assistants || thread.assistants.length === 0) {
-    return {
-      message: 'Thread must have at least one assistant',
-    }
-  }
-
-  const threadId = generateThreadId(thread.assistants[0]?.assistant_id)
-  try {
-    const updatedThread = {
-      ...thread,
-      id: threadId,
-      created: Date.now(),
-      updated: Date.now(),
-    }
-    const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
-    const threadJsonPath = join(threadDirPath, threadMetadataFileName)
-
-    if (!existsSync(threadDirPath)) {
-      mkdirSync(threadDirPath)
-    }
-
-    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
-    return updatedThread
-  } catch (err) {
-    return {
-      error: err,
-    }
-  }
-}
-
-export const updateThread = async (threadId: string, thread: any) => {
-  const threadMetadataFileName = 'thread.json'
-  const currentThreadData = await retrieveBuilder(JanApiRouteConfiguration.threads, threadId)
-  if (!currentThreadData) {
-    return {
-      message: 'Thread not found',
-    }
-  }
-  // we don't want to update the id and object
-  delete thread.id
-  delete thread.object
-
-  const updatedThread = {
-    ...currentThreadData,
-    ...thread,
-    updated: Date.now(),
-  }
-  try {
-    const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
-    const threadJsonPath = join(threadDirPath, threadMetadataFileName)
-
-    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
-    return updatedThread
-  } catch (err) {
-    return {
-      message: err,
-    }
-  }
-}
-
-const generateThreadId = (assistantId: string) => {
-  return `${assistantId}_${(Date.now() / 1000).toFixed(0)}`
-}
-
-export const createMessage = async (threadId: string, message: any) => {
-  const threadMessagesFileName = 'messages.jsonl'
-
-  try {
-    const { ulid } = require('ulidx')
-    const msgId = ulid()
-    const createdAt = Date.now()
-    const threadMessage: ThreadMessage = {
-      id: msgId,
-      thread_id: threadId,
-      status: MessageStatus.Ready,
-      created_at: createdAt,
-      completed_at: createdAt,
-      object: 'thread.message',
-      role: message.role,
-      content: [
-        {
-          type: ContentType.Text,
-          text: {
-            value: message.content,
-            annotations: [],
-          },
-        },
-      ],
-    }
-
-    const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
-    const threadMessagePath = join(threadDirPath, threadMessagesFileName)
-
-    if (!existsSync(threadDirPath)) {
-      mkdirSync(threadDirPath)
-    }
-    appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
-    return threadMessage
-  } catch (err) {
-    return {
-      message: err,
-    }
-  }
-}
-
-export const downloadModel = async (
-  modelId: string,
-  network?: { proxy?: string; ignoreSSL?: boolean }
-) => {
-  const strictSSL = !network?.ignoreSSL
-  const proxy = network?.proxy?.startsWith('http') ? network.proxy : undefined
-  const model = await retrieveBuilder(JanApiRouteConfiguration.models, modelId)
-  if (!model || model.object !== 'model') {
-    return {
-      message: 'Model not found',
-    }
-  }
-
-  const directoryPath = join(getJanDataFolderPath(), 'models', modelId)
-  if (!existsSync(directoryPath)) {
-    mkdirSync(directoryPath)
-  }
-
-  // path to model binary
-  const modelBinaryPath = join(directoryPath, modelId)
-
-  const request = require('request')
-  const progress = require('request-progress')
-
-  for (const source of model.sources) {
-    const rq = request({ url: source, strictSSL, proxy })
-    progress(rq, {})
-      ?.on('progress', function (state: any) {
-        console.debug('progress', JSON.stringify(state, null, 2))
-      })
-      ?.on('error', function (err: Error) {
-        console.error('error', err)
-      })
-      ?.on('end', function () {
-        console.debug('end')
-      })
-      .pipe(createWriteStream(modelBinaryPath))
-  }
-
-  return {
-    message: `Starting download ${modelId}`,
-  }
-}
-
-/**
- * Proxy /models to cortex
- * @param request
- * @param reply
- */
-export const models = async (request: any, reply: any) => {
-  const fetch = require('node-fetch')
-  const headers: Record<string, any> = {
-    'Content-Type': 'application/json',
-  }
-
-  const response = await fetch(`${CORTEX_API_URL}/models${request.url.split('/models')[1] ?? ''}`, {
-    method: request.method,
-    headers: headers,
-    body: JSON.stringify(request.body),
-  })
-
-  if (response.status !== 200) {
-    // Forward the error response to client via reply
-    const responseBody = await response.text()
-    const responseHeaders = Object.fromEntries(response.headers)
-    reply.code(response.status).headers(responseHeaders).send(responseBody)
-  } else {
-    reply.raw.writeHead(200, {
-      'Content-Type': 'application/json',
-      'Cache-Control': 'no-cache',
-      'Connection': 'keep-alive',
-      'Access-Control-Allow-Origin': '*',
-    })
-    response.body.pipe(reply.raw)
-  }
-}
-
-/**
- * Proxy chat completions
- * @param request
- * @param reply
- */
-export const chatCompletions = async (request: any, reply: any) => {
-  const headers: Record<string, any> = {
-    'Content-Type': 'application/json',
-  }
-
-  // add engine for new cortex cpp engine
-  if (request.body.engine === InferenceEngine.nitro) {
-    request.body.engine = InferenceEngine.cortex_llamacpp
-  }
-
-  const fetch = require('node-fetch')
-  const response = await fetch(`${CORTEX_API_URL}/chat/completions`, {
-    method: 'POST',
-    headers: headers,
-    body: JSON.stringify(request.body),
-  })
-  if (response.status !== 200) {
-    // Forward the error response to client via reply
-    const responseBody = await response.text()
-    const responseHeaders = Object.fromEntries(response.headers)
-    reply.code(response.status).headers(responseHeaders).send(responseBody)
-  } else {
-    reply.raw.writeHead(200, {
-      'Content-Type': request.body.stream === true ? 'text/event-stream' : 'application/json',
-      'Cache-Control': 'no-cache',
-      'Connection': 'keep-alive',
-      'Access-Control-Allow-Origin': '*',
-    })
-    response.body.pipe(reply.raw)
-  }
-}
diff --git a/core/src/node/api/restful/helper/configuration.test.ts b/core/src/node/api/restful/helper/configuration.test.ts
deleted file mode 100644
index ae002312ac..0000000000
--- a/core/src/node/api/restful/helper/configuration.test.ts
+++ /dev/null
@@ -1,24 +0,0 @@
-import { JanApiRouteConfiguration } from './configuration'
-
-describe('JanApiRouteConfiguration', () => {
-  it('should have the correct models configuration', () => {
-    const modelsConfig = JanApiRouteConfiguration.models;
-    expect(modelsConfig.dirName).toBe('models');
-    expect(modelsConfig.metadataFileName).toBe('model.json');
-    expect(modelsConfig.delete.object).toBe('model');
-  });
-
-  it('should have the correct assistants configuration', () => {
-    const assistantsConfig = JanApiRouteConfiguration.assistants;
-    expect(assistantsConfig.dirName).toBe('assistants');
-    expect(assistantsConfig.metadataFileName).toBe('assistant.json');
-    expect(assistantsConfig.delete.object).toBe('assistant');
-  });
-
-  it('should have the correct threads configuration', () => {
-    const threadsConfig = JanApiRouteConfiguration.threads;
-    expect(threadsConfig.dirName).toBe('threads');
-    expect(threadsConfig.metadataFileName).toBe('thread.json');
-    expect(threadsConfig.delete.object).toBe('thread');
-  });
-});
\ No newline at end of file
diff --git a/core/src/node/api/restful/helper/configuration.ts b/core/src/node/api/restful/helper/configuration.ts
deleted file mode 100644
index 88e5ffb61e..0000000000
--- a/core/src/node/api/restful/helper/configuration.ts
+++ /dev/null
@@ -1,31 +0,0 @@
-export const JanApiRouteConfiguration: Record<string, RouteConfiguration> = {
-  models: {
-    dirName: 'models',
-    metadataFileName: 'model.json',
-    delete: {
-      object: 'model',
-    },
-  },
-  assistants: {
-    dirName: 'assistants',
-    metadataFileName: 'assistant.json',
-    delete: {
-      object: 'assistant',
-    },
-  },
-  threads: {
-    dirName: 'threads',
-    metadataFileName: 'thread.json',
-    delete: {
-      object: 'thread',
-    },
-  },
-}
-
-export type RouteConfiguration = {
-  dirName: string
-  metadataFileName: string
-  delete: {
-    object: string
-  }
-}
diff --git a/core/src/node/api/restful/helper/consts.test.ts b/core/src/node/api/restful/helper/consts.test.ts
deleted file mode 100644
index 524f0cbeb9..0000000000
--- a/core/src/node/api/restful/helper/consts.test.ts
+++ /dev/null
@@ -1,5 +0,0 @@
-import { CORTEX_DEFAULT_PORT } from './consts'
-
-it('should test CORTEX_DEFAULT_PORT', () => {
-  expect(CORTEX_DEFAULT_PORT).toBe(39291)
-})
diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts
deleted file mode 100644
index 412d304eef..0000000000
--- a/core/src/node/api/restful/helper/consts.ts
+++ /dev/null
@@ -1,7 +0,0 @@
-export const CORTEX_DEFAULT_PORT = 39291
-
-export const LOCAL_HOST = '127.0.0.1'
-
-export const SUPPORTED_MODEL_FORMAT = '.gguf'
-
-export const CORTEX_API_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1`
diff --git a/core/src/node/api/restful/v1.test.ts b/core/src/node/api/restful/v1.test.ts
deleted file mode 100644
index 8e22496e9b..0000000000
--- a/core/src/node/api/restful/v1.test.ts
+++ /dev/null
@@ -1,16 +0,0 @@
-
-import { v1Router } from './v1';
-import { commonRouter } from './common';
-
-test('should define v1Router function', () => {
-  expect(v1Router).toBeDefined();
-});
-
-test('should register commonRouter', () => {
-  const mockApp = {
-    register: jest.fn(),
-  };
-  v1Router(mockApp as any);
-  expect(mockApp.register).toHaveBeenCalledWith(commonRouter);
-});
-
diff --git a/core/src/node/api/restful/v1.ts b/core/src/node/api/restful/v1.ts
deleted file mode 100644
index 9d57de8414..0000000000
--- a/core/src/node/api/restful/v1.ts
+++ /dev/null
@@ -1,16 +0,0 @@
-import { HttpServer } from '../HttpServer'
-import { commonRouter } from './common'
-
-export const v1Router = async (app: HttpServer) => {
-  // MARK: Public API Routes
-  app.register(commonRouter)
-
-  // MARK: Internal Application Routes
-  // DEPRECATED: Vulnerability possible issues
-  // handleRequests(app)
-
-  // Expanded route for tracking download progress
-  // TODO: Replace by Observer Wrapper (ZeroMQ / Vanilla Websocket)
-  // DEPRECATED: Jan FE Docker deploy is deprecated
-  // app.register(downloadRouter)
-}
diff --git a/core/src/node/helper/path.ts b/core/src/node/helper/path.ts
index 4efbea463d..5f63866401 100644
--- a/core/src/node/helper/path.ts
+++ b/core/src/node/helper/path.ts
@@ -1,5 +1,4 @@
-import { join, resolve } from 'path'
-import { getJanDataFolderPath } from './config'
+import { join } from 'path'
 
 /**
  * Normalize file path
@@ -34,4 +33,5 @@ export function appResourcePath() {
 
   // server
   return join(global.core.appPath(), '../../..')
-}
\ No newline at end of file
+}
+
diff --git a/core/tsconfig.json b/core/tsconfig.json
index b30d65851f..bf79a87205 100644
--- a/core/tsconfig.json
+++ b/core/tsconfig.json
@@ -13,7 +13,8 @@
     "declarationDir": "dist/types",
     "outDir": "dist/lib",
     "importHelpers": true,
-    "types": ["@types/jest"]
+    "types": ["@types/jest"],
+    "resolveJsonModule": true
   },
   "include": ["src"],
   "exclude": ["**/*.test.ts"]
diff --git a/docs/openapi/.gitkeep b/docs/openapi/.gitkeep
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/docs/openapi/jan.json b/docs/openapi/jan.json
deleted file mode 100644
index 844a8f7ce8..0000000000
--- a/docs/openapi/jan.json
+++ /dev/null
@@ -1,2397 +0,0 @@
-{
-  "openapi": "3.0.0",
-  "info": {
-    "title": "API Reference",
-    "description": "# Introduction\nJan API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference).\n",
-    "version": "0.1.8",
-    "contact": {
-      "name": "Jan Discord",
-      "url": "https://discord.gg/7EcEz7MrvA"
-    },
-    "license": {
-      "name": "AGPLv3",
-      "url": "https://github.com/janhq/nitro/blob/main/LICENSE"
-    }
-  },
-  "servers": [
-    {
-      "url": "http://localhost:1337/v1"
-    }
-  ],
-  "tags": [
-    {
-      "name": "Models",
-      "description": "List and describe the various models available in the API."
-    },
-    {
-      "name": "Chat",
-      "description": "Given a list of messages comprising a conversation, the model will return a response.\n"
-    },
-    {
-      "name": "Messages",
-      "description": "Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).\n"
-    },
-    {
-      "name": "Threads"
-    },
-    {
-      "name": "Assistants",
-      "description": "Configures and utilizes different AI assistants for varied tasks"
-    }
-  ],
-  "x-tagGroups": [
-    {
-      "name": "Endpoints",
-      "tags": ["Models", "Chat"]
-    },
-    {
-      "name": "Chat",
-      "tags": ["Assistants", "Messages", "Threads"]
-    }
-  ],
-  "paths": {
-    "/chat/completions": {
-      "post": {
-        "operationId": "createChatCompletion",
-        "tags": ["Chat"],
-        "summary": "Create chat completion\n",
-        "description": "Creates a model response for the given chat conversation.  <a  href = \"https://platform.openai.com/docs/api-reference/chat/create\"> Equivalent to OpenAI's create chat completion. </a>\n",
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/ChatCompletionRequest"
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ChatCompletionResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/models": {
-      "get": {
-        "operationId": "listModels",
-        "tags": ["Models"],
-        "summary": "List models",
-        "description": "Lists the currently available models, and provides basic information about each one such as the owner and availability.  <a  href = \"https://platform.openai.com/docs/api-reference/models/list\"> Equivalent to OpenAI's list model. </a>\n",
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ListModelsResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/models/download/{model_id}": {
-      "get": {
-        "operationId": "downloadModel",
-        "tags": ["Models"],
-        "summary": "Download a specific model.",
-        "description": "Download a model.\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "model_id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "mistral-ins-7b-q4"
-            },
-            "description": "The ID of the model to use for this request.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/DownloadModelResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/models/{model_id}": {
-      "get": {
-        "operationId": "retrieveModel",
-        "tags": ["Models"],
-        "summary": "Retrieve model",
-        "description": "Get a model instance, providing basic information about the model such as the owner and permissioning.  <a  href = \"https://platform.openai.com/docs/api-reference/models/retrieve\"> Equivalent to OpenAI's retrieve model. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "model_id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "mistral-ins-7b-q4"
-            },
-            "description": "The ID of the model to use for this request.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/GetModelResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "delete": {
-        "operationId": "deleteModel",
-        "tags": ["Models"],
-        "summary": "Delete model",
-        "description": "Delete a model. <a  href = \"https://platform.openai.com/docs/api-reference/models/delete\"> Equivalent to OpenAI's delete model. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "model_id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "mistral-ins-7b-q4"
-            },
-            "description": "The model id to delete\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/DeleteModelResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/threads": {
-      "post": {
-        "operationId": "createThread",
-        "tags": ["Threads"],
-        "summary": "Create thread",
-        "description": "Create a thread.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/createThread\"> Equivalent to OpenAI's create thread. </a>\n",
-        "requestBody": {
-          "required": false,
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/CreateThreadObject"
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "Thread created successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/CreateThreadResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "get": {
-        "operationId": "listThreads",
-        "tags": ["Threads"],
-        "summary": "List threads",
-        "description": "Retrieves a list of all threads available in the system.\n",
-        "responses": {
-          "200": {
-            "description": "List of threads retrieved successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "array",
-                  "items": {
-                    "$ref": "#/components/schemas/ThreadObject"
-                  },
-                  "example": [
-                    {
-                      "id": "thread_abc123",
-                      "object": "thread",
-                      "created_at": 1699014083,
-                      "assistants": ["assistant-001"],
-                      "metadata": {},
-                      "messages": []
-                    },
-                    {
-                      "id": "thread_abc456",
-                      "object": "thread",
-                      "created_at": 1699014083,
-                      "assistants": ["assistant-002", "assistant-003"],
-                      "metadata": {}
-                    }
-                  ]
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/threads/{thread_id}": {
-      "get": {
-        "operationId": "getThread",
-        "tags": ["Threads"],
-        "summary": "Retrieve thread",
-        "description": "Retrieves detailed information about a specific thread using its thread_id.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/getThread\"> Equivalent to OpenAI's retrieve thread. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread to retrieve.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Thread details retrieved successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/GetThreadResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "patch": {
-        "operationId": "modifyThread",
-        "tags": ["Threads"],
-        "summary": "Modify thread",
-        "description": "Modifies a thread.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/modifyThread\"> Equivalent to OpenAI's modify thread. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread to be modified.\n"
-          }
-        ],
-        "requestBody": {
-          "required": true,
-          "content": {
-            "application/json": {
-              "schema": {
-                "type": "object",
-                "properties": {
-                  "title": {
-                    "type": "string",
-                    "description": "Set the title of the thread",
-                    "items": {
-                      "$ref": "#/components/schemas/ThreadMessageObject"
-                    }
-                  }
-                }
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "Thread modified successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ModifyThreadResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "delete": {
-        "operationId": "deleteThread",
-        "tags": ["Threads"],
-        "summary": "Delete thread",
-        "description": "Delete a thread.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/deleteThread\"> Equivalent to OpenAI's delete thread. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread to be deleted.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Thread deleted successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/DeleteThreadResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/assistants": {
-      "get": {
-        "operationId": "listAssistants",
-        "tags": ["Assistants"],
-        "summary": "List assistants",
-        "description": "Return a list of assistants. <a href = \"https://platform.openai.com/docs/api-reference/assistants/listAssistants\"> Equivalent to OpenAI's list assistants. </a>\n",
-        "responses": {
-          "200": {
-            "description": "List of assistants retrieved successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "array",
-                  "items": {
-                    "type": "object",
-                    "properties": {
-                      "id": {
-                        "type": "string"
-                      },
-                      "object": {
-                        "type": "string"
-                      },
-                      "version": {
-                        "type": "integer"
-                      },
-                      "created_at": {
-                        "type": "integer"
-                      },
-                      "name": {
-                        "type": "string"
-                      },
-                      "description": {
-                        "type": "string"
-                      },
-                      "avatar": {
-                        "type": "string",
-                        "format": "uri"
-                      },
-                      "models": {
-                        "type": "array",
-                        "items": {
-                          "type": "object",
-                          "properties": {
-                            "model_id": {
-                              "type": "string"
-                            }
-                          }
-                        }
-                      },
-                      "instructions": {
-                        "type": "string"
-                      },
-                      "events": {
-                        "type": "object",
-                        "properties": {
-                          "in": {
-                            "type": "array",
-                            "items": {}
-                          },
-                          "out": {
-                            "type": "array",
-                            "items": {}
-                          }
-                        }
-                      },
-                      "metadata": {
-                        "type": "object"
-                      },
-                      "x-codeSamples": {
-                        "type": "object",
-                        "properties": {
-                          "cURL": {
-                            "type": "object",
-                            "properties": {
-                              "lang": {
-                                "type": "string",
-                                "example": "cURL"
-                              },
-                              "source": {
-                                "type": "string",
-                                "example": "curl http://localhost:1337/v1/assistants \\\n  -H \"Content-Type: application/json\"\n"
-                              }
-                            }
-                          },
-                          "JavaScript": {
-                            "type": "object",
-                            "properties": {
-                              "lang": {
-                                "type": "string",
-                                "example": "JavaScript"
-                              },
-                              "source": {
-                                "type": "string",
-                                "example": "fetch('http://localhost:1337/v1/assistants', {\n  method: 'GET',\n  headers: {\n    'Content-Type': 'application/json'\n  }\n})\n"
-                              }
-                            }
-                          },
-                          "Node.js": {
-                            "type": "object",
-                            "properties": {
-                              "lang": {
-                                "type": "string",
-                                "example": "Node.js"
-                              },
-                              "source": {
-                                "type": "string",
-                                "example": "const fetch = require('node-fetch');\n\nfetch('http://localhost:1337/v1/assistants', {\n  method: 'GET',\n  headers: {\n    'Content-Type': 'application/json'\n  }\n})\n"
-                              }
-                            }
-                          },
-                          "Python": {
-                            "type": "object",
-                            "properties": {
-                              "lang": {
-                                "type": "string",
-                                "example": "Python"
-                              },
-                              "source": {
-                                "type": "string",
-                                "example": "import requests\n\nurl = 'http://localhost:1337/v1/assistants'\nheaders = {'Content-Type': 'application/json'}\n\nresponse = requests.get(url, headers=headers)\n"
-                              }
-                            }
-                          }
-                        }
-                      }
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/assistants/{assistant_id}": {
-      "get": {
-        "operationId": "getAssistant",
-        "tags": ["Assistants"],
-        "summary": "Retrieve assistant",
-        "description": "Retrieves an assistant. <a href = \"https://platform.openai.com/docs/api-reference/assistants/getAssistant\"> Equivalent to OpenAI's retrieve assistants. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "assistant_id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "jan"
-            },
-            "description": "The ID of the assistant to retrieve.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "string",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/RetrieveAssistantResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/threads/{thread_id}/messages": {
-      "get": {
-        "operationId": "listMessages",
-        "tags": ["Messages"],
-        "summary": "List messages",
-        "description": "Retrieves all messages from the given thread. <a  href = \"https://platform.openai.com/docs/api-reference/messages/listMessages\"> Equivalent to OpenAI's list messages. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread from which to retrieve messages.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "List of messages retrieved successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ListMessagesResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "post": {
-        "operationId": "createMessage",
-        "tags": ["Messages"],
-        "summary": "Create message",
-        "description": "Create a message. <a  href = \"https://platform.openai.com/docs/api-reference/messages/createMessage\"> Equivalent to OpenAI's list messages. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread to which the message will be posted.\n"
-          }
-        ],
-        "requestBody": {
-          "required": true,
-          "content": {
-            "application/json": {
-              "schema": {
-                "type": "object",
-                "properties": {
-                  "role": {
-                    "type": "string",
-                    "description": "Role of the sender, either 'user' or 'assistant'.\n",
-                    "example": "user",
-                    "enum": ["user", "assistant"]
-                  },
-                  "content": {
-                    "type": "string",
-                    "description": "Text content of the message.\n",
-                    "example": "How does AI work? Explain it in simple terms."
-                  }
-                },
-                "required": ["role", "content"]
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "Message created successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/CreateMessageResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/threads/{thread_id}/messages/{message_id}": {
-      "get": {
-        "operationId": "retrieveMessage",
-        "tags": ["Messages"],
-        "summary": "Retrieve message",
-        "description": "Retrieve a specific message from a thread using its thread_id and message_id. <a  href = \"https://platform.openai.com/docs/api-reference/messages/getMessage\"> Equivalent to OpenAI's retrieve messages. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread containing the message.\n"
-          },
-          {
-            "in": "path",
-            "name": "message_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the message to retrieve.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/GetMessageResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  },
-  "x-webhooks": {
-    "ModelObject": {
-      "post": {
-        "summary": "The model object",
-        "description": "Describe a model offering that can be used with the API. <a  href = \"https://platform.openai.com/docs/api-reference/models/object\"> Equivalent to OpenAI's model object. </a>\n",
-        "operationId": "ModelObject",
-        "tags": ["Models"],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/ModelObject"
-              }
-            }
-          }
-        }
-      }
-    },
-    "AssistantObject": {
-      "post": {
-        "summary": "The assistant object",
-        "description": "Build assistants that can call models and use tools to perform tasks. <a  href = \"https://platform.openai.com/docs/api-reference/assistants\"> Equivalent to OpenAI's assistants object. </a>\n",
-        "operationId": "AssistantObjects",
-        "tags": ["Assistants"],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/AssistantObject"
-              }
-            }
-          }
-        }
-      }
-    },
-    "MessageObject": {
-      "post": {
-        "summary": "The message object",
-        "description": "Information about a message in the thread.  <a  href = \"https://platform.openai.com/docs/api-reference/messages/object\"> Equivalent to OpenAI's message object. </a>\n",
-        "operationId": "MessageObject",
-        "tags": ["Messages"],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": null
-              }
-            }
-          }
-        }
-      }
-    },
-    "ThreadObject": {
-      "post": {
-        "summary": "The thread object",
-        "description": "Represents a thread that contains messages. <a  href = \"https://platform.openai.com/docs/api-reference/threads/object\"> Equivalent to OpenAI's thread object. </a>",
-        "operationId": "ThreadObject",
-        "tags": ["Threads"],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": null
-              }
-            }
-          }
-        }
-      }
-    }
-  },
-  "components": {
-    "schemas": {
-      "ThreadObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the thread, defaults to foldername.\n",
-            "example": "thread_...."
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, defaults to thread.\n",
-            "example": "thread"
-          },
-          "title": {
-            "type": "string",
-            "description": "A brief summary or description of the thread, defaults to an empty string.\n",
-            "example": "funny physics joke"
-          },
-          "assistants": {
-            "type": "array",
-            "description": "",
-            "items": {
-              "properties": {
-                "assistant_id": {
-                  "type": "string",
-                  "description": "The identifier of assistant, defaults to \"jan\"\n",
-                  "example": "jan"
-                },
-                "model": {
-                  "type": "object",
-                  "properties": {
-                    "id": {
-                      "type": "string",
-                      "description": "",
-                      "example": "..."
-                    },
-                    "settings": {
-                      "type": "object",
-                      "description": "Defaults to and overrides assistant.json's \"settings\" (and if none, then model.json \"settings\")\n"
-                    },
-                    "parameters": {
-                      "type": "object",
-                      "description": "Defaults to and overrides assistant.json's \"parameters\" (and if none, then model.json \"parameters\")\n"
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "created": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the thread, defaults to file creation time.\n",
-            "example": 1231231
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the thread, defaults to an empty object.\n",
-            "example": {}
-          }
-        }
-      },
-      "GetThreadResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the thread.",
-            "example": "thread_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object",
-            "example": "thread"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the thread.",
-            "example": 1699014083
-          },
-          "assistants": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "List of assistants involved in the thread.",
-            "example": ["assistant-001"]
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the thread.",
-            "example": {}
-          },
-          "messages": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "List of messages within the thread.",
-            "example": []
-          }
-        }
-      },
-      "CreateThreadResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the newly created thread.",
-            "example": "thread_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread.",
-            "example": "thread"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the thread.",
-            "example": 1699014083
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the newly created thread.",
-            "example": {}
-          }
-        }
-      },
-      "CreateThreadObject": {
-        "type": "object",
-        "properties": {
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread.",
-            "example": "thread"
-          },
-          "title": {
-            "type": "string",
-            "description": "A brief summary or description of the thread, defaults to an empty string.\n",
-            "example": "funny physics joke"
-          },
-          "assistants": {
-            "type": "array",
-            "description": "assistant involved in the thread",
-            "items": {
-              "properties": {
-                "assistant_id": {
-                  "type": "string",
-                  "description": "The identifier of assistant, defaults to \"jan\"\n",
-                  "example": "jan"
-                },
-                "assistant_name": {
-                  "type": "string",
-                  "description": "The name of assistant, defaults to \"Jan\"\n",
-                  "example": "Jan"
-                },
-                "instructions": {
-                  "type": "string",
-                  "description": "The instruction of assistant, defaults to \"Be my grammar corrector\"\n"
-                },
-                "model": {
-                  "type": "object",
-                  "properties": {
-                    "id": {
-                      "type": "string",
-                      "description": "Model id",
-                      "example": "mistral-ins-7b-q4"
-                    },
-                    "settings": {
-                      "type": "object",
-                      "description": "Defaults to and overrides assistant.json's \"settings\" (and if none, then model.json \"settings\")\n"
-                    },
-                    "parameters": {
-                      "type": "object",
-                      "description": "Defaults to and overrides assistant.json's \"parameters\" (and if none, then model.json \"parameters\")\n"
-                    },
-                    "engine": {
-                      "type": "string",
-                      "description": "Engine id",
-                      "example": "nitro"
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the thread, defaults to an empty object.\n"
-          }
-        }
-      },
-      "ThreadMessageObject": {
-        "type": "object",
-        "properties": {
-          "role": {
-            "type": "string",
-            "description": "\"Role of the sender, either 'user' or 'assistant'.\"\n",
-            "enum": ["user", "assistant"]
-          },
-          "content": {
-            "type": "string",
-            "description": "\"Text content of the message.\"\n"
-          },
-          "file_ids": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "\"Array of file IDs associated with the message, if any.\"\n"
-          }
-        }
-      },
-      "ModifyThreadResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "\"The identifier of the modified thread.\"\n",
-            "example": "thread_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread.",
-            "example": "thread"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the thread.",
-            "example": 1699014083
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the modified thread.",
-            "example": {}
-          }
-        }
-      },
-      "DeleteThreadResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the deleted thread.",
-            "example": "thread_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating the thread has been deleted.",
-            "example": "thread.deleted"
-          },
-          "deleted": {
-            "type": "boolean",
-            "description": "Indicates whether the thread was successfully deleted.",
-            "example": true
-          }
-        }
-      },
-      "ListModelsResponse": {
-        "type": "object",
-        "properties": {
-          "object": {
-            "type": "string",
-            "enum": ["list"]
-          },
-          "data": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/Model"
-            }
-          }
-        },
-        "required": ["object", "data"]
-      },
-      "Model": {
-        "type": "object",
-        "properties": {
-          "source_url": {
-            "type": "string",
-            "format": "uri",
-            "description": "URL to the source of the model.",
-            "example": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf"
-          },
-          "id": {
-            "type": "string",
-            "description": "Unique identifier used in chat-completions model_name, matches folder name.",
-            "example": "trinity-v1.2-7b"
-          },
-          "object": {
-            "type": "string",
-            "example": "model"
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the model.",
-            "example": "Trinity-v1.2 7B Q4"
-          },
-          "version": {
-            "type": "string",
-            "default": "1.0",
-            "description": "The version number of the model."
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the model.",
-            "example": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes."
-          },
-          "format": {
-            "type": "string",
-            "description": "State format of the model, distinct from the engine.",
-            "example": "gguf"
-          },
-          "settings": {
-            "type": "object",
-            "properties": {
-              "ctx_len": {
-                "type": "integer",
-                "description": "Context length.",
-                "example": 4096
-              },
-              "prompt_template": {
-                "type": "string",
-                "example": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-              }
-            },
-            "additionalProperties": false
-          },
-          "parameters": {
-            "type": "object",
-            "properties": {
-              "temperature": {
-                "example": 0.7
-              },
-              "top_p": {
-                "example": 0.95
-              },
-              "stream": {
-                "example": true
-              },
-              "max_tokens": {
-                "example": 4096
-              },
-              "stop": {
-                "example": []
-              },
-              "frequency_penalty": {
-                "example": 0
-              },
-              "presence_penalty": {
-                "example": 0
-              }
-            },
-            "additionalProperties": false
-          },
-          "metadata": {
-            "type": "object",
-            "properties": {
-              "author": {
-                "type": "string",
-                "example": "Jan"
-              },
-              "tags": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                },
-                "example": ["7B", "Merged", "Featured"]
-              },
-              "size": {
-                "type": "integer",
-                "example": 4370000000
-              },
-              "cover": {
-                "type": "string",
-                "format": "uri",
-                "example": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
-              }
-            },
-            "additionalProperties": false
-          },
-          "engine": {
-            "example": "nitro"
-          }
-        }
-      },
-      "ModelObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the model.\n",
-            "example": "trinity-v1.2-7b"
-          },
-          "object": {
-            "type": "string",
-            "description": "The type of the object, indicating it's a model.\n",
-            "default": "model"
-          },
-          "created": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the model.\n",
-            "example": 1253935178
-          },
-          "owned_by": {
-            "type": "string",
-            "description": "The entity that owns the model.\n",
-            "example": "_"
-          }
-        }
-      },
-      "GetModelResponse": {
-        "type": "object",
-        "properties": {
-          "source_url": {
-            "type": "string",
-            "format": "uri",
-            "description": "URL to the source of the model.",
-            "example": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
-          },
-          "id": {
-            "type": "string",
-            "description": "Unique identifier used in chat-completions model_name, matches folder name.",
-            "example": "mistral-ins-7b-q4"
-          },
-          "object": {
-            "type": "string",
-            "example": "model"
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the model.",
-            "example": "Mistral Instruct 7B Q4"
-          },
-          "version": {
-            "type": "string",
-            "default": "1.0",
-            "description": "The version number of the model."
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the model.",
-            "example": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes."
-          },
-          "format": {
-            "type": "string",
-            "description": "State format of the model, distinct from the engine.",
-            "example": "gguf"
-          },
-          "settings": {
-            "type": "object",
-            "properties": {
-              "ctx_len": {
-                "type": "integer",
-                "description": "Context length.",
-                "example": 4096
-              },
-              "prompt_template": {
-                "type": "string",
-                "example": "[INST] {prompt} [/INST]"
-              }
-            },
-            "additionalProperties": false
-          },
-          "parameters": {
-            "type": "object",
-            "properties": {
-              "temperature": {
-                "example": 0.7
-              },
-              "top_p": {
-                "example": 0.95
-              },
-              "stream": {
-                "example": true
-              },
-              "max_tokens": {
-                "example": 4096
-              },
-              "stop": {
-                "example": []
-              },
-              "frequency_penalty": {
-                "example": 0
-              },
-              "presence_penalty": {
-                "example": 0
-              }
-            },
-            "additionalProperties": false
-          },
-          "metadata": {
-            "type": "object",
-            "properties": {
-              "author": {
-                "type": "string",
-                "example": "MistralAI"
-              },
-              "tags": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                },
-                "example": ["7B", "Featured", "Foundation Model"]
-              },
-              "size": {
-                "example": 4370000000,
-                "type": "integer"
-              },
-              "cover": {
-                "example": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png",
-                "type": "string"
-              }
-            },
-            "additionalProperties": false
-          },
-          "engine": {
-            "example": "nitro"
-          }
-        }
-      },
-      "DeleteModelResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the model that was deleted.",
-            "example": "mistral-ins-7b-q4"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a model.",
-            "default": "model"
-          },
-          "deleted": {
-            "type": "boolean",
-            "description": "Indicates whether the model was successfully deleted.",
-            "example": true
-          }
-        }
-      },
-      "StartModelResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the model that was started.",
-            "example": "model-zephyr-7B"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a model.",
-            "default": "model"
-          },
-          "state": {
-            "type": "string",
-            "description": "The current state of the model after the start operation.",
-            "example": "running"
-          }
-        },
-        "required": ["id", "object", "state"]
-      },
-      "StopModelResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the model that was started.",
-            "example": "model-zephyr-7B"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a model.",
-            "default": "model"
-          },
-          "state": {
-            "type": "string",
-            "description": "The current state of the model after the start operation.",
-            "example": "stopped"
-          }
-        },
-        "required": ["id", "object", "state"]
-      },
-      "DownloadModelResponse": {
-        "type": "object",
-        "properties": {
-          "message": {
-            "type": "string",
-            "description": "Message indicates Jan starting download corresponding model.",
-            "example": "Starting download mistral-ins-7b-q4"
-          }
-        }
-      },
-      "MessageObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "Sequential or UUID identifier of the message.\n",
-            "example": 0
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, defaults to 'thread.message'.\n",
-            "example": "thread.message"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the message.\n"
-          },
-          "thread_id": {
-            "type": "string",
-            "description": "Identifier of the thread to which this message belongs. Defaults to parent thread.\n",
-            "example": "thread_asdf"
-          },
-          "assistant_id": {
-            "type": "string",
-            "description": "Identifier of the assistant involved in the message. Defaults to parent thread.\n",
-            "example": "jan"
-          },
-          "role": {
-            "type": "string",
-            "enum": ["user", "assistant"],
-            "description": "Role of the sender, either 'user' or 'assistant'.\n"
-          },
-          "content": {
-            "type": "array",
-            "items": {
-              "type": "object",
-              "properties": {
-                "type": {
-                  "type": "string",
-                  "description": "Type of content, e.g., 'text'.\n"
-                },
-                "text": {
-                  "type": "object",
-                  "properties": {
-                    "value": {
-                      "type": "string",
-                      "description": "Text content of the message.\n",
-                      "example": "Hi!?"
-                    },
-                    "annotations": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "description": "Annotations for the text content, if any.\n",
-                      "example": []
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the message, defaults to an empty object.\n",
-            "example": {}
-          }
-        }
-      },
-      "GetMessageResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the message.",
-            "example": "msg_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread message.",
-            "default": "thread.message"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the message.",
-            "example": 1699017614
-          },
-          "thread_id": {
-            "type": "string",
-            "description": "Identifier of the thread to which this message belongs.",
-            "example": "thread_abc123"
-          },
-          "role": {
-            "type": "string",
-            "description": "Role of the sender, either 'user' or 'assistant'.",
-            "example": "user"
-          },
-          "content": {
-            "type": "array",
-            "items": {
-              "type": "object",
-              "properties": {
-                "type": {
-                  "type": "string",
-                  "description": "Type of content, e.g., 'text'.",
-                  "example": "text"
-                },
-                "text": {
-                  "type": "object",
-                  "properties": {
-                    "value": {
-                      "type": "string",
-                      "description": "Text content of the message.",
-                      "example": "How does AI work? Explain it in simple terms."
-                    },
-                    "annotations": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "description": "Annotations for the text content, if any.",
-                      "example": []
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "file_ids": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Array of file IDs associated with the message, if any.",
-            "example": []
-          },
-          "assistant_id": {
-            "type": "string",
-            "description": "Identifier of the assistant involved in the message, if applicable.",
-            "example": null
-          },
-          "run_id": {
-            "type": "string",
-            "description": "Run ID associated with the message, if applicable.",
-            "example": null
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the message.",
-            "example": {}
-          }
-        }
-      },
-      "CreateMessageResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the created message.",
-            "example": "msg_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread message.",
-            "example": "thread.message"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the message.",
-            "example": 1699017614
-          },
-          "thread_id": {
-            "type": "string",
-            "description": "Identifier of the thread to which this message belongs.",
-            "example": "thread_abc123"
-          },
-          "role": {
-            "type": "string",
-            "description": "Role of the sender, either 'user' or 'assistant'.",
-            "example": "user"
-          },
-          "content": {
-            "type": "array",
-            "items": {
-              "type": "object",
-              "properties": {
-                "type": {
-                  "type": "string",
-                  "description": "Type of content, e.g., 'text'.",
-                  "example": "text"
-                },
-                "text": {
-                  "type": "object",
-                  "properties": {
-                    "value": {
-                      "type": "string",
-                      "description": "Text content of the message.",
-                      "example": "How does AI work? Explain it in simple terms."
-                    },
-                    "annotations": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "description": "Annotations for the text content, if any.",
-                      "example": []
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "file_ids": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Array of file IDs associated with the message, if any.",
-            "example": []
-          },
-          "assistant_id": {
-            "type": "string",
-            "description": "Identifier of the assistant involved in the message, if applicable.",
-            "example": null
-          },
-          "run_id": {
-            "type": "string",
-            "description": "Run ID associated with the message, if applicable.",
-            "example": null
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the message.",
-            "example": {}
-          }
-        }
-      },
-      "ListMessagesResponse": {
-        "type": "object",
-        "properties": {
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a list.",
-            "default": "list"
-          },
-          "data": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/ListMessageObject"
-            }
-          },
-          "first_id": {
-            "type": "string",
-            "description": "Identifier of the first message in the list.",
-            "example": "msg_abc123"
-          },
-          "last_id": {
-            "type": "string",
-            "description": "Identifier of the last message in the list.",
-            "example": "msg_abc456"
-          },
-          "has_more": {
-            "type": "boolean",
-            "description": "Indicates whether there are more messages to retrieve.",
-            "example": false
-          }
-        }
-      },
-      "ListMessageObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the message.",
-            "example": "msg_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread message.",
-            "example": "thread.message"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the message.",
-            "example": 1699017614
-          },
-          "thread_id": {
-            "type": "string",
-            "description": "Identifier of the thread to which this message belongs.",
-            "example": "thread_abc123"
-          },
-          "role": {
-            "type": "string",
-            "description": "Role of the sender, either 'user' or 'assistant'.",
-            "example": "user"
-          },
-          "content": {
-            "type": "array",
-            "items": {
-              "type": "object",
-              "properties": {
-                "type": {
-                  "type": "string",
-                  "description": "Type of content, e.g., 'text'."
-                },
-                "text": {
-                  "type": "object",
-                  "properties": {
-                    "value": {
-                      "type": "string",
-                      "description": "Text content of the message.",
-                      "example": "How does AI work? Explain it in simple terms."
-                    },
-                    "annotations": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "description": "Annotations for the text content, if any."
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "file_ids": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Array of file IDs associated with the message, if any.",
-            "example": []
-          },
-          "assistant_id": {
-            "type": "string",
-            "description": "Identifier of the assistant involved in the message, if applicable.",
-            "example": null
-          },
-          "run_id": {
-            "type": "string",
-            "description": "Run ID associated with the message, if applicable.",
-            "example": null
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the message.",
-            "example": {}
-          }
-        }
-      },
-      "MessageFileObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the file.",
-            "example": "file-abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread message file.",
-            "example": "thread.message.file"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the file.",
-            "example": 1699061776
-          },
-          "message_id": {
-            "type": "string",
-            "description": "Identifier of the message to which this file is associated.",
-            "example": "msg_abc123"
-          }
-        }
-      },
-      "ListMessageFilesResponse": {
-        "type": "object",
-        "properties": {
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a list.",
-            "default": "list"
-          },
-          "data": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/MessageFileObject"
-            }
-          }
-        }
-      },
-      "ChatObject": {
-        "type": "object",
-        "properties": {
-          "messages": {
-            "type": "array",
-            "description": "Contains input data or prompts for the model to process.\n",
-            "items": {
-              "type": "object",
-              "properties": {
-                "content": {
-                  "type": "string"
-                },
-                "role": {
-                  "type": "string"
-                }
-              }
-            },
-            "example": [
-              {
-                "content": "Hello there :wave:",
-                "role": "assistant"
-              },
-              {
-                "content": "Can you write a long story",
-                "role": "user"
-              }
-            ]
-          },
-          "stream": {
-            "type": "boolean",
-            "default": true,
-            "description": "Enables continuous output generation, allowing for streaming of model responses."
-          },
-          "model": {
-            "type": "string",
-            "example": "gpt-3.5-turbo",
-            "description": "Specifies the model being used for inference or processing tasks."
-          },
-          "max_tokens": {
-            "type": "number",
-            "default": 2048,
-            "description": "The maximum number of tokens the model will generate in a single response."
-          },
-          "stop": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Defines specific tokens or phrases at which the model will stop generating further output.",
-            "example": ["hello"]
-          },
-          "frequency_penalty": {
-            "type": "number",
-            "default": 0,
-            "description": "Adjusts the likelihood of the model repeating words or phrases in its output."
-          },
-          "presence_penalty": {
-            "type": "number",
-            "default": 0,
-            "description": "Influences the generation of new and varied concepts in the model's output."
-          },
-          "temperature": {
-            "type": "number",
-            "default": 0.7,
-            "description": "Controls the randomness of the model's output."
-          },
-          "top_p": {
-            "type": "number",
-            "default": 0.95,
-            "description": "Set probability threshold for more relevant outputs."
-          },
-          "cache_prompt": {
-            "type": "boolean",
-            "default": true,
-            "description": "Optimize performance in repeated or similar requests."
-          }
-        }
-      },
-      "ChatCompletionRequest": {
-        "type": "object",
-        "properties": {
-          "messages": {
-            "type": "array",
-            "description": "Contains input data or prompts for the model to process.\n",
-            "items": {
-              "type": "object",
-              "properties": {
-                "content": {
-                  "type": "string"
-                },
-                "role": {
-                  "type": "string"
-                }
-              }
-            },
-            "example": [
-              {
-                "content": "You are a helpful assistant.",
-                "role": "system"
-              },
-              {
-                "content": "Hello!",
-                "role": "user"
-              }
-            ]
-          },
-          "model": {
-            "type": "string",
-            "example": "tinyllama-1.1b",
-            "description": "Specifies the model being used for inference or processing tasks.\n"
-          },
-          "stream": {
-            "type": "boolean",
-            "default": true,
-            "description": "Enables continuous output generation, allowing for streaming of model responses.\n"
-          },
-          "max_tokens": {
-            "type": "number",
-            "default": 2048,
-            "description": "The maximum number of tokens the model will generate in a single response.\n"
-          },
-          "stop": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Defines specific tokens or phrases at which the model will stop generating further output.\n",
-            "example": ["hello"]
-          },
-          "frequency_penalty": {
-            "type": "number",
-            "default": 0,
-            "description": "Adjusts the likelihood of the model repeating words or phrases in its output.\n"
-          },
-          "presence_penalty": {
-            "type": "number",
-            "default": 0,
-            "description": "Influences the generation of new and varied concepts in the model's output.\n"
-          },
-          "temperature": {
-            "type": "number",
-            "default": 0.7,
-            "description": "Controls the randomness of the model's output.\n"
-          },
-          "top_p": {
-            "type": "number",
-            "default": 0.95,
-            "description": "Set probability threshold for more relevant outputs.\n"
-          }
-        }
-      },
-      "ChatCompletionResponse": {
-        "type": "object",
-        "description": "Description of the response structure",
-        "properties": {
-          "choices": {
-            "type": "array",
-            "description": "Array of choice objects",
-            "items": {
-              "type": "object",
-              "properties": {
-                "finish_reason": {
-                  "type": "string",
-                  "nullable": true,
-                  "example": null,
-                  "description": "Reason for finishing the response, if applicable"
-                },
-                "index": {
-                  "type": "integer",
-                  "example": 0,
-                  "description": "Index of the choice"
-                },
-                "message": {
-                  "type": "object",
-                  "properties": {
-                    "content": {
-                      "type": "string",
-                      "example": "Hello user. What can I help you with?",
-                      "description": "Content of the message"
-                    },
-                    "role": {
-                      "type": "string",
-                      "example": "assistant",
-                      "description": "Role of the sender"
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "created": {
-            "type": "integer",
-            "example": 1700193928,
-            "description": "Timestamp of when the response was created"
-          },
-          "id": {
-            "type": "string",
-            "example": "ebwd2niJvJB1Q2Whyvkz",
-            "description": "Unique identifier of the response"
-          },
-          "model": {
-            "type": "string",
-            "nullable": true,
-            "example": "_",
-            "description": "Model used for generating the response"
-          },
-          "object": {
-            "type": "string",
-            "example": "chat.completion",
-            "description": "Type of the response object"
-          },
-          "system_fingerprint": {
-            "type": "string",
-            "nullable": true,
-            "example": "_",
-            "description": "System fingerprint"
-          },
-          "usage": {
-            "type": "object",
-            "description": "Information about the usage of tokens",
-            "properties": {
-              "completion_tokens": {
-                "type": "integer",
-                "example": 500,
-                "description": "Number of tokens used for completion"
-              },
-              "prompt_tokens": {
-                "type": "integer",
-                "example": 33,
-                "description": "Number of tokens used in the prompt"
-              },
-              "total_tokens": {
-                "type": "integer",
-                "example": 533,
-                "description": "Total number of tokens used"
-              }
-            }
-          }
-        }
-      },
-      "AssistantObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Math Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "ListAssistantsResponse": {
-        "type": "object"
-      },
-      "CreateAssistantResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Math Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "RetrieveAssistantResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Math Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "ModifyAssistantObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Math Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "ModifyAssistantResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Physics Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise!"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "DeleteAssistantResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the deleted assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating the assistant has been deleted.",
-            "example": "assistant.deleted"
-          },
-          "deleted": {
-            "type": "boolean",
-            "description": "Indicates whether the assistant was successfully deleted.",
-            "example": true
-          }
-        }
-      }
-    }
-  }
-}
diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml
deleted file mode 100644
index 35fd431753..0000000000
--- a/docs/openapi/jan.yaml
+++ /dev/null
@@ -1,1043 +0,0 @@
----
-openapi: 3.0.0
-info:
-  title: API Reference
-  description: >
-    # Introduction
-
-    Jan API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference).
-version: 0.1.8
-contact:
-  name: Jan Discord
-  url: https://discord.gg/7EcEz7MrvA
-license:
-  name: AGPLv3
-  url: https://github.com/janhq/nitro/blob/main/LICENSE
-servers:
-  - url: /v1
-tags:
-  - name: Models
-    description: List and describe the various models available in the API.
-  - name: Chat
-    description: >
-      Given a list of messages comprising a conversation, the model will
-      return a response.
-  - name: Messages
-    description: >
-      Messages capture a conversation's content. This can include the
-      content from LLM responses and other metadata from [chat
-      completions](/specs/chats).
-  - name: Threads
-  - name: Assistants
-    description: Configures and utilizes different AI assistants for varied tasks
-x-tagGroups:
-  - name: Endpoints
-    tags:
-      - Models
-      - Chat
-  - name: Chat
-    tags:
-      - Assistants
-      - Messages
-      - Threads
-paths:
-  /chat/completions:
-    post:
-      operationId: createChatCompletion
-      tags:
-        - Chat
-      summary: |
-        Create chat completion
-      description: >
-        Creates a model response for the given chat conversation.  <a  href
-        = "https://platform.openai.com/docs/api-reference/chat/create">
-        Equivalent to OpenAI's create chat completion. </a>
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/chat.yaml#/components/schemas/ChatCompletionRequest
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/chat.yaml#/components/schemas/ChatCompletionResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X 'POST' \
-              'http://localhost:1337/v1/chat/completions' \
-              -H 'accept: application/json' \
-              -H 'Content-Type: application/json' \
-              -d '{
-                "messages": [
-                  {
-                    "content": "You are a helpful assistant.",
-                    "role": "system"
-                  },
-                  {
-                    "content": "Hello!",
-                    "role": "user"
-                  }
-                ],
-                "model": "tinyllama-1.1b",
-                "stream": true,
-                "max_tokens": 2048,
-                "stop": [
-                  "hello"
-                ],
-                "frequency_penalty": 0,
-                "presence_penalty": 0,
-                "temperature": 0.7,
-                "top_p": 0.95
-              }'
-        - lang: JavaScript
-          source: |-
-            const data = {
-              messages: [
-                {
-                  content: 'You are a helpful assistant.',
-                  role: 'system'
-                },
-                {
-                  content: 'Hello!',
-                  role: 'user'
-                }
-              ],
-              model: 'tinyllama-1.1b',
-              stream: true,
-              max_tokens: 2048,
-              stop: ['hello'],
-              frequency_penalty: 0,
-              presence_penalty: 0,
-              temperature: 0.7,
-              top_p: 0.95
-            };
-
-            fetch('http://localhost:1337/v1/chat/completions', {
-              method: 'POST',
-              headers: {
-                'Content-Type': 'application/json',
-                'Accept': 'application/json'
-              },
-              body: JSON.stringify(data)
-            })
-              .then(response => response.json())
-              .then(data => console.log(data));
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            const data = {
-              messages: [
-                {
-                  content: 'You are a helpful assistant.',
-                  role: 'system'
-                },
-                {
-                  content: 'Hello!',
-                  role: 'user'
-                }
-              ],
-              model: 'tinyllama-1.1b',
-              stream: true,
-              max_tokens: 2048,
-              stop: ['hello'],
-              frequency_penalty: 0,
-              presence_penalty: 0,
-              temperature: 0.7,
-              top_p: 0.95
-            };
-
-            fetch('http://localhost:1337/v1/chat/completions', {
-              method: 'POST',
-              headers: {
-                'Content-Type': 'application/json',
-                'Accept': 'application/json'
-              },
-              body: JSON.stringify(data)
-            })
-              .then(response => response.json())
-              .then(data => console.log(data));
-        - lang: Python
-          source: >-
-            import requests
-
-            import json
-
-
-            data = {
-              "messages": [
-                {
-                  "content": "You are a helpful assistant.",
-                  "role": "system"
-                },
-                {
-                  "content": "Hello!",
-                  "role": "user"
-                }
-              ],
-              "model": "tinyllama-1.1b",
-              "stream": true,
-              "max_tokens": 2048,
-              "stop": [
-                "hello"
-              ],
-              "frequency_penalty": 0,
-              "presence_penalty": 0,
-              "temperature": 0.7,
-              "top_p": 0.95
-            }
-
-
-            response = requests.post('http://localhost:1337/v1/chat/completions', json=data)
-
-            print(response.json())
-  /models:
-    get:
-      operationId: listModels
-      tags:
-        - Models
-      summary: List models
-      description: >
-        Lists the currently available models, and provides basic
-        information about each one such as the owner and availability.  <a  href
-        = "https://platform.openai.com/docs/api-reference/models/list">
-        Equivalent to OpenAI's list model. </a>
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/models.yaml#/components/schemas/ListModelsResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl -X 'GET' \
-              'http://localhost:1337/v1/models' \
-              -H 'accept: application/json'
-        - lang: JavaScript
-          source: |-
-            const response = await fetch('http://localhost:1337/v1/models', {
-              method: 'GET',
-              headers: {Accept: 'application/json'}
-            });
-            const data = await response.json();
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            const url = 'http://localhost:1337/v1/models';
-            const options = {
-              method: 'GET',
-              headers: { Accept: 'application/json' }
-            };
-
-            fetch(url, options)
-              .then(res => res.json())
-              .then(json => console.log(json));
-        - lang: Python
-          source: |-
-            import requests
-
-            url = 'http://localhost:1337/v1/models'
-            headers = {'Accept': 'application/json'}
-            response = requests.get(url, headers=headers)
-            data = response.json()
-  '/models/download/{model_id}':
-    get:
-      operationId: downloadModel
-      tags:
-        - Models
-      summary: Download a specific model.
-      description: |
-        Download a model.
-      parameters:
-        - in: path
-          name: model_id
-          required: true
-          schema:
-            type: string
-            example: mistral-ins-7b-q4
-          description: |
-            The ID of the model to use for this request.
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/models.yaml#/components/schemas/DownloadModelResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl -X 'GET' \  
-              'http://localhost:1337/v1/models/download/{model_id}' \  
-              -H 'accept: application/json'
-        - lang: JavaScript
-          source: >-
-            const response = await
-            fetch('http://localhost:1337/v1/models/download/{model_id}', {
-              method: 'GET',
-              headers: {accept: 'application/json'}
-            });
-
-
-            const data = await response.json();
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/models/download/{model_id}', {
-              method: 'GET',
-              headers: {accept: 'application/json'}
-            })
-            .then(res => res.json())
-            .then(data => console.log(data));
-        - lang: Python
-          source: >-
-            import requests
-
-
-            response = requests.get('http://localhost:1337/v1/models/download/{model_id}', headers={'accept': 'application/json'})
-
-            data = response.json()
-  '/models/{model_id}':
-    get:
-      operationId: retrieveModel
-      tags:
-        - Models
-      summary: Retrieve model
-      description: >
-        Get a model instance, providing basic information about the model
-        such as the owner and permissioning.  <a  href =
-        "https://platform.openai.com/docs/api-reference/models/retrieve">
-        Equivalent to OpenAI's retrieve model. </a>
-      parameters:
-        - in: path
-          name: model_id
-          required: true
-          schema:
-            type: string
-            example: mistral-ins-7b-q4
-          description: |
-            The ID of the model to use for this request.
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/models.yaml#/components/schemas/GetModelResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl -X 'GET' \  
-              'http://localhost:1337/v1/models/{model_id}' \  
-              -H 'accept: application/json'
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch');
-
-            const modelId = 'mistral-ins-7b-q4';
-
-            fetch(`http://localhost:1337/v1/models/${modelId}`, {
-              method: 'GET',
-              headers: {'accept': 'application/json'}
-            })
-            .then(res => res.json())
-            .then(json => console.log(json));
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            const modelId = 'mistral-ins-7b-q4';
-
-            fetch(`http://localhost:1337/v1/models/${modelId}`, {
-              method: 'GET',
-              headers: {'accept': 'application/json'}
-            })
-            .then(res => res.json())
-            .then(json => console.log(json));
-        - lang: Python
-          source: >-
-            import requests
-
-
-            model_id = 'mistral-ins-7b-q4'
-
-
-            response = requests.get(f'http://localhost:1337/v1/models/{model_id}', headers={'accept': 'application/json'})
-
-            print(response.json())
-    delete:
-      operationId: deleteModel
-      tags:
-        - Models
-      summary: Delete model
-      description: >
-        Delete a model. <a  href =
-        "https://platform.openai.com/docs/api-reference/models/delete">
-        Equivalent to OpenAI's delete model. </a>
-      parameters:
-        - in: path
-          name: model_id
-          required: true
-          schema:
-            type: string
-            example: mistral-ins-7b-q4
-          description: |
-            The model id to delete
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/models.yaml#/components/schemas/DeleteModelResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl -X 'DELETE' \
-              'http://localhost:1337/v1/models/{model_id}' \
-              -H 'accept: application/json'
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch');
-
-            const modelId = 'mistral-ins-7b-q4';
-
-            fetch(`http://localhost:1337/v1/models/${modelId}`, {
-              method: 'DELETE',
-              headers: { 'accept': 'application/json' }
-            })
-              .then(res => res.json())
-              .then(json => console.log(json));
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            const modelId = 'mistral-ins-7b-q4';
-
-            fetch(`http://localhost:1337/v1/models/${modelId}`, {
-              method: 'DELETE',
-              headers: { 'accept': 'application/json' }
-            })
-              .then(res => res.json())
-              .then(json => console.log(json));
-        - lang: Python
-          source: >-
-            import requests
-
-
-            model_id = 'mistral-ins-7b-q4'
-
-
-            response = requests.delete(f'http://localhost:1337/v1/models/{model_id}', headers={'accept': 'application/json'})
-  /threads:
-    post:
-      operationId: createThread
-      tags:
-        - Threads
-      summary: Create thread
-      description: >
-        Create a thread.  <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/createThread">
-        Equivalent to OpenAI's create thread. </a>
-      requestBody:
-        required: false
-        content:
-          application/json:
-            schema:
-              $ref: specs/threads.yaml#/components/schemas/CreateThreadObject
-      responses:
-        '200':
-          description: Thread created successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/threads.yaml#/components/schemas/CreateThreadResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X POST http://localhost:1337/v1/threads \  
-              -H "Content-Type: application/json" \  
-              -d '{
-                  "messages": [{
-                      "role": "user",
-                      "content": "Hello, what is AI?",
-                      "file_ids": ["file-abc123"]
-                  }, {
-                      "role": "user",
-                      "content": "How does AI work? Explain it in simple terms."
-                  }]
-              }'
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/threads', {
-              method: 'POST',
-              headers: {
-                'Content-Type': 'application/json'
-              },
-              body: JSON.stringify({
-                messages: [
-                  {
-                    role: 'user',
-                    content: 'Hello, what is AI?',
-                    file_ids: ['file-abc123']
-                  },
-                  {
-                    role: 'user',
-                    content: 'How does AI work? Explain it in simple terms.'
-                  }
-                ]
-              })
-            });
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/threads', {
-              method: 'POST',
-              headers: {
-                'Content-Type': 'application/json'
-              },
-              body: JSON.stringify({
-                messages: [
-                  {
-                    role: 'user',
-                    content: 'Hello, what is AI?',
-                    file_ids: ['file-abc123']
-                  },
-                  {
-                    role: 'user',
-                    content: 'How does AI work? Explain it in simple terms.'
-                  }
-                ]
-              })
-            });
-        - lang: Python
-          source: |-
-            import requests
-
-            url = 'http://localhost:1337/v1/threads'
-            payload = {
-              'messages': [
-                {
-                  'role': 'user',
-                  'content': 'Hello, what is AI?',
-                  'file_ids': ['file-abc123']
-                },
-                {
-                  'role': 'user',
-                  'content': 'How does AI work? Explain it in simple terms.'
-                }
-              ]
-            }
-
-            response = requests.post(url, json=payload)
-            print(response.text)
-    get:
-      operationId: listThreads
-      tags:
-        - Threads
-      summary: List threads
-      description: |
-        Retrieves a list of all threads available in the system.
-      responses:
-        '200':
-          description: List of threads retrieved successfully
-          content:
-            application/json:
-              schema:
-                type: array
-                items:
-                  $ref: specs/threads.yaml#/components/schemas/ThreadObject
-                example:
-                  - id: thread_abc123
-                    object: thread
-                    created_at: 1699014083
-                    assistants:
-                      - assistant-001
-                    metadata: {}
-                    messages: []
-                  - id: thread_abc456
-                    object: thread
-                    created_at: 1699014083
-                    assistants:
-                      - assistant-002
-                      - assistant-003
-                    metadata: {}
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl http://localhost:1337/v1/threads \ 
-             -H "Content-Type: application/json"
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch'); 
-
-            fetch('http://localhost:1337/v1/threads', { 
-             method: 'GET', 
-             headers: {'Content-Type': 'application/json'} 
-            }).then(res => res.json()) 
-            .then(json => console.log(json));
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/threads', {
-             method: 'GET',
-             headers: {'Content-Type': 'application/json'}
-            }).then(res => res.json())
-            .then(json => console.log(json));
-        - lang: Python
-          source: |-
-            import requests
-
-            url = 'http://localhost:1337/v1/threads'
-            headers = {'Content-Type': 'application/json'}
-
-            response = requests.get(url, headers=headers)
-            print(response.json())
-  '/threads/{thread_id}':
-    get:
-      operationId: getThread
-      tags:
-        - Threads
-      summary: Retrieve thread
-      description: >
-        Retrieves detailed information about a specific thread using its
-        thread_id.  <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/getThread">
-        Equivalent to OpenAI's retrieve thread. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread to retrieve.
-      responses:
-        '200':
-          description: Thread details retrieved successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/threads.yaml#/components/schemas/GetThreadResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl http://localhost:1337/v1/threads/{thread_id}
-    patch:
-      operationId: modifyThread
-      tags:
-        - Threads
-      summary: Modify thread
-      description: >
-        Modifies a thread.  <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/modifyThread">
-        Equivalent to OpenAI's modify thread. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread to be modified.
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              properties:
-                title:
-                  type: string
-                  description: Set the title of the thread
-                  items:
-                    $ref: specs/threads.yaml#/components/schemas/ThreadMessageObject
-      responses:
-        '200':
-          description: Thread modified successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/threads.yaml#/components/schemas/ModifyThreadResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X POST http://localhost:1337/v1/threads/{thread_id} \
-              -H "Content-Type: application/json" \
-              -d '{
-                  "messages": [{
-                      "role": "user",
-                      "content": "Hello, what is AI?",
-                      "file_ids": ["file-abc123"]
-                  }, {
-                      "role": "user",
-                      "content": "How does AI work? Explain it in simple terms."
-                  }]
-              }'
-    delete:
-      operationId: deleteThread
-      tags:
-        - Threads
-      summary: Delete thread
-      description: >
-        Delete a thread.  <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/deleteThread">
-        Equivalent to OpenAI's delete thread. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread to be deleted.
-      responses:
-        '200':
-          description: Thread deleted successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/threads.yaml#/components/schemas/DeleteThreadResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X DELETE http://localhost:1337/v1/threads/{thread_id}
-  /assistants:
-    get:
-      operationId: listAssistants
-      tags:
-        - Assistants
-      summary: List assistants
-      description: >
-        Return a list of assistants. <a href =
-        "https://platform.openai.com/docs/api-reference/assistants/listAssistants">
-        Equivalent to OpenAI's list assistants. </a>
-      responses:
-        '200':
-          description: List of assistants retrieved successfully
-          content:
-            application/json:
-              schema:
-                type: array
-                example:
-                  - id: asst_abc123
-                    object: assistant
-                    version: 1
-                    created_at: 1698984975
-                    name: Math Tutor
-                    description: null
-                    avatar: https://pic.png
-                    models:
-                      - model_id: model_0
-                    instructions: Be concise
-                    events:
-                      in: []
-                      out: []
-                    metadata: {}
-                  - id: asst_abc456
-                    object: assistant
-                    version: 1
-                    created_at: 1698984975
-                    name: Physics Tutor
-                    description: null
-                    avatar: https://pic.png
-                    models:
-                      - model_id: model_1
-                    instructions: Be concise!
-                    events:
-                      in: []
-                      out: []
-                    metadata: {}
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl http://localhost:1337/v1/assistants \
-              -H "Content-Type: application/json"
-        - lang: JavaScript
-          source: |-
-            fetch('http://localhost:1337/v1/assistants', {
-              method: 'GET',
-              headers: {
-                'Content-Type': 'application/json'
-              }
-            })
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/assistants', {
-              method: 'GET',
-              headers: {
-                'Content-Type': 'application/json'
-              }
-            })
-        - lang: Python
-          source: |-
-            import requests
-
-            url = 'http://localhost:1337/v1/assistants'
-            headers = {'Content-Type': 'application/json'}
-
-            response = requests.get(url, headers=headers)
-  '/assistants/{assistant_id}':
-    get:
-      operationId: getAssistant
-      tags:
-        - Assistants
-      summary: Retrieve assistant
-      description: >
-        Retrieves an assistant. <a href =
-        "https://platform.openai.com/docs/api-reference/assistants/getAssistant">
-        Equivalent to OpenAI's retrieve assistants. </a>
-      parameters:
-        - in: path
-          name: assistant_id
-          required: true
-          schema:
-            type: string
-            example: jan
-          description: |
-            The ID of the assistant to retrieve.
-      responses:
-        '200':
-          description: null
-          content:
-            application/json:
-              schema:
-                $ref: specs/assistants.yaml#/components/schemas/RetrieveAssistantResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl http://localhost:1337/v1/assistants/{assistant_id} \
-              -H "Content-Type: application/json"
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch');
-
-            let assistantId = 'abc123';
-
-            fetch(`http://localhost:1337/v1/assistants/${assistantId}`, {
-              method: 'GET',
-              headers: {
-                'Content-Type': 'application/json'
-              }
-            })
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            let assistantId = 'abc123';
-
-            fetch(`http://localhost:1337/v1/assistants/${assistantId}`, {
-              method: 'GET',
-              headers: {
-                'Content-Type': 'application/json'
-              }
-            })
-        - lang: Python
-          source: >-
-            import requests
-
-
-            assistant_id = 'abc123'
-
-
-            response = requests.get(f'http://localhost:1337/v1/assistants/{assistant_id}', headers={'Content-Type': 'application/json'})
-  '/threads/{thread_id}/messages':
-    get:
-      operationId: listMessages
-      tags:
-        - Messages
-      summary: List messages
-      description: >
-        Retrieves all messages from the given thread. <a  href =
-        "https://platform.openai.com/docs/api-reference/messages/listMessages">
-        Equivalent to OpenAI's list messages. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread from which to retrieve messages.
-      responses:
-        '200':
-          description: List of messages retrieved successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/messages.yaml#/components/schemas/ListMessagesResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl http://localhost:1337/v1/threads/{thread_id}/messages \
-              -H "Content-Type: application/json"
-    post:
-      operationId: createMessage
-      tags:
-        - Messages
-      summary: Create message
-      description: >
-        Create a message. <a  href =
-        "https://platform.openai.com/docs/api-reference/messages/createMessage">
-        Equivalent to OpenAI's list messages. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread to which the message will be posted.
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              properties:
-                role:
-                  type: string
-                  description: |
-                    Role of the sender, either 'user' or 'assistant'.
-                  example: user
-                  enum:
-                    - user
-                    - assistant
-                content:
-                  type: string
-                  description: |
-                    Text content of the message.
-                  example: How does AI work? Explain it in simple terms.
-              required:
-                - role
-                - content
-      responses:
-        '200':
-          description: Message created successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/messages.yaml#/components/schemas/CreateMessageResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X POST http://localhost:1337/v1/threads/{thread_id}/messages \
-              -H "Content-Type: application/json" \
-              -d '{
-                "role": "user",
-                "content": "How does AI work? Explain it in simple terms."
-              }'
-  '/threads/{thread_id}/messages/{message_id}':
-    get:
-      operationId: retrieveMessage
-      tags:
-        - Messages
-      summary: Retrieve message
-      description: >
-        Retrieve a specific message from a thread using its thread_id and
-        message_id. <a  href =
-        "https://platform.openai.com/docs/api-reference/messages/getMessage">
-        Equivalent to OpenAI's retrieve messages. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread containing the message.
-        - in: path
-          name: message_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the message to retrieve.
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/messages.yaml#/components/schemas/GetMessageResponse
-      x-codeSamples:
-        - lang: cURL
-          source: >
-            curl http://localhost:1337/v1/threads/{thread_id}/messages/{message_id}
-            \
-              -H "Content-Type: application/json"
-x-webhooks:
-  ModelObject:
-    post:
-      summary: The model object
-      description: >
-        Describe a model offering that can be used with the API. <a  href =
-        "https://platform.openai.com/docs/api-reference/models/object">
-        Equivalent to OpenAI's model object. </a>
-      operationId: ModelObject
-      tags:
-        - Models
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/models.yaml#/components/schemas/ModelObject
-  AssistantObject:
-    post:
-      summary: The assistant object
-      description: >
-        Build assistants that can call models and use tools to perform
-        tasks. <a  href =
-        "https://platform.openai.com/docs/api-reference/assistants"> Equivalent
-        to OpenAI's assistants object. </a>
-      operationId: AssistantObjects
-      tags:
-        - Assistants
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/assistants.yaml#/components/schemas/AssistantObject
-  MessageObject:
-    post:
-      summary: The message object
-      description: >
-        Information about a message in the thread.  <a  href =
-        "https://platform.openai.com/docs/api-reference/messages/object">
-        Equivalent to OpenAI's message object. </a>
-      operationId: MessageObject
-      tags:
-        - Messages
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/messages.yaml#/components/schemas/MessageObject
-  ThreadObject:
-    post:
-      summary: The thread object
-      description: Represents a thread that contains messages. <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/object">
-        Equivalent to OpenAI's thread object. </a>
-      operationId: ThreadObject
-      tags:
-        - Threads
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/threads.yaml#/components/schemas/ThreadObject
diff --git a/docs/openapi/specs/assistants.yaml b/docs/openapi/specs/assistants.yaml
deleted file mode 100644
index 5db1f6a976..0000000000
--- a/docs/openapi/specs/assistants.yaml
+++ /dev/null
@@ -1,319 +0,0 @@
----
-components:
-  schemas:
-    AssistantObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Math Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    ListAssistantsResponse: null
-    CreateAssistantResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Math Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    RetrieveAssistantResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Math Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    ModifyAssistantObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Math Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    ModifyAssistantResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Physics Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise!
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    DeleteAssistantResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the deleted assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating the assistant has been deleted.
-          example: assistant.deleted
-        deleted:
-          type: boolean
-          description: Indicates whether the assistant was successfully deleted.
-          example: true
diff --git a/docs/openapi/specs/chat.yaml b/docs/openapi/specs/chat.yaml
deleted file mode 100644
index c9358d7962..0000000000
--- a/docs/openapi/specs/chat.yaml
+++ /dev/null
@@ -1,196 +0,0 @@
----
-components:
-  schemas:
-    ChatObject:
-      type: object
-      properties:
-        messages:
-          type: arrays
-          description: |
-            Contains input data or prompts for the model to process.
-          example:
-            - content: 'Hello there :wave:'
-              role: assistant
-            - content: Can you write a long story
-              role: user
-        stream:
-          type: boolean
-          default: true
-          description:
-            Enables continuous output generation, allowing for streaming of
-            model responses.
-        model:
-          type: string
-          example: gpt-3.5-turbo
-          description: Specifies the model being used for inference or processing tasks.
-        max_tokens:
-          type: number
-          default: 2048
-          description:
-            The maximum number of tokens the model will generate in a single
-            response.
-        stop:
-          type: arrays
-          example:
-            - hello
-          description:
-            Defines specific tokens or phrases at which the model will stop
-            generating further output/
-        frequency_penalty:
-          type: number
-          default: 0
-          description:
-            Adjusts the likelihood of the model repeating words or phrases in
-            its output.
-        presence_penalty:
-          type: number
-          default: 0
-          description:
-            Influences the generation of new and varied concepts in the model's
-            output.
-        temperature:
-          type: number
-          default: 0.7
-          min: 0
-          max: 1
-          description: Controls the randomness of the model's output.
-        top_p:
-          type: number
-          default: 0.95
-          min: 0
-          max: 1
-          description: Set probability threshold for more relevant outputs.
-        cache_prompt:
-          type: boolean
-          default: true
-          description: Optimize performance in repeated or similar requests.
-    ChatCompletionRequest:
-      type: object
-      properties:
-        messages:
-          type: arrays
-          description: |
-            Contains input data or prompts for the model to process.
-          example:
-            - content: You are a helpful assistant.
-              role: system
-            - content: Hello!
-              role: user
-        model:
-          type: string
-          example: tinyllama-1.1b
-          description: |
-            Specifies the model being used for inference or processing tasks.
-        stream:
-          type: boolean
-          default: true
-          description: >
-            Enables continuous output generation, allowing for streaming of
-            model responses.
-        max_tokens:
-          type: number
-          default: 2048
-          description: >
-            The maximum number of tokens the model will generate in a single
-            response.
-        stop:
-          type: arrays
-          example:
-            - hello
-          description: >
-            Defines specific tokens or phrases at which the model will stop
-            generating further output.
-        frequency_penalty:
-          type: number
-          default: 0
-          description: >
-            Adjusts the likelihood of the model repeating words or phrases in
-            its output.
-        presence_penalty:
-          type: number
-          default: 0
-          description: >
-            Influences the generation of new and varied concepts in the model's
-            output.
-        temperature:
-          type: number
-          default: 0.7
-          min: 0
-          max: 1
-          description: |
-            Controls the randomness of the model's output.
-        top_p:
-          type: number
-          default: 0.95
-          min: 0
-          max: 1
-          description: |
-            Set probability threshold for more relevant outputs.
-    ChatCompletionResponse:
-      type: object
-      description: Description of the response structure
-      properties:
-        choices:
-          type: array
-          description: Array of choice objects
-          items:
-            type: object
-            properties:
-              finish_reason:
-                type: string
-                nullable: true
-                example: null
-                description: Reason for finishing the response, if applicable
-              index:
-                type: integer
-                example: 0
-                description: Index of the choice
-              message:
-                type: object
-                properties:
-                  content:
-                    type: string
-                    example: Hello user. What can I help you with?
-                    description: Content of the message
-                  role:
-                    type: string
-                    example: assistant
-                    description: Role of the sender
-        created:
-          type: integer
-          example: 1700193928
-          description: Timestamp of when the response was created
-        id:
-          type: string
-          example: ebwd2niJvJB1Q2Whyvkz
-          description: Unique identifier of the response
-        model:
-          type: string
-          nullable: true
-          example: _
-          description: Model used for generating the response
-        object:
-          type: string
-          example: chat.completion
-          description: Type of the response object
-        system_fingerprint:
-          type: string
-          nullable: true
-          example: _
-          description: System fingerprint
-        usage:
-          type: object
-          description: Information about the usage of tokens
-          properties:
-            completion_tokens:
-              type: integer
-              example: 500
-              description: Number of tokens used for completion
-            prompt_tokens:
-              type: integer
-              example: 33
-              description: Number of tokens used in the prompt
-            total_tokens:
-              type: integer
-              example: 533
-              description: Total number of tokens used
diff --git a/docs/openapi/specs/messages.yaml b/docs/openapi/specs/messages.yaml
deleted file mode 100644
index 22d82b787b..0000000000
--- a/docs/openapi/specs/messages.yaml
+++ /dev/null
@@ -1,313 +0,0 @@
----
-components:
-  schemas:
-    MessageObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: |
-            Sequential or UUID identifier of the message.
-          example: 0
-        object:
-          type: string
-          description: |
-            Type of the object, defaults to 'thread.message'.
-          example: thread.message
-        created_at:
-          type: integer
-          format: int64
-          description: |
-            Unix timestamp representing the creation time of the message.
-        thread_id:
-          type: string
-          description: >
-            Identifier of the thread to which this message belongs. Defaults to
-            parent thread.
-          example: thread_asdf
-        assistant_id:
-          type: string
-          description: >
-            Identifier of the assistant involved in the message. Defaults to
-            parent thread.
-          example: jan
-        role:
-          type: string
-          enum:
-            - user
-            - assistant
-          description: |
-            Role of the sender, either 'user' or 'assistant'.
-        content:
-          type: array
-          items:
-            type: object
-            properties:
-              type:
-                type: string
-                description: |
-                  Type of content, e.g., 'text'.
-              text:
-                type: object
-                properties:
-                  value:
-                    type: string
-                    description: |
-                      Text content of the message.
-                    example: Hi!?
-                  annotations:
-                    type: array
-                    items:
-                      type: string
-                    description: |
-                      Annotations for the text content, if any.
-                    example: []
-        metadata:
-          type: object
-          description: |
-            Metadata associated with the message, defaults to an empty object.
-          example: {}
-    GetMessageResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the message.
-          example: msg_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread message.
-          default: thread.message
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the message.
-          example: 1699017614
-        thread_id:
-          type: string
-          description: Identifier of the thread to which this message belongs.
-          example: thread_abc123
-        role:
-          type: string
-          description: Role of the sender, either 'user' or 'assistant'.
-          example: user
-        content:
-          type: array
-          items:
-            type: object
-            properties:
-              type:
-                type: string
-                description: Type of content, e.g., 'text'.
-                example: text
-              text:
-                type: object
-                properties:
-                  value:
-                    type: string
-                    description: Text content of the message.
-                    example: How does AI work? Explain it in simple terms.
-                  annotations:
-                    type: array
-                    items:
-                      type: string
-                    description: Annotations for the text content, if any.
-                    example: []
-        file_ids:
-          type: array
-          items:
-            type: string
-          description: Array of file IDs associated with the message, if any.
-          example: []
-        assistant_id:
-          type: string
-          description: Identifier of the assistant involved in the message, if applicable.
-          example: null
-        run_id:
-          type: string
-          description: Run ID associated with the message, if applicable.
-          example: null
-        metadata:
-          type: object
-          description: Metadata associated with the message.
-          example: {}
-    CreateMessageResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the created message.
-          example: msg_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread message.
-          example: thread.message
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the message.
-          example: 1699017614
-        thread_id:
-          type: string
-          description: Identifier of the thread to which this message belongs.
-          example: thread_abc123
-        role:
-          type: string
-          description: Role of the sender, either 'user' or 'assistant'.
-          example: user
-        content:
-          type: array
-          items:
-            type: object
-            properties:
-              type:
-                type: string
-                description: Type of content, e.g., 'text'.
-                example: text
-              text:
-                type: object
-                properties:
-                  value:
-                    type: string
-                    description: Text content of the message.
-                    example: How does AI work? Explain it in simple terms.
-                  annotations:
-                    type: array
-                    items:
-                      type: string
-                    description: Annotations for the text content, if any.
-                    example: []
-        file_ids:
-          type: array
-          items:
-            type: string
-          description: Array of file IDs associated with the message, if any.
-          example: []
-        assistant_id:
-          type: string
-          description: Identifier of the assistant involved in the message, if applicable.
-          example: null
-        run_id:
-          type: string
-          description: Run ID associated with the message, if applicable.
-          example: null
-        metadata:
-          type: object
-          description: Metadata associated with the message.
-          example: {}
-    ListMessagesResponse:
-      type: object
-      properties:
-        object:
-          type: string
-          description: Type of the object, indicating it's a list.
-          default: list
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ListMessageObject'
-        first_id:
-          type: string
-          description: Identifier of the first message in the list.
-          example: msg_abc123
-        last_id:
-          type: string
-          description: Identifier of the last message in the list.
-          example: msg_abc456
-        has_more:
-          type: boolean
-          description: Indicates whether there are more messages to retrieve.
-          example: false
-    ListMessageObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the message.
-          example: msg_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread message.
-          example: thread.message
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the message.
-          example: 1699017614
-        thread_id:
-          type: string
-          description: Identifier of the thread to which this message belongs.
-          example: thread_abc123
-        role:
-          type: string
-          description: Role of the sender, either 'user' or 'assistant'.
-          example: user
-        content:
-          type: array
-          items:
-            type: object
-            properties:
-              type:
-                type: string
-                description: Type of content, e.g., 'text'.
-              text:
-                type: object
-                properties:
-                  value:
-                    type: string
-                    description: Text content of the message.
-                    example: How does AI work? Explain it in simple terms.
-                  annotations:
-                    type: array
-                    items:
-                      type: string
-                    description: Annotations for the text content, if any.
-        file_ids:
-          type: array
-          items:
-            type: string
-          description: Array of file IDs associated with the message, if any.
-          example: []
-        assistant_id:
-          type: string
-          description: Identifier of the assistant involved in the message, if applicable.
-          example: null
-        run_id:
-          type: string
-          description: Run ID associated with the message, if applicable.
-          example: null
-        metadata:
-          type: object
-          description: Metadata associated with the message.
-          example: {}
-    MessageFileObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the file.
-          example: file-abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread message file.
-          example: thread.message.file
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the file.
-          example: 1699061776
-        message_id:
-          type: string
-          description: Identifier of the message to which this file is associated.
-          example: msg_abc123
-    ListMessageFilesResponse:
-      type: object
-      properties:
-        object:
-          type: string
-          description: Type of the object, indicating it's a list.
-          default: list
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/MessageFileObject'
diff --git a/docs/openapi/specs/models.yaml b/docs/openapi/specs/models.yaml
deleted file mode 100644
index ff2040bb59..0000000000
--- a/docs/openapi/specs/models.yaml
+++ /dev/null
@@ -1,259 +0,0 @@
----
-components:
-  schemas:
-    ListModelsResponse:
-      type: object
-      properties:
-        object:
-          type: string
-          enum:
-            - list
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Model'
-      required:
-        - object
-        - data
-    Model:
-      type: object
-      properties:
-        source_url:
-          type: string
-          format: uri
-          description: URL to the source of the model.
-          example: https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf
-        id:
-          type: string
-          description:
-            Unique identifier used in chat-completions model_name, matches
-            folder name.
-          example: trinity-v1.2-7b
-        object:
-          type: string
-          example: model
-        name:
-          type: string
-          description: Name of the model.
-          example: Trinity-v1.2 7B Q4
-        version:
-          type: string
-          default: '1.0'
-          description: The version number of the model.
-        description:
-          type: string
-          description: Description of the model.
-          example:
-            Trinity is an experimental model merge using the Slerp method.
-            Recommended for daily assistance purposes.
-        format:
-          type: string
-          description: State format of the model, distinct from the engine.
-          example: gguf
-        settings:
-          type: object
-          properties:
-            ctx_len:
-              type: integer
-              description: Context length.
-              example: 4096
-            prompt_template:
-              type: string
-              example: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-          additionalProperties: false
-        parameters:
-          type: object
-          properties:
-            temperature:
-              example: 0.7
-            top_p:
-              example: 0.95
-            stream:
-              example: true
-            max_tokens:
-              example: 4096
-            stop:
-              example: []
-            frequency_penalty:
-              example: 0
-            presence_penalty:
-              example: 0
-          additionalProperties: false
-        metadata:
-          author:
-            type: string
-            example: Jan
-          tags:
-            example:
-              - 7B
-              - Merged
-              - Featured
-          size:
-            example: 4370000000,
-          cover:
-            example: https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png
-        engine:
-          example: nitro
-    ModelObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: |
-            The identifier of the model.
-          example: trinity-v1.2-7b
-        object:
-          type: string
-          description: |
-            The type of the object, indicating it's a model.
-          default: model
-        created:
-          type: integer
-          format: int64
-          description: |
-            Unix timestamp representing the creation time of the model.
-          example: 1253935178
-        owned_by:
-          type: string
-          description: |
-            The entity that owns the model.
-          example: _
-    GetModelResponse:
-      type: object
-      properties:
-        source_url:
-          type: string
-          format: uri
-          description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
-        id:
-          type: string
-          description:
-            Unique identifier used in chat-completions model_name, matches
-            folder name.
-          example: mistral-ins-7b-q4
-        object:
-          type: string
-          example: model
-        name:
-          type: string
-          description: Name of the model.
-          example: Mistral Instruct 7B Q4
-        version:
-          type: string
-          default: '1.0'
-          description: The version number of the model.
-        description:
-          type: string
-          description: Description of the model.
-          example:
-            Trinity is an experimental model merge using the Slerp method.
-            Recommended for daily assistance purposes.
-        format:
-          type: string
-          description: State format of the model, distinct from the engine.
-          example: gguf
-        settings:
-          type: object
-          properties:
-            ctx_len:
-              type: integer
-              description: Context length.
-              example: 4096
-            prompt_template:
-              type: string
-              example: '[INST] {prompt} [/INST]'
-          additionalProperties: false
-        parameters:
-          type: object
-          properties:
-            temperature:
-              example: 0.7
-            top_p:
-              example: 0.95
-            stream:
-              example: true
-            max_tokens:
-              example: 4096
-            stop:
-              example: []
-            frequency_penalty:
-              example: 0
-            presence_penalty:
-              example: 0
-          additionalProperties: false
-        metadata:
-          author:
-            type: string
-            example: MistralAI
-          tags:
-            example:
-              - 7B
-              - Featured
-              - Foundation Model
-          size:
-            example: 4370000000,
-          cover:
-            example: https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png
-        engine:
-          example: nitro
-    DeleteModelResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the model that was deleted.
-          example: mistral-ins-7b-q4
-        object:
-          type: string
-          description: Type of the object, indicating it's a model.
-          default: model
-        deleted:
-          type: boolean
-          description: Indicates whether the model was successfully deleted.
-          example: true
-    StartModelResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the model that was started.
-          example: model-zephyr-7B
-        object:
-          type: string
-          description: Type of the object, indicating it's a model.
-          default: model
-        state:
-          type: string
-          description: The current state of the model after the start operation.
-          example: running
-      required:
-        - id
-        - object
-        - state
-    StopModelResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the model that was started.
-          example: model-zephyr-7B
-        object:
-          type: string
-          description: Type of the object, indicating it's a model.
-          default: model
-        state:
-          type: string
-          description: The current state of the model after the start operation.
-          example: stopped
-      required:
-        - id
-        - object
-        - state
-    DownloadModelResponse:
-      type: object
-      properties:
-        message:
-          type: string
-          description: Message indicates Jan starting download corresponding model.
-          example: Starting download mistral-ins-7b-q4
diff --git a/docs/openapi/specs/threads.yaml b/docs/openapi/specs/threads.yaml
deleted file mode 100644
index 285fcc82d7..0000000000
--- a/docs/openapi/specs/threads.yaml
+++ /dev/null
@@ -1,227 +0,0 @@
----
-components:
-  schemas:
-    ThreadObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: |
-            The identifier of the thread, defaults to foldername.
-          example: thread_....
-        object:
-          type: string
-          description: |
-            Type of the object, defaults to thread.
-          example: thread
-        title:
-          type: string
-          description: >
-            A brief summary or description of the thread, defaults to an empty
-            string.
-          example: funny physics joke
-        assistants:
-          type: array
-          description: ''
-          items:
-            properties:
-              assistant_id:
-                type: string
-                description: |
-                  The identifier of assistant, defaults to "jan"
-                example: jan
-              model:
-                type: object
-                properties:
-                  id:
-                    type: string
-                    description: ''
-                    example: ...
-                  settings:
-                    type: object
-                    description: >
-                      Defaults to and overrides assistant.json's "settings" (and if none,
-                      then model.json "settings")
-                  parameters:
-                    type: object
-                    description: >
-                      Defaults to and overrides assistant.json's "parameters" (and if
-                      none, then model.json "parameters")
-        created:
-          type: integer
-          format: int64
-          description: >
-            Unix timestamp representing the creation time of the thread,
-            defaults to file creation time.
-          example: 1231231
-        metadata:
-          type: object
-          description: |
-            Metadata associated with the thread, defaults to an empty object.
-          example: {}
-    GetThreadResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the thread.
-          example: thread_abc123
-        object:
-          type: string
-          description: Type of the object
-          example: thread
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the thread.
-          example: 1699014083
-        assistants:
-          type: array
-          items:
-            type: string
-          description: List of assistants involved in the thread.
-          example:
-            - assistant-001
-        metadata:
-          type: object
-          description: Metadata associated with the thread.
-          example: {}
-        messages:
-          type: array
-          items:
-            type: string
-          description: List of messages within the thread.
-          example: []
-    CreateThreadResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the newly created thread.
-          example: thread_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread.
-          example: thread
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the thread.
-          example: 1699014083
-        metadata:
-          type: object
-          description: Metadata associated with the newly created thread.
-          example: {}
-    CreateThreadObject:
-      type: object
-      properties:
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread.
-          example: thread
-        title:
-          type: string
-          description: >
-            A brief summary or description of the thread, defaults to an empty
-            string.
-          example: funny physics joke
-        assistants:
-          type: array
-          description: assistant involved in the thread
-          items:
-            properties:
-              assistant_id:
-                type: string
-                description: |
-                  The identifier of assistant, defaults to "jan"
-                example: jan
-              assistant_name:
-                type: string
-                description: |
-                  The name of assistant, defaults to "Jan"
-                example: Jan
-              instructions:
-                type: string
-                description: >
-                  The instruction of assistant, defaults to "Be my grammar corrector"
-              model:
-                type: object
-                properties:
-                  id:
-                    type: string
-                    description: Model id
-                    example: mistral-ins-7b-q4
-                  settings:
-                    type: object
-                    description: >
-                      Defaults to and overrides assistant.json's "settings" (and if none,
-                      then model.json "settings")
-                  parameters:
-                    type: object
-                    description: >
-                      Defaults to and overrides assistant.json's "parameters" (and if
-                      none, then model.json "parameters")
-                  engine:
-                    type: string
-                    description: Engine id
-                    example: nitro
-        metadata:
-          type: object
-          description: |
-            Metadata associated with the thread, defaults to an empty object.
-    ThreadMessageObject:
-      type: object
-      properties:
-        role:
-          type: string
-          description: |
-            "Role of the sender, either 'user' or 'assistant'."
-          enum:
-            - user
-            - assistant
-        content:
-          type: string
-          description: |
-            "Text content of the message."
-        file_ids:
-          type: array
-          items:
-            type: string
-          description: |
-            "Array of file IDs associated with the message, if any."
-    ModifyThreadResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: |
-            "The identifier of the modified thread."
-          example: thread_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread.
-          example: thread
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the thread.
-          example: 1699014083
-        metadata:
-          type: object
-          description: Metadata associated with the modified thread.
-          example: {}
-    DeleteThreadResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the deleted thread.
-          example: thread_abc123
-        object:
-          type: string
-          description: Type of the object, indicating the thread has been deleted.
-          example: thread.deleted
-        deleted:
-          type: boolean
-          description: Indicates whether the thread was successfully deleted.
-          example: true
diff --git a/docs/openapi/version.txt b/docs/openapi/version.txt
deleted file mode 100644
index 5656be6241..0000000000
--- a/docs/openapi/version.txt
+++ /dev/null
@@ -1 +0,0 @@
-v1.23.2
\ No newline at end of file
diff --git a/electron/docs/openapi/.gitkeep b/electron/docs/openapi/.gitkeep
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/electron/package.json b/electron/package.json
index c4609e7af1..f8611734a5 100644
--- a/electron/package.json
+++ b/electron/package.json
@@ -15,7 +15,6 @@
       "build/**/*.{js,map}",
       "pre-install",
       "themes",
-      "docs/**/*",
       "scripts/**/*",
       "icons/**/*",
       "themes",
diff --git a/package.json b/package.json
index 255dda6c71..8b24f14ba6 100644
--- a/package.json
+++ b/package.json
@@ -27,12 +27,12 @@
     "pre-install:linux": "find extensions -type f -path \"**/*.tgz\" -exec cp {} pre-install \\;",
     "pre-install:win32": "powershell -Command \"Get-ChildItem -Path \"extensions\" -Recurse -File -Filter \"*.tgz\" | ForEach-Object { Copy-Item -Path $_.FullName -Destination \"pre-install\" }\"",
     "pre-install": "run-script-os",
-    "copy:assets": "cpx \"pre-install/*.tgz\" \"electron/pre-install/\" && cpx \"themes/**\" \"electron/themes\" && cpx \"docs/openapi/**\" \"electron/docs/openapi\"",
+    "copy:assets": "cpx \"pre-install/*.tgz\" \"electron/pre-install/\" && cpx \"themes/**\" \"electron/themes\"",
     "dev:electron": "yarn copy:assets && yarn workspace jan dev",
     "dev:web": "yarn workspace @janhq/web dev",
-    "dev:server": "yarn copy:assets && yarn workspace @janhq/server dev",
+    "dev:server": "yarn workspace @janhq/server dev",
     "dev": "turbo run dev --parallel --filter=!@janhq/server",
-    "build:server": "yarn copy:assets && cd server && yarn install && yarn run build",
+    "build:server": "cd server && yarn install && yarn run build",
     "build:core": "cd core && yarn install && yarn run build",
     "build:web": "yarn workspace @janhq/web build && cpx \"web/out/**\" \"electron/renderer/\"",
     "build:electron": "yarn copy:assets && yarn workspace jan build",
diff --git a/server/cortex.json b/server/cortex.json
new file mode 100644
index 0000000000..917cff3548
--- /dev/null
+++ b/server/cortex.json
@@ -0,0 +1,6308 @@
+{
+  "openapi": "3.0.0",
+  "paths": {
+    "/assistants": {
+      "post": {
+        "operationId": "AssistantsController_create",
+        "summary": "Create assistant",
+        "description": "Creates a new assistant.",
+        "parameters": [],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/CreateAssistantDto"
+              }
+            }
+          }
+        },
+        "responses": {
+          "201": {
+            "description": "The assistant has been successfully created."
+          }
+        },
+        "tags": ["Assistants"]
+      },
+      "get": {
+        "operationId": "AssistantsController_findAll",
+        "summary": "List assistants",
+        "description": "Returns a list of assistants.",
+        "parameters": [
+          {
+            "name": "limit",
+            "required": false,
+            "in": "query",
+            "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
+            "schema": {
+              "type": "number"
+            }
+          },
+          {
+            "name": "order",
+            "required": false,
+            "in": "query",
+            "description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "after",
+            "required": false,
+            "in": "query",
+            "description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "before",
+            "required": false,
+            "in": "query",
+            "description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/AssistantEntity"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Assistants"]
+      }
+    },
+    "/assistants/{id}": {
+      "get": {
+        "operationId": "AssistantsController_findOne",
+        "summary": "Get assistant",
+        "description": "Retrieves a specific assistant defined by an assistant's `id`.",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the assistant.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/AssistantEntity"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Assistants"]
+      },
+      "delete": {
+        "operationId": "AssistantsController_remove",
+        "summary": "Delete assistant",
+        "description": "Deletes a specific assistant defined by an assistant's `id`.",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the assistant.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "The assistant has been successfully deleted.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/DeleteAssistantResponseDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Assistants"]
+      }
+    },
+    "/healthz": {
+      "get": {
+        "operationId": "HealthController_check",
+        "summary": "Check health",
+        "description": "Performs a comprehensive check of the application's health status.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {}
+            }
+          }
+        },
+        "tags": ["Server"]
+      }
+    },
+    "/processManager/destroy": {
+      "delete": {
+        "operationId": "Terminate server process",
+        "summary": "Terminate server",
+        "description": "Initiates the shutdown process for the server, ensuring that all active connections are gracefully closed and any ongoing processes are properly terminated.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {}
+            }
+          }
+        },
+        "tags": ["Server"]
+      }
+    },
+    "/embeddings": {
+      "post": {
+        "summary": "Create embeddings",
+        "description": "Creates an embedding vector representing the input text.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "oneOf": [
+                      {
+                        "type": "string",
+                        "description": "The string that will be turned into an embedding."
+                      },
+                      {
+                        "type": "array",
+                        "description": "The array of strings that will be turned into an embedding.",
+                        "items": {
+                          "type": "string"
+                        }
+                      },
+                      {
+                        "type": "array",
+                        "description": "The array of integers that will be turned into an embedding.",
+                        "items": {
+                          "type": "integer"
+                        }
+                      },
+                      {
+                        "type": "array",
+                        "description": "The array of arrays containing integers that will be turned into an embedding.",
+                        "items": {
+                          "type": "array",
+                          "items": {
+                            "type": "integer"
+                          }
+                        }
+                      }
+                    ],
+                    "description": "Input text to embed, encoded as a string or array of tokens. Cannot be empty."
+                  },
+                  "model": {
+                    "type": "string",
+                    "description": "ID of the model to use.",
+                    "example": "text-embedding-ada-002"
+                  },
+                  "encoding_format": {
+                    "type": "string",
+                    "description": "The format to return the embeddings in.",
+                    "enum": ["float", "base64"],
+                    "default": "float"
+                  }
+                },
+                "required": ["input", "model"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "A list of embedding vectors",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "data": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "index": {
+                            "type": "integer",
+                            "description": "The index of the embedding in the list of embeddings."
+                          },
+                          "embedding": {
+                            "type": "array",
+                            "items": {
+                              "type": "number"
+                            },
+                            "description": "The embedding vector, which is a list of floats."
+                          },
+                          "object": {
+                            "type": "string",
+                            "description": "The object type, which is always 'embedding'.",
+                            "example": "embedding"
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Embeddings"]
+      }
+    },
+    "/chat/completions": {
+      "post": {
+        "operationId": "ChatController_create",
+        "summary": "Create chat completion",
+        "description": "Creates a model response for the given conversation. The following parameters are not working for the `TensorRT-LLM` engine:\n- `frequency_penalty`\n- `presence_penalty`\n- `top_p`",
+        "parameters": [],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/CreateChatCompletionDto"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "oneOf": [
+                    {
+                      "title": "Chat Completion Response",
+                      "$ref": "#/components/schemas/ChatCompletionResponseDto"
+                    },
+                    {
+                      "title": "Chat Completion Chunk Response",
+                      "$ref": "#/components/schemas/ChatCompletionChunkResponseDto"
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Chat"]
+      }
+    },
+    "/models/pull": {
+      "post": {
+        "operationId": "ModelsController_pullModel",
+        "summary": "Pull a model",
+        "description": "Pull a model from a remote source.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/PullModelRequest"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string"
+                    },
+                    "task": {
+                      "type": "object",
+                      "properties": {
+                        "id": {
+                          "type": "string"
+                        },
+                        "items": {
+                          "type": "array",
+                          "items": {
+                            "type": "object",
+                            "properties": {
+                              "bytes": {
+                                "type": "integer"
+                              },
+                              "checksum": {
+                                "type": "string"
+                              },
+                              "downloadUrl": {
+                                "type": "string"
+                              },
+                              "downloadedBytes": {
+                                "type": "integer"
+                              },
+                              "id": {
+                                "type": "string"
+                              },
+                              "localPath": {
+                                "type": "string"
+                              }
+                            }
+                          }
+                        },
+                        "type": {
+                          "type": "string"
+                        }
+                      }
+                    }
+                  }
+                },
+                "example": {
+                  "message": "Model start downloading!",
+                  "task": {
+                    "id": "TheBloke:Mistral-7B-Instruct-v0.1-GGUF:mistral-7b-instruct-v0.1.Q3_K_L.gguf",
+                    "items": [
+                      {
+                        "bytes": 3822024352,
+                        "checksum": "N/A",
+                        "downloadUrl": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q3_K_L.gguf",
+                        "downloadedBytes": 0,
+                        "id": "TheBloke:Mistral-7B-Instruct-v0.1-GGUF:mistral-7b-instruct-v0.1.Q3_K_L.gguf",
+                        "localPath": "/Users/user_name/cortexcpp/models/huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q3_K_L.gguf"
+                      }
+                    ],
+                    "type": "Model"
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SimpleErrorResponse"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      },
+      "delete": {
+        "tags": ["Pulling Models"],
+        "summary": "Stop model download",
+        "description": "Stops the download of a model with the corresponding taskId provided in the request body",
+        "operationId": "ModelsController_stopModelDownload",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "taskId": {
+                    "type": "string",
+                    "description": "The unique identifier of the download task to be stopped"
+                  }
+                },
+                "required": ["taskId"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Download stopped successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Download stopped successfully"
+                    },
+                    "taskId": {
+                      "type": "string",
+                      "example": "task-123456"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Invalid taskId"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Task not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Download task not found"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "An unexpected error occurred"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/models/add": {
+      "post": {
+        "operationId": "ModelsController_addModel",
+        "summary": "Add a remote model",
+        "description": "Add a new remote model configuration to the system.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/AddModelRequest"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string"
+                    },
+                    "model": {
+                      "type": "object",
+                      "properties": {
+                        "model": {
+                          "type": "string"
+                        },
+                        "engine": {
+                          "type": "string"
+                        },
+                        "version": {
+                          "type": "string"
+                        }
+                      }
+                    }
+                  }
+                },
+                "example": {
+                  "message": "Model added successfully!",
+                  "model": {
+                    "model": "claude-3-5-sonnet-20241022",
+                    "engine": "anthropic",
+                    "version": "2023-06-01"
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SimpleErrorResponse"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      }
+    },
+    "/models": {
+      "get": {
+        "operationId": "ModelsController_findAll",
+        "summary": "List models",
+        "description": "Lists the currently available models, and provides basic information about each one such as the owner and availability. [Equivalent to OpenAI's list model](https://platform.openai.com/docs/api-reference/models/list).",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ListModelsResponseDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/start": {
+      "post": {
+        "operationId": "ModelsController_startModel",
+        "summary": "Start model",
+        "description": "Load a model into memory. Note: Request body parameters will override those loaded from model.yml",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ModelStartDto"
+              },
+              "example": {
+                "model": "llama3:8b-gguf-q6-k"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "The model has been successfully started.",
+            "content": {
+              "application/json": {
+                "example": {
+                  "message": "Started successfully!"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/stop": {
+      "post": {
+        "operationId": "ModelsController_stopModel",
+        "summary": "Stop model",
+        "description": "Unload model from memory",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ModelStopDto"
+              },
+              "example": {
+                "model": "llama3:8b-gguf-q6-k"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "The model has been successfully started.",
+            "content": {
+              "application/json": {
+                "example": {
+                  "message": "Stopped successfully!"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/{id}": {
+      "get": {
+        "operationId": "ModelsController_findOne",
+        "summary": "Get model",
+        "description": "Retrieves a model instance, providing basic information about the model such as the owner and permissions. [Equivalent to OpenAI's list model](https://platform.openai.com/docs/api-reference/models/retrieve).",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the model.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ModelDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      },
+      "delete": {
+        "operationId": "ModelsController_remove",
+        "summary": "Delete model",
+        "description": "Deletes a model. [Equivalent to OpenAI's delete model](https://platform.openai.com/docs/api-reference/models/delete).",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the model.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "The model has been successfully deleted.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/DeleteModelResponseDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/{model}": {
+      "patch": {
+        "operationId": "ModelsController_update",
+        "summary": "Update model",
+        "description": "Updates a model instance defined by a model's `id`.",
+        "parameters": [
+          {
+            "name": "model",
+            "required": true,
+            "in": "path",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/UpdateModelDto"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "The model has been successfully updated.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/UpdateModelDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/import": {
+      "post": {
+        "operationId": "ModelsController_importModel",
+        "summary": "Import model",
+        "description": "Imports a model from a specified path.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ImportModelRequest"
+              },
+              "example": {
+                "model": "model-id",
+                "modelPath": "/path/to/gguf",
+                "name": "model display name",
+                "option": "symlink"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Model is imported successfully!",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ImportModelResponse"
+                },
+                "example": {
+                  "message": "Model is imported successfully!",
+                  "modelHandle": "model-id",
+                  "result": "OK"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      }
+    },
+    "/models/sources": {
+      "post": {
+        "summary": "Add a model source",
+        "description": "User can add a Huggingface Organization or Repository",
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "source": {
+                    "type": "string",
+                    "description": "The url of model source to add",
+                    "example": "https://huggingface.co/cortexso/tinyllama"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Added model source"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      },
+      "delete": {
+        "summary": "Remove a model source",
+        "description": "User can remove a Huggingface Organization or Repository",
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "source": {
+                    "type": "string",
+                    "description": "The url of model source to remove",
+                    "example": "https://huggingface.co/cortexso/tinyllama"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful uninstallation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "description": "Removed model source successfully!",
+                      "example": "Removed model source successfully!"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "description": "Error message describing the issue with the request"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      }
+    },
+    "/threads": {
+      "post": {
+        "summary": "Create Thread",
+        "description": "Creates a new thread with optional metadata.",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "metadata": {
+                    "type": "object",
+                    "properties": {
+                      "title": {
+                        "type": "string",
+                        "description": "Title of the thread"
+                      }
+                    },
+                    "description": "Optional metadata for the thread"
+                  }
+                }
+              },
+              "example": {
+                "metadata": {
+                  "title": "New Thread"
+                }
+              }
+            }
+          },
+          "required": false
+        },
+        "responses": {
+          "200": {
+            "description": "Thread created successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the thread was created"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the thread"
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "properties": {
+                        "title": {
+                          "type": "string",
+                          "description": "Title of the thread"
+                        }
+                      },
+                      "description": "Metadata associated with the thread"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread'"
+                    }
+                  },
+                  "required": ["created_at", "id", "object"]
+                },
+                "example": {
+                  "created_at": 1734020845,
+                  "id": "0001KNP3QDX314435VAEGW1Z2X",
+                  "metadata": {
+                    "title": "New Thread"
+                  },
+                  "object": "thread"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      },
+      "get": {
+        "summary": "List Threads",
+        "description": "Returns a list of threads with their metadata.",
+        "responses": {
+          "200": {
+            "description": "List of threads retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "object": {
+                      "type": "string",
+                      "description": "Type of the list response, always 'list'"
+                    },
+                    "data": {
+                      "type": "array",
+                      "description": "Array of thread objects",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "created_at": {
+                            "type": "integer",
+                            "description": "Unix timestamp of when the thread was created"
+                          },
+                          "id": {
+                            "type": "string",
+                            "description": "Unique identifier for the thread"
+                          },
+                          "metadata": {
+                            "type": "object",
+                            "properties": {
+                              "title": {
+                                "type": "string",
+                                "description": "Title of the thread"
+                              },
+                              "lastMessage": {
+                                "type": "string",
+                                "description": "Content of the last message in the thread"
+                              }
+                            },
+                            "description": "Metadata associated with the thread"
+                          },
+                          "object": {
+                            "type": "string",
+                            "description": "Type of object, always 'thread'"
+                          }
+                        },
+                        "required": ["created_at", "id", "object"]
+                      }
+                    }
+                  },
+                  "required": ["object", "data"]
+                },
+                "example": {
+                  "data": [
+                    {
+                      "created_at": 1734020845,
+                      "id": "0001KNP3QDX314435VAEGW1Z2X",
+                      "metadata": {
+                        "title": "New Thread"
+                      },
+                      "object": "thread"
+                    },
+                    {
+                      "created_at": 1734020803,
+                      "id": "0001KNP3P3DAQSDVEQGRBTCTNJ",
+                      "metadata": {
+                        "title": ""
+                      },
+                      "object": "thread"
+                    }
+                  ],
+                  "object": "list"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      }
+    },
+    "/threads/{id}": {
+      "get": {
+        "summary": "Retrieve Thread",
+        "description": "Retrieves a specific thread by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to retrieve",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Thread retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the thread was created"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the thread"
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "properties": {
+                        "lastMessage": {
+                          "type": "string",
+                          "description": "Content of the last message in the thread"
+                        },
+                        "title": {
+                          "type": "string",
+                          "description": "Title of the thread"
+                        }
+                      },
+                      "description": "Metadata associated with the thread"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread'"
+                    }
+                  },
+                  "required": ["created_at", "id", "object"]
+                },
+                "example": {
+                  "created_at": 1732370026,
+                  "id": "jan_1732370027",
+                  "metadata": {
+                    "lastMessage": "Based on the context, I'm not sure how to build a unique experience quickly and easily. The text mentions that there are some concerns about Android apps providing consistent experiences for different users, which makes me skeptical about building one.\n\nSpecifically, it says:\n\n* \"Might not pass CTS\" (Computer Science Technology standards)\n* \"Might not comply with CDD\" (Consumer Development Division standards)\n\nThis suggests that building a unique experience for all users could be challenging or impossible. Therefore, I don't know how to build a unique experience quickly and easily.\n\nWould you like me to try again?",
+                    "title": "hello"
+                  },
+                  "object": "thread"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      },
+      "patch": {
+        "summary": "Modify Thread",
+        "description": "Updates a specific thread's metadata.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to modify",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "metadata": {
+                    "type": "object",
+                    "properties": {
+                      "title": {
+                        "type": "string",
+                        "description": "New title for the thread"
+                      }
+                    },
+                    "description": "Metadata to update"
+                  }
+                }
+              },
+              "example": {
+                "metadata": {
+                  "title": "my title"
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Thread modified successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the thread was created"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the thread"
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "properties": {
+                        "title": {
+                          "type": "string",
+                          "description": "Updated title of the thread"
+                        }
+                      },
+                      "description": "Updated metadata for the thread"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread'"
+                    }
+                  },
+                  "required": ["created_at", "id", "object"]
+                },
+                "example": {
+                  "created_at": 1733301054,
+                  "id": "0001KN04SY7D75K0MPTXMXCH39",
+                  "metadata": {
+                    "title": "my title"
+                  },
+                  "object": "thread"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      },
+      "delete": {
+        "summary": "Delete Thread",
+        "description": "Deletes a specific thread by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to delete",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Thread deleted successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "deleted": {
+                      "type": "boolean",
+                      "description": "Indicates if the thread was successfully deleted"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "ID of the deleted thread"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.deleted'"
+                    }
+                  },
+                  "required": ["deleted", "id", "object"]
+                },
+                "example": {
+                  "deleted": true,
+                  "id": "jan_1732370027",
+                  "object": "thread.deleted"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      }
+    },
+    "/threads/{thread_id}/messages": {
+      "post": {
+        "summary": "Create Message",
+        "description": "Creates a new message in a thread.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to create the message in",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "role": {
+                    "type": "string",
+                    "description": "Role of the message sender",
+                    "enum": ["user", "assistant"]
+                  },
+                  "content": {
+                    "type": "string",
+                    "description": "The content of the message"
+                  }
+                },
+                "required": ["role", "content"]
+              },
+              "example": {
+                "role": "user",
+                "content": "Hello, world!"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Message created successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the message"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.message'"
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was created"
+                    },
+                    "completed_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was completed"
+                    },
+                    "thread_id": {
+                      "type": "string",
+                      "description": "ID of the thread this message belongs to"
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "Role of the message sender",
+                      "enum": ["user", "assistant"]
+                    },
+                    "status": {
+                      "type": "string",
+                      "description": "Status of the message",
+                      "enum": ["completed"]
+                    },
+                    "content": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "description": "Type of content",
+                            "enum": ["text"]
+                          },
+                          "text": {
+                            "type": "object",
+                            "properties": {
+                              "value": {
+                                "type": "string",
+                                "description": "The message text"
+                              },
+                              "annotations": {
+                                "type": "array",
+                                "description": "Array of annotations for the text"
+                              }
+                            }
+                          }
+                        }
+                      }
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "description": "Additional metadata for the message"
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created_at",
+                    "completed_at",
+                    "thread_id",
+                    "role",
+                    "status",
+                    "content"
+                  ]
+                },
+                "example": {
+                  "completed_at": 1734023130,
+                  "content": [
+                    {
+                      "text": {
+                        "annotations": [],
+                        "value": "Hello, world!"
+                      },
+                      "type": "text"
+                    }
+                  ],
+                  "created_at": 1734023130,
+                  "id": "0001KNP5YT00GW0X476W5TVBFE",
+                  "metadata": {},
+                  "object": "thread.message",
+                  "role": "user",
+                  "status": "completed",
+                  "thread_id": "jan_1732370027"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      },
+      "get": {
+        "summary": "List Messages",
+        "description": "Retrieves a list of messages in a thread with optional pagination and filtering.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to list messages from",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "limit",
+            "in": "query",
+            "required": false,
+            "description": "Maximum number of messages to return",
+            "schema": {
+              "type": "integer"
+            }
+          },
+          {
+            "name": "order",
+            "in": "query",
+            "required": false,
+            "description": "Sort order of messages",
+            "schema": {
+              "type": "string",
+              "enum": ["asc", "desc"]
+            }
+          },
+          {
+            "name": "after",
+            "in": "query",
+            "required": false,
+            "description": "Cursor for fetching messages after this message ID",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "before",
+            "in": "query",
+            "required": false,
+            "description": "Cursor for fetching messages before this message ID",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "run_id",
+            "in": "query",
+            "required": false,
+            "description": "Filter messages by run ID",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Messages retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "object": {
+                      "type": "string",
+                      "description": "Type of the list response, always 'list'"
+                    },
+                    "data": {
+                      "type": "array",
+                      "description": "Array of message objects",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "id": {
+                            "type": "string",
+                            "description": "Unique identifier for the message"
+                          },
+                          "object": {
+                            "type": "string",
+                            "description": "Type of object, always 'thread.message'"
+                          },
+                          "created_at": {
+                            "type": "integer",
+                            "description": "Unix timestamp of when the message was created"
+                          },
+                          "thread_id": {
+                            "type": "string",
+                            "description": "ID of the thread this message belongs to"
+                          },
+                          "role": {
+                            "type": "string",
+                            "description": "Role of the message sender",
+                            "enum": ["assistant", "user"]
+                          },
+                          "status": {
+                            "type": "string",
+                            "description": "Status of the message",
+                            "enum": ["completed"]
+                          },
+                          "content": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {
+                                "type": {
+                                  "type": "string",
+                                  "description": "Type of content",
+                                  "enum": ["text"]
+                                },
+                                "text": {
+                                  "type": "object",
+                                  "properties": {
+                                    "value": {
+                                      "type": "string",
+                                      "description": "The message text"
+                                    },
+                                    "annotations": {
+                                      "type": "array",
+                                      "description": "Array of annotations for the text"
+                                    }
+                                  }
+                                }
+                              }
+                            }
+                          },
+                          "metadata": {
+                            "type": "object",
+                            "description": "Additional metadata for the message"
+                          },
+                          "attachments": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {
+                                "file_id": {
+                                  "type": "string",
+                                  "description": "ID of the attached file"
+                                },
+                                "tools": {
+                                  "type": "array",
+                                  "items": {
+                                    "type": "object",
+                                    "properties": {
+                                      "type": {
+                                        "type": "string",
+                                        "description": "Type of tool used"
+                                      }
+                                    }
+                                  }
+                                }
+                              }
+                            }
+                          }
+                        },
+                        "required": [
+                          "id",
+                          "object",
+                          "created_at",
+                          "thread_id",
+                          "role",
+                          "content"
+                        ]
+                      }
+                    }
+                  },
+                  "required": ["object", "data"]
+                },
+                "example": {
+                  "data": [
+                    {
+                      "content": [
+                        {
+                          "text": {
+                            "annotations": [],
+                            "value": "Based on the context, I'm not sure how to build a unique experience quickly and easily..."
+                          },
+                          "type": "text"
+                        }
+                      ],
+                      "created_at": 1732633637,
+                      "id": "01JDMG6CG6DD4B3RQN82QD8Q7P",
+                      "metadata": {},
+                      "object": "thread.message",
+                      "role": "assistant",
+                      "status": "completed",
+                      "thread_id": "jan_1732370027"
+                    }
+                  ],
+                  "object": "list"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      }
+    },
+    "/threads/{thread_id}/messages/{message_id}": {
+      "get": {
+        "summary": "Retrieve Message",
+        "description": "Retrieves a specific message from a thread by its ID.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread containing the message",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "message_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the message to retrieve",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Message retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the message"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.message'"
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was created"
+                    },
+                    "thread_id": {
+                      "type": "string",
+                      "description": "ID of the thread this message belongs to"
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "Role of the message sender",
+                      "enum": ["assistant", "user"]
+                    },
+                    "status": {
+                      "type": "string",
+                      "description": "Status of the message",
+                      "enum": ["completed"]
+                    },
+                    "content": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "description": "Type of content",
+                            "enum": ["text"]
+                          },
+                          "text": {
+                            "type": "object",
+                            "properties": {
+                              "value": {
+                                "type": "string",
+                                "description": "The message text"
+                              },
+                              "annotations": {
+                                "type": "array",
+                                "description": "Array of annotations for the text"
+                              }
+                            }
+                          }
+                        }
+                      }
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "description": "Additional metadata for the message"
+                    },
+                    "attachments": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "file_id": {
+                            "type": "string",
+                            "description": "ID of the attached file"
+                          },
+                          "tools": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {
+                                "type": {
+                                  "type": "string",
+                                  "description": "Type of tool used"
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created_at",
+                    "thread_id",
+                    "role",
+                    "content"
+                  ]
+                },
+                "example": {
+                  "attachments": [
+                    {
+                      "file_id": "01JDMG617BHMPW859VE18BPQ7Y",
+                      "tools": [
+                        {
+                          "type": "file_search"
+                        }
+                      ]
+                    }
+                  ],
+                  "content": [
+                    {
+                      "text": {
+                        "annotations": [],
+                        "value": "summary this"
+                      },
+                      "type": "text"
+                    }
+                  ],
+                  "created_at": 1732633625,
+                  "id": "01JDMG617BHMPW859VE18BPQ7Y",
+                  "metadata": {},
+                  "object": "thread.message",
+                  "role": "user",
+                  "status": "completed",
+                  "thread_id": "jan_1732370027"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      },
+      "patch": {
+        "summary": "Modify Message",
+        "description": "Modifies a specific message's content or metadata in a thread.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread containing the message",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "message_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the message to modify",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "content": {
+                    "type": "object",
+                    "description": "New content for the message"
+                  },
+                  "metadata": {
+                    "type": "object",
+                    "description": "Updated metadata for the message",
+                    "additionalProperties": true
+                  }
+                }
+              },
+              "example": {
+                "content": {},
+                "metadata": {
+                  "test": 1
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Message modified successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the message"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.message'"
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was created"
+                    },
+                    "completed_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was completed"
+                    },
+                    "thread_id": {
+                      "type": "string",
+                      "description": "ID of the thread this message belongs to"
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "Role of the message sender",
+                      "enum": ["user", "assistant"]
+                    },
+                    "status": {
+                      "type": "string",
+                      "description": "Status of the message",
+                      "enum": ["completed"]
+                    },
+                    "content": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "description": "Type of content",
+                            "enum": ["text"]
+                          },
+                          "text": {
+                            "type": "object",
+                            "properties": {
+                              "value": {
+                                "type": "string",
+                                "description": "The message text"
+                              },
+                              "annotations": {
+                                "type": "array",
+                                "description": "Array of annotations for the text"
+                              }
+                            }
+                          }
+                        }
+                      }
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "description": "Additional metadata for the message",
+                      "additionalProperties": true
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created_at",
+                    "completed_at",
+                    "thread_id",
+                    "role",
+                    "status",
+                    "content"
+                  ]
+                },
+                "example": {
+                  "completed_at": 1734023130,
+                  "content": [
+                    {
+                      "text": {
+                        "annotations": [],
+                        "value": "Hello, world!"
+                      },
+                      "type": "text"
+                    }
+                  ],
+                  "created_at": 1734023130,
+                  "id": "0001KNP5YT00GW0X476W5TVBFE",
+                  "metadata": {
+                    "test": 1
+                  },
+                  "object": "thread.message",
+                  "role": "user",
+                  "status": "completed",
+                  "thread_id": "jan_1732370027"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      },
+      "delete": {
+        "summary": "Delete Message",
+        "description": "Deletes a specific message from a thread.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread containing the message",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "message_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the message to delete",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Message deleted successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "deleted": {
+                      "type": "boolean",
+                      "description": "Indicates if the message was successfully deleted"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "ID of the deleted message"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.message.deleted'"
+                    }
+                  },
+                  "required": ["deleted", "id", "object"]
+                },
+                "example": {
+                  "deleted": true,
+                  "id": "01JDCMZPBGDP276D6Z2QN2MJMX",
+                  "object": "thread.message.deleted"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      }
+    },
+    "/system": {
+      "delete": {
+        "operationId": "SystemController_delete",
+        "summary": "Stop api server",
+        "description": "Stops the Cortex API endpoint server for the detached mode.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": ""
+          }
+        },
+        "tags": ["System"]
+      },
+      "get": {
+        "operationId": "SystemController_get",
+        "summary": "Get health status",
+        "description": "Retrieves the health status of your Cortex's system.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "Ok"
+          }
+        },
+        "tags": ["System"]
+      }
+    },
+    "/system/events/download": {
+      "get": {
+        "operationId": "SystemController_downloadEvent",
+        "summary": "Get download status",
+        "description": "Retrieves the model's download status.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["System"]
+      }
+    },
+    "/system/events/model": {
+      "get": {
+        "operationId": "SystemController_modelEvent",
+        "summary": "Get model status",
+        "description": "Retrieves all the available model statuses within Cortex.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["System"]
+      }
+    },
+    "/system/events/resources": {
+      "get": {
+        "operationId": "SystemController_resourcesEvent",
+        "summary": "Get resources status",
+        "description": "Retrieves the resources status of the system.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["System"]
+      }
+    },
+    "/engines/{name}": {
+      "get": {
+        "operationId": "EnginesController_listInstalledEngines",
+        "summary": "List installed engines",
+        "description": "List installed engines for a particular engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "engine": {
+                        "type": "string",
+                        "example": "llama-cpp"
+                      },
+                      "name": {
+                        "type": "string",
+                        "example": "mac-arm64"
+                      },
+                      "version": {
+                        "type": "string",
+                        "example": "0.1.35-28.10.24"
+                      }
+                    }
+                  }
+                },
+                "example": [
+                  {
+                    "engine": "llama-cpp",
+                    "name": "mac-arm64",
+                    "version": "0.1.35-28.10.24"
+                  },
+                  {
+                    "engine": "llama-cpp",
+                    "name": "linux-amd64-avx",
+                    "version": "0.1.35-27.10.24"
+                  }
+                ]
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/releases": {
+      "get": {
+        "summary": "List released engines",
+        "description": "List released engines of a specific engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "draft": {
+                        "type": "boolean",
+                        "example": false
+                      },
+                      "name": {
+                        "type": "string",
+                        "example": "v0.1.39-20.11.24"
+                      },
+                      "prerelease": {
+                        "type": "boolean",
+                        "example": true
+                      },
+                      "published_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "example": "2024-11-20T17:39:40Z"
+                      },
+                      "url": {
+                        "type": "string",
+                        "example": "https://api.github.com/repos/janhq/cortex.llamacpp/releases/186479804"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/releases/latest": {
+      "get": {
+        "summary": "Get latest release",
+        "description": "Return variants for the latest engine release of a specific engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "created_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "example": "2024-11-15T10:39:39Z"
+                      },
+                      "download_count": {
+                        "type": "integer",
+                        "example": 76
+                      },
+                      "name": {
+                        "type": "string",
+                        "example": "0.1.39-linux-amd64-avx-cuda-11-7"
+                      },
+                      "size": {
+                        "type": "integer",
+                        "example": 151215080
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/install": {
+      "post": {
+        "summary": "Install an engine",
+        "description": "Install an engine of a specific type, with optional version and variant. If none are provided, the latest version and most suitable variant will be installed.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm",
+                "openai",
+                "anthropic"
+              ],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "version": {
+                    "type": "string",
+                    "description": "The version of the engine to install (optional)",
+                    "example": "v0.1.39"
+                  },
+                  "variant": {
+                    "type": "string",
+                    "description": "The variant of the engine to install (optional)",
+                    "example": "mac-arm64"
+                  },
+                  "type": {
+                    "type": "string",
+                    "description": "The type of connection, remote or local",
+                    "example": "remote"
+                  },
+                  "url": {
+                    "type": "string",
+                    "description": "The URL for the API endpoint for remote engine",
+                    "example": "https://api.openai.com"
+                  },
+                  "api_key": {
+                    "type": "string",
+                    "description": "The API key for authentication for remote engine",
+                    "example": ""
+                  },
+                  "metadata": {
+                    "type": "object",
+                    "properties": {
+                      "get_models_url": {
+                        "type": "string",
+                        "description": "The URL to get models",
+                        "example": "https://api.openai.com/v1/models"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine starts installing!"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      },
+      "delete": {
+        "summary": "Uninstall an engine",
+        "description": "Uninstall an engine based on engine, version, and variant. If version and variant are not provided, all versions and variants of the engine will be uninstalled.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm",
+                "openai",
+                "anthropic"
+              ],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "version": {
+                    "type": "string",
+                    "description": "The version of the engine to uninstall (optional)",
+                    "example": "v0.1.39"
+                  },
+                  "variant": {
+                    "type": "string",
+                    "description": "The variant of the engine to uninstall (optional)",
+                    "example": "mac-arm64"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful uninstallation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "description": "Engine llama-cpp uninstalled successfully!",
+                      "example": "Engine llama-cpp uninstalled successfully!"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "description": "Error message describing the issue with the request"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/update": {
+      "post": {
+        "summary": "Update engine",
+        "description": "Updates the specified engine type using the engine variant currently set as default.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The name of the engine to update"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Engine updated successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine updated successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/default": {
+      "get": {
+        "summary": "Get default engine variant",
+        "description": "Retrieves the default engine variant for the specified engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "engine": {
+                      "type": "string",
+                      "example": "llama-cpp"
+                    },
+                    "name": {
+                      "type": "string",
+                      "example": "mac-arm64"
+                    },
+                    "version": {
+                      "type": "string",
+                      "example": "0.1.35-28.10.24"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      },
+      "post": {
+        "summary": "Set default engine variant",
+        "description": "Sets the default engine variant for the specified engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "required": ["version", "variant"],
+                "properties": {
+                  "version": {
+                    "type": "string",
+                    "description": "The version of the engine variant",
+                    "example": "0.1.34"
+                  },
+                  "variant": {
+                    "type": "string",
+                    "description": "The variant of the engine",
+                    "example": "mac-arm64"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Default engine variant set successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/load": {
+      "post": {
+        "summary": "Load engine",
+        "description": "Loads the specified engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm",
+                "openai",
+                "anthropic"
+              ],
+              "default": "llama-cpp"
+            },
+            "description": "The name of the engine to update"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Engine loaded successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine loaded successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      },
+      "delete": {
+        "summary": "Unload engine",
+        "description": "Unloads the specified engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The name of the engine to update"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Engine unloaded successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine unloaded successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/hardware": {
+      "get": {
+        "summary": "Get hardware information",
+        "description": "Retrieves detailed information about the system's hardware configuration, including CPU, GPU(s), operating system, power status, RAM, and storage.",
+        "responses": {
+          "200": {
+            "description": "Hardware information retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "cpu": {
+                      "$ref": "#/components/schemas/CPUDto"
+                    },
+                    "gpus": {
+                      "type": "array",
+                      "items": {
+                        "$ref": "#/components/schemas/GPUDto"
+                      }
+                    },
+                    "os": {
+                      "$ref": "#/components/schemas/OperatingSystemDto"
+                    },
+                    "power": {
+                      "$ref": "#/components/schemas/PowerDto"
+                    },
+                    "ram": {
+                      "$ref": "#/components/schemas/RAMDto"
+                    },
+                    "storage": {
+                      "$ref": "#/components/schemas/StorageDto"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Hardware"]
+      }
+    },
+    "/hardware/activate": {
+      "post": {
+        "summary": "Activate GPUs",
+        "description": "Activates the specified GPUs based on their indices provided in the request body.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "gpus": {
+                    "type": "array",
+                    "items": {
+                      "type": "integer"
+                    },
+                    "example": [0, 1, 2],
+                    "description": "An array of GPU indices to activate."
+                  }
+                },
+                "required": ["gpus"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "The hardware configuration has been activated.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "The hardware configuration has been activated.",
+                      "description": "Confirmation message indicating successful activation."
+                    },
+                    "activated_gpus": {
+                      "type": "array",
+                      "items": {
+                        "type": "integer"
+                      },
+                      "example": [0, 1, 2],
+                      "description": "List of GPU indices that were activated."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad Request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Invalid GPU index provided",
+                      "description": "Error message indicating what went wrong."
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Hardware"]
+      }
+    },
+    "/files": {
+      "post": {
+        "summary": "Upload a File",
+        "description": "Uploads a file to the Cortex server.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "multipart/form-data": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "file": {
+                    "type": "string",
+                    "format": "binary"
+                  },
+                  "purpose": {
+                    "type": "string",
+                    "enum": ["assistants"],
+                    "description": "The intended purpose of the uploaded file"
+                  }
+                },
+                "required": ["file", "purpose"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "bytes": {
+                      "type": "integer",
+                      "example": 3211109
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "example": 1733942093
+                    },
+                    "filename": {
+                      "type": "string",
+                      "example": "Enterprise_Application_Infrastructure_v2_20140903_toCTC_v1.0.pdf"
+                    },
+                    "id": {
+                      "type": "string",
+                      "example": "file-0001KNKPTDDAQSDVEQGRBTCTNJ"
+                    },
+                    "object": {
+                      "type": "string",
+                      "example": "file"
+                    },
+                    "purpose": {
+                      "type": "string",
+                      "example": "assistants"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      },
+      "get": {
+        "summary": "List files",
+        "description": "Lists all the files in the current directory.",
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "data": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "bytes": {
+                            "type": "integer",
+                            "example": 3211109
+                          },
+                          "created_at": {
+                            "type": "integer",
+                            "example": 1733942093
+                          },
+                          "filename": {
+                            "type": "string",
+                            "example": "Enterprise_Application_Infrastructure_v2_20140903_toCTC_v1.0.pdf"
+                          },
+                          "id": {
+                            "type": "string",
+                            "example": "file-0001KNKPTDDAQSDVEQGRBTCTNJ"
+                          },
+                          "object": {
+                            "type": "string",
+                            "example": "file"
+                          },
+                          "purpose": {
+                            "type": "string",
+                            "example": "assistants"
+                          }
+                        }
+                      }
+                    },
+                    "object": {
+                      "type": "string",
+                      "example": "list"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      }
+    },
+    "/files/{id}": {
+      "get": {
+        "summary": "Retrieve File",
+        "description": "Retrieves a file by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the file to retrieve",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "thread",
+            "in": "query",
+            "required": false,
+            "description": "Optional thread identifier",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully retrieved file",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "bytes": {
+                      "type": "integer",
+                      "example": 3211109
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "example": 1733942093
+                    },
+                    "filename": {
+                      "type": "string",
+                      "example": "Enterprise_Application_Infrastructure_v2_20140903_toCTC_v1.0.pdf"
+                    },
+                    "id": {
+                      "type": "string",
+                      "example": "file-0001KNKPTDDAQSDVEQGRBTCTNJ"
+                    },
+                    "object": {
+                      "type": "string",
+                      "example": "file"
+                    },
+                    "purpose": {
+                      "type": "string",
+                      "example": "assistants"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      },
+      "delete": {
+        "summary": "Delete File",
+        "description": "Deletes a file by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the file to delete",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "File successfully deleted",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "deleted": {
+                      "type": "boolean",
+                      "description": "Indicates if the file was successfully deleted"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "The ID of the deleted file"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'file'"
+                    }
+                  },
+                  "required": ["deleted", "id", "object"]
+                },
+                "example": {
+                  "deleted": true,
+                  "id": "file-0001KNP26FC62D620DGYNG2R8H",
+                  "object": "file"
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "File not found or invalid request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "description": "Error message describing the issue"
+                    }
+                  },
+                  "required": ["message"]
+                },
+                "example": {
+                  "message": "File not found: file-0001KNP26FC62D620DGYNG2R8H"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      }
+    },
+    "/files/{id}/content": {
+      "get": {
+        "summary": "Get File Content",
+        "description": "Retrieves the content of a file by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the file to retrieve content from",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "thread",
+            "in": "query",
+            "required": false,
+            "description": "Optional thread identifier",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "File content retrieved successfully",
+            "content": {
+              "*/*": {
+                "schema": {
+                  "type": "string",
+                  "format": "binary",
+                  "description": "The raw content of the file"
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "File not found or invalid request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "description": "Error message describing the issue"
+                    }
+                  },
+                  "required": ["message"]
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      }
+    },
+    "/configs": {
+      "get": {
+        "summary": "Get Configurations",
+        "description": "Retrieves the current configuration settings of the Cortex server.",
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "allowed_origins": {
+                      "type": "array",
+                      "items": {
+                        "type": "string"
+                      },
+                      "example": ["http://127.0.0.1:39281", "https://cortex.so"]
+                    },
+                    "cors": {
+                      "type": "boolean",
+                      "example": false
+                    },
+                    "proxy_username": {
+                      "type": "string",
+                      "example": "username"
+                    },
+                    "proxy_password": {
+                      "type": "string",
+                      "example": "password"
+                    },
+                    "proxy_url": {
+                      "type": "string",
+                      "example": "http://proxy.example.com:8080"
+                    },
+                    "verify_proxy_ssl": {
+                      "type": "boolean",
+                      "description": "test",
+                      "example": false
+                    },
+                    "verify_proxy_host_ssl": {
+                      "type": "boolean",
+                      "example": false
+                    },
+                    "verify_peer_ssl": {
+                      "type": "boolean",
+                      "example": false
+                    },
+                    "verify_host_ssl": {
+                      "type": "boolean",
+                      "example": false
+                    },
+                    "no_proxy": {
+                      "type": "string",
+                      "example": "localhost"
+                    },
+                    "huggingface_token": {
+                      "type": "string",
+                      "example": "your_token"
+                    }
+                  }
+                },
+                "example": {
+                  "allowed_origins": [
+                    "http://127.0.0.1:39281",
+                    "https://cortex.so"
+                  ],
+                  "cors": false,
+                  "proxy_username": "username",
+                  "proxy_password": "password",
+                  "proxy_url": "http://proxy.example.com:8080",
+                  "verify_proxy_ssl": false,
+                  "verify_proxy_host_ssl": false,
+                  "verify_peer_ssl": false,
+                  "verify_host_ssl": false,
+                  "no_proxy": "localhost",
+                  "huggingface_token": "your_token"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Configurations"]
+      },
+      "patch": {
+        "tags": ["Configurations"],
+        "summary": "Update configuration settings",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "cors": {
+                    "type": "boolean",
+                    "description": "Indicates whether CORS is enabled.",
+                    "example": false
+                  },
+                  "allowed_origins": {
+                    "type": "array",
+                    "items": {
+                      "type": "string"
+                    },
+                    "description": "List of allowed origins.",
+                    "example": ["http://127.0.0.1:39281", "https://cortex.so"]
+                  },
+                  "proxy_username": {
+                    "type": "string",
+                    "description": "Username for the proxy server.",
+                    "example": "username"
+                  },
+                  "proxy_password": {
+                    "type": "string",
+                    "description": "Password for the proxy server.",
+                    "example": "password"
+                  },
+                  "proxy_url": {
+                    "type": "string",
+                    "description": "URL for the proxy server.",
+                    "example": "http://proxy.example.com:8080"
+                  },
+                  "verify_proxy_ssl": {
+                    "type": "boolean",
+                    "description": "Indicates whether to verify the SSL certificate of the proxy server.",
+                    "example": false
+                  },
+                  "verify_proxy_host_ssl": {
+                    "type": "boolean",
+                    "description": "Indicates whether to verify the SSL certificate of the proxy server host.",
+                    "example": false
+                  },
+                  "verify_peer_ssl": {
+                    "type": "boolean",
+                    "description": "Indicates whether to verify the SSL certificate of the peer.",
+                    "example": false
+                  },
+                  "verify_host_ssl": {
+                    "type": "boolean",
+                    "description": "Indicates whether to verify the SSL certificate of the host.",
+                    "example": false
+                  },
+                  "no_proxy": {
+                    "type": "string",
+                    "description": "List of hosts that should not be proxied.",
+                    "example": "localhost"
+                  },
+                  "huggingface_token": {
+                    "type": "string",
+                    "description": "HuggingFace token to pull models.",
+                    "example": "your_token"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Configuration updated successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "config": {
+                      "type": "object",
+                      "properties": {
+                        "allowed_origins": {
+                          "type": "array",
+                          "items": {
+                            "type": "string"
+                          },
+                          "example": [
+                            "http://127.0.0.1:39281",
+                            "https://cortex.so"
+                          ]
+                        },
+                        "cors": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "proxy_username": {
+                          "type": "string",
+                          "example": "username"
+                        },
+                        "proxy_password": {
+                          "type": "string",
+                          "example": "password"
+                        },
+                        "proxy_url": {
+                          "type": "string",
+                          "example": "http://proxy.example.com:8080"
+                        },
+                        "verify_proxy_ssl": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "verify_proxy_host_ssl": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "verify_peer_ssl": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "verify_host_ssl": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "no_proxy": {
+                          "type": "string",
+                          "example": "localhost"
+                        },
+                        "huggingface_token": {
+                          "type": "string",
+                          "example": "your_token"
+                        }
+                      }
+                    },
+                    "message": {
+                      "type": "string",
+                      "example": "Configuration updated successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "info": {
+    "title": "Cortex API",
+    "description": "Cortex API enables API commands for seamless interaction with LLMs.",
+    "version": "1.0",
+    "contact": {}
+  },
+  "tags": [
+    {
+      "name": "Chat",
+      "description": "This endpoint initiates interaction with a Large Language Models (LLM)."
+    },
+    {
+      "name": "Embeddings",
+      "description": "This endpoint create embeddings for a given input text or tokens."
+    },
+    {
+      "name": "Assistants",
+      "description": "These endpoints manage the lifecycle of an Assistant within a conversation thread."
+    },
+    {
+      "name": "Pulling Models",
+      "description": "These endpoints handle downloading and importing models."
+    },
+    {
+      "name": "Running Models",
+      "description": "These endpoints support a range of operations that allow users to effectively control and interact with their models"
+    },
+    {
+      "name": "Server",
+      "description": "These endpoints manage the lifecycle of Server, including heath check and shutdown."
+    },
+    {
+      "name": "Configurations",
+      "description": "These endpoints manage the configuration of the Cortex server."
+    },
+    {
+      "name": "Messages",
+      "description": "These endpoints manage the retrieval and storage of conversation content, including responses from LLMs and other metadata related to chat interactions."
+    },
+    {
+      "name": "Threads",
+      "description": "These endpoints handle the creation, retrieval, updating, and deletion of conversation threads."
+    },
+    {
+      "name": "Engines",
+      "description": "Endpoints for managing the available engines within Cortex."
+    },
+    {
+      "name": "Files",
+      "description": "Endpoints for managing the files within Cortex."
+    },
+    {
+      "name": "Hardware",
+      "description": "Endpoints for managing the available hardware within Cortex."
+    },
+    {
+      "name": "System",
+      "description": "Endpoints for stopping the Cortex API server, checking its status, and fetching system events."
+    }
+  ],
+  "x-tagGroups": [
+    {
+      "name": "CORTEX",
+      "tags": [
+        "Chat",
+        "Embeddings",
+        "Engines",
+        "Files",
+        "Hardware",
+        "Events",
+        "Threads",
+        "Messages",
+        "Pulling Models",
+        "Running Models",
+        "Processes",
+        "Status",
+        "Server",
+        "Configurations"
+      ]
+    }
+  ],
+  "servers": [
+    {
+      "url": "/v1"
+    }
+  ],
+  "components": {
+    "schemas": {
+      "CreateAssistantDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The unique identifier of the assistant.",
+            "example": "jan",
+            "default": "jan"
+          },
+          "avatar": {
+            "type": "string",
+            "description": "The avatar of the assistant.",
+            "example": "",
+            "default": ""
+          },
+          "name": {
+            "type": "string",
+            "description": "The name of the assistant.",
+            "example": "Jan",
+            "default": "Jan"
+          },
+          "description": {
+            "type": "string",
+            "description": "The description of the assistant.",
+            "example": "A default assistant that can use all downloaded models",
+            "default": "A default assistant that can use all downloaded models"
+          },
+          "model": {
+            "type": "string",
+            "description": "The model of the assistant."
+          },
+          "instructions": {
+            "type": "string",
+            "description": "The instructions for the assistant.",
+            "example": "",
+            "default": ""
+          },
+          "tools": {
+            "description": "The tools associated with the assistant.",
+            "example": [],
+            "default": [],
+            "type": "array",
+            "items": {
+              "type": "array",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": ["function"]
+                },
+                "function": {
+                  "$ref": "#/components/schemas/Function"
+                }
+              },
+              "required": ["type", "function"]
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "nullable": true,
+            "description": "The metadata of the assistant."
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Top p.",
+            "example": "0.7",
+            "default": "0.7"
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Temperature.",
+            "example": "0.7",
+            "default": "0.7"
+          }
+        },
+        "required": [
+          "id",
+          "name",
+          "description",
+          "model",
+          "instructions",
+          "tools",
+          "metadata"
+        ]
+      },
+      "AssistantEntity": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string"
+          },
+          "avatar": {
+            "type": "string"
+          },
+          "object": {
+            "type": "string"
+          },
+          "created_at": {
+            "type": "number"
+          },
+          "name": {
+            "type": "string",
+            "nullable": true
+          },
+          "description": {
+            "type": "string",
+            "nullable": true
+          },
+          "model": {
+            "type": "string"
+          },
+          "instructions": {
+            "type": "string",
+            "nullable": true
+          },
+          "tools": {
+            "type": "array"
+          },
+          "metadata": {
+            "type": "object",
+            "nullable": true
+          },
+          "top_p": {
+            "type": "number",
+            "nullable": true
+          },
+          "temperature": {
+            "type": "number",
+            "nullable": true
+          },
+          "response_format": {
+            "type": "object",
+            "nullable": true
+          },
+          "tool_resources": {
+            "type": "object",
+            "nullable": true
+          }
+        },
+        "required": [
+          "id",
+          "object",
+          "created_at",
+          "name",
+          "description",
+          "model",
+          "instructions",
+          "tools",
+          "metadata",
+          "top_p",
+          "temperature",
+          "response_format",
+          "tool_resources"
+        ]
+      },
+      "DeleteAssistantResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "assistant_123",
+            "description": "The identifier of the assistant that was deleted."
+          },
+          "object": {
+            "type": "string",
+            "example": "assistant",
+            "description": "Type of the object, indicating it's a assistant.",
+            "default": "assistant"
+          },
+          "deleted": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates whether the assistant was successfully deleted."
+          }
+        },
+        "required": ["id", "object", "deleted"]
+      },
+      "Message": {
+        "type": "object",
+        "discriminator": {
+          "propertyName": "role",
+          "mapping": {
+            "system": "SystemMessage",
+            "user": "UserMessage",
+            "assistant": "AssistantMessage",
+            "tool": "ToolMessage",
+            "function": "FunctionMessage"
+          }
+        },
+        "properties": {
+          "role": {
+            "type": "string",
+            "enum": ["system", "user", "assistant", "tool"]
+          },
+          "name": {
+            "type": "string",
+            "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+          }
+        },
+        "required": ["role"]
+      },
+      "SystemMessage": {
+        "allOf": [
+          {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "description": "The role of the messages author, in this case `system`."
+              },
+              "content": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "array",
+                    "items": {
+                      "$ref": "#/components/schemas/TextContentPart"
+                    }
+                  }
+                ]
+              },
+              "name": {
+                "type": "string",
+                "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+              }
+            },
+            "required": ["content", "role"]
+          }
+        ]
+      },
+      "UserMessage": {
+        "allOf": [
+          {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "description": "The role of the messages author, in this case `user`."
+              },
+              "content": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "anyOf": [
+                        {
+                          "type": "object",
+                          "title": "Text Content Part",
+                          "description": "Text Content",
+                          "$ref": "#/components/schemas/TextContentPart"
+                        },
+                        {
+                          "type": "object",
+                          "title": "Image Content Part",
+                          "description": "Image Content",
+                          "$ref": "#/components/schemas/ImageContentPart"
+                        },
+                        {
+                          "type": "object",
+                          "title": "Audio Content Part",
+                          "description": "Audio Content",
+                          "$ref": "#/components/schemas/AudioContentPart"
+                        }
+                      ]
+                    }
+                  }
+                ]
+              },
+              "name": {
+                "type": "string",
+                "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+              }
+            },
+            "required": ["content", "role"]
+          }
+        ]
+      },
+      "AssistantMessage": {
+        "allOf": [
+          {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "description": "The role of the messages author, in this case `assistant`."
+              },
+              "content": {
+                "description": "The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified.",
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "anyOf": [
+                        {
+                          "$ref": "#/components/schemas/TextContentPart"
+                        },
+                        {
+                          "$ref": "#/components/schemas/RefusalContentPart"
+                        }
+                      ]
+                    }
+                  }
+                ]
+              },
+              "name": {
+                "type": "string",
+                "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+              },
+              "refusal": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
+              },
+              "audio": {
+                "type": "object",
+                "anyOf": [
+                  {
+                    "$ref": "#/components/schemas/Audio"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
+              },
+              "tool_calls": {
+                "type": "array",
+                "items": {
+                  "$ref": "#/components/schemas/ToolCall"
+                }
+              },
+              "function_call": {
+                "deprecated": true,
+                "anyOf": [
+                  {
+                    "$ref": "#/components/schemas/FunctionCall"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
+              }
+            }
+          }
+        ]
+      },
+      "ToolMessage": {
+        "allOf": [
+          {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "description": "The role of the messages author, in this case `tool`."
+              },
+              "content": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "anyOf": [
+                        {
+                          "$ref": "#/components/schemas/TextContentPart"
+                        }
+                      ]
+                    }
+                  }
+                ]
+              },
+              "tool_call_id": {
+                "type": "string"
+              }
+            },
+            "required": ["content", "tool_call_id"]
+          }
+        ]
+      },
+      "FunctionMessage": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/Message"
+          }
+        ],
+        "deprecated": true
+      },
+      "TextContentPart": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string",
+            "enum": ["text"]
+          },
+          "text": {
+            "type": "string"
+          }
+        },
+        "required": ["type", "text"]
+      },
+      "ImageContentPart": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string",
+            "enum": ["image_url"]
+          },
+          "image_url": {
+            "$ref": "#/components/schemas/ImageUrl"
+          }
+        },
+        "required": ["type", "image_url"]
+      },
+      "AudioContentPart": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string",
+            "description": "The type of the content part. Always `input_audio`."
+          },
+          "input_audio": {
+            "$ref": "#/components/schemas/InputAudio"
+          }
+        },
+        "required": ["type", "input_audio"]
+      },
+      "RefusalContentPart": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string"
+          },
+          "refusal": {
+            "type": "string"
+          }
+        },
+        "required": ["type", "refusal"]
+      },
+      "ImageUrl": {
+        "type": "object",
+        "properties": {
+          "url": {
+            "type": "string",
+            "description": "Either a URL of the image or the base64 encoded image data."
+          },
+          "detail": {
+            "type": "string",
+            "default": "auto",
+            "description": "Specifies the detail level of the image. Defaults to `auto`."
+          }
+        },
+        "required": ["url"]
+      },
+      "InputAudio": {
+        "type": "object",
+        "properties": {
+          "data": {
+            "type": "string",
+            "description": "Base64 encoded audio data."
+          },
+          "format": {
+            "type": "string",
+            "enum": ["wav", "mp3"],
+            "description": "The format of the encoded audio data. Currently supports `wav` and `mp3`."
+          }
+        },
+        "required": ["data", "format"]
+      },
+      "Audio": {
+        "type": "object",
+        "description": "Data about a previous audio response from the model.",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "Unique identifier for a previous audio response from the model."
+          }
+        },
+        "required": ["id"]
+      },
+      "ToolCall": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string"
+          },
+          "type": {
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/FunctionCall"
+          }
+        },
+        "required": ["id", "type", "function"]
+      },
+      "FunctionCall": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          },
+          "arguments": {
+            "type": "string"
+          }
+        },
+        "required": ["name", "arguments"]
+      },
+      "CreateChatCompletionDto": {
+        "type": "object",
+        "properties": {
+          "messages": {
+            "description": "Array of chat messages to be used for generating the chat completion. Depending on the model you use, different message types (modalities) are supported, like text, images, and audio. Currently, cortex only support text modalities.",
+            "type": "array",
+            "items": {
+              "anyOf": [
+                {
+                  "title": "System Message",
+                  "description": "System Message",
+                  "$ref": "#/components/schemas/SystemMessage"
+                },
+                {
+                  "title": "User Message",
+                  "description": "User Message",
+                  "$ref": "#/components/schemas/UserMessage"
+                },
+                {
+                  "title": "Assistant Message",
+                  "description": "Assistant Message",
+                  "$ref": "#/components/schemas/AssistantMessage"
+                },
+                {
+                  "title": "Tool Message",
+                  "description": "Tool Message",
+                  "$ref": "#/components/schemas/ToolMessage"
+                }
+              ]
+            }
+          },
+          "model": {
+            "type": "string",
+            "description": "The unique identifier of the model.",
+            "example": "mistral"
+          },
+          "stream": {
+            "type": "boolean",
+            "description": "Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.",
+            "example": true
+          },
+          "max_tokens": {
+            "type": "number",
+            "description": "Sets the upper limit on the number of tokens the model can generate in a single output. This value is now deprecated in favor of `max_completion_tokens`.",
+            "example": 4096
+          },
+          "max_completion_tokens": {
+            "type": "number",
+            "description": "Sets the upper limit on the number of tokens the model can generate in a single output."
+          },
+          "stop": {
+            "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
+            "example": ["End"],
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "description": "Modifies the likelihood of the model repeating the same words or phrases within a single output.",
+            "example": 0.2
+          },
+          "presence_penalty": {
+            "type": "number",
+            "description": "Reduces the likelihood of repeating tokens, promoting novelty in the output.",
+            "example": 0.6
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Influences the randomness of the model's output.",
+            "example": 0.8
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Sets probability threshold for more relevant outputs.",
+            "example": 0.95
+          },
+          "modalities": {
+            "type": "array",
+            "items": {
+              "type": "string",
+              "enum": ["text", "audio"]
+            },
+            "description": "Specifies the modalities (types of input) supported by the model. Currently, cortex only support text modalities. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "example": ["text"]
+          },
+          "audio": {
+            "description": "Parameters for audio output. Required when audio output is requested with `modalities: ['audio']`. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "type": "object",
+            "properties": {
+              "voice": {
+                "type": "string",
+                "description": "The voice of the generated audio."
+              },
+              "format": {
+                "type": "string",
+                "description": "Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.",
+                "enum": ["mp3", "wav", "flac", "opus", "pcm16"]
+              }
+            },
+            "required": ["voice", "format"]
+          },
+          "store": {
+            "type": "boolean",
+            "description": "Whether or not to store the output of this chat completion request for use in our model distillation or evals products. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "default": false,
+            "example": false
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Developer-defined tags and values used for filtering completions in the dashboard. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "example": {
+              "type": "conversation"
+            }
+          },
+          "logit_bias": {
+            "type": "object",
+            "description": "Modify the likelihood of specified tokens appearing in the completion. \n\nAccepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.",
+            "example": {
+              "15496": -100,
+              "51561": -100
+            },
+            "default": null
+          },
+          "logprobs": {
+            "type": "boolean",
+            "description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.",
+            "example": false,
+            "default": false
+          },
+          "top_logprobs": {
+            "type": "number",
+            "description": "An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used."
+          },
+          "n": {
+            "type": "number",
+            "description": "How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.",
+            "example": 1,
+            "default": 1
+          },
+          "response_format": {
+            "type": "object",
+            "description": "An object specifying the format that the model must output. Setting to { \"type\": \"json_object\" } enables JSON mode, which guarantees the message the model generates is valid JSON. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "properties": {
+              "type": {
+                "type": "string",
+                "description": "The format of the generated output. Must be one of `text`, `json_schema` or `json_object`.",
+                "enum": ["text", "json_object", "json_schema"]
+              }
+            },
+            "required": ["type"]
+          },
+          "seed": {
+            "type": "number",
+            "description": "This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.",
+            "example": 123,
+            "default": null
+          },
+          "service_tier": {
+            "type": "string",
+            "description": "Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:\n\n - If set to 'auto', and the Project is Scale tier enabled, the system will utilize scale tier credits until they are exhausted.\n- If set to 'auto', and the Project is not Scale tier enabled, the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\n- If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\nWhen not set, the default behavior is 'auto'.\nWhen this parameter is set, the response body will include the service_tier utilized.\n\n We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582)."
+          },
+          "stream_options": {
+            "type": "object",
+            "default": null,
+            "description": "Options for streaming response. Only set this when you set `stream: true`.",
+            "properties": {
+              "include_usage": {
+                "type": "boolean",
+                "description": "If set, an additional chunk will be streamed before the data: `[DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.",
+                "example": false,
+                "default": false
+              }
+            }
+          },
+          "tools": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": ["function"]
+                },
+                "function": {
+                  "$ref": "#/components/schemas/Function"
+                }
+              },
+              "required": ["type", "function"]
+            }
+          },
+          "tool_choice": {
+            "anyOf": [
+              {
+                "type": "string",
+                "enum": ["none", "auto", "required"]
+              },
+              {
+                "type": "object",
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": ["function"]
+                  },
+                  "function": {
+                    "type": "object",
+                    "properties": {
+                      "name": {
+                        "type": "string"
+                      }
+                    },
+                    "required": ["name"]
+                  }
+                },
+                "required": ["type", "function"]
+              }
+            ]
+          },
+          "parallel_tool_calls": {
+            "type": "boolean",
+            "description": "Whether to enable parallel function calling during tool use. Cortex support parallel tool calls by default",
+            "example": true,
+            "default": true
+          },
+          "user": {
+            "type": "string",
+            "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582)."
+          },
+          "dynatemp_range": {
+            "type": "number",
+            "description": "Dynamic temperature range. This parameter only supported by `llama-cpp` engine."
+          },
+          "dynatemp_exponent": {
+            "type": "number",
+            "description": "Dynamic temperature exponent. This parameter only supported by `llama-cpp` engine."
+          },
+          "top_k": {
+            "type": "integer",
+            "description": "The number of most likely tokens to consider at each step. This parameter only supported by `llama-cpp` engine."
+          },
+          "min_p": {
+            "type": "number",
+            "description": "Minimum probability threshold for token sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "tfs_z": {
+            "type": "number",
+            "description": "The z-score used for Typical token sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "typ_p": {
+            "type": "number",
+            "description": "The cumulative probability threshold used for Typical token sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "repeat_last_n": {
+            "type": "integer",
+            "description": "Number of previous tokens to penalize for repeating. This parameter only supported by `llama-cpp` engine."
+          },
+          "repeat_penalty": {
+            "type": "number",
+            "description": "Penalty for repeating tokens. This parameter only supported by `llama-cpp` engine."
+          },
+          "mirostat": {
+            "type": "boolean",
+            "description": "Enables or disables Mirostat sampling (true or false). This parameter only supported by `llama-cpp` engine."
+          },
+          "mirostat_tau": {
+            "type": "number",
+            "description": "Target entropy value for Mirostat sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "mirostat_eta": {
+            "type": "number",
+            "description": "Learning rate for Mirostat sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "penalize_nl": {
+            "type": "boolean",
+            "description": "Penalizes newline tokens (true or false). This parameter only supported by `llama-cpp` engine."
+          },
+          "ignore_eos": {
+            "type": "boolean",
+            "description": "Ignores the end-of-sequence token (true or false). This parameter only supported by `llama-cpp` engine."
+          },
+          "n_probs": {
+            "type": "integer",
+            "description": "Number of probabilities to return. This parameter only supported by `llama-cpp` engine."
+          },
+          "min_keep": {
+            "type": "integer",
+            "description": "Minimum number of tokens to keep. This parameter only supported by `llama-cpp` engine."
+          }
+        },
+        "required": ["messages", "model"]
+      },
+      "Function": {
+        "type": "object",
+        "properties": {
+          "description": {
+            "type": "string"
+          },
+          "name": {
+            "type": "string",
+            "pattern": "^[a-zA-Z0-9_-]{1,64}$"
+          },
+          "parameters": {
+            "type": "object"
+          },
+          "strict": {
+            "type": "boolean",
+            "default": false
+          }
+        },
+        "required": ["name"]
+      },
+      "MessageDto": {
+        "type": "object",
+        "properties": {
+          "content": {
+            "type": "string",
+            "description": "The textual content of the chat message or completion generated by the model."
+          },
+          "role": {
+            "type": "string",
+            "description": "The role of the participant in the chat, such as 'user' or 'system', indicating who is the sender of the message."
+          }
+        },
+        "required": ["content", "role"]
+      },
+      "ChoiceDto": {
+        "type": "object",
+        "properties": {
+          "finish_reason": {
+            "type": "string",
+            "description": "The reason the chat completion ended, typically indicating whether the model completed the text naturally or was cut off."
+          },
+          "index": {
+            "type": "number",
+            "description": "The index of the completion relative to other generated completions, useful for identifying its order in a batch request."
+          },
+          "message": {
+            "description": "An object representing the message details involved in the chat completion, encapsulated within a MessageDto.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/MessageDto"
+              }
+            ]
+          }
+        },
+        "required": ["finish_reason", "index", "message"]
+      },
+      "UsageDto": {
+        "type": "object",
+        "properties": {
+          "completion_tokens": {
+            "type": "number",
+            "description": "The number of tokens used in the completion part of the response generated by the model."
+          },
+          "prompt_tokens": {
+            "type": "number",
+            "description": "The number of tokens used in the prompt part of the chat input, which is provided to the model."
+          },
+          "total_tokens": {
+            "type": "number",
+            "description": "The total number of tokens used in both the prompt and the completion, summarizing the entire token count of the chat operation."
+          }
+        },
+        "required": ["completion_tokens", "prompt_tokens", "total_tokens"]
+      },
+      "ChatCompletionResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "A unique identifier for the chat completion."
+          },
+          "choices": {
+            "type": "array",
+            "description": "A list of chat completion choices. Can be more than one if n is greater than 1.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "finish_reason": {
+                  "type": "string",
+                  "description": "The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool, or function_call (deprecated) if the model called a function."
+                },
+                "index": {
+                  "type": "integer",
+                  "description": "The index of the choice in the list of choices."
+                },
+                "message": {
+                  "type": "object",
+                  "properties": {
+                    "content": {
+                      "type": ["string", "null"],
+                      "description": "The contents of the message."
+                    },
+                    "refusal": {
+                      "type": ["string", "null"],
+                      "description": "The refusal message generated by the model."
+                    },
+                    "tool_calls": {
+                      "type": "array",
+                      "description": "The tool calls generated by the model, such as function calls.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "id": {
+                            "type": "string",
+                            "description": "The ID of the tool call."
+                          },
+                          "type": {
+                            "type": "string",
+                            "description": "The type of the tool. Currently, only function is supported."
+                          },
+                          "function": {
+                            "type": "object",
+                            "properties": {
+                              "name": {
+                                "type": "string",
+                                "description": "The name of the function to call."
+                              },
+                              "arguments": {
+                                "type": "string",
+                                "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
+                              }
+                            },
+                            "required": ["name", "arguments"]
+                          }
+                        },
+                        "required": ["id", "type", "function"]
+                      }
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "The role of the author of this message."
+                    },
+                    "function_call": {
+                      "type": "object",
+                      "deprecated": true,
+                      "description": "Deprecated and replaced by tool_calls. The name and arguments of a function that should be called, as generated by the model.",
+                      "properties": {
+                        "arguments": {
+                          "type": "string",
+                          "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
+                        },
+                        "name": {
+                          "type": "string",
+                          "description": "The name of the function to call."
+                        }
+                      },
+                      "required": ["arguments", "name"]
+                    },
+                    "audio": {
+                      "type": "object",
+                      "description": "If the audio output modality is requested, this object contains data about the audio response from the model.",
+                      "properties": {
+                        "id": {
+                          "type": "string",
+                          "description": "Unique identifier for this audio response."
+                        },
+                        "expires_at": {
+                          "type": "integer",
+                          "description": "The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations."
+                        },
+                        "data": {
+                          "type": "string",
+                          "description": "Base64 encoded audio bytes generated by the model, in the format specified in the request."
+                        },
+                        "transcript": {
+                          "type": "string",
+                          "description": "Transcript of the audio generated by the model."
+                        }
+                      },
+                      "required": ["id", "expires_at", "data", "transcript"]
+                    }
+                  },
+                  "required": ["role"]
+                },
+                "logprobs": {
+                  "type": "object",
+                  "description": "Log probability information for the choice.",
+                  "properties": {
+                    "content": {
+                      "type": ["array", "null"],
+                      "description": "A list of message content tokens with log probability information.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    },
+                    "top_logprobs": {
+                      "type": "array",
+                      "description": "List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested top_logprobs returned.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    },
+                    "refusal": {
+                      "type": ["array", "null"],
+                      "description": "A list of message refusal tokens with log probability information.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    }
+                  }
+                }
+              },
+              "required": ["finish_reason", "index", "message"]
+            }
+          },
+          "created": {
+            "type": "integer",
+            "description": "The Unix timestamp (in seconds) of when the chat completion was created."
+          },
+          "model": {
+            "type": "string",
+            "description": "The model used for the chat completion."
+          },
+          "service_tier": {
+            "type": ["string", "null"],
+            "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request."
+          },
+          "system_fingerprint": {
+            "type": "string",
+            "description": "This fingerprint represents the backend configuration that the model runs with. Can be used in conjunction with the seed request parameter to understand when backend changes have been made that might impact determinism."
+          },
+          "object": {
+            "type": "string",
+            "description": "The object type, which is always chat.completion."
+          },
+          "usage": {
+            "type": "object",
+            "description": "Usage statistics for the completion request.",
+            "properties": {
+              "completion_tokens": {
+                "type": "integer",
+                "description": "Number of tokens in the generated completion."
+              },
+              "prompt_tokens": {
+                "type": "integer",
+                "description": "Number of tokens in the prompt."
+              },
+              "total_tokens": {
+                "type": "integer",
+                "description": "Total number of tokens used in the request (prompt + completion)."
+              },
+              "completion_tokens_details": {
+                "type": "object",
+                "description": "Breakdown of tokens used in a completion.",
+                "properties": {
+                  "audio_tokens": {
+                    "type": "integer",
+                    "description": "Audio input tokens generated by the model."
+                  },
+                  "reasoning_tokens": {
+                    "type": "integer",
+                    "description": "Tokens generated by the model for reasoning."
+                  }
+                },
+                "required": ["audio_tokens", "reasoning_tokens"]
+              },
+              "prompt_tokens_details": {
+                "type": "object",
+                "description": "Breakdown of tokens used in the prompt.",
+                "properties": {
+                  "audio_tokens": {
+                    "type": "integer",
+                    "description": "Audio input tokens present in the prompt."
+                  },
+                  "cached_tokens": {
+                    "type": "integer",
+                    "description": "Cached tokens present in the prompt."
+                  }
+                },
+                "required": ["audio_tokens", "cached_tokens"]
+              }
+            },
+            "required": [
+              "completion_tokens",
+              "prompt_tokens",
+              "total_tokens",
+              "completion_tokens_details",
+              "prompt_tokens_details"
+            ]
+          }
+        },
+        "required": [
+          "id",
+          "choices",
+          "created",
+          "model",
+          "system_fingerprint",
+          "object",
+          "usage"
+        ]
+      },
+      "ChatCompletionChunkResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "A unique identifier for the chat completion. Each chunk has the same ID."
+          },
+          "choices": {
+            "type": "array",
+            "description": "A list of chat completion choices. Can contain more than one element if n is greater than 1. Can also be empty for the last chunk if you set stream_options: {\"include_usage\": true}.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "delta": {
+                  "type": "object",
+                  "description": "A chat completion delta generated by streamed model responses.",
+                  "properties": {
+                    "content": {
+                      "type": ["string", "null"],
+                      "description": "The contents of the chunk message."
+                    },
+                    "function_call": {
+                      "type": "object",
+                      "description": "Deprecated and replaced by tool_calls. The name and arguments of a function that should be called, as generated by the model.",
+                      "deprecated": true
+                    },
+                    "tool_calls": {
+                      "type": "array",
+                      "description": "The tool calls generated by the model.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "index": {
+                            "type": "integer",
+                            "description": "The index of the tool call in the list of tool calls."
+                          },
+                          "id": {
+                            "type": "string",
+                            "description": "The ID of the tool call."
+                          },
+                          "type": {
+                            "type": "string",
+                            "description": "The type of the tool. Currently, only function is supported."
+                          },
+                          "function": {
+                            "type": "object",
+                            "properties": {
+                              "name": {
+                                "type": "string",
+                                "description": "The name of the function to call."
+                              },
+                              "arguments": {
+                                "type": "string",
+                                "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
+                              }
+                            },
+                            "required": ["name", "arguments"]
+                          }
+                        },
+                        "required": ["index", "id", "type", "function"]
+                      }
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "The role of the author of this message."
+                    },
+                    "refusal": {
+                      "type": ["string", "null"],
+                      "description": "The refusal message generated by the model."
+                    }
+                  }
+                },
+                "logprobs": {
+                  "type": "object",
+                  "description": "Log probability information for the choice.",
+                  "properties": {
+                    "content": {
+                      "type": ["array", "null"],
+                      "description": "A list of message content tokens with log probability information.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    },
+                    "top_logprobs": {
+                      "type": "array",
+                      "description": "List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested top_logprobs returned.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    },
+                    "refusal": {
+                      "type": ["array", "null"],
+                      "description": "A list of message refusal tokens with log probability information.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    }
+                  }
+                },
+                "finish_reason": {
+                  "type": ["string", "null"],
+                  "description": "The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool, or function_call (deprecated) if the model called a function."
+                },
+                "index": {
+                  "type": "integer",
+                  "description": "The index of the choice in the list of choices."
+                }
+              },
+              "required": ["delta", "index"]
+            }
+          },
+          "created": {
+            "type": "integer",
+            "description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp."
+          },
+          "model": {
+            "type": "string",
+            "description": "The model used to generate the completion."
+          },
+          "service_tier": {
+            "type": ["string", "null"],
+            "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request."
+          },
+          "system_fingerprint": {
+            "type": "string",
+            "description": "This fingerprint represents the backend configuration that the model runs with. Can be used in conjunction with the seed request parameter to understand when backend changes have been made that might impact determinism."
+          },
+          "object": {
+            "type": "string",
+            "description": "The object type, which is always chat.completion.chunk."
+          },
+          "usage": {
+            "type": "object",
+            "description": "An optional field that will only be present when you set stream_options: {\"include_usage\": true} in your request. When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request.",
+            "properties": {
+              "completion_tokens": {
+                "type": "integer",
+                "description": "Number of tokens in the generated completion."
+              },
+              "prompt_tokens": {
+                "type": "integer",
+                "description": "Number of tokens in the prompt."
+              },
+              "total_tokens": {
+                "type": "integer",
+                "description": "Total number of tokens used in the request (prompt + completion)."
+              }
+            },
+            "required": ["completion_tokens", "prompt_tokens", "total_tokens"]
+          }
+        },
+        "required": [
+          "id",
+          "choices",
+          "created",
+          "model",
+          "system_fingerprint",
+          "object"
+        ]
+      },
+      "CreateEmbeddingsDto": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "example": "mistral",
+            "description": "The name of the embedding model to be used."
+          },
+          "input": {
+            "example": ["Hello World"],
+            "description": "The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "encoding_format": {
+            "type": "string",
+            "example": "float",
+            "description": "Specifies the format for the embeddings. Supported formats include `float` and `int`. This field is optional."
+          },
+          "dimensions": {
+            "type": "number",
+            "example": 3,
+            "description": "Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional."
+          }
+        },
+        "required": ["model", "input"]
+      },
+      "EmbeddingsResponseDto": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "description": "Type of the result object."
+          },
+          "model": {
+            "type": "string",
+            "description": "Identifier of the model utilized for generating embeddings."
+          },
+          "embedding": {
+            "description": "The embedding vector represented as an array of floating-point numbers. ",
+            "type": "array",
+            "items": {
+              "type": "number"
+            }
+          },
+          "usage": {
+            "description": "Details of token usage, including prompt_tokens and total_tokens.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/UsageDto"
+              }
+            ]
+          }
+        },
+        "required": ["object", "model", "embedding", "usage"]
+      },
+      "PullModelRequest": {
+        "type": "object",
+        "required": ["model"],
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The identifier or URL of the model to use. It can be a model ID on Cortexso (https://huggingface.co/cortexso) or a HuggingFace URL pointing to the model file. For example: 'gpt2' or 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf'",
+            "examples": [
+              "tinyllama:gguf",
+              "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf"
+            ]
+          },
+          "id": {
+            "type": "string",
+            "description": "The id which will be used to register the model.",
+            "examples": "my-custom-model-id"
+          },
+          "name": {
+            "type": "string",
+            "description": "The name which will be used to overwrite the model name.",
+            "examples": "my-custom-model-name"
+          }
+        }
+      },
+      "PullModelResponse": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "example": "Model start downloading!"
+          }
+        }
+      },
+      "AddModelRequest": {
+        "type": "object",
+        "required": [
+          "model",
+          "engine",
+          "version",
+          "inference_params",
+          "TransformReq",
+          "TransformResp",
+          "metadata"
+        ],
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The identifier of the model."
+          },
+          "api_key_template": {
+            "type": "string",
+            "description": "Template for the API key header."
+          },
+          "engine": {
+            "type": "string",
+            "description": "The engine used for the model."
+          },
+          "version": {
+            "type": "string",
+            "description": "The version of the model."
+          },
+          "inference_params": {
+            "type": "object",
+            "properties": {
+              "temperature": {
+                "type": "number"
+              },
+              "top_p": {
+                "type": "number"
+              },
+              "frequency_penalty": {
+                "type": "number"
+              },
+              "presence_penalty": {
+                "type": "number"
+              },
+              "max_tokens": {
+                "type": "integer"
+              },
+              "stream": {
+                "type": "boolean"
+              }
+            }
+          },
+          "TransformReq": {
+            "type": "object",
+            "properties": {
+              "get_models": {
+                "type": "object"
+              },
+              "chat_completions": {
+                "type": "object",
+                "properties": {
+                  "url": {
+                    "type": "string"
+                  },
+                  "template": {
+                    "type": "string"
+                  }
+                }
+              },
+              "embeddings": {
+                "type": "object"
+              }
+            }
+          },
+          "TransformResp": {
+            "type": "object",
+            "properties": {
+              "chat_completions": {
+                "type": "object",
+                "properties": {
+                  "template": {
+                    "type": "string"
+                  }
+                }
+              },
+              "embeddings": {
+                "type": "object"
+              }
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "properties": {
+              "author": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "end_point": {
+                "type": "string"
+              },
+              "logo": {
+                "type": "string"
+              },
+              "api_key_url": {
+                "type": "string"
+              }
+            }
+          }
+        }
+      },
+      "CreateModelDto": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The unique identifier of the model.",
+            "example": "mistral"
+          },
+          "name": {
+            "type": "string",
+            "description": "The name of the model.",
+            "example": "mistral"
+          },
+          "files": {
+            "description": "The URL sources from which the model downloaded or accessed.",
+            "example": ["https://huggingface.co/cortexso/mistral/tree/gguf"],
+            "oneOf": [
+              {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              {
+                "$ref": "#/components/schemas/ModelArtifactDto"
+              }
+            ]
+          },
+          "prompt_template": {
+            "type": "string",
+            "description": "A predefined text or framework that guides the AI model's response generation.",
+            "example": "\n      You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}."
+          },
+          "stop": {
+            "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
+            "example": ["End"],
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "max_tokens": {
+            "type": "number",
+            "description": "Sets the upper limit on the number of tokens the model can generate in a single output.",
+            "example": 4096
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Sets probability threshold for more relevant outputs.",
+            "example": 0.9
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Influences the randomness of the model's output.",
+            "example": 0.7
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "description": "Modifies the likelihood of the model repeating the same words or phrases within a single output.",
+            "example": 0.5
+          },
+          "presence_penalty": {
+            "type": "number",
+            "description": "Reduces the likelihood of repeating tokens, promoting novelty in the output.",
+            "example": 0.6
+          },
+          "stream": {
+            "type": "boolean",
+            "description": "Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.",
+            "example": true
+          },
+          "ctx_len": {
+            "type": "number",
+            "description": "Sets the maximum input the model can use to generate a response, it varies with the model used.",
+            "example": 4096
+          },
+          "ngl": {
+            "type": "number",
+            "description": "Determines GPU layer usage.",
+            "example": 32
+          },
+          "n_parallel": {
+            "type": "number",
+            "minimum": 1,
+            "description": "Number of parallel processing units to use.",
+            "example": 1
+          },
+          "cpu_threads": {
+            "type": "number",
+            "minimum": 1,
+            "description": "Determines CPU inference threads, limited by hardware and OS. ",
+            "example": 10
+          },
+          "engine": {
+            "type": "string",
+            "description": "The engine used to run the model.",
+            "example": "llamacpp"
+          },
+          "owned_by": {
+            "type": "string",
+            "description": "The owner of the model.",
+            "example": "",
+            "default": ""
+          }
+        },
+        "required": ["model", "files"]
+      },
+      "StartModelSuccessDto": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "description": "The success or error message displayed when a model is successfully loaded or fails to load."
+          },
+          "modelId": {
+            "type": "string",
+            "description": "The unique identifier of the model."
+          }
+        },
+        "required": ["message", "modelId"]
+      },
+      "ModelStartDto": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "example": "llama3:8b-gguf-q6-k",
+            "description": "A downloaded model name."
+          },
+          "ctx_len": {
+            "type": "number",
+            "description": "The context length for model operations varies; the maximum depends on the specific model used.",
+            "example": 4096
+          },
+          "ngl": {
+            "type": "number",
+            "description": "Determines GPU layer usage.",
+            "example": 32
+          },
+          "n_parallel": {
+            "type": "number",
+            "minimum": 1,
+            "description": "Number of parallel processing units to use.",
+            "example": 1
+          },
+          "cache_type": {
+            "type": "string",
+            "description": "KV cache type: f16, q8_0, q4_0, default is f16",
+            "example": "f16"
+          },
+          "caching_enabled": {
+            "type": "boolean",
+            "description": "To enable prompt caching or not",
+            "example": true
+          },
+          "model_path": {
+            "type": "string",
+            "description": "Local path to LLM model file",
+            "example": "/tmp/model.gguf"
+          },
+          "mmproj": {
+            "type": "string",
+            "description": "Local path to mmproj model file",
+            "example": "/tmp/model.gguf"
+          }
+        },
+        "required": ["model"]
+      },
+      "ModelStopDto": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "example": "llama3:8b-gguf-q6-k",
+            "description": "A downloaded model name."
+          }
+        },
+        "required": ["model"]
+      },
+      "ImportModelRequest": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The unique identifier of the model."
+          },
+          "modelPath": {
+            "type": "string",
+            "description": "The file path to the model."
+          },
+          "name": {
+            "type": "string",
+            "description": "The display name of the model."
+          },
+          "option": {
+            "type": "string",
+            "description": "Import options such as symlink or copy.",
+            "enum": ["symlink", "copy"]
+          }
+        },
+        "required": ["model", "modelPath"]
+      },
+      "ImportModelResponse": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "description": "Success message."
+          },
+          "modelHandle": {
+            "type": "string",
+            "description": "The unique identifier of the imported model."
+          },
+          "result": {
+            "type": "string",
+            "description": "Result status.",
+            "example": "OK"
+          }
+        },
+        "required": ["message", "modelHandle", "result"]
+      },
+      "CommonResponseDto": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "description": "The response success or error message."
+          }
+        },
+        "required": ["message"]
+      },
+      "EngineUninstallationResponseDto": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "example": "Engine my_engine uninstalled successfully!",
+            "enum": [
+              "Engine onnxruntime uninstalled successfully!",
+              "Engine llama-cpp uninstalled successfully!",
+              "Engine tensorrt-llm uninstalled successfully!"
+            ]
+          }
+        }
+      },
+      "SimpleErrorResponse": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string"
+          }
+        }
+      },
+      "EngineInstallationResponseDto": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "example": "Engine my_engine installed successfully!",
+            "enum": [
+              "Engine onnxruntime installed successfully!",
+              "Engine llama-cpp installed successfully!",
+              "Engine tensorrt-llm installed successfully!"
+            ]
+          }
+        }
+      },
+      "EngineList": {
+        "type": "object",
+        "properties": {
+          "data": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Engine"
+            }
+          },
+          "object": {
+            "type": "string",
+            "example": "list"
+          },
+          "result": {
+            "type": "string",
+            "example": "OK"
+          }
+        },
+        "required": ["data", "object", "result"]
+      },
+      "Engine": {
+        "type": "object",
+        "properties": {
+          "description": {
+            "type": "string",
+            "example": "This extension enables chat completion API calls using the Onnx engine"
+          },
+          "name": {
+            "type": "string",
+            "example": "onnxruntime"
+          },
+          "productName": {
+            "type": "string",
+            "example": "onnxruntime"
+          },
+          "status": {
+            "type": "string",
+            "example": "Incompatible"
+          },
+          "variant": {
+            "type": "string",
+            "example": "mac-arm64"
+          },
+          "version": {
+            "type": "string",
+            "example": "0.1.34"
+          }
+        },
+        "required": ["description", "name", "productName", "status"]
+      },
+      "CpuModeDto": {
+        "type": "object",
+        "properties": {
+          "ram": {
+            "type": "number",
+            "example": 1024
+          }
+        }
+      },
+      "GpuModeDto": {
+        "type": "object",
+        "properties": {
+          "ram": {
+            "type": "number",
+            "example": 1024
+          },
+          "vram": {
+            "type": "number",
+            "example": 1024
+          },
+          "ngl": {
+            "type": "number",
+            "example": 30
+          },
+          "context_length": {
+            "type": "number",
+            "example": 4096
+          },
+          "recommend_ngl": {
+            "type": "number",
+            "example": 33
+          }
+        }
+      },
+      "RecommendDto": {
+        "type": "object",
+        "properties": {
+          "cpu_mode": {
+            "type": "object",
+            "$ref": "#/components/schemas/CpuModeDto"
+          },
+          "gpu_mode": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/GPUDto"
+            }
+          }
+        }
+      },
+      "ModelDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "mistral",
+            "description": "The model identifier, which can be referenced in the API endpoints."
+          },
+          "prompt_template": {
+            "type": "string",
+            "example": "You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.",
+            "description": "A predefined text or framework that guides the AI model's response generation."
+          },
+          "stop": {
+            "example": ["End"],
+            "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "max_tokens": {
+            "type": "number",
+            "example": 4096,
+            "description": "Sets the upper limit on the number of tokens the model can generate in a single output."
+          },
+          "temperature": {
+            "type": "number",
+            "example": 0.7,
+            "description": "Influences the randomness of the model's output."
+          },
+          "top_p": {
+            "type": "number",
+            "example": 0.95,
+            "description": "Sets probability threshold for more relevant outputs"
+          },
+          "stream": {
+            "type": "boolean",
+            "example": true,
+            "description": "Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file."
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "example": 0,
+            "description": "Modifies the likelihood of the model repeating the same words or phrases within a single output."
+          },
+          "presence_penalty": {
+            "type": "number",
+            "example": 0,
+            "description": "Reduces the likelihood of repeating tokens, promoting novelty in the output."
+          },
+          "ngl": {
+            "type": "number",
+            "description": "Determines GPU layer usage.",
+            "example": 32
+          },
+          "ctx_len": {
+            "type": "number",
+            "description": "The context length for model operations varies; the maximum depends on the specific model used.",
+            "example": 4096
+          },
+          "cpu_threads": {
+            "type": "number",
+            "description": "Determines CPU inference threads, limited by hardware and OS.",
+            "example": 10
+          },
+          "pre_prompt": {
+            "type": "string",
+            "description": "The prompt to use for internal configuration",
+            "example": "You are an assistant with expert knowledge in {subject}. Please provide a detailed and accurate response to the following query: {query}. Ensure that your response is clear, concise, and informative."
+          },
+          "n_batch": {
+            "type": "number",
+            "description": "The batch size for prompt eval step",
+            "example": 512
+          },
+          "caching_enabled": {
+            "type": "boolean",
+            "description": "To enable prompt caching or not",
+            "example": true
+          },
+          "grp_attn_n": {
+            "type": "number",
+            "description": "Group attention factor in self-extend",
+            "example": 1
+          },
+          "grp_attn_w": {
+            "type": "number",
+            "description": "Group attention width in self-extend",
+            "example": 512
+          },
+          "mlock": {
+            "type": "boolean",
+            "description": "Prevent system swapping of the model to disk in macOS",
+            "example": false
+          },
+          "grammar_file": {
+            "type": "string",
+            "description": "You can constrain the sampling using GBNF grammars by providing path to a grammar file"
+          },
+          "flash_attn": {
+            "type": "boolean",
+            "description": "To enable Flash Attention, default is true",
+            "example": true
+          },
+          "cache_type": {
+            "type": "string",
+            "description": "KV cache type: f16, q8_0, q4_0, default is f16",
+            "example": "f16"
+          },
+          "use_mmap": {
+            "type": "boolean",
+            "description": "To enable mmap, default is true",
+            "example": true
+          },
+          "size": {
+            "type": "number",
+            "description": "The model file size in bytes",
+            "example": 1073741824
+          },
+          "engine": {
+            "type": "string",
+            "description": "The engine to use.",
+            "example": "llamacpp"
+          },
+          "recommendation": {
+            "type": "object",
+            "$ref": "#/components/schemas/RecommendDto"
+          }
+        },
+        "required": ["id"]
+      },
+      "ListModelsResponseDto": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "example": "list",
+            "enum": ["list"]
+          },
+          "data": {
+            "description": "List of models",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ModelDto"
+            }
+          }
+        },
+        "required": ["object", "data"]
+      },
+      "UpdateModelDto": {
+        "type": "object",
+        "properties": {
+          "files": {
+            "type": "array",
+            "description": "List of file paths associated with the model. Can be relative or absolute.",
+            "items": {
+              "type": "string",
+              "example": "models\\cortex.so\\tinyllama\\1b-gguf\\model.gguf"
+            }
+          },
+          "stop": {
+            "type": "array",
+            "description": "Tokens that signal the end of generation.",
+            "items": {
+              "type": "string"
+            },
+            "example": ["</s>"]
+          },
+          "stream": {
+            "type": "boolean",
+            "description": "Whether to stream the output as it is generated.",
+            "example": true
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Controls nucleus sampling; the model considers the results of the tokens with top_p probability mass.",
+            "example": 0.95
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Controls randomness in token selection; lower values make the output more deterministic.",
+            "example": 0.7
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "description": "Penalizes repeated tokens based on their frequency.",
+            "example": 0
+          },
+          "presence_penalty": {
+            "type": "number",
+            "description": "Penalizes tokens that have already appeared in the output.",
+            "example": 0
+          },
+          "max_tokens": {
+            "type": "integer",
+            "description": "Maximum number of tokens to generate.",
+            "example": 4096
+          },
+          "seed": {
+            "type": "integer",
+            "description": "Seed for random number generation to ensure reproducibility; -1 for random seed.",
+            "example": -1
+          },
+          "dynatemp_range": {
+            "type": "number",
+            "description": "Range for dynamic temperature adjustment.",
+            "example": 0
+          },
+          "dynatemp_exponent": {
+            "type": "number",
+            "description": "Exponent for dynamic temperature adjustment.",
+            "example": 1
+          },
+          "top_k": {
+            "type": "integer",
+            "description": "Limits the sampling pool to the top_k most probable tokens.",
+            "example": 40
+          },
+          "min_p": {
+            "type": "number",
+            "description": "Minimum probability threshold for token selection.",
+            "example": 0.05
+          },
+          "tfs_z": {
+            "type": "number",
+            "description": "Threshold for token frequency sampling.",
+            "example": 1
+          },
+          "typ_p": {
+            "type": "number",
+            "description": "Controls typical sampling; similar to top_p but focuses on local token distribution.",
+            "example": 1
+          },
+          "repeat_last_n": {
+            "type": "integer",
+            "description": "Number of recent tokens to consider for repetition penalty.",
+            "example": 64
+          },
+          "repeat_penalty": {
+            "type": "number",
+            "description": "Penalty applied to repeated tokens.",
+            "example": 1
+          },
+          "mirostat": {
+            "type": "boolean",
+            "description": "Enables or disables Mirostat sampling.",
+            "example": false
+          },
+          "mirostat_tau": {
+            "type": "number",
+            "description": "Target entropy for Mirostat sampling.",
+            "example": 5
+          },
+          "mirostat_eta": {
+            "type": "number",
+            "description": "Learning rate for Mirostat sampling.",
+            "example": 0.1
+          },
+          "penalize_nl": {
+            "type": "boolean",
+            "description": "Whether to penalize newline tokens.",
+            "example": false
+          },
+          "ignore_eos": {
+            "type": "boolean",
+            "description": "Whether to ignore end-of-sequence tokens during generation.",
+            "example": false
+          },
+          "n_probs": {
+            "type": "integer",
+            "description": "Number of probabilities to consider for each token.",
+            "example": 0
+          },
+          "min_keep": {
+            "type": "integer",
+            "description": "Minimum number of tokens to keep in the buffer.",
+            "example": 0
+          },
+          "engine": {
+            "type": "string",
+            "description": "The engine used to run the model.",
+            "example": "llama-cpp"
+          },
+          "prompt_template": {
+            "type": "string",
+            "description": "Template used for formatting prompts.",
+            "example": "\n\n<|system|>\n{system_message}</s>\n\n\n\n\n<|user|>\n{prompt}</s>\n\n\n<|assistant|>\n\n"
+          },
+          "ctx_len": {
+            "type": "integer",
+            "description": "Context length for the model.",
+            "example": 4096
+          },
+          "n_parallel": {
+            "type": "integer",
+            "description": "Number of parallel threads for execution.",
+            "example": 1
+          },
+          "ngl": {
+            "type": "integer",
+            "description": "Number of GPU layers.",
+            "example": 33
+          },
+          "api_key_template": {
+            "type": "string",
+            "description": "Template for the API key header."
+          },
+          "version": {
+            "type": "string",
+            "description": "The version of the model."
+          },
+          "inference_params": {
+            "type": "object",
+            "properties": {
+              "temperature": {
+                "type": "number"
+              },
+              "top_p": {
+                "type": "number"
+              },
+              "frequency_penalty": {
+                "type": "number"
+              },
+              "presence_penalty": {
+                "type": "number"
+              },
+              "max_tokens": {
+                "type": "integer"
+              },
+              "stream": {
+                "type": "boolean"
+              }
+            }
+          }
+        }
+      },
+      "DeleteModelResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "mistral-ins-7b-q4",
+            "description": "The identifier of the model that was deleted."
+          },
+          "object": {
+            "type": "string",
+            "example": "model",
+            "description": "Type of the object, indicating it's a model.",
+            "default": "model"
+          },
+          "deleted": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates whether the model was successfully deleted."
+          }
+        },
+        "required": ["id", "object", "deleted"]
+      },
+      "CreateThreadAssistantDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "thread_123",
+            "description": "The unique identifier of the assistant."
+          },
+          "avatar": {
+            "type": "string",
+            "example": "https://example.com/avatar.png",
+            "description": "URL of the assistant's avatar image."
+          },
+          "name": {
+            "type": "string",
+            "example": "Virtual Helper",
+            "description": "The name of the assistant."
+          },
+          "model": {
+            "type": "string",
+            "example": "mistral",
+            "description": "The model's unique identifier and settings."
+          },
+          "instructions": {
+            "type": "string",
+            "example": "Assist with customer queries and provide information based on the company database.",
+            "description": "The assistant's specific instructions."
+          },
+          "tools": {
+            "type": "array",
+            "example": [
+              {
+                "name": "Knowledge Retrieval",
+                "settings": {
+                  "source": "internal",
+                  "endpoint": "https://api.example.com/knowledge"
+                }
+              }
+            ],
+            "description": "The thread's tool(Knowledge Retrieval) configurations."
+          },
+          "description": {
+            "type": "string",
+            "nullable": true,
+            "example": "This assistant helps with customer support by retrieving relevant information.",
+            "description": "The description of the assistant."
+          },
+          "metadata": {
+            "type": "object",
+            "nullable": true,
+            "example": {
+              "department": "support",
+              "version": "1.0"
+            },
+            "description": "Additional metadata for the assistant."
+          },
+          "object": {
+            "type": "string",
+            "example": "assistant",
+            "description": "The object type, always \"assistant\"."
+          },
+          "temperature": {
+            "type": "number",
+            "nullable": true,
+            "example": 0.7,
+            "description": "Sampling temperature for the assistant."
+          },
+          "top_p": {
+            "type": "number",
+            "nullable": true,
+            "example": 0.9,
+            "description": "Top-p sampling value for the assistant."
+          },
+          "created_at": {
+            "type": "number",
+            "example": 1622470423,
+            "description": "Timestamp of when the assistant was created."
+          },
+          "response_format": {
+            "type": "object",
+            "example": {
+              "format": "json"
+            },
+            "description": "The response format option for the assistant."
+          },
+          "tool_resources": {
+            "type": "object",
+            "example": {
+              "resources": ["database1", "database2"]
+            },
+            "description": "Tool resources for the assistant."
+          }
+        },
+        "required": [
+          "id",
+          "name",
+          "model",
+          "instructions",
+          "tools",
+          "description",
+          "metadata",
+          "object",
+          "created_at"
+        ]
+      },
+      "CreateThreadDto": {
+        "type": "object",
+        "properties": {
+          "assistants": {
+            "description": "The details of the thread's settings.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/CreateThreadAssistantDto"
+            }
+          }
+        },
+        "required": ["assistants"]
+      },
+      "ContentDto": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string",
+            "example": "text",
+            "description": "Type of content, e.g., \"text\"."
+          },
+          "text": {
+            "type": "object",
+            "example": {
+              "value": "How does AI work? Explain it in simple terms.",
+              "annotations": []
+            },
+            "description": "Text content of the message along with any annotations."
+          }
+        },
+        "required": ["type", "text"]
+      },
+      "GetMessageResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "msg_abc123",
+            "description": "The identifier of the message."
+          },
+          "object": {
+            "type": "string",
+            "example": "thread.message",
+            "description": "Type of the object, indicating it's a thread message.",
+            "default": "thread.message"
+          },
+          "created_at": {
+            "type": "integer",
+            "example": 1699017614,
+            "description": "Unix timestamp representing the creation time of the message."
+          },
+          "thread_id": {
+            "type": "string",
+            "example": "thread_abc123",
+            "description": "Identifier of the thread to which this message belongs."
+          },
+          "role": {
+            "type": "string",
+            "example": "user",
+            "description": "Role of the sender, either 'user' or 'assistant'."
+          },
+          "content": {
+            "description": "Array of content objects detailing the message content.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ContentDto"
+            }
+          },
+          "file_ids": {
+            "example": [],
+            "description": "Array of file IDs associated with the message, if any.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "assistant_id": {
+            "type": "string",
+            "nullable": true,
+            "example": null,
+            "description": "Identifier of the assistant involved in the message, if applicable."
+          },
+          "run_id": {
+            "type": "string",
+            "nullable": true,
+            "example": null,
+            "description": "Run ID associated with the message, if applicable."
+          },
+          "metadata": {
+            "type": "object",
+            "example": {},
+            "description": "Metadata associated with the message."
+          }
+        },
+        "required": [
+          "id",
+          "object",
+          "created_at",
+          "thread_id",
+          "role",
+          "content",
+          "file_ids",
+          "assistant_id",
+          "run_id",
+          "metadata"
+        ]
+      },
+      "ListMessageObjectDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "msg_abc123",
+            "description": "The identifier of the message."
+          },
+          "object": {
+            "type": "string",
+            "example": "thread.message",
+            "description": "Type of the object, indicating it's a thread message."
+          },
+          "created_at": {
+            "type": "integer",
+            "example": 1699017614,
+            "description": "Unix timestamp representing the creation time of the message."
+          },
+          "thread_id": {
+            "type": "string",
+            "example": "thread_abc123",
+            "description": "Identifier of the thread to which this message belongs."
+          },
+          "role": {
+            "type": "string",
+            "example": "user",
+            "description": "Role of the sender, either 'user' or 'assistant'."
+          },
+          "file_ids": {
+            "description": "Array of file IDs associated with the message, if any.",
+            "example": [],
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "assistant_id": {
+            "type": "string",
+            "nullable": true,
+            "description": "Identifier of the assistant involved in the message, if applicable.",
+            "example": null
+          },
+          "run_id": {
+            "type": "string",
+            "nullable": true,
+            "description": "Run ID associated with the message, if applicable.",
+            "example": null
+          },
+          "metadata": {
+            "type": "object",
+            "example": {},
+            "description": "Metadata associated with the message."
+          }
+        },
+        "required": [
+          "id",
+          "object",
+          "created_at",
+          "thread_id",
+          "role",
+          "file_ids",
+          "assistant_id",
+          "run_id",
+          "metadata"
+        ]
+      },
+      "ListMessagesResponseDto": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "example": "list",
+            "description": "Type of the object, indicating it's a list."
+          },
+          "data": {
+            "description": "Array of message objects.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ListMessageObjectDto"
+            }
+          },
+          "first_id": {
+            "type": "string",
+            "example": "msg_abc123",
+            "description": "Identifier of the first message in the list."
+          },
+          "last_id": {
+            "type": "string",
+            "example": "msg_abc456",
+            "description": "Identifier of the last message in the list."
+          },
+          "has_more": {
+            "type": "boolean",
+            "example": false,
+            "description": "Indicates whether there are more messages to retrieve."
+          }
+        },
+        "required": ["object", "data", "first_id", "last_id", "has_more"]
+      },
+      "CreateMessageDto": {
+        "type": "object",
+        "properties": {
+          "role": {
+            "type": "object",
+            "example": "user",
+            "description": "The role of the entity that is creating the message. Allowed values include:\n      - user: Indicates the message is sent by an actual user and should be used in most cases to represent user-generated messages.\n      - assistant: Indicates the message is generated by the assistant. Use this value to insert messages from the assistant into the conversation."
+          },
+          "content": {
+            "type": "string",
+            "example": "Tell me a joke",
+            "description": "The text contents of the message."
+          }
+        },
+        "required": ["role", "content"]
+      },
+      "UpdateMessageDto": {
+        "type": "object",
+        "properties": {}
+      },
+      "DeleteMessageDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "message_123",
+            "description": "The identifier of the message that was deleted."
+          },
+          "object": {
+            "type": "string",
+            "example": "message",
+            "description": "Type of the object, indicating it's a message.",
+            "default": "message"
+          },
+          "deleted": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates whether the message was successfully deleted."
+          }
+        },
+        "required": ["id", "object", "deleted"]
+      },
+      "GetThreadResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "thread_abc123",
+            "description": "The identifier of the thread."
+          },
+          "object": {
+            "type": "string",
+            "example": "thread",
+            "description": "Type of the object"
+          },
+          "created_at": {
+            "type": "integer",
+            "example": 1699014083,
+            "description": "Unix timestamp representing the creation time of the thread."
+          },
+          "assistants": {
+            "example": ["assistant-001"],
+            "description": "List of assistants involved in the thread.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "example": {},
+            "description": "Metadata associated with the thread."
+          },
+          "messages": {
+            "example": [],
+            "description": "List of messages within the thread.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          }
+        },
+        "required": [
+          "id",
+          "object",
+          "created_at",
+          "assistants",
+          "metadata",
+          "messages"
+        ]
+      },
+      "UpdateThreadDto": {
+        "type": "object",
+        "properties": {}
+      },
+      "DeleteThreadResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "thread_123",
+            "description": "The identifier of the thread that was deleted."
+          },
+          "object": {
+            "type": "string",
+            "example": "thread",
+            "description": "Type of the object, indicating it's a thread.",
+            "default": "thread"
+          },
+          "deleted": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates whether the thread was successfully deleted."
+          }
+        },
+        "required": ["id", "object", "deleted"]
+      },
+      "CPUDto": {
+        "type": "object",
+        "properties": {
+          "arch": {
+            "type": "string",
+            "example": "amd64",
+            "description": "The architecture of the CPU."
+          },
+          "cores": {
+            "type": "integer",
+            "example": 8,
+            "description": "The number of CPU cores available."
+          },
+          "instructions": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "example": [
+              "fpu",
+              "mmx",
+              "sse",
+              "sse2",
+              "sse3",
+              "ssse3",
+              "sse4_1",
+              "sse4_2",
+              "pclmulqdq",
+              "avx",
+              "avx2",
+              "aes",
+              "f16c"
+            ],
+            "description": "A list of supported CPU instruction sets."
+          },
+          "model": {
+            "type": "string",
+            "example": "AMD Ryzen Threadripper PRO 5955WX 16-Cores",
+            "description": "The model name of the CPU."
+          }
+        },
+        "required": ["arch", "cores", "instructions", "model"]
+      },
+      "GPUDto": {
+        "type": "object",
+        "properties": {
+          "activated": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates if the GPU is currently activated."
+          },
+          "additional_information": {
+            "type": "object",
+            "properties": {
+              "compute_cap": {
+                "type": "string",
+                "example": "8.6",
+                "description": "The compute capability of the GPU."
+              },
+              "driver_version": {
+                "type": "string",
+                "example": "535.183",
+                "description": "The version of the installed driver."
+              }
+            },
+            "required": ["compute_cap", "driver_version"]
+          },
+          "free_vram": {
+            "type": "integer",
+            "example": 23983,
+            "description": "The amount of free VRAM in MB."
+          },
+          "id": {
+            "type": "string",
+            "example": "0",
+            "description": "Unique identifier for the GPU."
+          },
+          "name": {
+            "type": "string",
+            "example": "NVIDIA GeForce RTX 3090",
+            "description": "The name of the GPU model."
+          },
+          "total_vram": {
+            "type": "integer",
+            "example": 24576,
+            "description": "The total VRAM available in MB."
+          },
+          "uuid": {
+            "type": "string",
+            "example": "GPU-5206045b-2a1c-1e7d-6c60-d7c367d02376",
+            "description": "The universally unique identifier for the GPU."
+          },
+          "version": {
+            "type": "string",
+            "example": "12.2",
+            "description": "The version of the GPU."
+          }
+        },
+        "required": [
+          "activated",
+          "additional_information",
+          "free_vram",
+          "id",
+          "name",
+          "total_vram",
+          "uuid",
+          "version"
+        ]
+      },
+      "OperatingSystemDto": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string",
+            "example": "Ubuntu 24.04.1 LTS",
+            "description": "The name of the operating system."
+          },
+          "version": {
+            "type": "string",
+            "example": "24.04.1 LTS (Noble Numbat)",
+            "description": "The version of the operating system."
+          }
+        },
+        "required": ["name", "version"]
+      },
+      "PowerDto": {
+        "type": "object",
+        "properties": {
+          "battery_life": {
+            "type": "integer",
+            "example": 0,
+            "description": "The percentage of battery life remaining."
+          },
+          "charging_status": {
+            "type": "string",
+            "example": "",
+            "description": "The charging status of the device."
+          },
+          "is_power_saving": {
+            "type": "boolean",
+            "example": false,
+            "description": "Indicates if the power-saving mode is enabled."
+          }
+        },
+        "required": ["battery_life", "charging_status", "is_power_saving"]
+      },
+      "RAMDto": {
+        "type": "object",
+        "properties": {
+          "available": {
+            "type": "integer",
+            "example": 11100,
+            "description": "The amount of available RAM in MB."
+          },
+          "total": {
+            "type": "integer",
+            "example": 15991,
+            "description": "The total RAM in MB."
+          },
+          "type": {
+            "type": "string",
+            "example": "",
+            "description": "The type of RAM."
+          }
+        },
+        "required": ["available", "total", "type"]
+      },
+      "StorageDto": {
+        "type": "object",
+        "properties": {
+          "available": {
+            "type": "integer",
+            "example": 0,
+            "description": "The amount of available storage in MB."
+          },
+          "total": {
+            "type": "integer",
+            "example": 0,
+            "description": "The total storage in MB."
+          },
+          "type": {
+            "type": "string",
+            "example": "",
+            "description": "The type of storage."
+          }
+        },
+        "required": ["available", "total", "type"]
+      }
+    }
+  }
+}
diff --git a/server/index.ts b/server/index.ts
index e8a6eea78e..4008d70081 100644
--- a/server/index.ts
+++ b/server/index.ts
@@ -1,9 +1,9 @@
 import fastify from 'fastify'
 import dotenv from 'dotenv'
-import { v1Router, log, getJanExtensionsPath } from '@janhq/core/node'
-import { join } from 'path'
+import { log } from '@janhq/core/node'
 import tcpPortUsed from 'tcp-port-used'
 import { Logger } from './helpers/logger'
+import CORTEX_SCHEMA from './cortex.json'
 
 // Load environment variables
 dotenv.config()
@@ -66,34 +66,29 @@ export const startServer = async (configs?: ServerConfig): Promise<boolean> => {
 
     // Initialize Fastify server with logging
     server = fastify({
-      logger: new Logger(),
+      loggerInstance: new Logger(),
       // Set body limit to 100MB - Default is 1MB
       // According to OpenAI - a batch input file can be up to 100 MB in size
       // Whisper endpoints accept up to 25MB
       // Vision endpoints accept up to 4MB
-      bodyLimit: 104_857_600
+      bodyLimit: 104_857_600,
     })
 
     // Register CORS if enabled
     if (corsEnabled) await server.register(require('@fastify/cors'), {})
 
+    CORTEX_SCHEMA.servers[0].url = configs?.prefix ?? '/v1'
     // Register Swagger for API documentation
     await server.register(require('@fastify/swagger'), {
       mode: 'static',
       specification: {
-        path: configs?.schemaPath ?? './../docs/openapi/jan.yaml',
-        baseDir: configs?.baseDir ?? './../docs/openapi',
-        postProcessor: function (swaggerObject: any) {
-          swaggerObject.servers[0].url = configs?.prefix ?? '/v1'
-          return swaggerObject
-        },
+        document: CORTEX_SCHEMA,
       },
     })
 
     // Register Swagger UI
     await server.register(require('@fastify/swagger-ui'), {
       routePrefix: '/',
-      baseDir: configs?.baseDir ?? join(__dirname, '../..', './docs/openapi'),
       uiConfig: {
         docExpansion: 'full',
         deepLinking: false,
@@ -102,26 +97,12 @@ export const startServer = async (configs?: ServerConfig): Promise<boolean> => {
       transformSpecificationClone: true,
     })
 
-    // Register static file serving for extensions
-    // TODO: Watch extension files changes and reload
-    await server.register(
-      (childContext: any, _: any, done: any) => {
-        childContext.register(require('@fastify/static'), {
-          root: getJanExtensionsPath(),
-          wildcard: false,
-        })
-
-        done()
-      },
-      { prefix: 'extensions' }
-    )
-
-    // Register proxy middleware
-    if (configs?.storageAdataper)
-      server.addHook('preHandler', configs.storageAdataper)
+    server.register(require('@fastify/http-proxy'), {
+      upstream: 'http://127.0.0.1:39291/v1',
+      prefix: configs?.prefix ?? '/v1',
+      http2: false,
+    })
 
-    // Register API routes
-    await server.register(v1Router, { prefix: configs?.prefix ?? '/v1' })
     // Start listening for requests
     await server
       .listen({
diff --git a/server/main.ts b/server/main.ts
index 71fb111062..b69197d157 100644
--- a/server/main.ts
+++ b/server/main.ts
@@ -1,7 +1,6 @@
-import { s3 } from './middleware/s3'
 import { setup } from './helpers/setup'
 import { startServer as start } from './index'
 /**
  * Setup extensions and start the server
  */
-setup().then(() => start({ storageAdataper: s3 }))
+setup().then(() => start())
diff --git a/server/middleware/s3.ts b/server/middleware/s3.ts
deleted file mode 100644
index 3024285a3d..0000000000
--- a/server/middleware/s3.ts
+++ /dev/null
@@ -1,70 +0,0 @@
-import { join } from 'path'
-
-// Middleware to intercept requests and proxy if certain conditions are met
-const config = {
-  endpoint: process.env.AWS_ENDPOINT,
-  region: process.env.AWS_REGION,
-  credentials: {
-    accessKeyId: process.env.AWS_ACCESS_KEY_ID,
-    secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
-  },
-}
-
-const S3_BUCKET_NAME = process.env.S3_BUCKET_NAME
-
-const fs = require('@cyclic.sh/s3fs')(S3_BUCKET_NAME, config)
-const PROXY_PREFIX = '/v1/fs'
-const PROXY_ROUTES = ['/threads', '/messages']
-
-export const s3 = (req: any, reply: any, done: any) => {
-  // Proxy FS requests to S3 using S3FS
-  if (req.url.startsWith(PROXY_PREFIX)) {
-    const route = req.url.split('/').pop()
-    const args = parseRequestArgs(req)
-
-    // Proxy matched requests to the s3fs module
-    if (args.length && PROXY_ROUTES.some((route) => args[0].includes(route))) {
-      try {
-        // Handle customized route
-        // S3FS does not handle appendFileSync
-        if (route === 'appendFileSync') {
-          let result = handAppendFileSync(args)
-
-          reply.status(200).send(result)
-          return
-        }
-        // Reroute the other requests to the s3fs module
-        const result = fs[route](...args)
-        reply.status(200).send(result)
-        return
-      } catch (ex) {
-        console.error(ex)
-      }
-    }
-  }
-  // Let other requests go through
-  done()
-}
-
-const parseRequestArgs = (req: Request) => {
-  const {
-    getJanDataFolderPath,
-    normalizeFilePath,
-  } = require('@janhq/core/node')
-
-  return JSON.parse(req.body as any).map((arg: any) =>
-    typeof arg === 'string' &&
-    (arg.startsWith(`file:/`) || arg.startsWith(`file:\\`))
-      ? join(getJanDataFolderPath(), normalizeFilePath(arg))
-      : arg
-  )
-}
-
-const handAppendFileSync = (args: any[]) => {
-  if (fs.existsSync(args[0])) {
-    const data = fs.readFileSync(args[0], 'utf-8')
-    return fs.writeFileSync(args[0], data + args[1])
-  } else {
-    return fs.writeFileSync(args[0], args[1])
-  }
-}
diff --git a/server/package.json b/server/package.json
index b2c237c615..cd60e8ea75 100644
--- a/server/package.json
+++ b/server/package.json
@@ -8,7 +8,8 @@
   "homepage": "https://jan.ai",
   "description": "Use offline LLMs with your own data. Run open source models like Llama2 or Falcon on your internal computers/servers.",
   "files": [
-    "build/**"
+    "build/**",
+    "cortex.json"
   ],
   "scripts": {
     "lint": "eslint . --ext \".js,.jsx,.ts,.tsx\"",
@@ -19,14 +20,15 @@
   "dependencies": {
     "@alumna/reflect": "^1.1.3",
     "@cyclic.sh/s3fs": "^1.2.9",
-    "@fastify/cors": "^8.4.2",
+    "@fastify/cors": "^10.0.1",
+    "@fastify/http-proxy": "^11.0.0",
     "@fastify/static": "^6.12.0",
-    "@fastify/swagger": "^8.13.0",
-    "@fastify/swagger-ui": "2.0.1",
+    "@fastify/swagger": "^9.4.0",
+    "@fastify/swagger-ui": "5.2.0",
     "@janhq/core": "link:./core",
     "@npmcli/arborist": "^7.3.1",
     "dotenv": "^16.3.1",
-    "fastify": "^4.24.3",
+    "fastify": "^5.2.0",
     "fetch-retry": "^5.0.6",
     "node-fetch": "2",
     "request": "^2.88.2",
diff --git a/server/tsconfig.json b/server/tsconfig.json
index dd27b89323..d707984030 100644
--- a/server/tsconfig.json
+++ b/server/tsconfig.json
@@ -15,7 +15,8 @@
     "paths": { "*": ["node_modules/*"] },
     "typeRoots": ["node_modules/@types"],
     "ignoreDeprecations": "5.0",
-    "declaration": true
+    "declaration": true,
+    "resolveJsonModule": true
   },
   // "sourceMap": true,
 
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index e436d116e0..ed704dd612 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -10,7 +10,6 @@ import { LAST_USED_MODEL_ID } from './useRecommendedModel'
 import { vulkanEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
 import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
-import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 export const activeModelAtom = atom<Model | undefined>(undefined)
 export const loadModelErrorAtom = atom<string | undefined>(undefined)
@@ -29,7 +28,6 @@ export const stateModelAtom = atom<ModelState>({
 
 export function useActiveModel() {
   const [activeModel, setActiveModel] = useAtom(activeModelAtom)
-  const activeThread = useAtomValue(activeThreadAtom)
   const [stateModel, setStateModel] = useAtom(stateModelAtom)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
   const setLoadModelError = useSetAtom(loadModelErrorAtom)