diff --git a/CHANGELOG.md b/CHANGELOG.md
index c68b0f0d5328..20a2723e77d2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,58 @@
 
 # Changelog
 
+## [Version 1.19.0](https://github.com/lobehub/lobe-chat/compare/v1.18.2...v1.19.0)
+
+<sup>Released on **2024-09-18**</sup>
+
+#### ✨ Features
+
+- **misc**: Add Ai21Labs model provider, add Github Models provider, support native Artifacts just like Claude.
+
+<br/>
+
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+
+#### What's improved
+
+- **misc**: Add Ai21Labs model provider, closes [#3727](https://github.com/lobehub/lobe-chat/issues/3727) ([d2fe0f0](https://github.com/lobehub/lobe-chat/commit/d2fe0f0))
+- **misc**: Add Github Models provider, closes [#3869](https://github.com/lobehub/lobe-chat/issues/3869) ([8eac1bd](https://github.com/lobehub/lobe-chat/commit/8eac1bd))
+- **misc**: Support native Artifacts just like Claude, closes [#3985](https://github.com/lobehub/lobe-chat/issues/3985) ([0c42c46](https://github.com/lobehub/lobe-chat/commit/0c42c46))
+
+</details>
+
+<div align="right">
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+</div>
+
+### [Version 1.18.2](https://github.com/lobehub/lobe-chat/compare/v1.18.1...v1.18.2)
+
+<sup>Released on **2024-09-18**</sup>
+
+#### 🐛 Bug Fixes
+
+- **misc**: Fix InterceptingRoutes in discover.
+
+<br/>
+
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+
+#### What's fixed
+
+- **misc**: Fix InterceptingRoutes in discover, closes [#4011](https://github.com/lobehub/lobe-chat/issues/4011) ([2b75040](https://github.com/lobehub/lobe-chat/commit/2b75040))
+
+</details>
+
+<div align="right">
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+</div>
+
 ### [Version 1.18.1](https://github.com/lobehub/lobe-chat/compare/v1.18.0...v1.18.1)
 
 <sup>Released on **2024-09-18**</sup>
diff --git a/Dockerfile b/Dockerfile
index bcbfc466633e..9611ae4a175a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -105,6 +105,8 @@ ENV ACCESS_CODE="" \
 
 # Model Variables
 ENV \
+    # AI21
+    AI21_API_KEY="" \
     # Ai360
     AI360_API_KEY="" \
     # Anthropic
diff --git a/Dockerfile.database b/Dockerfile.database
index 8d844b813340..1df23248e9c3 100644
--- a/Dockerfile.database
+++ b/Dockerfile.database
@@ -137,6 +137,8 @@ ENV NEXT_PUBLIC_S3_DOMAIN="" \
 
 # Model Variables
 ENV \
+    # AI21
+    AI21_API_KEY="" \
     # Ai360
     AI360_API_KEY="" \
     # Anthropic
diff --git a/locales/ar/chat.json b/locales/ar/chat.json
index 5e5be6aed220..dbce8c9b0905 100644
--- a/locales/ar/chat.json
+++ b/locales/ar/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "مرحبًا، أنا **{{name}}**، {{systemRole}}، دعنا نبدأ الدردشة!",
   "agentDefaultMessageWithoutEdit": "مرحبًا، أنا **{{name}}**، دعنا نبدأ المحادثة!",
   "agentsAndConversations": "الوكلاء والمحادثات",
+  "artifact": {
+    "generating": "جاري الإنشاء",
+    "thinking": "جاري التفكير",
+    "thought": "عملية التفكير",
+    "unknownTitle": "عمل غير مسمى"
+  },
   "backToBottom": "العودة إلى الأسفل",
   "chatList": {
     "longMessageDetail": "عرض التفاصيل"
diff --git a/locales/ar/error.json b/locales/ar/error.json
index ff75c5b1abcf..0dc4f287bec9 100644
--- a/locales/ar/error.json
+++ b/locales/ar/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "كلمة المرور غير صحيحة أو فارغة، يرجى إدخال كلمة مرور الوصول الصحيحة أو إضافة مفتاح API مخصص",
     "InvalidBedrockCredentials": "فشلت مصادقة Bedrock، يرجى التحقق من AccessKeyId/SecretAccessKey وإعادة المحاولة",
     "InvalidClerkUser": "عذرًا، لم تقم بتسجيل الدخول بعد، يرجى تسجيل الدخول أو التسجيل للمتابعة",
+    "InvalidGithubToken": "رمز وصول شخصية GitHub غير صحيح أو فارغ، يرجى التحقق من رمز وصول GitHub الشخصي والمحاولة مرة أخرى",
     "InvalidOllamaArgs": "تكوين Ollama غير صحيح، يرجى التحقق من تكوين Ollama وإعادة المحاولة",
     "InvalidProviderAPIKey": "{{provider}} مفتاح API غير صحيح أو فارغ، يرجى التحقق من مفتاح API {{provider}} الخاص بك وحاول مرة أخرى",
     "LocationNotSupportError": "عذرًا، لا يدعم موقعك الحالي خدمة هذا النموذج، قد يكون ذلك بسبب قيود المنطقة أو عدم توفر الخدمة. يرجى التحقق مما إذا كان الموقع الحالي يدعم استخدام هذه الخدمة، أو محاولة استخدام معلومات الموقع الأخرى.",
diff --git a/locales/ar/modelProvider.json b/locales/ar/modelProvider.json
index b25fd23ecfe5..c15f1f359702 100644
--- a/locales/ar/modelProvider.json
+++ b/locales/ar/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "استخدام معلومات المصادقة الخاصة بـ Bedrock المخصصة"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "أدخل رمز الوصول الشخصي الخاص بك على Github، انقر [هنا](https://github.com/settings/tokens) لإنشاء واحد",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "اختبر ما إذا تم إدخال عنوان الوكيل بشكل صحيح",
diff --git a/locales/ar/portal.json b/locales/ar/portal.json
index 0ab243843915..3280f1773927 100644
--- a/locales/ar/portal.json
+++ b/locales/ar/portal.json
@@ -6,11 +6,27 @@
       "file": "ملف"
     }
   },
+  "Plugins": "ملحقات",
   "actions": {
     "genAiMessage": "إنشاء رسالة مساعد ذكاء اصطناعي",
     "summary": "ملخص",
     "summaryTooltip": "ملخص للمحتوى الحالي"
   },
+  "artifacts": {
+    "display": {
+      "code": "رمز",
+      "preview": "معاينة"
+    },
+    "svg": {
+      "copyAsImage": "نسخ كصورة",
+      "copyFail": "فشل النسخ، سبب الخطأ: {{error}}",
+      "copySuccess": "تم نسخ الصورة بنجاح",
+      "download": {
+        "png": "تحميل كـ PNG",
+        "svg": "تحميل كـ SVG"
+      }
+    }
+  },
   "emptyArtifactList": "قائمة القطع الأثرية الحالية فارغة، يرجى استخدام الإضافات في الجلسة ومن ثم التحقق مرة أخرى",
   "emptyKnowledgeList": "قائمة المعرفة الحالية فارغة، يرجى فتح قاعدة المعرفة حسب الحاجة في المحادثة قبل العرض",
   "files": "ملفات",
diff --git a/locales/bg-BG/chat.json b/locales/bg-BG/chat.json
index 971d4e3dc2c1..6168aacdfe31 100644
--- a/locales/bg-BG/chat.json
+++ b/locales/bg-BG/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Здравей, аз съм **{{name}}**, {{systemRole}}. Нека започнем да чатим!",
   "agentDefaultMessageWithoutEdit": "Здравей, аз съм **{{name}}** и нека започнем разговора!",
   "agentsAndConversations": "агенти и разговори",
+  "artifact": {
+    "generating": "Генериране",
+    "thinking": "В процес на мислене",
+    "thought": "Процес на мислене",
+    "unknownTitle": "Неназован артефакт"
+  },
   "backToBottom": "Върни се в началото",
   "chatList": {
     "longMessageDetail": "Вижте детайлите"
diff --git a/locales/bg-BG/error.json b/locales/bg-BG/error.json
index 4052d3d11e4f..252b5395ea5d 100644
--- a/locales/bg-BG/error.json
+++ b/locales/bg-BG/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Невалиден или празен код за достъп. Моля, въведете правилния код за достъп или добавете персонализиран API ключ.",
     "InvalidBedrockCredentials": "Удостоверяването на Bedrock е неуспешно. Моля, проверете AccessKeyId/SecretAccessKey и опитайте отново.",
     "InvalidClerkUser": "很抱歉，你当前尚未登录，请先登录或注册账号后继续操作",
+    "InvalidGithubToken": "GitHub Личният Достъпен Токен е неправилен или е празен. Моля, проверете Личния Достъпен Токен на GitHub и опитайте отново.",
     "InvalidOllamaArgs": "Невалидна конфигурация на Ollama, моля, проверете конфигурацията на Ollama и опитайте отново",
     "InvalidProviderAPIKey": "{{provider}} API ключ е невалиден или липсва, моля проверете {{provider}} API ключа и опитайте отново",
     "LocationNotSupportError": "Съжаляваме, вашето текущо местоположение не поддържа тази услуга на модела. Това може да се дължи на регионални ограничения или на недостъпност на услугата. Моля, потвърдете дали текущото местоположение поддържа използването на тази услуга или опитайте да използвате друго местоположение.",
diff --git a/locales/bg-BG/modelProvider.json b/locales/bg-BG/modelProvider.json
index 9ca9aababefe..a3f7446cdb0e 100644
--- a/locales/bg-BG/modelProvider.json
+++ b/locales/bg-BG/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Използване на персонализирана информация за удостоверяване на Bedrock"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Въведете вашия GitHub PAT, кликнете [тук](https://github.com/settings/tokens), за да създадете",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Тестване дали адресът на прокси е попълнен правилно",
diff --git a/locales/bg-BG/portal.json b/locales/bg-BG/portal.json
index f259a1e2ea9b..da254d05e40c 100644
--- a/locales/bg-BG/portal.json
+++ b/locales/bg-BG/portal.json
@@ -6,11 +6,27 @@
       "file": "Файл"
     }
   },
+  "Plugins": "Плъгини",
   "actions": {
     "genAiMessage": "Създаване на съобщение на помощника",
     "summary": "Обобщение",
     "summaryTooltip": "Обобщение на текущото съдържание"
   },
+  "artifacts": {
+    "display": {
+      "code": "Код",
+      "preview": "Преглед"
+    },
+    "svg": {
+      "copyAsImage": "Копирай като изображение",
+      "copyFail": "Копирането не успя, причина за грешката: {{error}}",
+      "copySuccess": "Изображението е копирано успешно",
+      "download": {
+        "png": "Изтегли като PNG",
+        "svg": "Изтегли като SVG"
+      }
+    }
+  },
   "emptyArtifactList": "Списъкът с текущите артефакти е празен. Моля, използвайте добавки в разговора и след това проверете отново.",
   "emptyKnowledgeList": "Текущият списък с познания е празен. Моля, активирайте базата данни на познанията по време на сесията, за да я прегледате.",
   "files": "файлове",
diff --git a/locales/de-DE/chat.json b/locales/de-DE/chat.json
index b18cd94703ac..ccb00f0d4986 100644
--- a/locales/de-DE/chat.json
+++ b/locales/de-DE/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Hallo, ich bin **{{name}}**, {{systemRole}}. Lass uns chatten!",
   "agentDefaultMessageWithoutEdit": "Hallo, ich bin **{{name}}**. Lassen Sie uns ins Gespräch kommen!",
   "agentsAndConversations": "Agenten und Unterhaltungen",
+  "artifact": {
+    "generating": "Wird generiert",
+    "thinking": "Denken",
+    "thought": "Denkenprozess",
+    "unknownTitle": "Unbenanntes Werk"
+  },
   "backToBottom": "Zurück zum Ende",
   "chatList": {
     "longMessageDetail": "Details anzeigen"
diff --git a/locales/de-DE/error.json b/locales/de-DE/error.json
index 1090c3c58f69..8b6a66bc3596 100644
--- a/locales/de-DE/error.json
+++ b/locales/de-DE/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Das Passwort ist ungültig oder leer. Bitte geben Sie das richtige Zugangspasswort ein oder fügen Sie einen benutzerdefinierten API-Schlüssel hinzu.",
     "InvalidBedrockCredentials": "Die Bedrock-Authentifizierung ist fehlgeschlagen. Bitte überprüfen Sie AccessKeyId/SecretAccessKey und versuchen Sie es erneut.",
     "InvalidClerkUser": "Entschuldigung, du bist derzeit nicht angemeldet. Bitte melde dich an oder registriere ein Konto, um fortzufahren.",
+    "InvalidGithubToken": "Der persönliche Zugriffstoken für Github ist ungültig oder leer. Bitte überprüfen Sie den persönlichen Zugriffstoken für Github und versuchen Sie es erneut.",
     "InvalidOllamaArgs": "Ollama-Konfiguration ist ungültig. Bitte überprüfen Sie die Ollama-Konfiguration und versuchen Sie es erneut.",
     "InvalidProviderAPIKey": "{{provider}} API-Schlüssel ist ungültig oder leer. Bitte überprüfen Sie den {{provider}} API-Schlüssel und versuchen Sie es erneut.",
     "LocationNotSupportError": "Entschuldigung, Ihr Standort unterstützt diesen Modellservice möglicherweise aufgrund von regionalen Einschränkungen oder nicht aktivierten Diensten nicht. Bitte überprüfen Sie, ob der aktuelle Standort die Verwendung dieses Dienstes unterstützt, oder versuchen Sie, andere Standortinformationen zu verwenden.",
diff --git a/locales/de-DE/modelProvider.json b/locales/de-DE/modelProvider.json
index 9d118f9efc9f..99df752ea8f2 100644
--- a/locales/de-DE/modelProvider.json
+++ b/locales/de-DE/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Verwenden Sie benutzerdefinierte Bedrock-Authentifizierungsinformationen"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Geben Sie Ihr GitHub-PAT ein und klicken Sie [hier](https://github.com/settings/tokens), um eines zu erstellen.",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Testen Sie, ob die Proxy-Adresse korrekt eingetragen wurde",
diff --git a/locales/de-DE/portal.json b/locales/de-DE/portal.json
index 8490e0dc3cb8..a34ae2da5de1 100644
--- a/locales/de-DE/portal.json
+++ b/locales/de-DE/portal.json
@@ -6,11 +6,27 @@
       "file": "Datei"
     }
   },
+  "Plugins": "Plugins",
   "actions": {
     "genAiMessage": "Assistenten-Nachricht erstellen",
     "summary": "Zusammenfassung",
     "summaryTooltip": "Zusammenfassung des aktuellen Inhalts"
   },
+  "artifacts": {
+    "display": {
+      "code": "Code",
+      "preview": "Vorschau"
+    },
+    "svg": {
+      "copyAsImage": "Als Bild kopieren",
+      "copyFail": "Kopieren fehlgeschlagen, Fehlerursache: {{error}}",
+      "copySuccess": "Bild erfolgreich kopiert",
+      "download": {
+        "png": "Als PNG herunterladen",
+        "svg": "Als SVG herunterladen"
+      }
+    }
+  },
   "emptyArtifactList": "Die Liste der Artefakte ist derzeit leer. Bitte verwenden Sie Plugins in der Sitzung und überprüfen Sie sie erneut.",
   "emptyKnowledgeList": "Die aktuelle Wissensliste ist leer. Bitte aktivieren Sie die Wissensdatenbank nach Bedarf in der Sitzung, um sie anzuzeigen.",
   "files": "Dateien",
diff --git a/locales/en-US/chat.json b/locales/en-US/chat.json
index 9db04515a095..d6ba15d7e8a3 100644
--- a/locales/en-US/chat.json
+++ b/locales/en-US/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Hello, I'm **{{name}}**, {{systemRole}}. Let's start chatting!",
   "agentDefaultMessageWithoutEdit": "Hello, I'm **{{name}}**, let's start chatting!",
   "agentsAndConversations": "Assistants and Conversations",
+  "artifact": {
+    "generating": "Generating",
+    "thinking": "Thinking",
+    "thought": "Thought Process",
+    "unknownTitle": "Untitled Work"
+  },
   "backToBottom": "Back to bottom",
   "chatList": {
     "longMessageDetail": "View Details"
diff --git a/locales/en-US/error.json b/locales/en-US/error.json
index 8ff86d28ed0d..02c3ff38115c 100644
--- a/locales/en-US/error.json
+++ b/locales/en-US/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Invalid access code or empty. Please enter the correct access code or add a custom API Key.",
     "InvalidBedrockCredentials": "Bedrock authentication failed. Please check the AccessKeyId/SecretAccessKey and retry.",
     "InvalidClerkUser": "Sorry, you are not currently logged in. Please log in or register an account to continue.",
+    "InvalidGithubToken": "The GitHub Personal Access Token is incorrect or empty. Please check your GitHub Personal Access Token and try again.",
     "InvalidOllamaArgs": "Invalid Ollama configuration, please check Ollama configuration and try again",
     "InvalidProviderAPIKey": "{{provider}} API Key is incorrect or empty, please check your {{provider}} API Key and try again",
     "LocationNotSupportError": "We're sorry, your current location does not support this model service. This may be due to regional restrictions or the service not being available. Please confirm if the current location supports using this service, or try using a different location.",
diff --git a/locales/en-US/modelProvider.json b/locales/en-US/modelProvider.json
index c96a74c0314a..e48d8bfa3ed7 100644
--- a/locales/en-US/modelProvider.json
+++ b/locales/en-US/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Use Custom Bedrock Authentication Information"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Enter your GitHub PAT. Click [here](https://github.com/settings/tokens) to create one.",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Test if the proxy address is correctly filled in",
diff --git a/locales/en-US/portal.json b/locales/en-US/portal.json
index 852545ed610c..a02b51c6b7a3 100644
--- a/locales/en-US/portal.json
+++ b/locales/en-US/portal.json
@@ -6,11 +6,27 @@
       "file": "File"
     }
   },
+  "Plugins": "Plugins",
   "actions": {
     "genAiMessage": "Generate Assistant Message",
     "summary": "Summary",
     "summaryTooltip": "Summarize current content"
   },
+  "artifacts": {
+    "display": {
+      "code": "Code",
+      "preview": "Preview"
+    },
+    "svg": {
+      "copyAsImage": "Copy as Image",
+      "copyFail": "Copy failed, reason: {{error}}",
+      "copySuccess": "Image copied successfully",
+      "download": {
+        "png": "Download as PNG",
+        "svg": "Download as SVG"
+      }
+    }
+  },
   "emptyArtifactList": "The current Artifacts list is empty. Please use plugins in the session as needed before viewing.",
   "emptyKnowledgeList": "The current knowledge list is empty. Please enable the knowledge base as needed during the conversation before viewing.",
   "files": "Files",
diff --git a/locales/es-ES/chat.json b/locales/es-ES/chat.json
index fe76585270d8..5aadf260a72c 100644
--- a/locales/es-ES/chat.json
+++ b/locales/es-ES/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Hola, soy **{{name}}**, {{systemRole}}, ¡comencemos a chatear!",
   "agentDefaultMessageWithoutEdit": "¡Hola, soy **{{name}}**! Comencemos nuestra conversación.",
   "agentsAndConversations": "agentesYConversaciones",
+  "artifact": {
+    "generating": "Generando",
+    "thinking": "Pensando",
+    "thought": "Proceso de pensamiento",
+    "unknownTitle": "Obra sin título"
+  },
   "backToBottom": "Volver al fondo",
   "chatList": {
     "longMessageDetail": "Ver detalles"
diff --git a/locales/es-ES/error.json b/locales/es-ES/error.json
index 4e0844c57e9d..9aa7424c6371 100644
--- a/locales/es-ES/error.json
+++ b/locales/es-ES/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "La contraseña no es válida o está vacía. Por favor, introduce una contraseña de acceso válida o añade una clave API personalizada",
     "InvalidBedrockCredentials": "La autenticación de Bedrock no se ha completado con éxito, por favor, verifica AccessKeyId/SecretAccessKey e inténtalo de nuevo",
     "InvalidClerkUser": "Lo siento mucho, actualmente no has iniciado sesión. Por favor, inicia sesión o regístrate antes de continuar.",
+    "InvalidGithubToken": "El token de acceso personal de Github es incorrecto o está vacío. Por favor, verifica el token de acceso personal de Github y vuelve a intentarlo.",
     "InvalidOllamaArgs": "La configuración de Ollama no es válida, por favor revisa la configuración de Ollama e inténtalo de nuevo",
     "InvalidProviderAPIKey": "{{provider}} API Key incorrecta o vacía, por favor revisa tu {{provider}} API Key e intenta de nuevo",
     "LocationNotSupportError": "Lo sentimos, tu ubicación actual no es compatible con este servicio de modelo, puede ser debido a restricciones geográficas o a que el servicio no está disponible. Por favor, verifica si tu ubicación actual es compatible con este servicio o intenta usar otra información de ubicación.",
diff --git a/locales/es-ES/modelProvider.json b/locales/es-ES/modelProvider.json
index ce77c262d696..cadb4e280ab7 100644
--- a/locales/es-ES/modelProvider.json
+++ b/locales/es-ES/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Usar información de autenticación de Bedrock personalizada"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Introduce tu PAT de Github, haz clic [aquí](https://github.com/settings/tokens) para crear uno",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Prueba si la dirección del proxy de la interfaz se ha introducido correctamente",
diff --git a/locales/es-ES/portal.json b/locales/es-ES/portal.json
index 4b62721e0a7d..4dec581564a8 100644
--- a/locales/es-ES/portal.json
+++ b/locales/es-ES/portal.json
@@ -6,11 +6,27 @@
       "file": "Archivo"
     }
   },
+  "Plugins": "Complementos",
   "actions": {
     "genAiMessage": "Crear mensaje de IA",
     "summary": "Resumen",
     "summaryTooltip": "Resumir el contenido actual"
   },
+  "artifacts": {
+    "display": {
+      "code": "Código",
+      "preview": "Vista previa"
+    },
+    "svg": {
+      "copyAsImage": "Copiar como imagen",
+      "copyFail": "Error al copiar, motivo del error: {{error}}",
+      "copySuccess": "Imagen copiada con éxito",
+      "download": {
+        "png": "Descargar como PNG",
+        "svg": "Descargar como SVG"
+      }
+    }
+  },
   "emptyArtifactList": "La lista de Artefactos actual está vacía. Por favor, utilice los complementos en la conversación y vuelva a intentarlo.",
   "emptyKnowledgeList": "La lista de conocimientos actual está vacía. Por favor, activa la base de conocimientos según sea necesario en la conversación antes de volver a revisar.",
   "files": "archivos",
diff --git a/locales/fr-FR/chat.json b/locales/fr-FR/chat.json
index 4bfd91d7356c..33498bfeef84 100644
--- a/locales/fr-FR/chat.json
+++ b/locales/fr-FR/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Bonjour, je suis **{{name}}**, {{systemRole}}. Commençons la conversation !",
   "agentDefaultMessageWithoutEdit": "Bonjour, je suis **{{name}}**. Commençons notre conversation !",
   "agentsAndConversations": "Agents et conversations",
+  "artifact": {
+    "generating": "Génération en cours",
+    "thinking": "En réflexion",
+    "thought": "Processus de pensée",
+    "unknownTitle": "Œuvre sans nom"
+  },
   "backToBottom": "Retour en bas",
   "chatList": {
     "longMessageDetail": "Voir les détails"
diff --git a/locales/fr-FR/error.json b/locales/fr-FR/error.json
index 6b37b0b3af13..5602d68c1508 100644
--- a/locales/fr-FR/error.json
+++ b/locales/fr-FR/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Le mot de passe est incorrect ou vide. Veuillez saisir le mot de passe d'accès correct ou ajouter une clé API personnalisée.",
     "InvalidBedrockCredentials": "L'authentification Bedrock a échoué, veuillez vérifier AccessKeyId/SecretAccessKey et réessayer",
     "InvalidClerkUser": "Désolé, vous n'êtes pas actuellement connecté. Veuillez vous connecter ou vous inscrire avant de continuer.",
+    "InvalidGithubToken": "Le jeton d'accès personnel GitHub est incorrect ou vide. Veuillez vérifier le jeton d'accès personnel GitHub et réessayer.",
     "InvalidOllamaArgs": "La configuration d'Ollama n'est pas valide, veuillez vérifier la configuration d'Ollama et réessayer",
     "InvalidProviderAPIKey": "{{provider}} API Key incorrect or missing, please check {{provider}} API Key and try again",
     "LocationNotSupportError": "Désolé, votre emplacement actuel ne prend pas en charge ce service de modèle, peut-être en raison de restrictions géographiques ou de services non disponibles. Veuillez vérifier si votre emplacement actuel prend en charge ce service ou essayer avec une autre localisation.",
diff --git a/locales/fr-FR/modelProvider.json b/locales/fr-FR/modelProvider.json
index 02950fe4c131..87e10457ef40 100644
--- a/locales/fr-FR/modelProvider.json
+++ b/locales/fr-FR/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Utiliser des informations d'authentification Bedrock personnalisées"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Entrez votre PAT GitHub, cliquez [ici](https://github.com/settings/tokens) pour en créer un.",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Vérifiez si l'adresse du proxy est correctement saisie",
diff --git a/locales/fr-FR/portal.json b/locales/fr-FR/portal.json
index dfe9bf550766..9c925134570c 100644
--- a/locales/fr-FR/portal.json
+++ b/locales/fr-FR/portal.json
@@ -6,11 +6,27 @@
       "file": "Fichier"
     }
   },
+  "Plugins": "Plugins",
   "actions": {
     "genAiMessage": "Créer un message d'assistant",
     "summary": "Résumé",
     "summaryTooltip": "Résumé du contenu actuel"
   },
+  "artifacts": {
+    "display": {
+      "code": "Code",
+      "preview": "Aperçu"
+    },
+    "svg": {
+      "copyAsImage": "Copier en tant qu'image",
+      "copyFail": "Échec de la copie, raison de l'erreur : {{error}}",
+      "copySuccess": "Image copiée avec succès",
+      "download": {
+        "png": "Télécharger en tant que PNG",
+        "svg": "Télécharger en tant que SVG"
+      }
+    }
+  },
   "emptyArtifactList": "La liste des Artifacts est actuellement vide. Veuillez utiliser les plugins dans la conversation avant de consulter à nouveau.",
   "emptyKnowledgeList": "La liste des connaissances est actuellement vide. Veuillez activer la base de connaissances selon vos besoins dans la conversation avant de consulter.",
   "files": "Fichiers",
diff --git a/locales/it-IT/chat.json b/locales/it-IT/chat.json
index 6f5a043f2b91..525ac6d9c011 100644
--- a/locales/it-IT/chat.json
+++ b/locales/it-IT/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Ciao, sono **{{name}}**, {{systemRole}}, iniziamo a chattare!",
   "agentDefaultMessageWithoutEdit": "Ciao, sono **{{name}}**. Cominciamo a chiacchierare!",
   "agentsAndConversations": "Assistenti e Conversazioni",
+  "artifact": {
+    "generating": "Generazione in corso",
+    "thinking": "In fase di riflessione",
+    "thought": "Processo di pensiero",
+    "unknownTitle": "Opera non nominata"
+  },
   "backToBottom": "Torna in fondo",
   "chatList": {
     "longMessageDetail": "Visualizza dettagli"
diff --git a/locales/it-IT/error.json b/locales/it-IT/error.json
index 08b50046468b..add5dbd2f8c6 100644
--- a/locales/it-IT/error.json
+++ b/locales/it-IT/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Password incorrect or empty, please enter the correct access password, or add a custom API Key",
     "InvalidBedrockCredentials": "Autenticazione Bedrock non riuscita, controlla AccessKeyId/SecretAccessKey e riprova",
     "InvalidClerkUser": "Spiacenti, al momento non hai effettuato l'accesso. Per favore, effettua l'accesso o registrati prima di continuare.",
+    "InvalidGithubToken": "Il token di accesso personale di Github non è corretto o è vuoto. Controlla il token di accesso personale di Github e riprova.",
     "InvalidOllamaArgs": "Configurazione Ollama non valida, controllare la configurazione di Ollama e riprovare",
     "InvalidProviderAPIKey": "{{provider}} Chiave API non valida o vuota, controlla la Chiave API di {{provider}} e riprova",
     "LocationNotSupportError": "Spiacenti, la tua posizione attuale non supporta questo servizio modello, potrebbe essere a causa di restrizioni geografiche o servizi non attivati. Verifica se la posizione attuale supporta l'uso di questo servizio o prova a utilizzare un'altra posizione.",
diff --git a/locales/it-IT/modelProvider.json b/locales/it-IT/modelProvider.json
index 8fb0dedf4493..c4a52d7e7fc4 100644
--- a/locales/it-IT/modelProvider.json
+++ b/locales/it-IT/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Usa le informazioni di autenticazione Bedrock personalizzate"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Inserisci il tuo PAT di Github, clicca [qui](https://github.com/settings/tokens) per crearne uno",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Verifica se l'indirizzo del proxy è stato compilato correttamente",
diff --git a/locales/it-IT/portal.json b/locales/it-IT/portal.json
index daba88af1c19..151d67fef659 100644
--- a/locales/it-IT/portal.json
+++ b/locales/it-IT/portal.json
@@ -6,11 +6,27 @@
       "file": "File"
     }
   },
+  "Plugins": "Plugin",
   "actions": {
     "genAiMessage": "Genera messaggio AI",
     "summary": "Sommario",
     "summaryTooltip": "Sommario del contenuto attuale"
   },
+  "artifacts": {
+    "display": {
+      "code": "Codice",
+      "preview": "Anteprima"
+    },
+    "svg": {
+      "copyAsImage": "Copia come immagine",
+      "copyFail": "Copia fallita, motivo dell'errore: {{error}}",
+      "copySuccess": "Immagine copiata con successo",
+      "download": {
+        "png": "Scarica come PNG",
+        "svg": "Scarica come SVG"
+      }
+    }
+  },
   "emptyArtifactList": "La lista degli Artefatti attuale è vuota, si prega di utilizzare i plugin necessari durante la sessione e poi controllare di nuovo",
   "emptyKnowledgeList": "L'elenco delle conoscenze attuale è vuoto. Si prega di attivare il database delle conoscenze durante la conversazione per visualizzarlo.",
   "files": "File",
diff --git a/locales/ja-JP/chat.json b/locales/ja-JP/chat.json
index 82720c3756f5..34eb81b7c2bf 100644
--- a/locales/ja-JP/chat.json
+++ b/locales/ja-JP/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "こんにちは、私は **{{name}}** です、{{systemRole}}、さあ、チャットを始めましょう！",
   "agentDefaultMessageWithoutEdit": "こんにちは、私は**{{name}}**です。会話しましょう！",
   "agentsAndConversations": "エージェントと会話",
+  "artifact": {
+    "generating": "生成中",
+    "thinking": "思考中",
+    "thought": "思考過程",
+    "unknownTitle": "未命名の作品"
+  },
   "backToBottom": "現在に戻る",
   "chatList": {
     "longMessageDetail": "詳細を見る"
diff --git a/locales/ja-JP/error.json b/locales/ja-JP/error.json
index e330e5f04762..f2b1627a437e 100644
--- a/locales/ja-JP/error.json
+++ b/locales/ja-JP/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "パスワードが正しくないか空です。正しいアクセスパスワードを入力するか、カスタムAPIキーを追加してください",
     "InvalidBedrockCredentials": "Bedrockの認証に失敗しました。AccessKeyId/SecretAccessKeyを確認してから再試行してください。",
     "InvalidClerkUser": "申し訳ありませんが、現在ログインしていません。続行するにはログインまたはアカウント登録を行ってください",
+    "InvalidGithubToken": "Githubのパーソナルアクセストークンが無効または空です。Githubのパーソナルアクセストークンを確認してから、再試行してください。",
     "InvalidOllamaArgs": "Ollamaの設定が正しくありません。Ollamaの設定を確認してからもう一度お試しください",
     "InvalidProviderAPIKey": "{{provider}} APIキーが正しくないか空です。{{provider}} APIキーを確認して再試行してください。",
     "LocationNotSupportError": "申し訳ありませんが、お住まいの地域ではこのモデルサービスをサポートしていません。地域制限またはサービスが利用できない可能性があります。現在の位置がこのサービスをサポートしているかどうかを確認するか、他の位置情報を使用してみてください。",
diff --git a/locales/ja-JP/modelProvider.json b/locales/ja-JP/modelProvider.json
index 8875b339ae07..6ca08bc6b465 100644
--- a/locales/ja-JP/modelProvider.json
+++ b/locales/ja-JP/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "使用カスタム Bedrock 認証情報"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "あなたのGithub PATを入力してください。[こちら](https://github.com/settings/tokens)をクリックして作成します",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "プロキシアドレスが正しく入力されているかをテストします",
diff --git a/locales/ja-JP/portal.json b/locales/ja-JP/portal.json
index 2d3b05cad761..ba3b4b3c8a3a 100644
--- a/locales/ja-JP/portal.json
+++ b/locales/ja-JP/portal.json
@@ -6,11 +6,27 @@
       "file": "ファイル"
     }
   },
+  "Plugins": "プラグイン",
   "actions": {
     "genAiMessage": "AIメッセージを生成",
     "summary": "サマリー",
     "summaryTooltip": "現在の内容を要約"
   },
+  "artifacts": {
+    "display": {
+      "code": "コード",
+      "preview": "プレビュー"
+    },
+    "svg": {
+      "copyAsImage": "画像としてコピー",
+      "copyFail": "コピーに失敗しました。エラーの理由: {{error}}",
+      "copySuccess": "画像のコピーに成功しました",
+      "download": {
+        "png": "PNGとしてダウンロード",
+        "svg": "SVGとしてダウンロード"
+      }
+    }
+  },
   "emptyArtifactList": "現在、アーティファクトリストは空です。プラグインを使用してセッション中に追加してください。",
   "emptyKnowledgeList": "現在の知識リストは空です。会話中に必要に応じて知識ベースを開いてからご覧ください。",
   "files": "ファイル",
diff --git a/locales/ko-KR/chat.json b/locales/ko-KR/chat.json
index 6583725ba2f3..19c7a9c3db2b 100644
--- a/locales/ko-KR/chat.json
+++ b/locales/ko-KR/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "안녕하세요, 저는 **{{name}}**입니다. {{systemRole}}입니다. 대화를 시작해 봅시다!",
   "agentDefaultMessageWithoutEdit": "안녕하세요, 저는 **{{name}}**입니다. 대화를 시작해보세요!",
   "agentsAndConversations": "에이전트 및 대화",
+  "artifact": {
+    "generating": "생성 중",
+    "thinking": "생각 중",
+    "thought": "사고 과정",
+    "unknownTitle": "제목 없음"
+  },
   "backToBottom": "하단으로 이동",
   "chatList": {
     "longMessageDetail": "자세히 보기"
diff --git a/locales/ko-KR/error.json b/locales/ko-KR/error.json
index 7b750df59708..dfb815faa7f2 100644
--- a/locales/ko-KR/error.json
+++ b/locales/ko-KR/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "액세스 코드가 잘못되었거나 비어 있습니다. 올바른 액세스 코드를 입력하거나 사용자 지정 API 키를 추가하십시오.",
     "InvalidBedrockCredentials": "Bedrock 인증에 실패했습니다. AccessKeyId/SecretAccessKey를 확인한 후 다시 시도하십시오.",
     "InvalidClerkUser": "죄송합니다. 현재 로그인되어 있지 않습니다. 계속하려면 먼저 로그인하거나 계정을 등록해주세요.",
+    "InvalidGithubToken": "Github 개인 액세스 토큰이 올바르지 않거나 비어 있습니다. Github 개인 액세스 토큰을 확인한 후 다시 시도해 주십시오.",
     "InvalidOllamaArgs": "Ollama 구성이 잘못되었습니다. Ollama 구성을 확인한 후 다시 시도하십시오.",
     "InvalidProviderAPIKey": "{{provider}} API 키가 잘못되었거나 비어 있습니다. {{provider}} API 키를 확인하고 다시 시도하십시오.",
     "LocationNotSupportError": "죄송합니다. 귀하의 현재 위치는 해당 모델 서비스를 지원하지 않습니다. 지역 제한 또는 서비스 미개통으로 인한 것일 수 있습니다. 현재 위치가 해당 서비스를 지원하는지 확인하거나 다른 위치 정보를 사용해 보십시오.",
diff --git a/locales/ko-KR/modelProvider.json b/locales/ko-KR/modelProvider.json
index 8d9e4ab0a1f8..39ffd897a5d3 100644
--- a/locales/ko-KR/modelProvider.json
+++ b/locales/ko-KR/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "사용자 정의 Bedrock 인증 정보 사용"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "당신의 Github PAT를 입력하세요. [여기](https://github.com/settings/tokens)를 클릭하여 생성하세요.",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "프록시 주소가 올바르게 입력되었는지 테스트합니다",
diff --git a/locales/ko-KR/portal.json b/locales/ko-KR/portal.json
index 4320256d87b9..bb9c1a777690 100644
--- a/locales/ko-KR/portal.json
+++ b/locales/ko-KR/portal.json
@@ -6,11 +6,27 @@
       "file": "파일"
     }
   },
+  "Plugins": "플러그인",
   "actions": {
     "genAiMessage": "AI 메시지 생성",
     "summary": "요약",
     "summaryTooltip": "현재 콘텐츠를 요약합니다"
   },
+  "artifacts": {
+    "display": {
+      "code": "코드",
+      "preview": "미리보기"
+    },
+    "svg": {
+      "copyAsImage": "이미지로 복사",
+      "copyFail": "복사 실패, 오류 원인: {{error}}",
+      "copySuccess": "이미지 복사 성공",
+      "download": {
+        "png": "PNG로 다운로드",
+        "svg": "SVG로 다운로드"
+      }
+    }
+  },
   "emptyArtifactList": "현재 아티팩트 목록이 비어 있습니다. 플러그인을 사용한 후에 다시 확인해주세요.",
   "emptyKnowledgeList": "현재 지식 목록이 비어 있습니다. 대화 중에 필요에 따라 지식 베이스를 활성화한 후 다시 확인해 주세요.",
   "files": "파일",
diff --git a/locales/nl-NL/chat.json b/locales/nl-NL/chat.json
index e5f407ec66f3..5fd75ec8db6f 100644
--- a/locales/nl-NL/chat.json
+++ b/locales/nl-NL/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Hallo, ik ben **{{name}}**, {{systemRole}}, laten we beginnen met praten!",
   "agentDefaultMessageWithoutEdit": "Hallo, ik ben **{{name}}**. Laten we beginnen met een gesprek!",
   "agentsAndConversations": "agenten en gesprekken",
+  "artifact": {
+    "generating": "Genereren",
+    "thinking": "Denken",
+    "thought": "Denken proces",
+    "unknownTitle": "Onbenoemd werk"
+  },
   "backToBottom": "Terug naar onderen",
   "chatList": {
     "longMessageDetail": "Bekijk details"
diff --git a/locales/nl-NL/error.json b/locales/nl-NL/error.json
index 693fe03ed262..bdbc95004705 100644
--- a/locales/nl-NL/error.json
+++ b/locales/nl-NL/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Ongeldige toegangscode: het wachtwoord is onjuist of leeg. Voer de juiste toegangscode in of voeg een aangepaste API-sleutel toe.",
     "InvalidBedrockCredentials": "Bedrock authentication failed, please check AccessKeyId/SecretAccessKey and retry",
     "InvalidClerkUser": "Sorry, you are not currently logged in. Please log in or register an account to continue.",
+    "InvalidGithubToken": "Github Persoonlijke Toegangstoken is ongeldig of leeg, controleer de Github Persoonlijke Toegangstoken en probeer het opnieuw.",
     "InvalidOllamaArgs": "Ollama-configuratie is onjuist, controleer de Ollama-configuratie en probeer het opnieuw",
     "InvalidProviderAPIKey": "{{provider}} API-sleutel is onjuist of leeg. Controleer de {{provider}} API-sleutel en probeer het opnieuw.",
     "LocationNotSupportError": "Sorry, your current location does not support this model service, possibly due to regional restrictions or service not being available. Please confirm if the current location supports using this service, or try using other location information.",
diff --git a/locales/nl-NL/modelProvider.json b/locales/nl-NL/modelProvider.json
index 10e2d82d0c2c..92f21fcd356e 100644
--- a/locales/nl-NL/modelProvider.json
+++ b/locales/nl-NL/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Gebruik aangepaste Bedrock-verificatiegegevens"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Vul je Github PAT in, klik [hier](https://github.com/settings/tokens) om er een te maken",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Test of het proxyadres correct is ingevuld",
diff --git a/locales/nl-NL/portal.json b/locales/nl-NL/portal.json
index cf1dd5f6fa04..3ffe58c88d1b 100644
--- a/locales/nl-NL/portal.json
+++ b/locales/nl-NL/portal.json
@@ -6,11 +6,27 @@
       "file": "Bestand"
     }
   },
+  "Plugins": "Plugins",
   "actions": {
     "genAiMessage": "Creëer assistentbericht",
     "summary": "Samenvatting",
     "summaryTooltip": "Samenvatting van de huidige inhoud"
   },
+  "artifacts": {
+    "display": {
+      "code": "Code",
+      "preview": "Voorbeeld"
+    },
+    "svg": {
+      "copyAsImage": "Kopieer als afbeelding",
+      "copyFail": "Kopiëren mislukt, foutmelding: {{error}}",
+      "copySuccess": "Afbeelding succesvol gekopieerd",
+      "download": {
+        "png": "Download als PNG",
+        "svg": "Download als SVG"
+      }
+    }
+  },
   "emptyArtifactList": "De huidige lijst met Artifacts is leeg. Gebruik plugins in de sessie en bekijk deze later opnieuw.",
   "emptyKnowledgeList": "De huidige kennislijst is leeg. Gelieve de kennisbank in de sessie te openen voordat u deze bekijkt.",
   "files": "Bestanden",
diff --git a/locales/pl-PL/chat.json b/locales/pl-PL/chat.json
index 36f89c816196..891ce02e31ed 100644
--- a/locales/pl-PL/chat.json
+++ b/locales/pl-PL/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Cześć, jestem **{{name}}**, {{systemRole}}, zacznijmy rozmowę!",
   "agentDefaultMessageWithoutEdit": "Cześć, jestem **{{name}}**. Zacznijmy rozmowę!",
   "agentsAndConversations": "Agenci i rozmowy",
+  "artifact": {
+    "generating": "Generowanie",
+    "thinking": "Myślenie",
+    "thought": "Proces myślenia",
+    "unknownTitle": "Nienazwane dzieło"
+  },
   "backToBottom": "Przewiń na dół",
   "chatList": {
     "longMessageDetail": "Zobacz szczegóły"
diff --git a/locales/pl-PL/error.json b/locales/pl-PL/error.json
index e805d0fdd291..a2be5ffc840c 100644
--- a/locales/pl-PL/error.json
+++ b/locales/pl-PL/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Nieprawidłowy kod dostępu: Hasło jest nieprawidłowe lub puste. Proszę wprowadzić poprawne hasło dostępu lub dodać niestandardowy klucz API.",
     "InvalidBedrockCredentials": "Uwierzytelnienie Bedrock nie powiodło się, prosimy sprawdzić AccessKeyId/SecretAccessKey i spróbować ponownie.",
     "InvalidClerkUser": "Przepraszamy, nie jesteś obecnie zalogowany. Proszę najpierw zalogować się lub zarejestrować, aby kontynuować.",
+    "InvalidGithubToken": "Token dostępu osobistego do GitHub jest niewłaściwy lub pusty. Proszę sprawdzić Token dostępu osobistego do GitHub i spróbować ponownie.",
     "InvalidOllamaArgs": "Nieprawidłowa konfiguracja Ollama, sprawdź konfigurację Ollama i spróbuj ponownie",
     "InvalidProviderAPIKey": "{{provider}} Klucz API jest nieprawidłowy lub pusty. Sprawdź Klucz API {{provider}} i spróbuj ponownie.",
     "LocationNotSupportError": "Przepraszamy, Twoja lokalizacja nie obsługuje tego usługi modelu, być może ze względu na ograniczenia regionalne lub brak dostępności usługi. Proszę sprawdź, czy bieżąca lokalizacja obsługuje tę usługę, lub spróbuj użyć innych informacji o lokalizacji.",
diff --git a/locales/pl-PL/modelProvider.json b/locales/pl-PL/modelProvider.json
index 7074b5b85118..75f611bfdc32 100644
--- a/locales/pl-PL/modelProvider.json
+++ b/locales/pl-PL/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Użyj niestandardowych informacji uwierzytelniających Bedrock"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Wprowadź swój osobisty token dostępu GitHub (PAT), kliknij [tutaj](https://github.com/settings/tokens), aby go utworzyć",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Test czy adres proxy jest poprawnie wypełniony",
diff --git a/locales/pl-PL/portal.json b/locales/pl-PL/portal.json
index 7b22cd5c99c0..1dbd5fec267e 100644
--- a/locales/pl-PL/portal.json
+++ b/locales/pl-PL/portal.json
@@ -6,11 +6,27 @@
       "file": "Plik"
     }
   },
+  "Plugins": "Wtyczki",
   "actions": {
     "genAiMessage": "Tworzenie wiadomości AI",
     "summary": "Podsumowanie",
     "summaryTooltip": "Podsumowanie bieżącej zawartości"
   },
+  "artifacts": {
+    "display": {
+      "code": "Kod",
+      "preview": "Podgląd"
+    },
+    "svg": {
+      "copyAsImage": "Skopiuj jako obraz",
+      "copyFail": "Kopiowanie nie powiodło się, powód błędu: {{error}}",
+      "copySuccess": "Obraz skopiowany pomyślnie",
+      "download": {
+        "png": "Pobierz jako PNG",
+        "svg": "Pobierz jako SVG"
+      }
+    }
+  },
   "emptyArtifactList": "Obecna lista Artefaktów jest pusta. Proszę użyć wtyczek w trakcie sesji, a następnie sprawdzić ponownie.",
   "emptyKnowledgeList": "Aktualna lista wiedzy jest pusta. Proszę otworzyć bazę wiedzy w trakcie rozmowy, aby ją przeglądać.",
   "files": "Pliki",
diff --git a/locales/pt-BR/chat.json b/locales/pt-BR/chat.json
index ffd257383428..b7425c6f5a3b 100644
--- a/locales/pt-BR/chat.json
+++ b/locales/pt-BR/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Olá, eu sou **{{name}}**, {{systemRole}}, vamos conversar!",
   "agentDefaultMessageWithoutEdit": "Olá, sou o **{{name}}**, vamos começar a conversa!",
   "agentsAndConversations": "Agentes e Conversas",
+  "artifact": {
+    "generating": "Gerando",
+    "thinking": "Pensando",
+    "thought": "Processo de pensamento",
+    "unknownTitle": "Obra sem título"
+  },
   "backToBottom": "Voltar para o início",
   "chatList": {
     "longMessageDetail": "Ver detalhes"
diff --git a/locales/pt-BR/error.json b/locales/pt-BR/error.json
index c4eee5ae377a..dc9479daa9f1 100644
--- a/locales/pt-BR/error.json
+++ b/locales/pt-BR/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Senha de acesso inválida ou em branco. Por favor, insira a senha de acesso correta ou adicione uma Chave de API personalizada.",
     "InvalidBedrockCredentials": "Credenciais Bedrock inválidas, por favor, verifique AccessKeyId/SecretAccessKey e tente novamente",
     "InvalidClerkUser": "Desculpe, você ainda não fez login. Por favor, faça login ou registre uma conta antes de continuar.",
+    "InvalidGithubToken": "O Token de Acesso Pessoal do Github está incorreto ou vazio. Por favor, verifique o Token de Acesso Pessoal do Github e tente novamente.",
     "InvalidOllamaArgs": "Configuração Ollama inválida, verifique a configuração do Ollama e tente novamente",
     "InvalidProviderAPIKey": "{{provider}} API Key inválido ou em branco, por favor, verifique o {{provider}} API Key e tente novamente",
     "LocationNotSupportError": "Desculpe, sua localização atual não suporta este serviço de modelo, pode ser devido a restrições geográficas ou serviço não disponível. Por favor, verifique se a localização atual suporta o uso deste serviço ou tente usar outras informações de localização.",
diff --git a/locales/pt-BR/modelProvider.json b/locales/pt-BR/modelProvider.json
index a01fcc3de95e..2fda52a1e796 100644
--- a/locales/pt-BR/modelProvider.json
+++ b/locales/pt-BR/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Usar informações de autenticação Bedrock personalizadas"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Insira seu PAT do Github, clique [aqui](https://github.com/settings/tokens) para criar",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Teste se o endereço do proxy está corretamente preenchido",
diff --git a/locales/pt-BR/portal.json b/locales/pt-BR/portal.json
index 6ed5e8fc4763..6c890f7c2fa3 100644
--- a/locales/pt-BR/portal.json
+++ b/locales/pt-BR/portal.json
@@ -6,11 +6,27 @@
       "file": "Arquivo"
     }
   },
+  "Plugins": "Plugins",
   "actions": {
     "genAiMessage": "Gerar mensagem de IA",
     "summary": "Resumo",
     "summaryTooltip": "Resumir o conteúdo atual"
   },
+  "artifacts": {
+    "display": {
+      "code": "Código",
+      "preview": "Prévia"
+    },
+    "svg": {
+      "copyAsImage": "Copiar como imagem",
+      "copyFail": "Falha ao copiar, motivo do erro: {{error}}",
+      "copySuccess": "Imagem copiada com sucesso",
+      "download": {
+        "png": "Baixar como PNG",
+        "svg": "Baixar como SVG"
+      }
+    }
+  },
   "emptyArtifactList": "A lista de Artefatos atual está vazia. Por favor, use os plugins conforme necessário durante a sessão e depois verifique novamente.",
   "emptyKnowledgeList": "A lista de conhecimentos atual está vazia. Por favor, ative o repositório de conhecimentos conforme necessário durante a conversa antes de visualizar.",
   "files": "Arquivos",
diff --git a/locales/ru-RU/chat.json b/locales/ru-RU/chat.json
index 8ab984276ef9..6af41335644b 100644
--- a/locales/ru-RU/chat.json
+++ b/locales/ru-RU/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Привет, я **{{name}}**, {{systemRole}}. Давай начнем разговор!",
   "agentDefaultMessageWithoutEdit": "Привет, я **{{name}}**, давай начнём разговор!",
   "agentsAndConversations": "Агенты и беседы",
+  "artifact": {
+    "generating": "Генерация",
+    "thinking": "В процессе размышлений",
+    "thought": "Процесс мышления",
+    "unknownTitle": "Безымянное произведение"
+  },
   "backToBottom": "Вернуться вниз",
   "chatList": {
     "longMessageDetail": "Посмотреть детали"
diff --git a/locales/ru-RU/error.json b/locales/ru-RU/error.json
index fdc9e6b3aaa2..0c8cb0ba41f7 100644
--- a/locales/ru-RU/error.json
+++ b/locales/ru-RU/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Неверный код доступа: введите правильный код доступа или добавьте пользовательский ключ API",
     "InvalidBedrockCredentials": "Аутентификация Bedrock не прошла, пожалуйста, проверьте AccessKeyId/SecretAccessKey и повторите попытку",
     "InvalidClerkUser": "Извините, вы еще не вошли в систему. Пожалуйста, войдите или зарегистрируйтесь, прежде чем продолжить",
+    "InvalidGithubToken": "Личный токен доступа Github некорректен или пуст, пожалуйста, проверьте личный токен доступа Github и повторите попытку",
     "InvalidOllamaArgs": "Неверная конфигурация Ollama, пожалуйста, проверьте конфигурацию Ollama и повторите попытку",
     "InvalidProviderAPIKey": "{{provider}} API ключ недействителен или отсутствует. Пожалуйста, проверьте ключ API {{provider}} и повторите попытку",
     "LocationNotSupportError": "Извините, ваше текущее местоположение не поддерживает эту службу модели, возможно из-за ограничений региона или недоступности службы. Пожалуйста, убедитесь, что текущее местоположение поддерживает использование этой службы, или попробуйте использовать другую информацию о местоположении.",
diff --git a/locales/ru-RU/modelProvider.json b/locales/ru-RU/modelProvider.json
index 66009ef7fa12..0569ef1b1829 100644
--- a/locales/ru-RU/modelProvider.json
+++ b/locales/ru-RU/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Использовать пользовательскую информацию аутентификации Bedrock"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Введите ваш персональный токен доступа GitHub (PAT), нажмите [здесь](https://github.com/settings/tokens), чтобы создать его",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Проверить правильность адреса прокси",
diff --git a/locales/ru-RU/portal.json b/locales/ru-RU/portal.json
index 157700d26696..5ffeb145b040 100644
--- a/locales/ru-RU/portal.json
+++ b/locales/ru-RU/portal.json
@@ -6,11 +6,27 @@
       "file": "Файл"
     }
   },
+  "Plugins": "Плагины",
   "actions": {
     "genAiMessage": "Создать сообщение помощника",
     "summary": "Сводка",
     "summaryTooltip": "Сводка текущего содержимого"
   },
+  "artifacts": {
+    "display": {
+      "code": "Код",
+      "preview": "Предварительный просмотр"
+    },
+    "svg": {
+      "copyAsImage": "Скопировать как изображение",
+      "copyFail": "Не удалось скопировать, причина ошибки: {{error}}",
+      "copySuccess": "Изображение успешно скопировано",
+      "download": {
+        "png": "Скачать как PNG",
+        "svg": "Скачать как SVG"
+      }
+    }
+  },
   "emptyArtifactList": "Список текущих артефактов пуст. Пожалуйста, используйте плагины во время сеанса и затем просмотрите.",
   "emptyKnowledgeList": "Текущий список знаний пуст. Пожалуйста, откройте базу знаний по мере необходимости в разговоре, прежде чем просматривать.",
   "files": "файлы",
diff --git a/locales/tr-TR/chat.json b/locales/tr-TR/chat.json
index a42566fd9b00..3128f4a671b5 100644
--- a/locales/tr-TR/chat.json
+++ b/locales/tr-TR/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Merhaba, Ben **{{name}}**, {{systemRole}}. Hemen sohbet etmeye başlayalım!",
   "agentDefaultMessageWithoutEdit": "Merhaba, ben **{{name}}**. Konuşmaya başlayalım!",
   "agentsAndConversations": "Ajanlar ve Konuşmalar",
+  "artifact": {
+    "generating": "Üretiliyor",
+    "thinking": "Düşünülüyor",
+    "thought": "Düşünce Süreci",
+    "unknownTitle": "İsimsiz Eser"
+  },
   "backToBottom": "En alta git",
   "chatList": {
     "longMessageDetail": "Detayları görüntüle"
diff --git a/locales/tr-TR/error.json b/locales/tr-TR/error.json
index a74d679e5c44..6f3f94cc5dd5 100644
--- a/locales/tr-TR/error.json
+++ b/locales/tr-TR/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Geçersiz Erişim Kodu: Geçersiz veya boş bir şifre girdiniz. Lütfen doğru erişim şifresini girin veya özel API Anahtarı ekleyin.",
     "InvalidBedrockCredentials": "Bedrock kimlik doğrulaması geçersiz, lütfen AccessKeyId/SecretAccessKey bilgilerinizi kontrol edip tekrar deneyin",
     "InvalidClerkUser": "Üzgünüz, şu anda giriş yapmadınız. Lütfen işlemlere devam etmeden önce giriş yapın veya hesap oluşturun",
+    "InvalidGithubToken": "Github Kişisel Erişim Token'ı hatalı veya boş. Lütfen Github Kişisel Erişim Token'ınızı kontrol edin ve tekrar deneyin.",
     "InvalidOllamaArgs": "Ollama yapılandırması yanlış, lütfen Ollama yapılandırmasını kontrol edip tekrar deneyin",
     "InvalidProviderAPIKey": "{{provider}} API Anahtarı geçersiz veya boş, lütfen {{provider}} API Anahtarını kontrol edip tekrar deneyin",
     "LocationNotSupportError": "Üzgünüz, bulunduğunuz konum bu model hizmetini desteklemiyor, muhtemelen bölge kısıtlamaları veya hizmetin henüz açılmamış olması nedeniyle. Lütfen mevcut konumun bu hizmeti kullanmaya uygun olup olmadığını doğrulayın veya başka bir konum bilgisi kullanmayı deneyin.",
diff --git a/locales/tr-TR/modelProvider.json b/locales/tr-TR/modelProvider.json
index 8c5e3301e0b0..26aeb8d24e47 100644
--- a/locales/tr-TR/modelProvider.json
+++ b/locales/tr-TR/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Özel Bedrock Kimlik Bilgilerini Kullan"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Github PAT'nizi girin, [buraya](https://github.com/settings/tokens) tıklayarak oluşturun",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Proxy adresinin doğru girilip girilmediğini test edin",
diff --git a/locales/tr-TR/portal.json b/locales/tr-TR/portal.json
index 59921ce8f29d..11bcdca71053 100644
--- a/locales/tr-TR/portal.json
+++ b/locales/tr-TR/portal.json
@@ -6,11 +6,27 @@
       "file": "Dosya"
     }
   },
+  "Plugins": "Eklentiler",
   "actions": {
     "genAiMessage": "Yapay Zeka Mesajı Oluştur",
     "summary": "Özet",
     "summaryTooltip": "Mevcut içeriği özetle"
   },
+  "artifacts": {
+    "display": {
+      "code": "Kod",
+      "preview": "Önizleme"
+    },
+    "svg": {
+      "copyAsImage": "Resmi Kopyala",
+      "copyFail": "Kopyalama başarısız, hata nedeni: {{error}}",
+      "copySuccess": "Resim başarıyla kopyalandı",
+      "download": {
+        "png": "PNG olarak indir",
+        "svg": "SVG olarak indir"
+      }
+    }
+  },
   "emptyArtifactList": "Mevcut Artefakt listesi boş, lütfen eklentileri kullanarak oturumda gerektiğinde göz atın",
   "emptyKnowledgeList": "Mevcut bilgi listesi boş. Lütfen sohbet sırasında ihtiyaç duyduğunuz bilgi havuzunu açtıktan sonra tekrar kontrol edin.",
   "files": "Dosyalar",
diff --git a/locales/vi-VN/chat.json b/locales/vi-VN/chat.json
index 95be73622682..d6dfe611ec3c 100644
--- a/locales/vi-VN/chat.json
+++ b/locales/vi-VN/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "Xin chào, tôi là **{{name}}**, {{systemRole}}. Hãy bắt đầu trò chuyện ngay!",
   "agentDefaultMessageWithoutEdit": "Xin chào, tôi là **{{name}}**, chúng ta hãy bắt đầu trò chuyện nào!",
   "agentsAndConversations": "Người hỗ trợ và cuộc trò chuyện",
+  "artifact": {
+    "generating": "Đang tạo",
+    "thinking": "Đang suy nghĩ",
+    "thought": "Quá trình suy nghĩ",
+    "unknownTitle": "Tác phẩm chưa được đặt tên"
+  },
   "backToBottom": "Quay về dưới cùng",
   "chatList": {
     "longMessageDetail": "Xem chi tiết"
diff --git a/locales/vi-VN/error.json b/locales/vi-VN/error.json
index 469702f93b14..329e4d63b09d 100644
--- a/locales/vi-VN/error.json
+++ b/locales/vi-VN/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "Mật khẩu truy cập không hợp lệ hoặc trống, vui lòng nhập mật khẩu truy cập đúng hoặc thêm Khóa API tùy chỉnh",
     "InvalidBedrockCredentials": "Xác thực Bedrock không thành công, vui lòng kiểm tra AccessKeyId/SecretAccessKey và thử lại",
     "InvalidClerkUser": "Xin lỗi, bạn chưa đăng nhập. Vui lòng đăng nhập hoặc đăng ký tài khoản trước khi tiếp tục.",
+    "InvalidGithubToken": "Mã truy cập cá nhân Github không chính xác hoặc để trống, vui lòng kiểm tra lại Mã truy cập cá nhân Github và thử lại",
     "InvalidOllamaArgs": "Cấu hình Ollama không hợp lệ, vui lòng kiểm tra lại cấu hình Ollama và thử lại",
     "InvalidProviderAPIKey": "{{provider}} API Key không hợp lệ hoặc trống, vui lòng kiểm tra và thử lại",
     "LocationNotSupportError": "Xin lỗi, vị trí của bạn không hỗ trợ dịch vụ mô hình này, có thể do hạn chế vùng miền hoặc dịch vụ chưa được mở. Vui lòng xác nhận xem vị trí hiện tại có hỗ trợ sử dụng dịch vụ này không, hoặc thử sử dụng thông tin vị trí khác.",
diff --git a/locales/vi-VN/modelProvider.json b/locales/vi-VN/modelProvider.json
index 481b1e2b76d0..a643f9f2a83d 100644
--- a/locales/vi-VN/modelProvider.json
+++ b/locales/vi-VN/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "Sử dụng Thông tin Xác thực Bedrock tùy chỉnh"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "Nhập mã truy cập cá nhân Github của bạn, nhấp vào [đây](https://github.com/settings/tokens) để tạo",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "Kiểm tra địa chỉ proxy có được nhập chính xác không",
diff --git a/locales/vi-VN/portal.json b/locales/vi-VN/portal.json
index 346713226170..bfb400c87498 100644
--- a/locales/vi-VN/portal.json
+++ b/locales/vi-VN/portal.json
@@ -6,11 +6,27 @@
       "file": "Tập tin"
     }
   },
+  "Plugins": "Tiện ích",
   "actions": {
     "genAiMessage": "Tạo tin nhắn trợ giúp",
     "summary": "Tóm tắt",
     "summaryTooltip": "Tóm tắt nội dung hiện tại"
   },
+  "artifacts": {
+    "display": {
+      "code": "Mã",
+      "preview": "Xem trước"
+    },
+    "svg": {
+      "copyAsImage": "Sao chép dưới dạng hình ảnh",
+      "copyFail": "Sao chép thất bại, lý do lỗi: {{error}}",
+      "copySuccess": "Sao chép hình ảnh thành công",
+      "download": {
+        "png": "Tải xuống dưới dạng PNG",
+        "svg": "Tải xuống dưới dạng SVG"
+      }
+    }
+  },
   "emptyArtifactList": "Danh sách Tác Phẩm hiện tại đang trống, vui lòng sử dụng các plugin trong cuộc trò chuyện trước khi xem lại",
   "emptyKnowledgeList": "Danh sách kiến thức hiện tại trống, vui lòng mở kho kiến thức khi cần trong cuộc trò chuyện trước khi xem",
   "files": "Tập tin",
diff --git a/locales/zh-CN/chat.json b/locales/zh-CN/chat.json
index b94fb5f3e101..1381ddf29319 100644
--- a/locales/zh-CN/chat.json
+++ b/locales/zh-CN/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "你好，我是 **{{name}}**，{{systemRole}}，让我们开始对话吧！",
   "agentDefaultMessageWithoutEdit": "你好，我是 **{{name}}**，让我们开始对话吧！",
   "agentsAndConversations": "助手与会话",
+  "artifact": {
+    "generating": "生成中",
+    "thinking": "思考中",
+    "thought": "思考过程",
+    "unknownTitle": "未命名作品"
+  },
   "backToBottom": "跳转至当前",
   "chatList": {
     "longMessageDetail": "查看详情"
diff --git a/locales/zh-CN/error.json b/locales/zh-CN/error.json
index 547597157ad6..14fb419e362f 100644
--- a/locales/zh-CN/error.json
+++ b/locales/zh-CN/error.json
@@ -82,7 +82,8 @@
     "OllamaServiceUnavailable": "Ollama 服务连接失败，请检查 Ollama 是否运行正常，或是否正确设置 Ollama 的跨域配置",
     "AgentRuntimeError": "Lobe AI Runtime 执行出错，请根据以下信息排查或重试",
     "FreePlanLimit": "当前为免费用户，无法使用该功能，请升级到付费计划后继续使用",
-    "SubscriptionPlanLimit": "您的订阅额度已用尽，无法使用该功能，请升级到更高计划，或购买资源包后继续使用"
+    "SubscriptionPlanLimit": "您的订阅额度已用尽，无法使用该功能，请升级到更高计划，或购买资源包后继续使用",
+    "InvalidGithubToken": "Github PAT 不正确或为空，请检查 Github PAT 后重试"
   },
   "stt": {
     "responseError": "服务请求失败，请检查配置或重试"
diff --git a/locales/zh-CN/modelProvider.json b/locales/zh-CN/modelProvider.json
index aec88fcf9cf0..760c2109c667 100644
--- a/locales/zh-CN/modelProvider.json
+++ b/locales/zh-CN/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "使用自定义 Bedrock 鉴权信息"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "填入你的 Github PAT，点击[这里](https://github.com/settings/tokens) 创建",
+      "placeholder": "ghp_xxxxxx",
+      "title": "Github PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "测试代理地址是否正确填写",
diff --git a/locales/zh-CN/portal.json b/locales/zh-CN/portal.json
index 278aa2d4254d..666b4c3fc540 100644
--- a/locales/zh-CN/portal.json
+++ b/locales/zh-CN/portal.json
@@ -6,13 +6,29 @@
       "file": "文件"
     }
   },
+  "Plugins": "插件",
   "actions": {
     "genAiMessage": "创建助手消息",
     "summary": "总结",
     "summaryTooltip": "总结当前内容"
   },
+  "artifacts": {
+    "display": {
+      "code": "代码",
+      "preview": "预览"
+    },
+    "svg": {
+      "copyAsImage": "复制为图片",
+      "copyFail": "复制失败，错误原因:{{error}}",
+      "copySuccess": "图片复制成功",
+      "download": {
+        "png": "下载为 PNG",
+        "svg": "下载为 SVG"
+      }
+    }
+  },
   "emptyArtifactList": "当前 Artifacts 列表为空，请在会话中按需使用插件后再查看",
-  "emptyKnowledgeList": "当前知识列表为空，请在会话中按需开启知识库后再查看",
+  "emptyKnowledgeList": "当前知识列表为空",
   "files": "文件",
   "messageDetail": "消息详情",
   "title": "工作区"
diff --git a/locales/zh-TW/chat.json b/locales/zh-TW/chat.json
index 0988acb9de24..1825a870a54b 100644
--- a/locales/zh-TW/chat.json
+++ b/locales/zh-TW/chat.json
@@ -6,6 +6,12 @@
   "agentDefaultMessageWithSystemRole": "你好，我是 **{{name}}**，{{systemRole}}，讓我們開始對話吧！",
   "agentDefaultMessageWithoutEdit": "你好，我是 **{{name}}**，讓我們開始對話吧！",
   "agentsAndConversations": "助理與對話",
+  "artifact": {
+    "generating": "生成中",
+    "thinking": "思考中",
+    "thought": "思考過程",
+    "unknownTitle": "未命名作品"
+  },
   "backToBottom": "返回底部",
   "chatList": {
     "longMessageDetail": "查看詳情"
diff --git a/locales/zh-TW/error.json b/locales/zh-TW/error.json
index d4a47f724fab..938947f33037 100644
--- a/locales/zh-TW/error.json
+++ b/locales/zh-TW/error.json
@@ -59,6 +59,7 @@
     "InvalidAccessCode": "密碼不正確或為空，請輸入正確的訪問密碼，或添加自定義 API 金鑰",
     "InvalidBedrockCredentials": "Bedrock 驗證未通過，請檢查 AccessKeyId/SecretAccessKey 後重試",
     "InvalidClerkUser": "很抱歉，你當前尚未登錄，請先登錄或註冊帳號後繼續操作",
+    "InvalidGithubToken": "Github 個人存取權杖不正確或為空，請檢查 Github 個人存取權杖後再試一次",
     "InvalidOllamaArgs": "Ollama 配置不正確，請檢查 Ollama 配置後重試",
     "InvalidProviderAPIKey": "{{provider}} API 金鑰不正確或為空，請檢查 {{provider}} API 金鑰後重試",
     "LocationNotSupportError": "很抱歉，你的所在位置不支持此模型服務，可能是由於地區限制或服務未開通。請確認當前位置是否支持使用此服務，或嘗試使用其他位置信息。",
diff --git a/locales/zh-TW/modelProvider.json b/locales/zh-TW/modelProvider.json
index e7424f3263b0..f453e73908cc 100644
--- a/locales/zh-TW/modelProvider.json
+++ b/locales/zh-TW/modelProvider.json
@@ -51,6 +51,13 @@
       "title": "使用自定義 Bedrock 驗證資訊"
     }
   },
+  "github": {
+    "personalAccessToken": {
+      "desc": "填入你的 Github 個人存取權杖，點擊[這裡](https://github.com/settings/tokens) 創建",
+      "placeholder": "ghp_xxxxxx",
+      "title": "GitHub PAT"
+    }
+  },
   "ollama": {
     "checker": {
       "desc": "測試代理地址是否正確填寫",
diff --git a/locales/zh-TW/portal.json b/locales/zh-TW/portal.json
index ed1697dfb7ce..75079c26d979 100644
--- a/locales/zh-TW/portal.json
+++ b/locales/zh-TW/portal.json
@@ -6,11 +6,27 @@
       "file": "檔案"
     }
   },
+  "Plugins": "外掛",
   "actions": {
     "genAiMessage": "生成助手訊息",
     "summary": "摘要",
     "summaryTooltip": "總結目前內容"
   },
+  "artifacts": {
+    "display": {
+      "code": "程式碼",
+      "preview": "預覽"
+    },
+    "svg": {
+      "copyAsImage": "複製為圖片",
+      "copyFail": "複製失敗，錯誤原因:{{error}}",
+      "copySuccess": "圖片複製成功",
+      "download": {
+        "png": "下載為 PNG",
+        "svg": "下載為 SVG"
+      }
+    }
+  },
   "emptyArtifactList": "當前文物列表為空，請在會話中按需使用插件後再查看",
   "emptyKnowledgeList": "當前知識列表為空，請在會話中按需開啟知識庫後再查看",
   "files": "檔案",
diff --git a/package.json b/package.json
index 33f3b841d9db..39cf1d0b7203 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/chat",
-  "version": "1.18.1",
+  "version": "1.19.0",
   "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",
@@ -112,6 +112,7 @@
     "@clerk/localizations": "2.0.0",
     "@clerk/nextjs": "^5.3.3",
     "@clerk/themes": "^2.1.27",
+    "@codesandbox/sandpack-react": "^2.19.8",
     "@cyntler/react-doc-viewer": "^1.16.6",
     "@google/generative-ai": "^0.16.0",
     "@icons-pack/react-simple-icons": "9.6.0",
@@ -172,6 +173,7 @@
     "next-auth": "beta",
     "next-mdx-remote": "^4.4.1",
     "next-sitemap": "^4.2.3",
+    "nextjs-toploader": "^3.6.15",
     "numeral": "^2.0.6",
     "nuqs": "^1.17.8",
     "officeparser": "^4.1.1",
@@ -255,6 +257,7 @@
     "@types/semver": "^7.5.8",
     "@types/systemjs": "^6.13.5",
     "@types/ua-parser-js": "^0.7.39",
+    "@types/unist": "^3.0.3",
     "@types/uuid": "^10.0.0",
     "@types/ws": "^8.5.12",
     "@vitest/coverage-v8": "~1.2.2",
diff --git a/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/HTML.tsx b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/HTML.tsx
new file mode 100644
index 000000000000..8b4455ec25e1
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/HTML.tsx
@@ -0,0 +1,25 @@
+import { memo, useEffect, useRef } from 'react';
+
+interface HTMLRendererProps {
+  height?: string;
+  htmlContent: string;
+  width?: string;
+}
+const HTMLRenderer = memo<HTMLRendererProps>(({ htmlContent, width = '100%', height = '100%' }) => {
+  const iframeRef = useRef<HTMLIFrameElement>(null);
+
+  useEffect(() => {
+    if (!iframeRef.current) return;
+
+    const doc = iframeRef.current.contentDocument;
+    if (!doc) return;
+
+    doc.open();
+    doc.write(htmlContent);
+    doc.close();
+  }, [htmlContent]);
+
+  return <iframe ref={iframeRef} style={{ border: 'none', height, width }} title="html-renderer" />;
+});
+
+export default HTMLRenderer;
diff --git a/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/React.tsx b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/React.tsx
new file mode 100644
index 000000000000..728c237c7eee
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/React.tsx
@@ -0,0 +1,30 @@
+import { SandpackLayout, SandpackPreview, SandpackProvider } from '@codesandbox/sandpack-react';
+import { memo } from 'react';
+
+interface ReactRendererProps {
+  code: string;
+}
+const ReactRenderer = memo<ReactRendererProps>(({ code }) => {
+  return (
+    <SandpackProvider
+      customSetup={{
+        dependencies: {
+          antd: 'latest',
+        },
+      }}
+      files={{
+        'App.js': code,
+      }}
+      options={{ externalResources: ['https://cdn.tailwindcss.com'] }}
+      style={{ height: '100%' }}
+      template="react"
+      theme="auto"
+    >
+      <SandpackLayout style={{ height: '100%' }}>
+        <SandpackPreview style={{ height: '100%' }} />
+      </SandpackLayout>
+    </SandpackProvider>
+  );
+});
+
+export default ReactRenderer;
diff --git a/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/SVG.tsx b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/SVG.tsx
new file mode 100644
index 000000000000..8f64eccc7dab
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/SVG.tsx
@@ -0,0 +1,114 @@
+import { Icon, Tooltip } from '@lobehub/ui';
+import { App, Button, Dropdown, Space } from 'antd';
+import { css, cx } from 'antd-style';
+import { CopyIcon, DownloadIcon } from 'lucide-react';
+import { domToPng } from 'modern-screenshot';
+import { useTranslation } from 'react-i18next';
+import { Center, Flexbox } from 'react-layout-kit';
+
+import { BRANDING_NAME } from '@/const/branding';
+import { useChatStore } from '@/store/chat';
+import { chatPortalSelectors } from '@/store/chat/selectors';
+import { copyImageToClipboard } from '@/utils/clipboard';
+
+const svgContainer = css`
+  width: 100%;
+  height: 100%;
+
+  > svg {
+    width: 100%;
+    height: 100%;
+  }
+`;
+
+const actions = css`
+  position: absolute;
+  inset-block-end: 8px;
+  inset-inline-end: 8px;
+`;
+
+const DOM_ID = 'artfact-svg';
+interface SVGRendererProps {
+  content: string;
+}
+
+const SVGRenderer = ({ content }: SVGRendererProps) => {
+  const { t } = useTranslation('portal');
+  const { message } = App.useApp();
+
+  const generatePng = async () => {
+    return domToPng(document.querySelector(`#${DOM_ID}`) as HTMLDivElement, {
+      features: {
+        // 不启用移除控制符，否则会导致 safari emoji 报错
+        removeControlCharacter: false,
+      },
+      scale: 2,
+    });
+  };
+
+  const downloadImage = async (type: string) => {
+    let dataUrl = '';
+    if (type === 'png') dataUrl = await generatePng();
+    else if (type === 'svg') {
+      const blob = new Blob([content], { type: 'image/svg+xml' });
+
+      dataUrl = URL.createObjectURL(blob);
+    }
+
+    const title = chatPortalSelectors.artifactTitle(useChatStore.getState());
+
+    const link = document.createElement('a');
+    link.download = `${BRANDING_NAME}_${title}.${type}`;
+    link.href = dataUrl;
+    link.click();
+    link.remove();
+  };
+
+  return (
+    <Flexbox
+      align={'center'}
+      className="svg-renderer"
+      height={'100%'}
+      style={{ position: 'relative' }}
+    >
+      <Center
+        className={cx(svgContainer)}
+        dangerouslySetInnerHTML={{ __html: content }}
+        id={DOM_ID}
+      />
+      <Flexbox className={cx(actions)}>
+        <Space.Compact>
+          <Dropdown
+            menu={{
+              items: [
+                { key: 'png', label: t('artifacts.svg.download.png') },
+                { key: 'svg', label: t('artifacts.svg.download.svg') },
+              ],
+              onClick: ({ key }) => {
+                downloadImage(key);
+              },
+            }}
+          >
+            <Button icon={<Icon icon={DownloadIcon} />} />
+          </Dropdown>
+          <Tooltip title={t('artifacts.svg.copyAsImage')}>
+            <Button
+              icon={<Icon icon={CopyIcon} />}
+              onClick={async () => {
+                const dataUrl = await generatePng();
+                try {
+                  await copyImageToClipboard(dataUrl);
+                  message.success(t('artifacts.svg.copySuccess'));
+                } catch (e) {
+                  message.error(t('artifacts.svg.copyFail', { error: e }));
+                }
+              }}
+            />
+          </Tooltip>
+        </Space.Compact>
+      </Flexbox>
+    </Flexbox>
+  );
+};
+
+export default SVGRenderer;
diff --git a/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/index.tsx b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/index.tsx
new file mode 100644
index 000000000000..4d69361420e9
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/Renderer/index.tsx
@@ -0,0 +1,25 @@
+import dynamic from 'next/dynamic';
+import { memo } from 'react';
+
+import HTMLRenderer from './HTML';
+import SVGRender from './SVG';
+
+const ReactRenderer = dynamic(() => import('./React'), { ssr: false });
+
+const Renderer = memo<{ content: string; type?: string }>(({ content, type }) => {
+  switch (type) {
+    case 'application/lobe.artifacts.react': {
+      return <ReactRenderer code={content} />;
+    }
+
+    case 'image/svg+xml': {
+      return <SVGRender content={content} />;
+    }
+
+    default: {
+      return <HTMLRenderer htmlContent={content} />;
+    }
+  }
+});
+
+export default Renderer;
diff --git a/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/index.tsx b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/index.tsx
new file mode 100644
index 000000000000..144bb14ca1d5
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Body/index.tsx
@@ -0,0 +1,79 @@
+import { Highlighter } from '@lobehub/ui';
+import { memo, useEffect, useMemo } from 'react';
+import { Flexbox } from 'react-layout-kit';
+
+import { useChatStore } from '@/store/chat';
+import { chatPortalSelectors, chatSelectors } from '@/store/chat/selectors';
+
+import Renderer from './Renderer';
+
+const ArtifactsUI = memo(() => {
+  const [
+    messageId,
+    displayMode,
+    isMessageGenerating,
+    artifactType,
+    artifactContent,
+
+    isArtifactTagClosed,
+  ] = useChatStore((s) => {
+    const messageId = chatPortalSelectors.artifactMessageId(s) || '';
+
+    return [
+      messageId,
+      s.portalArtifactDisplayMode,
+      chatSelectors.isMessageGenerating(messageId)(s),
+      chatPortalSelectors.artifactType(s),
+      chatPortalSelectors.artifactCode(messageId)(s),
+      chatPortalSelectors.isArtifactTagClosed(messageId)(s),
+    ];
+  });
+
+  useEffect(() => {
+    // when message generating , check whether the artifact is closed
+    // if close, move the display mode to preview
+    if (isMessageGenerating && isArtifactTagClosed && displayMode === 'code') {
+      useChatStore.setState({ portalArtifactDisplayMode: 'preview' });
+    }
+  }, [isMessageGenerating, displayMode, isArtifactTagClosed]);
+
+  const language = useMemo(() => {
+    switch (artifactType) {
+      case 'application/lobe.artifacts.react': {
+        return 'tsx';
+      }
+
+      case 'python': {
+        return 'python';
+      }
+
+      default: {
+        return 'html';
+      }
+    }
+  }, [artifactType]);
+
+  // make sure the message and id is valid
+  if (!messageId) return;
+
+  return (
+    <Flexbox
+      className={'portal-artifact'}
+      flex={1}
+      gap={8}
+      height={'100%'}
+      paddingInline={12}
+      style={{ overflow: 'hidden' }}
+    >
+      {!isArtifactTagClosed || displayMode === 'code' ? (
+        <Highlighter language={language} style={{ maxHeight: '100%', overflow: 'hidden' }}>
+          {artifactContent}
+        </Highlighter>
+      ) : (
+        <Renderer content={artifactContent} type={artifactType} />
+      )}
+    </Flexbox>
+  );
+});
+
+export default ArtifactsUI;
diff --git a/src/app/(main)/chat/(workspace)/@portal/Artifacts/Header.tsx b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Header.tsx
new file mode 100644
index 000000000000..17221e828b49
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/Artifacts/Header.tsx
@@ -0,0 +1,69 @@
+import { ActionIcon, Icon } from '@lobehub/ui';
+import { ConfigProvider, Segmented, Typography } from 'antd';
+import { cx } from 'antd-style';
+import { ArrowLeft, CodeIcon, EyeIcon } from 'lucide-react';
+import { useTranslation } from 'react-i18next';
+import { Flexbox } from 'react-layout-kit';
+
+import { useChatStore } from '@/store/chat';
+import { chatPortalSelectors } from '@/store/chat/selectors';
+import { oneLineEllipsis } from '@/styles';
+
+const Header = () => {
+  const { t } = useTranslation('portal');
+
+  const [displayMode, artifactTitle, isArtifactTagClosed, closeArtifact] = useChatStore((s) => {
+    const messageId = chatPortalSelectors.artifactMessageId(s) || '';
+
+    return [
+      s.portalArtifactDisplayMode,
+      chatPortalSelectors.artifactTitle(s),
+      chatPortalSelectors.isArtifactTagClosed(messageId)(s),
+      s.closeArtifact,
+    ];
+  });
+
+  return (
+    <Flexbox align={'center'} flex={1} gap={12} horizontal justify={'space-between'} width={'100%'}>
+      <Flexbox align={'center'} gap={4} horizontal>
+        <ActionIcon icon={ArrowLeft} onClick={() => closeArtifact()} />
+        <Typography.Text
+          className={cx(oneLineEllipsis)}
+          style={{ fontSize: 16 }}
+          type={'secondary'}
+        >
+          {artifactTitle}
+        </Typography.Text>
+      </Flexbox>
+      <ConfigProvider
+        theme={{
+          token: {
+            borderRadiusSM: 16,
+            borderRadiusXS: 16,
+            fontSize: 12,
+          },
+        }}
+      >
+        {isArtifactTagClosed && (
+          <Segmented
+            onChange={(value: 'code' | 'preview') => {
+              useChatStore.setState({ portalArtifactDisplayMode: value });
+            }}
+            options={[
+              {
+                icon: <Icon icon={EyeIcon} />,
+                label: t('artifacts.display.preview'),
+                value: 'preview',
+              },
+              { icon: <Icon icon={CodeIcon} />, label: t('artifacts.display.code'), value: 'code' },
+            ]}
+            size={'small'}
+            value={displayMode}
+          />
+        )}
+      </ConfigProvider>
+    </Flexbox>
+  );
+};
+
+export default Header;
diff --git a/src/app/(main)/chat/(workspace)/@portal/Artifacts/index.ts b/src/app/(main)/chat/(workspace)/@portal/Artifacts/index.ts
new file mode 100644
index 000000000000..4fc27b06b9f5
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/Artifacts/index.ts
@@ -0,0 +1,10 @@
+import { PortalImpl } from '../type';
+import Body from './Body';
+import Header from './Header';
+import { useEnable } from './useEnable';
+
+export const Artifacts: PortalImpl = {
+  Body,
+  Header,
+  useEnable,
+};
diff --git a/src/app/(main)/chat/(workspace)/@portal/Artifacts/useEnable.ts b/src/app/(main)/chat/(workspace)/@portal/Artifacts/useEnable.ts
new file mode 100644
index 000000000000..6d63d9eb28e1
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/Artifacts/useEnable.ts
@@ -0,0 +1,4 @@
+import { useChatStore } from '@/store/chat';
+import { chatPortalSelectors } from '@/store/chat/selectors';
+
+export const useEnable = () => useChatStore(chatPortalSelectors.showArtifactUI);
diff --git a/src/app/(main)/chat/(workspace)/@portal/FilePreview/index.ts b/src/app/(main)/chat/(workspace)/@portal/FilePreview/index.ts
index fdea91c50835..cb573c3822c9 100644
--- a/src/app/(main)/chat/(workspace)/@portal/FilePreview/index.ts
+++ b/src/app/(main)/chat/(workspace)/@portal/FilePreview/index.ts
@@ -1,8 +1,9 @@
+import { PortalImpl } from '../type';
 import Body from './Body';
 import Header from './Header';
 import { useEnable } from './useEnable';
 
-export const FilePreview = {
+export const FilePreview: PortalImpl = {
   Body,
   Header,
   useEnable,
diff --git a/src/app/(main)/chat/(workspace)/@portal/Home/Body/Artifacts/ArtifactList/Item/index.tsx b/src/app/(main)/chat/(workspace)/@portal/Home/Body/Plugins/ArtifactList/Item/index.tsx
similarity index 100%
rename from src/app/(main)/chat/(workspace)/@portal/Home/Body/Artifacts/ArtifactList/Item/index.tsx
rename to src/app/(main)/chat/(workspace)/@portal/Home/Body/Plugins/ArtifactList/Item/index.tsx
diff --git a/src/app/(main)/chat/(workspace)/@portal/Home/Body/Artifacts/ArtifactList/Item/style.ts b/src/app/(main)/chat/(workspace)/@portal/Home/Body/Plugins/ArtifactList/Item/style.ts
similarity index 100%
rename from src/app/(main)/chat/(workspace)/@portal/Home/Body/Artifacts/ArtifactList/Item/style.ts
rename to src/app/(main)/chat/(workspace)/@portal/Home/Body/Plugins/ArtifactList/Item/style.ts
diff --git a/src/app/(main)/chat/(workspace)/@portal/Home/Body/Artifacts/ArtifactList/index.tsx b/src/app/(main)/chat/(workspace)/@portal/Home/Body/Plugins/ArtifactList/index.tsx
similarity index 100%
rename from src/app/(main)/chat/(workspace)/@portal/Home/Body/Artifacts/ArtifactList/index.tsx
rename to src/app/(main)/chat/(workspace)/@portal/Home/Body/Plugins/ArtifactList/index.tsx
diff --git a/src/app/(main)/chat/(workspace)/@portal/Home/Body/Artifacts/index.tsx b/src/app/(main)/chat/(workspace)/@portal/Home/Body/Plugins/index.tsx
similarity index 95%
rename from src/app/(main)/chat/(workspace)/@portal/Home/Body/Artifacts/index.tsx
rename to src/app/(main)/chat/(workspace)/@portal/Home/Body/Plugins/index.tsx
index 9407b5eff078..77b773c87616 100644
--- a/src/app/(main)/chat/(workspace)/@portal/Home/Body/Artifacts/index.tsx
+++ b/src/app/(main)/chat/(workspace)/@portal/Home/Body/Plugins/index.tsx
@@ -11,7 +11,7 @@ export const Artifacts = memo(() => {
   return (
     <Flexbox gap={8}>
       <Typography.Title level={5} style={{ marginInline: 12 }}>
-        {t('Artifacts')}
+        {t('Plugins')}
       </Typography.Title>
       <ArtifactList />
     </Flexbox>
diff --git a/src/app/(main)/chat/(workspace)/@portal/Home/Body/index.tsx b/src/app/(main)/chat/(workspace)/@portal/Home/Body/index.tsx
index acf839898122..b91090604c0e 100644
--- a/src/app/(main)/chat/(workspace)/@portal/Home/Body/index.tsx
+++ b/src/app/(main)/chat/(workspace)/@portal/Home/Body/index.tsx
@@ -1,13 +1,13 @@
 import { Flexbox } from 'react-layout-kit';
 
-import Artifacts from './Artifacts';
 import Files from './Files';
+import Plugins from './Plugins';
 
 const Home = () => {
   return (
     <Flexbox gap={12} height={'100%'}>
       <Files />
-      <Artifacts />
+      <Plugins />
     </Flexbox>
   );
 };
diff --git a/src/app/(main)/chat/(workspace)/@portal/MessageDetail/index.ts b/src/app/(main)/chat/(workspace)/@portal/MessageDetail/index.ts
index 0739aac1d9e2..316924974c77 100644
--- a/src/app/(main)/chat/(workspace)/@portal/MessageDetail/index.ts
+++ b/src/app/(main)/chat/(workspace)/@portal/MessageDetail/index.ts
@@ -1,8 +1,9 @@
+import { PortalImpl } from '../type';
 import Body from './Body';
 import Header from './Header';
 import { useEnable } from './useEnable';
 
-export const MessageDetail = {
+export const MessageDetail: PortalImpl = {
   Body,
   Header,
   useEnable,
diff --git a/src/app/(main)/chat/(workspace)/@portal/Plugins/Body/ToolRender.tsx b/src/app/(main)/chat/(workspace)/@portal/Plugins/Body/ToolRender.tsx
index 68abb1b944ee..ec42e7a9f88e 100644
--- a/src/app/(main)/chat/(workspace)/@portal/Plugins/Body/ToolRender.tsx
+++ b/src/app/(main)/chat/(workspace)/@portal/Plugins/Body/ToolRender.tsx
@@ -8,7 +8,7 @@ import { BuiltinToolsPortals } from '@/tools/portals';
 import { safeParseJSON } from '@/utils/safeParseJSON';
 
 const ToolRender = memo(() => {
-  const messageId = useChatStore(chatPortalSelectors.artifactMessageId);
+  const messageId = useChatStore(chatPortalSelectors.toolMessageId);
   const message = useChatStore(chatSelectors.getMessageById(messageId || ''), isEqual);
 
   // make sure the message and id is valid
diff --git a/src/app/(main)/chat/(workspace)/@portal/Plugins/Body/index.tsx b/src/app/(main)/chat/(workspace)/@portal/Plugins/Body/index.tsx
index 9b0e5c2f03ec..b19f84fe1497 100644
--- a/src/app/(main)/chat/(workspace)/@portal/Plugins/Body/index.tsx
+++ b/src/app/(main)/chat/(workspace)/@portal/Plugins/Body/index.tsx
@@ -9,7 +9,7 @@ import Footer from '../Footer';
 import ToolRender from './ToolRender';
 
 const ToolUI = () => {
-  const messageId = useChatStore(chatPortalSelectors.artifactMessageId);
+  const messageId = useChatStore(chatPortalSelectors.toolMessageId);
   const message = useChatStore(chatSelectors.getMessageById(messageId || ''), isEqual);
 
   // make sure the message and id is valid
diff --git a/src/app/(main)/chat/(workspace)/@portal/Plugins/Footer.tsx b/src/app/(main)/chat/(workspace)/@portal/Plugins/Footer.tsx
index c8774445ec41..e1ebbf767ed9 100644
--- a/src/app/(main)/chat/(workspace)/@portal/Plugins/Footer.tsx
+++ b/src/app/(main)/chat/(workspace)/@portal/Plugins/Footer.tsx
@@ -9,7 +9,7 @@ import { chatPortalSelectors, chatSelectors } from '@/store/chat/selectors';
 
 const Footer = () => {
   const [messageId, isAIGenerating, triggerAIMessage, summaryPluginContent] = useChatStore((s) => [
-    chatPortalSelectors.artifactMessageId(s),
+    chatPortalSelectors.toolMessageId(s),
     chatSelectors.isAIGenerating(s),
     s.triggerAIMessage,
     s.summaryPluginContent,
diff --git a/src/app/(main)/chat/(workspace)/@portal/Plugins/index.ts b/src/app/(main)/chat/(workspace)/@portal/Plugins/index.ts
index f35830853735..81933952019d 100644
--- a/src/app/(main)/chat/(workspace)/@portal/Plugins/index.ts
+++ b/src/app/(main)/chat/(workspace)/@portal/Plugins/index.ts
@@ -1,8 +1,9 @@
+import { PortalImpl } from '../type';
 import Body from './Body';
 import Header from './Header';
 import { useEnable } from './useEnable';
 
-export const Plugins = {
+export const Plugins: PortalImpl = {
   Body,
   Header,
   useEnable,
diff --git a/src/app/(main)/chat/(workspace)/@portal/Plugins/useEnable.ts b/src/app/(main)/chat/(workspace)/@portal/Plugins/useEnable.ts
index 82ebd9cd626f..6602787a8c54 100644
--- a/src/app/(main)/chat/(workspace)/@portal/Plugins/useEnable.ts
+++ b/src/app/(main)/chat/(workspace)/@portal/Plugins/useEnable.ts
@@ -2,5 +2,5 @@ import { useChatStore } from '@/store/chat';
 import { chatPortalSelectors } from '@/store/chat/selectors';
 
 export const useEnable = () => {
-  return useChatStore(chatPortalSelectors.showArtifactUI);
+  return useChatStore(chatPortalSelectors.showPluginUI);
 };
diff --git a/src/app/(main)/chat/(workspace)/@portal/_layout/Desktop.tsx b/src/app/(main)/chat/(workspace)/@portal/_layout/Desktop.tsx
index d7e19039d3f2..4f99f4779ec8 100644
--- a/src/app/(main)/chat/(workspace)/@portal/_layout/Desktop.tsx
+++ b/src/app/(main)/chat/(workspace)/@portal/_layout/Desktop.tsx
@@ -1,15 +1,13 @@
 import { PropsWithChildren } from 'react';
-import { Flexbox } from 'react-layout-kit';
 
+import Body from '../features/Body';
 import Header from '../features/Header';
 
 const Layout = ({ children }: PropsWithChildren) => {
   return (
     <>
       <Header />
-      <Flexbox height={'100%'} style={{ position: 'relative' }} width={'100%'}>
-        {children}
-      </Flexbox>
+      <Body>{children}</Body>
     </>
   );
 };
diff --git a/src/app/(main)/chat/(workspace)/@portal/features/Body.tsx b/src/app/(main)/chat/(workspace)/@portal/features/Body.tsx
new file mode 100644
index 000000000000..13c62c344b8a
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/features/Body.tsx
@@ -0,0 +1,27 @@
+'use client';
+
+import { css, cx } from 'antd-style';
+import { PropsWithChildren } from 'react';
+import { Flexbox } from 'react-layout-kit';
+
+const body = css`
+  :has(.portal-artifact) {
+    overflow: hidden;
+    padding-block-end: 12px;
+  }
+`;
+
+const Body = ({ children }: PropsWithChildren) => {
+  return (
+    <Flexbox
+      className={cx(body, 'portal-body')}
+      height={'100%'}
+      style={{ position: 'relative' }}
+      width={'100%'}
+    >
+      {children}
+    </Flexbox>
+  );
+};
+
+export default Body;
diff --git a/src/app/(main)/chat/(workspace)/@portal/router.tsx b/src/app/(main)/chat/(workspace)/@portal/router.tsx
index f5101a776e77..224190bdf005 100644
--- a/src/app/(main)/chat/(workspace)/@portal/router.tsx
+++ b/src/app/(main)/chat/(workspace)/@portal/router.tsx
@@ -2,12 +2,14 @@
 
 import { memo } from 'react';
 
+import { Artifacts } from './Artifacts';
 import { FilePreview } from './FilePreview';
 import { HomeBody, HomeHeader } from './Home';
 import { MessageDetail } from './MessageDetail';
 import { Plugins } from './Plugins';
+import { PortalImpl } from './type';
 
-const items = [MessageDetail, Plugins, FilePreview];
+const items: PortalImpl[] = [MessageDetail, Artifacts, Plugins, FilePreview];
 
 export const PortalHeader = memo(() => {
   const enabledList: boolean[] = [];
diff --git a/src/app/(main)/chat/(workspace)/@portal/type.ts b/src/app/(main)/chat/(workspace)/@portal/type.ts
new file mode 100644
index 000000000000..51b93f023233
--- /dev/null
+++ b/src/app/(main)/chat/(workspace)/@portal/type.ts
@@ -0,0 +1,7 @@
+import { FC } from 'react';
+
+export interface PortalImpl {
+  Body: FC;
+  Header: FC;
+  useEnable: () => boolean;
+}
diff --git a/src/app/(main)/chat/(workspace)/_layout/Desktop/Portal.tsx b/src/app/(main)/chat/(workspace)/_layout/Desktop/Portal.tsx
index 8e337d10bafc..b454e15ba679 100644
--- a/src/app/(main)/chat/(workspace)/_layout/Desktop/Portal.tsx
+++ b/src/app/(main)/chat/(workspace)/_layout/Desktop/Portal.tsx
@@ -39,8 +39,9 @@ const PortalPanel = memo(({ children }: PropsWithChildren) => {
   const { styles } = useStyles();
   const { md = true } = useResponsive();
 
-  const [showInspector, showToolUI] = useChatStore((s) => [
+  const [showInspector, showToolUI, showArtifactUI] = useChatStore((s) => [
     chatPortalSelectors.showPortal(s),
+    chatPortalSelectors.showPluginUI(s),
     chatPortalSelectors.showArtifactUI(s),
   ]);
 
@@ -54,7 +55,7 @@ const PortalPanel = memo(({ children }: PropsWithChildren) => {
         expand
         hanlderStyle={{ display: 'none' }}
         maxWidth={MAX_WIDTH}
-        minWidth={showToolUI ? CHAT_DOCK_TOOL_UI_WIDTH : CHAT_DOCK_WIDTH}
+        minWidth={showArtifactUI || showToolUI ? CHAT_DOCK_TOOL_UI_WIDTH : CHAT_DOCK_WIDTH}
         mode={md ? 'fixed' : 'float'}
         placement={'right'}
         showHandlerWhenUnexpand={false}
diff --git a/src/app/(main)/discover/(detail)/assistant/[slug]/features/InfoSidebar/index.tsx b/src/app/(main)/discover/(detail)/assistant/[slug]/features/InfoSidebar/index.tsx
index 8829ae676d9a..e6b55a029dfb 100644
--- a/src/app/(main)/discover/(detail)/assistant/[slug]/features/InfoSidebar/index.tsx
+++ b/src/app/(main)/discover/(detail)/assistant/[slug]/features/InfoSidebar/index.tsx
@@ -1,12 +1,12 @@
 'use client';
 
 import { Skeleton } from 'antd';
+import Link from 'next/link';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox, FlexboxProps } from 'react-layout-kit';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverAssistantItem, DiscoverPlugintem } from '@/types/discover';
 
 import Block from '../../../../features/Block';
@@ -28,13 +28,13 @@ const InfoSidebar = memo<InfoSidebarProps>(({ pluginData, data, ...rest }) => {
       {pluginData && pluginData?.length > 0 && (
         <Block gap={12} title={t('assistants.plugins')}>
           {pluginData.map((item) => (
-            <InterceptingLink
+            <Link
               href={urlJoin('/discover/plugin', item.identifier)}
               key={item.identifier}
               style={{ color: 'inherit' }}
             >
               <ToolItem {...item} />
-            </InterceptingLink>
+            </Link>
           ))}
         </Block>
       )}
@@ -46,12 +46,9 @@ const InfoSidebar = memo<InfoSidebarProps>(({ pluginData, data, ...rest }) => {
       >
         {data?.suggestions?.length > 0 ? (
           data?.suggestions.map((item) => (
-            <InterceptingLink
-              href={urlJoin('/discover/assistant', item.identifier)}
-              key={item.identifier}
-            >
+            <Link href={urlJoin('/discover/assistant', item.identifier)} key={item.identifier}>
               <SuggestionItem {...item} />
-            </InterceptingLink>
+            </Link>
           ))
         ) : (
           <Skeleton active paragraph={{ rows: 5 }} title={false} />
diff --git a/src/app/(main)/discover/(detail)/features/Back.tsx b/src/app/(main)/discover/(detail)/features/Back.tsx
index 3dec3c7b89c2..330a520b82a2 100644
--- a/src/app/(main)/discover/(detail)/features/Back.tsx
+++ b/src/app/(main)/discover/(detail)/features/Back.tsx
@@ -8,8 +8,6 @@ import { CSSProperties, memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox } from 'react-layout-kit';
 
-import { useInterceptingRoutes } from '@/hooks/useInterceptingRoutes';
-
 const useStyles = createStyles(({ css, token }) => {
   return {
     back: css`
@@ -23,12 +21,9 @@ const useStyles = createStyles(({ css, token }) => {
 });
 
 const Back = memo<{ href: string; style?: CSSProperties }>(({ href, style }) => {
-  const { isIntercepted } = useInterceptingRoutes();
   const { t } = useTranslation('discover');
   const { styles } = useStyles();
 
-  if (isIntercepted) return null;
-
   return (
     <Link className={styles.back} href={href} style={{ marginBottom: 8, ...style }}>
       <Flexbox align={'center'} gap={8} horizontal>
diff --git a/src/app/(main)/discover/(detail)/features/DetailLayout.tsx b/src/app/(main)/discover/(detail)/features/DetailLayout.tsx
index 6276bf15733f..98a120683fe3 100644
--- a/src/app/(main)/discover/(detail)/features/DetailLayout.tsx
+++ b/src/app/(main)/discover/(detail)/features/DetailLayout.tsx
@@ -5,7 +5,6 @@ import { ReactNode, memo } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
 import Footer from '@/features/Setting/Footer';
-import { useInterceptingRoutes } from '@/hooks/useInterceptingRoutes';
 
 import SidebarContainer from './SidebarContainer';
 
@@ -20,10 +19,9 @@ interface DetailLayoutProps {
 
 const DetailLayout = memo<DetailLayoutProps>(
   ({ statistics, mobile, header, sidebar, children, actions }) => {
-    const { md = true, xl = true } = useResponsive();
-    const { isIntercepted } = useInterceptingRoutes();
+    const { md = true } = useResponsive();
 
-    if (mobile || !md || (isIntercepted && !xl))
+    if (mobile || !md)
       return (
         <>
           {header}
@@ -37,36 +35,6 @@ const DetailLayout = memo<DetailLayoutProps>(
         </>
       );
 
-    if (isIntercepted) {
-      return (
-        <>
-          <Flexbox flex={1} gap={24} style={{ overflow: 'hidden', position: 'relative' }}>
-            <Flexbox style={{ paddingRight: 16 }}>{header}</Flexbox>
-            <Flexbox
-              gap={24}
-              style={{
-                overflowX: 'hidden',
-                overflowY: 'auto',
-                paddingBottom: 48,
-                paddingRight: 16,
-                position: 'relative',
-              }}
-            >
-              {children}
-              <Footer />
-            </Flexbox>
-          </Flexbox>
-          <SidebarContainer style={{ position: 'sticky', top: '0' }}>
-            <Flexbox gap={16} width={'100%'}>
-              {actions}
-              {statistics}
-            </Flexbox>
-            {sidebar}
-          </SidebarContainer>
-        </>
-      );
-    }
-
     return (
       <>
         {header}
diff --git a/src/app/(main)/discover/(detail)/model/[...slugs]/features/InfoSidebar/index.tsx b/src/app/(main)/discover/(detail)/model/[...slugs]/features/InfoSidebar/index.tsx
index 32e4c0636d29..6baae1c0c80e 100644
--- a/src/app/(main)/discover/(detail)/model/[...slugs]/features/InfoSidebar/index.tsx
+++ b/src/app/(main)/discover/(detail)/model/[...slugs]/features/InfoSidebar/index.tsx
@@ -1,12 +1,12 @@
 'use client';
 
 import { Skeleton } from 'antd';
+import Link from 'next/link';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox, FlexboxProps } from 'react-layout-kit';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverModelItem } from '@/types/discover';
 
 import Block from '../../../../features/Block';
@@ -31,12 +31,9 @@ const InfoSidebar = memo<InfoSidebarProps>(({ data, ...rest }) => {
       >
         {data?.suggestions?.length > 0 ? (
           data?.suggestions.map((item) => (
-            <InterceptingLink
-              href={urlJoin('/discover/model', item.identifier)}
-              key={item.identifier}
-            >
+            <Link href={urlJoin('/discover/model', item.identifier)} key={item.identifier}>
               <SuggestionItem {...item} />
-            </InterceptingLink>
+            </Link>
           ))
         ) : (
           <Skeleton active paragraph={{ rows: 5 }} title={false} />
diff --git a/src/app/(main)/discover/(detail)/model/[...slugs]/features/ProviderList/ProviderItem.tsx b/src/app/(main)/discover/(detail)/model/[...slugs]/features/ProviderList/ProviderItem.tsx
index da87eb32830a..a3252d0ebb2e 100644
--- a/src/app/(main)/discover/(detail)/model/[...slugs]/features/ProviderList/ProviderItem.tsx
+++ b/src/app/(main)/discover/(detail)/model/[...slugs]/features/ProviderList/ProviderItem.tsx
@@ -1,7 +1,7 @@
 import { ModelTag, ProviderCombine } from '@lobehub/icons';
 import { ActionIcon, Grid, Icon, Tooltip } from '@lobehub/ui';
 import { Tag } from 'antd';
-import { createStyles } from 'antd-style';
+import { createStyles, useResponsive } from 'antd-style';
 import { BadgeCheck, BookIcon, ChevronRightIcon, KeyIcon } from 'lucide-react';
 import Link from 'next/link';
 import { memo, useMemo } from 'react';
@@ -9,7 +9,6 @@ import { useTranslation } from 'react-i18next';
 import { Flexbox } from 'react-layout-kit';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DEFAULT_MODEL_PROVIDER_LIST } from '@/config/modelProviders';
 import { BASE_PROVIDER_DOC_URL } from '@/const/url';
 import { DiscoverProviderItem } from '@/types/discover';
@@ -31,9 +30,12 @@ interface ProviderItemProps extends DiscoverProviderItem {
 
 const ProviderItem = memo<ProviderItemProps>(({ mobile, modelId, identifier }) => {
   const { t } = useTranslation('discover');
+  const { xl = true } = useResponsive();
   const { styles, theme } = useStyles();
   const isLobeHub = identifier === 'lobehub';
 
+  const isMobile = mobile || !xl;
+
   const model = useMemo(() => {
     const prividerItem = DEFAULT_MODEL_PROVIDER_LIST.find((v) => v.id === identifier);
     if (!prividerItem) return;
@@ -70,13 +72,10 @@ const ProviderItem = memo<ProviderItemProps>(({ mobile, modelId, identifier }) =
 
   const header = (
     <Flexbox gap={4} style={{ minWidth: 240 }}>
-      <InterceptingLink
-        href={urlJoin('/discover/provider', identifier)}
-        style={{ color: 'inherit' }}
-      >
+      <Link href={urlJoin('/discover/provider', identifier)} style={{ color: 'inherit' }}>
         <ProviderCombine provider={identifier} size={24} />
-      </InterceptingLink>
-      <Flexbox align={'center'} gap={6} horizontal>
+      </Link>
+      <Flexbox align={'center'} gap={6} horizontal wrap={'wrap'}>
         <ModelTag model={modelId} style={{ background: theme.colorFillQuaternary, margin: 0 }} />
         {isLobeHub && (
           <Tooltip title={t('models.providerInfo.officialTooltip')}>
@@ -103,9 +102,9 @@ const ProviderItem = memo<ProviderItemProps>(({ mobile, modelId, identifier }) =
   );
 
   const button = (
-    <InterceptingLink href={urlJoin('/discover/provider', identifier)} style={{ color: 'inherit' }}>
+    <Link href={urlJoin('/discover/provider', identifier)} style={{ color: 'inherit' }}>
       <ActionIcon color={theme.colorTextDescription} icon={ChevronRightIcon} />
-    </InterceptingLink>
+    </Link>
   );
 
   return (
@@ -117,7 +116,7 @@ const ProviderItem = memo<ProviderItemProps>(({ mobile, modelId, identifier }) =
       padding={16}
       wrap={'wrap'}
     >
-      {mobile && (
+      {isMobile && (
         <Flexbox align={'center'} horizontal justify={'space-between'}>
           {header}
           {button}
@@ -129,13 +128,13 @@ const ProviderItem = memo<ProviderItemProps>(({ mobile, modelId, identifier }) =
         gap={16}
         horizontal
         maxItemWidth={100}
-        rows={mobile ? 2 : items.length + 1}
+        rows={isMobile ? 2 : items.length + 1}
         style={{ minWidth: 240 }}
       >
-        {!mobile && header}
+        {!isMobile && header}
         {items.map((item, index) => (
           <Statistic
-            align={mobile ? 'flex-start' : 'center'}
+            align={isMobile ? 'flex-start' : 'center'}
             gap={4}
             key={index}
             valuePlacement={'bottom'}
@@ -144,7 +143,7 @@ const ProviderItem = memo<ProviderItemProps>(({ mobile, modelId, identifier }) =
           />
         ))}
       </Grid>
-      {!mobile && button}
+      {!isMobile && button}
     </Flexbox>
   );
 });
diff --git a/src/app/(main)/discover/(detail)/plugin/[slug]/features/InfoSidebar/index.tsx b/src/app/(main)/discover/(detail)/plugin/[slug]/features/InfoSidebar/index.tsx
index daea989c1c2a..32ff873691f0 100644
--- a/src/app/(main)/discover/(detail)/plugin/[slug]/features/InfoSidebar/index.tsx
+++ b/src/app/(main)/discover/(detail)/plugin/[slug]/features/InfoSidebar/index.tsx
@@ -1,12 +1,12 @@
 'use client';
 
 import { Skeleton } from 'antd';
+import Link from 'next/link';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox, FlexboxProps } from 'react-layout-kit';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverPlugintem } from '@/types/discover';
 
 import Block from '../../../../features/Block';
@@ -31,12 +31,9 @@ const InfoSidebar = memo<InfoSidebarProps>(({ data, ...rest }) => {
       >
         {data?.suggestions?.length > 0 ? (
           data?.suggestions.map((item) => (
-            <InterceptingLink
-              href={urlJoin('/discover/plugin', item.identifier)}
-              key={item.identifier}
-            >
+            <Link href={urlJoin('/discover/plugin', item.identifier)} key={item.identifier}>
               <SuggestionItem {...item} />
-            </InterceptingLink>
+            </Link>
           ))
         ) : (
           <Skeleton active paragraph={{ rows: 5 }} title={false} />
diff --git a/src/app/(main)/discover/(detail)/provider/[slug]/features/InfoSidebar/index.tsx b/src/app/(main)/discover/(detail)/provider/[slug]/features/InfoSidebar/index.tsx
index 7942d698a06a..235efc5cfe69 100644
--- a/src/app/(main)/discover/(detail)/provider/[slug]/features/InfoSidebar/index.tsx
+++ b/src/app/(main)/discover/(detail)/provider/[slug]/features/InfoSidebar/index.tsx
@@ -1,12 +1,12 @@
 'use client';
 
 import { Skeleton } from 'antd';
+import Link from 'next/link';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox, FlexboxProps } from 'react-layout-kit';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverProviderItem } from '@/types/discover';
 
 import Block from '../../../../features/Block';
@@ -31,12 +31,9 @@ const InfoSidebar = memo<InfoSidebarProps>(({ data, ...rest }) => {
       >
         {data?.suggestions?.length > 0 ? (
           data?.suggestions.map((item) => (
-            <InterceptingLink
-              href={urlJoin('/discover/provider', item.identifier)}
-              key={item.identifier}
-            >
+            <Link href={urlJoin('/discover/provider', item.identifier)} key={item.identifier}>
               <SuggestionItem {...item} />
-            </InterceptingLink>
+            </Link>
           ))
         ) : (
           <Skeleton active paragraph={{ rows: 5 }} title={false} />
diff --git a/src/app/(main)/discover/(detail)/provider/[slug]/features/ModelList/ModelItem.tsx b/src/app/(main)/discover/(detail)/provider/[slug]/features/ModelList/ModelItem.tsx
index 3442b978d866..50cb2e257e90 100644
--- a/src/app/(main)/discover/(detail)/provider/[slug]/features/ModelList/ModelItem.tsx
+++ b/src/app/(main)/discover/(detail)/provider/[slug]/features/ModelList/ModelItem.tsx
@@ -1,14 +1,14 @@
 import { ModelIcon } from '@lobehub/icons';
 import { ActionIcon, Grid } from '@lobehub/ui';
 import { Typography } from 'antd';
-import { createStyles } from 'antd-style';
+import { createStyles, useResponsive } from 'antd-style';
 import { ChevronRightIcon } from 'lucide-react';
+import Link from 'next/link';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox, FlexboxProps } from 'react-layout-kit';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverModelItem } from '@/types/discover';
 import { formatPriceByCurrency, formatTokenNumber } from '@/utils/format';
 
@@ -48,9 +48,12 @@ export interface SuggestionItemProps
 
 const ModelItem = memo<SuggestionItemProps>(({ mobile, meta, identifier }) => {
   const { title, tokens, vision, functionCall } = meta;
+  const { xl = true } = useResponsive();
   const { t } = useTranslation('discover');
   const { styles, theme } = useStyles();
 
+  const isMobile = mobile || !xl;
+
   const items: StatisticProps[] = [
     {
       title: t('models.contentLength'),
@@ -81,7 +84,7 @@ const ModelItem = memo<SuggestionItemProps>(({ mobile, meta, identifier }) => {
 
   const header = (
     <Flexbox gap={12}>
-      <InterceptingLink href={urlJoin('/discover/model', identifier)} style={{ color: 'inherit' }}>
+      <Link href={urlJoin('/discover/model', identifier)} style={{ color: 'inherit' }}>
         <Flexbox align={'center'} gap={12} horizontal width={'100%'}>
           <ModelIcon model={identifier} size={36} type={'avatar'} />
           <Flexbox style={{ overflow: 'hidden' }}>
@@ -93,15 +96,15 @@ const ModelItem = memo<SuggestionItemProps>(({ mobile, meta, identifier }) => {
             </Paragraph>
           </Flexbox>
         </Flexbox>
-      </InterceptingLink>
+      </Link>
       <ModelFeatureTags functionCall={functionCall} tokens={tokens} vision={vision} />
     </Flexbox>
   );
 
   const button = (
-    <InterceptingLink href={urlJoin('/discover/model', identifier)} style={{ color: 'inherit' }}>
+    <Link href={urlJoin('/discover/model', identifier)} style={{ color: 'inherit' }}>
       <ActionIcon color={theme.colorTextDescription} icon={ChevronRightIcon} />
-    </InterceptingLink>
+    </Link>
   );
 
   return (
@@ -113,7 +116,7 @@ const ModelItem = memo<SuggestionItemProps>(({ mobile, meta, identifier }) => {
       padding={16}
       wrap={'wrap'}
     >
-      {mobile && (
+      {isMobile && (
         <Flexbox align={'center'} horizontal justify={'space-between'}>
           {header}
           {button}
@@ -125,13 +128,13 @@ const ModelItem = memo<SuggestionItemProps>(({ mobile, meta, identifier }) => {
         gap={16}
         horizontal
         maxItemWidth={100}
-        rows={mobile ? 2 : items.length + 1}
+        rows={isMobile ? 2 : items.length + 1}
         style={{ minWidth: 240 }}
       >
-        {!mobile && header}
+        {!isMobile && header}
         {items.map((item, index) => (
           <Statistic
-            align={mobile ? 'flex-start' : 'center'}
+            align={isMobile ? 'flex-start' : 'center'}
             gap={4}
             key={index}
             valuePlacement={'bottom'}
@@ -140,7 +143,7 @@ const ModelItem = memo<SuggestionItemProps>(({ mobile, meta, identifier }) => {
           />
         ))}
       </Grid>
-      {!mobile && button}
+      {!isMobile && button}
     </Flexbox>
   );
 });
diff --git a/src/app/(main)/discover/(list)/(home)/features/AssistantList.tsx b/src/app/(main)/discover/(list)/(home)/features/AssistantList.tsx
index 697ef1cacf9f..2c47f8971454 100644
--- a/src/app/(main)/discover/(list)/(home)/features/AssistantList.tsx
+++ b/src/app/(main)/discover/(list)/(home)/features/AssistantList.tsx
@@ -1,8 +1,8 @@
 import { Grid } from '@lobehub/ui';
+import Link from 'next/link';
 import { memo } from 'react';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverAssistantItem } from '@/types/discover';
 
 import Card from '../../assistants/features/Card';
@@ -11,20 +11,14 @@ const AssistantList = memo<{ data: DiscoverAssistantItem[] }>(({ data }) => {
   return (
     <Grid maxItemWidth={280} rows={4}>
       {data.slice(0, 8).map((item) => (
-        <InterceptingLink
-          href={urlJoin('/discover/assistant/', item.identifier)}
-          key={item.identifier}
-        >
+        <Link href={urlJoin('/discover/assistant/', item.identifier)} key={item.identifier}>
           <Card showCategory {...item} />
-        </InterceptingLink>
+        </Link>
       ))}
       {data.slice(8, 16).map((item) => (
-        <InterceptingLink
-          href={urlJoin('/discover/assistant/', item.identifier)}
-          key={item.identifier}
-        >
+        <Link href={urlJoin('/discover/assistant/', item.identifier)} key={item.identifier}>
           <Card showCategory variant={'compact'} {...item} />
-        </InterceptingLink>
+        </Link>
       ))}
     </Grid>
   );
diff --git a/src/app/(main)/discover/(list)/(home)/features/ModelList.tsx b/src/app/(main)/discover/(list)/(home)/features/ModelList.tsx
index 2f16e272e650..272566e92b77 100644
--- a/src/app/(main)/discover/(list)/(home)/features/ModelList.tsx
+++ b/src/app/(main)/discover/(list)/(home)/features/ModelList.tsx
@@ -1,8 +1,8 @@
 import { Grid } from '@lobehub/ui';
+import Link from 'next/link';
 import { memo } from 'react';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverModelItem } from '@/types/discover';
 
 import Card from '../../models/features/Card';
@@ -11,9 +11,9 @@ const ModelList = memo<{ data: DiscoverModelItem[] }>(({ data }) => {
   return (
     <Grid maxItemWidth={280} rows={4}>
       {data.map((item) => (
-        <InterceptingLink href={urlJoin('/discover/model/', item.identifier)} key={item.identifier}>
+        <Link href={urlJoin('/discover/model/', item.identifier)} key={item.identifier}>
           <Card {...item} />
-        </InterceptingLink>
+        </Link>
       ))}
     </Grid>
   );
diff --git a/src/app/(main)/discover/(list)/(home)/features/PluginList.tsx b/src/app/(main)/discover/(list)/(home)/features/PluginList.tsx
index 987f4ee1b170..0dc7699b7f3c 100644
--- a/src/app/(main)/discover/(list)/(home)/features/PluginList.tsx
+++ b/src/app/(main)/discover/(list)/(home)/features/PluginList.tsx
@@ -1,8 +1,8 @@
 import { Grid } from '@lobehub/ui';
+import Link from 'next/link';
 import { memo } from 'react';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverPlugintem } from '@/types/discover';
 
 import Card from '../../plugins/features/Card';
@@ -11,12 +11,9 @@ const PluginList = memo<{ data: DiscoverPlugintem[] }>(({ data }) => {
   return (
     <Grid maxItemWidth={280} rows={4}>
       {data.map((item) => (
-        <InterceptingLink
-          href={urlJoin('/discover/plugin/', item.identifier)}
-          key={item.identifier}
-        >
+        <Link href={urlJoin('/discover/plugin/', item.identifier)} key={item.identifier}>
           <Card showCategory variant={'compact'} {...item} />
-        </InterceptingLink>
+        </Link>
       ))}
     </Grid>
   );
diff --git a/src/app/(main)/discover/(list)/assistants/features/List.tsx b/src/app/(main)/discover/(list)/assistants/features/List.tsx
index 6066d261c48f..8bd1bfe997ab 100644
--- a/src/app/(main)/discover/(list)/assistants/features/List.tsx
+++ b/src/app/(main)/discover/(list)/assistants/features/List.tsx
@@ -2,11 +2,11 @@
 
 import { Grid } from '@lobehub/ui';
 import { Empty } from 'antd';
+import Link from 'next/link';
 import { memo, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverAssistantItem } from '@/types/discover';
 
 import SearchResultCount from '../../../components/SearchResultCount';
@@ -42,12 +42,9 @@ const List = memo<ListProps>(({ category, mobile, searchKeywords, items = [] })
           data={all}
           initialItemCount={24}
           itemContent={(_, item) => (
-            <InterceptingLink
-              href={urlJoin('/discover/assistant/', item.identifier)}
-              key={item.identifier}
-            >
+            <Link href={urlJoin('/discover/assistant/', item.identifier)} key={item.identifier}>
               <Card showCategory variant={'compact'} {...item} />
-            </InterceptingLink>
+            </Link>
           )}
           style={{
             minHeight: '75vh',
@@ -62,12 +59,9 @@ const List = memo<ListProps>(({ category, mobile, searchKeywords, items = [] })
       <Title>{t('assistants.recentSubmits')}</Title>
       <Grid maxItemWidth={280} rows={4}>
         {recent.map((item) => (
-          <InterceptingLink
-            href={urlJoin('/discover/assistant/', item.identifier)}
-            key={item.identifier}
-          >
+          <Link href={urlJoin('/discover/assistant/', item.identifier)} key={item.identifier}>
             <Card showCategory={!category} {...item} />
-          </InterceptingLink>
+          </Link>
         ))}
       </Grid>
       {last && last?.length > 0 && (
@@ -77,12 +71,9 @@ const List = memo<ListProps>(({ category, mobile, searchKeywords, items = [] })
             data={last}
             initialItemCount={12}
             itemContent={(_, item) => (
-              <InterceptingLink
-                href={urlJoin('/discover/assistant/', item.identifier)}
-                key={item.identifier}
-              >
+              <Link href={urlJoin('/discover/assistant/', item.identifier)} key={item.identifier}>
                 <Card showCategory={!category} variant={'compact'} {...item} />
-              </InterceptingLink>
+              </Link>
             )}
             style={{
               minHeight: '75vh',
diff --git a/src/app/(main)/discover/(list)/models/features/List.tsx b/src/app/(main)/discover/(list)/models/features/List.tsx
index 41c533a1fa1d..544ca9f32b52 100644
--- a/src/app/(main)/discover/(list)/models/features/List.tsx
+++ b/src/app/(main)/discover/(list)/models/features/List.tsx
@@ -1,11 +1,11 @@
 'use client';
 
 import { Empty } from 'antd';
+import Link from 'next/link';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverModelItem } from '@/types/discover';
 
 import SearchResultCount from '../../../components/SearchResultCount';
@@ -32,12 +32,9 @@ const List = memo<ListProps>(({ category, searchKeywords, items = [] }) => {
           data={items}
           initialItemCount={24}
           itemContent={(_, item) => (
-            <InterceptingLink
-              href={urlJoin('/discover/model/', item.identifier)}
-              key={item.identifier}
-            >
+            <Link href={urlJoin('/discover/model/', item.identifier)} key={item.identifier}>
               <Card showCategory {...item} />
-            </InterceptingLink>
+            </Link>
           )}
           style={{
             minHeight: '75vh',
@@ -54,12 +51,9 @@ const List = memo<ListProps>(({ category, searchKeywords, items = [] }) => {
         data={items}
         initialItemCount={24}
         itemContent={(_, item) => (
-          <InterceptingLink
-            href={urlJoin('/discover/model/', item.identifier)}
-            key={item.identifier}
-          >
+          <Link href={urlJoin('/discover/model/', item.identifier)} key={item.identifier}>
             <Card showCategory={!category} {...item} />
-          </InterceptingLink>
+          </Link>
         )}
         style={{
           minHeight: '75vh',
diff --git a/src/app/(main)/discover/(list)/plugins/features/List.tsx b/src/app/(main)/discover/(list)/plugins/features/List.tsx
index 1b608f09957c..421e0358a13c 100644
--- a/src/app/(main)/discover/(list)/plugins/features/List.tsx
+++ b/src/app/(main)/discover/(list)/plugins/features/List.tsx
@@ -2,11 +2,11 @@
 
 import { Grid } from '@lobehub/ui';
 import { Empty } from 'antd';
+import Link from 'next/link';
 import { memo, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverPlugintem } from '@/types/discover';
 
 import SearchResultCount from '../../../components/SearchResultCount';
@@ -41,12 +41,9 @@ const List = memo<ListProps>(({ category, mobile, searchKeywords, items = [] })
           data={all}
           initialItemCount={24}
           itemContent={(_, item) => (
-            <InterceptingLink
-              href={urlJoin('/discover/plugin/', item.identifier)}
-              key={item.identifier}
-            >
+            <Link href={urlJoin('/discover/plugin/', item.identifier)} key={item.identifier}>
               <Card showCategory variant={'compact'} {...item} />
-            </InterceptingLink>
+            </Link>
           )}
           style={{
             minHeight: '75vh',
@@ -61,12 +58,9 @@ const List = memo<ListProps>(({ category, mobile, searchKeywords, items = [] })
       <Title>{t('plugins.recentSubmits')}</Title>
       <Grid maxItemWidth={280} rows={4}>
         {recent.map((item) => (
-          <InterceptingLink
-            href={urlJoin('/discover/plugin/', item.identifier)}
-            key={item.identifier}
-          >
+          <Link href={urlJoin('/discover/plugin/', item.identifier)} key={item.identifier}>
             <Card showCategory={!category} {...item} />
-          </InterceptingLink>
+          </Link>
         ))}
       </Grid>
       {last && last?.length > 0 && (
@@ -76,12 +70,9 @@ const List = memo<ListProps>(({ category, mobile, searchKeywords, items = [] })
             data={last}
             initialItemCount={12}
             itemContent={(_, item) => (
-              <InterceptingLink
-                href={urlJoin('/discover/plugin/', item.identifier)}
-                key={item.identifier}
-              >
+              <Link href={urlJoin('/discover/plugin/', item.identifier)} key={item.identifier}>
                 <Card showCategory={!category} variant={'compact'} {...item} />
-              </InterceptingLink>
+              </Link>
             )}
             style={{
               minHeight: '75vh',
diff --git a/src/app/(main)/discover/(list)/providers/features/List.tsx b/src/app/(main)/discover/(list)/providers/features/List.tsx
index ef5830d1bf87..09edb5e19fe4 100644
--- a/src/app/(main)/discover/(list)/providers/features/List.tsx
+++ b/src/app/(main)/discover/(list)/providers/features/List.tsx
@@ -1,11 +1,11 @@
 'use client';
 
 import { Empty } from 'antd';
+import Link from 'next/link';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { DiscoverProviderItem } from '@/types/discover';
 
 import SearchResultCount from '../../../components/SearchResultCount';
@@ -31,12 +31,9 @@ const List = memo<ListProps>(({ searchKeywords, items = [], mobile }) => {
           data={items}
           initialItemCount={6}
           itemContent={(_, item) => (
-            <InterceptingLink
-              href={urlJoin('/discover/provider/', item.identifier)}
-              key={item.identifier}
-            >
+            <Link href={urlJoin('/discover/provider/', item.identifier)} key={item.identifier}>
               <Card {...item} mobile={mobile} style={{ minHeight: 'unset' }} />
-            </InterceptingLink>
+            </Link>
           )}
           style={{
             minHeight: '75vh',
@@ -53,12 +50,9 @@ const List = memo<ListProps>(({ searchKeywords, items = [], mobile }) => {
         data={items}
         initialItemCount={6}
         itemContent={(_, item) => (
-          <InterceptingLink
-            href={urlJoin('/discover/provider/', item.identifier)}
-            key={item.identifier}
-          >
+          <Link href={urlJoin('/discover/provider/', item.identifier)} key={item.identifier}>
             <Card {...item} mobile={mobile} style={{ minHeight: 'unset' }} />
-          </InterceptingLink>
+          </Link>
         )}
         style={{
           minHeight: '75vh',
diff --git a/src/app/(main)/discover/_layout/Desktop/index.tsx b/src/app/(main)/discover/_layout/Desktop/index.tsx
index 10e7f91e7ead..98d5f569e1cd 100644
--- a/src/app/(main)/discover/_layout/Desktop/index.tsx
+++ b/src/app/(main)/discover/_layout/Desktop/index.tsx
@@ -1,11 +1,14 @@
 import { PropsWithChildren } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
+import NProgress from '@/components/NProgress';
+
 import Header from './Header';
 
 const Layout = ({ children }: PropsWithChildren) => {
   return (
     <>
+      <NProgress />
       <Flexbox height={'100%'} style={{ overflow: 'hidden', position: 'relative' }} width={'100%'}>
         <Header />
         {children}
diff --git a/src/app/(main)/discover/_layout/Mobile/index.tsx b/src/app/(main)/discover/_layout/Mobile/index.tsx
index 100d3707b70d..03e186c2343d 100644
--- a/src/app/(main)/discover/_layout/Mobile/index.tsx
+++ b/src/app/(main)/discover/_layout/Mobile/index.tsx
@@ -1,7 +1,14 @@
 import { PropsWithChildren } from 'react';
 
+import NProgress from '@/components/NProgress';
+
 const Layout = ({ children }: PropsWithChildren) => {
-  return children;
+  return (
+    <>
+      <NProgress />
+      {children}
+    </>
+  );
 };
 
 Layout.displayName = 'MobileDiscoverStoreLayout';
diff --git a/src/app/(main)/settings/llm/ProviderList/Github/index.tsx b/src/app/(main)/settings/llm/ProviderList/Github/index.tsx
new file mode 100644
index 000000000000..e6c51aaa4987
--- /dev/null
+++ b/src/app/(main)/settings/llm/ProviderList/Github/index.tsx
@@ -0,0 +1,53 @@
+'use client';
+
+import { Markdown } from '@lobehub/ui';
+import { Input } from 'antd';
+import { createStyles } from 'antd-style';
+import { useTranslation } from 'react-i18next';
+
+import { GithubProviderCard } from '@/config/modelProviders';
+import { GlobalLLMProviderKey } from '@/types/user/settings';
+
+import { KeyVaultsConfigKey, LLMProviderApiTokenKey } from '../../const';
+import { ProviderItem } from '../../type';
+
+const useStyles = createStyles(({ css, token }) => ({
+  markdown: css`
+    p {
+      color: ${token.colorTextDescription} !important;
+    }
+  `,
+  tip: css`
+    font-size: 12px;
+    color: ${token.colorTextDescription};
+  `,
+}));
+
+const providerKey: GlobalLLMProviderKey = 'github';
+
+// Same as OpenAIProvider, but replace API Key with Github Personal Access Token
+export const useGithubProvider = (): ProviderItem => {
+  const { t } = useTranslation('modelProvider');
+  const { styles } = useStyles();
+
+  return {
+    ...GithubProviderCard,
+    apiKeyItems: [
+      {
+        children: (
+          <Input.Password
+            autoComplete={'new-password'}
+            placeholder={t(`${providerKey}.personalAccessToken.placeholder`)}
+          />
+        ),
+        desc: (
+          <Markdown className={styles.markdown} fontSize={12} variant={'chat'}>
+            {t(`${providerKey}.personalAccessToken.desc`)}
+          </Markdown>
+        ),
+        label: t(`${providerKey}.personalAccessToken.title`),
+        name: [KeyVaultsConfigKey, providerKey, LLMProviderApiTokenKey],
+      },
+    ],
+  };
+};
diff --git a/src/app/(main)/settings/llm/ProviderList/providers.tsx b/src/app/(main)/settings/llm/ProviderList/providers.tsx
index f397d3afb626..5462897e6517 100644
--- a/src/app/(main)/settings/llm/ProviderList/providers.tsx
+++ b/src/app/(main)/settings/llm/ProviderList/providers.tsx
@@ -1,6 +1,7 @@
 import { useMemo } from 'react';
 
 import {
+  Ai21ProviderCard,
   Ai360ProviderCard,
   AnthropicProviderCard,
   BaichuanProviderCard,
@@ -28,6 +29,7 @@ import {
 import { ProviderItem } from '../type';
 import { useAzureProvider } from './Azure';
 import { useBedrockProvider } from './Bedrock';
+import { useGithubProvider } from './Github';
 import { useOllamaProvider } from './Ollama';
 import { useOpenAIProvider } from './OpenAI';
 
@@ -36,6 +38,7 @@ export const useProviderList = (): ProviderItem[] => {
   const OllamaProvider = useOllamaProvider();
   const OpenAIProvider = useOpenAIProvider();
   const BedrockProvider = useBedrockProvider();
+  const GithubProvider = useGithubProvider();
 
   return useMemo(
     () => [
@@ -47,6 +50,7 @@ export const useProviderList = (): ProviderItem[] => {
       GoogleProviderCard,
       DeepSeekProviderCard,
       OpenRouterProviderCard,
+      GithubProvider,
       GroqProviderCard,
       NovitaProviderCard,
       PerplexityProviderCard,
@@ -54,6 +58,7 @@ export const useProviderList = (): ProviderItem[] => {
       TogetherAIProviderCard,
       FireworksAIProviderCard,
       UpstageProviderCard,
+      Ai21ProviderCard,
       QwenProviderCard,
       SparkProviderCard,
       ZhiPuProviderCard,
@@ -66,6 +71,6 @@ export const useProviderList = (): ProviderItem[] => {
       SiliconCloudProviderCard,
       TaichuProviderCard,
     ],
-    [AzureProvider, OllamaProvider, OpenAIProvider, BedrockProvider],
+    [AzureProvider, OllamaProvider, OpenAIProvider, BedrockProvider, GithubProvider],
   );
 };
diff --git a/src/app/@modal/(.)discover/assistant/[slug]/page.tsx b/src/app/@modal/(.)discover/assistant/[slug]/page.tsx
deleted file mode 100644
index d7f2ae99ff6b..000000000000
--- a/src/app/@modal/(.)discover/assistant/[slug]/page.tsx
+++ /dev/null
@@ -1 +0,0 @@
-export { default } from '@/app/(main)/discover/(detail)/assistant/[slug]/page';
diff --git a/src/app/@modal/(.)discover/layout.tsx b/src/app/@modal/(.)discover/layout.tsx
deleted file mode 100644
index 316fd15aec5a..000000000000
--- a/src/app/@modal/(.)discover/layout.tsx
+++ /dev/null
@@ -1,29 +0,0 @@
-'use client';
-
-import { useResponsive } from 'antd-style';
-import { PropsWithChildren, memo } from 'react';
-import { Flexbox } from 'react-layout-kit';
-
-const Layout = memo<PropsWithChildren>(({ children }) => {
-  const { xl = true } = useResponsive();
-
-  return (
-    <Flexbox
-      flex={1}
-      gap={16}
-      horizontal={xl}
-      paddingInline={48}
-      style={{
-        overflow: xl ? 'hidden' : 'auto',
-        paddingBottom: xl ? 0 : 24,
-        paddingTop: 48,
-        position: 'relative',
-      }}
-      width={'100%'}
-    >
-      {children}
-    </Flexbox>
-  );
-});
-
-export default Layout;
diff --git a/src/app/@modal/(.)discover/loading.tsx b/src/app/@modal/(.)discover/loading.tsx
deleted file mode 100644
index 44cdbb7d479f..000000000000
--- a/src/app/@modal/(.)discover/loading.tsx
+++ /dev/null
@@ -1,3 +0,0 @@
-'use client';
-
-export { default } from '@/app/(main)/discover/(detail)/loading';
diff --git a/src/app/@modal/(.)discover/model/[...slugs]/page.tsx b/src/app/@modal/(.)discover/model/[...slugs]/page.tsx
deleted file mode 100644
index d38fa000613b..000000000000
--- a/src/app/@modal/(.)discover/model/[...slugs]/page.tsx
+++ /dev/null
@@ -1 +0,0 @@
-export { default } from '@/app/(main)/discover/(detail)/model/[...slugs]/page';
diff --git a/src/app/@modal/(.)discover/plugin/[slug]/page.tsx b/src/app/@modal/(.)discover/plugin/[slug]/page.tsx
deleted file mode 100644
index 4587bd13f59f..000000000000
--- a/src/app/@modal/(.)discover/plugin/[slug]/page.tsx
+++ /dev/null
@@ -1 +0,0 @@
-export { default } from '@/app/(main)/discover/(detail)/plugin/[slug]/page';
diff --git a/src/app/@modal/(.)discover/provider/[slug]/page.tsx b/src/app/@modal/(.)discover/provider/[slug]/page.tsx
deleted file mode 100644
index 85536843ca57..000000000000
--- a/src/app/@modal/(.)discover/provider/[slug]/page.tsx
+++ /dev/null
@@ -1 +0,0 @@
-export { default } from '@/app/(main)/discover/(detail)/provider/[slug]/page';
diff --git a/src/app/api/chat/agentRuntime.ts b/src/app/api/chat/agentRuntime.ts
index 9d414263c6f5..5b3f0e98e12c 100644
--- a/src/app/api/chat/agentRuntime.ts
+++ b/src/app/api/chat/agentRuntime.ts
@@ -133,6 +133,13 @@ const getLlmOptionsFromPayload = (provider: string, payload: JWTPayload) => {
 
       return { apiKey, baseURL };
     }
+    case ModelProvider.Github: {
+      const { GITHUB_TOKEN } = getLLMConfig();
+
+      const apiKey = apiKeyManager.pick(payload?.apiKey || GITHUB_TOKEN);
+
+      return { apiKey };
+    }
     case ModelProvider.OpenRouter: {
       const { OPENROUTER_API_KEY } = getLLMConfig();
 
@@ -230,6 +237,13 @@ const getLlmOptionsFromPayload = (provider: string, payload: JWTPayload) => {
 
       const apiKey = apiKeyManager.pick(payload?.apiKey || SPARK_API_KEY);
 
+      return { apiKey };
+    }
+    case ModelProvider.Ai21: {
+      const { AI21_API_KEY } = getLLMConfig();
+
+      const apiKey = apiKeyManager.pick(payload?.apiKey || AI21_API_KEY);
+
       return { apiKey };
     }
   }
diff --git a/src/app/layout.tsx b/src/app/layout.tsx
index 11b5d3193ff9..cd85ba817e78 100644
--- a/src/app/layout.tsx
+++ b/src/app/layout.tsx
@@ -24,6 +24,7 @@ const RootLayout = async ({ children, modal }: RootLayoutProps) => {
 
   const lang = cookieStore.get(LOBE_LOCALE_COOKIE);
   const direction = isRtlLang(lang?.value || DEFAULT_LANG) ? 'rtl' : 'ltr';
+  const mobile = isMobileDevice();
 
   return (
     <html dir={direction} lang={lang?.value || DEFAULT_LANG} suppressHydrationWarning>
@@ -31,7 +32,7 @@ const RootLayout = async ({ children, modal }: RootLayoutProps) => {
         <GlobalProvider>
           <AuthProvider>
             {children}
-            {modal}
+            {!mobile && modal}
           </AuthProvider>
           <PWAInstall />
         </GlobalProvider>
diff --git a/src/app/redirect/page.tsx b/src/app/redirect/page.tsx
deleted file mode 100644
index b303d161ca83..000000000000
--- a/src/app/redirect/page.tsx
+++ /dev/null
@@ -1,15 +0,0 @@
-import type { Metadata } from 'next';
-import { redirect } from 'next/navigation';
-
-export default function Page({ searchParams }: { searchParams: { url: string } }) {
-  redirect(searchParams.url);
-}
-
-export function generateMetadata(): Metadata {
-  return {
-    robots: {
-      follow: false,
-      index: false,
-    },
-  };
-}
diff --git a/src/components/InterceptingLink/index.tsx b/src/components/InterceptingLink/index.tsx
deleted file mode 100644
index 5f02495734d4..000000000000
--- a/src/components/InterceptingLink/index.tsx
+++ /dev/null
@@ -1,27 +0,0 @@
-import Link, { LinkProps } from 'next/link';
-import { CSSProperties, ReactNode, memo } from 'react';
-
-import { useInterceptingRoutes } from '@/hooks/useInterceptingRoutes';
-
-interface InterceptingLinkProps extends LinkProps {
-  children: ReactNode;
-  className?: string;
-  disableIntercepting?: boolean;
-  style?: CSSProperties;
-}
-
-const InterceptingLink = memo<InterceptingLinkProps>(({ href, disableIntercepting, ...rest }) => {
-  const router = useInterceptingRoutes();
-  return (
-    <Link
-      href={href}
-      onClick={(e) => {
-        e.preventDefault();
-        router.push(href as any, disableIntercepting);
-      }}
-      {...rest}
-    />
-  );
-});
-
-export default InterceptingLink;
diff --git a/src/components/NProgress/index.tsx b/src/components/NProgress/index.tsx
new file mode 100644
index 000000000000..3820eefca717
--- /dev/null
+++ b/src/components/NProgress/index.tsx
@@ -0,0 +1,12 @@
+'use client';
+
+import { useTheme } from 'antd-style';
+import NextTopLoader from 'nextjs-toploader';
+import { memo } from 'react';
+
+const NProgress = memo(() => {
+  const theme = useTheme();
+  return <NextTopLoader color={theme.colorText} height={2} shadow={false} showSpinner={false} />;
+});
+
+export default NProgress;
diff --git a/src/components/SidebarHeader/index.tsx b/src/components/SidebarHeader/index.tsx
index a156ff8b341c..09cf1d08eb53 100644
--- a/src/components/SidebarHeader/index.tsx
+++ b/src/components/SidebarHeader/index.tsx
@@ -27,7 +27,7 @@ const SidebarHeader = memo<SidebarHeaderProps>(({ title, style, actions }) => {
       paddingInline={16}
       style={style}
     >
-      <Flexbox align={'center'} gap={4} horizontal>
+      <Flexbox align={'center'} gap={4} horizontal width={'100%'}>
         {title}
       </Flexbox>
       <Flexbox align={'center'} gap={2} horizontal>
diff --git a/src/config/llm.ts b/src/config/llm.ts
index 91ac88e70d50..1c2219ac2666 100644
--- a/src/config/llm.ts
+++ b/src/config/llm.ts
@@ -53,6 +53,10 @@ export const getLLMConfig = () => {
       GROQ_MODEL_LIST: z.string().optional(),
       GROQ_PROXY_URL: z.string().optional(),
 
+      ENABLED_GITHUB: z.boolean(),
+      GITHUB_TOKEN: z.string().optional(),
+      GITHUB_MODEL_LIST: z.string().optional(),
+
       ENABLED_OPENROUTER: z.boolean(),
       OPENROUTER_API_KEY: z.string().optional(),
       OPENROUTER_MODEL_LIST: z.string().optional(),
@@ -110,6 +114,9 @@ export const getLLMConfig = () => {
 
       ENABLED_SPARK: z.boolean(),
       SPARK_API_KEY: z.string().optional(),
+
+      ENABLED_AI21: z.boolean(),
+      AI21_API_KEY: z.string().optional(),
     },
     runtimeEnv: {
       API_KEY_SELECT_MODE: process.env.API_KEY_SELECT_MODE,
@@ -172,6 +179,10 @@ export const getLLMConfig = () => {
       GROQ_MODEL_LIST: process.env.GROQ_MODEL_LIST,
       GROQ_PROXY_URL: process.env.GROQ_PROXY_URL,
 
+      ENABLED_GITHUB: !!process.env.GITHUB_TOKEN,
+      GITHUB_TOKEN: process.env.GITHUB_TOKEN,
+      GITHUB_MODEL_LIST: process.env.GITHUB_MODEL_LIST,
+
       ENABLED_ZEROONE: !!process.env.ZEROONE_API_KEY,
       ZEROONE_API_KEY: process.env.ZEROONE_API_KEY,
       ZEROONE_MODEL_LIST: process.env.ZEROONE_MODEL_LIST,
@@ -217,6 +228,9 @@ export const getLLMConfig = () => {
 
       ENABLED_SPARK: !!process.env.SPARK_API_KEY,
       SPARK_API_KEY: process.env.SPARK_API_KEY,
+
+      ENABLED_AI21: !!process.env.AI21_API_KEY,
+      AI21_API_KEY: process.env.AI21_API_KEY,
     },
   });
 };
diff --git a/src/config/modelProviders/ai21.ts b/src/config/modelProviders/ai21.ts
new file mode 100644
index 000000000000..809f2b28b2b5
--- /dev/null
+++ b/src/config/modelProviders/ai21.ts
@@ -0,0 +1,37 @@
+import { ModelProviderCard } from '@/types/llm';
+
+// ref https://docs.ai21.com/reference/jamba-15-api-ref
+const Ai21: ModelProviderCard = {
+  chatModels: [
+    {
+      displayName: 'Jamba 1.5 Mini',
+      enabled: true,
+      functionCall: true,
+      id: 'jamba-1.5-mini',
+      pricing: {
+        input: 0.2,
+        output: 0.4,
+      },
+      tokens: 256_000,
+    },
+    {
+      displayName: 'Jamba 1.5 Large',
+      enabled: true,
+      functionCall: true,
+      id: 'jamba-1.5-large',
+      pricing: {
+        input: 2,
+        output: 8,
+      },
+      tokens: 256_000,
+    },
+  ],
+  checkModel: 'jamba-1.5-mini',
+  id: 'ai21',
+  modelList: { showModelFetcher: true },
+  modelsUrl: 'https://docs.ai21.com/reference',
+  name: 'Ai21Labs',
+  url: 'https://studio.ai21.com',
+};
+
+export default Ai21;
diff --git a/src/config/modelProviders/anthropic.ts b/src/config/modelProviders/anthropic.ts
index 53de7c26444d..8c64f18ee217 100644
--- a/src/config/modelProviders/anthropic.ts
+++ b/src/config/modelProviders/anthropic.ts
@@ -117,6 +117,10 @@ const Anthropic: ModelProviderCard = {
   proxyUrl: {
     placeholder: 'https://api.anthropic.com',
   },
+  smoothing: {
+    speed: 5,
+    text: true,
+  },
   url: 'https://anthropic.com',
 };
 
diff --git a/src/config/modelProviders/github.ts b/src/config/modelProviders/github.ts
new file mode 100644
index 000000000000..59305c37c36f
--- /dev/null
+++ b/src/config/modelProviders/github.ts
@@ -0,0 +1,209 @@
+import { ModelProviderCard } from '@/types/llm';
+
+// ref:
+// https://github.com/marketplace/models
+const Github: ModelProviderCard = {
+  chatModels: [
+    {
+      description:
+        'A 398B parameters (94B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation.',
+      displayName: 'AI21 Jamba 1.5 Large',
+      functionCall: true,
+      id: 'ai21-jamba-1.5-large',
+      maxOutput: 4096,
+      tokens: 262_144,
+    },
+    {
+      description:
+        'A 52B parameters (12B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation.',
+      displayName: 'AI21 Jamba 1.5 Mini',
+      functionCall: true,
+      id: 'ai21-jamba-1.5-mini',
+      maxOutput: 4096,
+      tokens: 262_144,
+    },
+    {
+      description:
+        'A production-grade Mamba-based LLM model to achieve best-in-class performance, quality, and cost efficiency.',
+      displayName: 'AI21-Jamba-Instruct',
+      id: 'ai21-jamba-instruct',
+      maxOutput: 4096,
+      tokens: 72_000,
+    },
+    {
+      description:
+        'Command R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise.',
+      displayName: 'Cohere Command R',
+      id: 'cohere-command-r',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        'Command R+ is a state-of-the-art RAG-optimized model designed to tackle enterprise-grade workloads.',
+      displayName: 'Cohere Command R+',
+      id: 'cohere-command-r-plus',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        'A powerful 70-billion parameter model excelling in reasoning, coding, and broad language applications.',
+      displayName: 'Meta-Llama-3-70B-Instruct',
+      id: 'meta-llama-3-70b-instruct',
+      maxOutput: 4096,
+      tokens: 8192,
+    },
+    {
+      description:
+        'A versatile 8-billion parameter model optimized for dialogue and text generation tasks.',
+      displayName: 'Meta-Llama-3-8B-Instruct',
+      id: 'meta-llama-3-8b-instruct',
+      maxOutput: 4096,
+      tokens: 8192,
+    },
+    {
+      description:
+        'The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.',
+      displayName: 'Meta-Llama-3.1-405B-Instruct',
+      id: 'meta-llama-3.1-405b-instruct',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        'The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.',
+      displayName: 'Meta-Llama-3.1-70B-Instruct',
+      id: 'meta-llama-3.1-70b-instruct',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        'The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.',
+      displayName: 'Meta-Llama-3.1-8B-Instruct',
+      id: 'meta-llama-3.1-8b-instruct',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        "Mistral's flagship model that's ideal for complex tasks that require large reasoning capabilities or are highly specialized (Synthetic Text Generation, Code Generation, RAG, or Agents).",
+      displayName: 'Mistral Large',
+      id: 'mistral-large',
+      maxOutput: 4096,
+      tokens: 33_000,
+    },
+    {
+      description:
+        'Mistral Large (2407) is an advanced Large Language Model (LLM) with state-of-the-art reasoning, knowledge and coding capabilities.',
+      displayName: 'Mistral Large (2407)',
+      id: 'mistral-large-2407',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        'Mistral Nemo is a cutting-edge Language Model (LLM) boasting state-of-the-art reasoning, world knowledge, and coding capabilities within its size category.',
+      displayName: 'Mistral Nemo',
+      id: 'mistral-nemo',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        'Mistral Small can be used on any language-based task that requires high efficiency and low latency.',
+      displayName: 'Mistral Small',
+      id: 'mistral-small',
+      maxOutput: 4096,
+      tokens: 33_000,
+    },
+    {
+      description:
+        "OpenAI's most advanced multimodal model in the GPT-4 family. Can handle both text and image inputs.",
+      displayName: 'OpenAI GPT-4o',
+      enabled: true,
+      functionCall: true,
+      id: 'gpt-4o',
+      maxOutput: 4096,
+      tokens: 128_000,
+      vision: true,
+    },
+    {
+      description: 'An affordable, efficient AI solution for diverse text and image tasks.',
+      displayName: 'OpenAI GPT-4o mini',
+      enabled: true,
+      functionCall: true,
+      id: 'gpt-4o-mini',
+      maxOutput: 4096,
+      tokens: 128_000,
+      vision: true,
+    },
+    {
+      description:
+        'Same Phi-3-medium model, but with a larger context size for RAG or few shot prompting.',
+      displayName: 'Phi-3-medium instruct (128k)',
+      id: 'Phi-3-medium-128k-instruct',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        'A 14B parameters model, proves better quality than Phi-3-mini, with a focus on high-quality, reasoning-dense data.',
+      displayName: 'Phi-3-medium instruct (4k)',
+      id: 'Phi-3-medium-4k-instruct',
+      maxOutput: 4096,
+      tokens: 4096,
+    },
+    {
+      description:
+        'Same Phi-3-mini model, but with a larger context size for RAG or few shot prompting.',
+      displayName: 'Phi-3-mini instruct (128k)',
+      id: 'Phi-3-mini-128k-instruct',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        'Tiniest member of the Phi-3 family. Optimized for both quality and low latency.',
+      displayName: 'Phi-3-mini instruct (4k)',
+      id: 'Phi-3-mini-4k-instruct',
+      maxOutput: 4096,
+      tokens: 4096,
+    },
+    {
+      description:
+        'Same Phi-3-small model, but with a larger context size for RAG or few shot prompting.',
+      displayName: 'Phi-3-small instruct (128k)',
+      id: 'Phi-3-small-128k-instruct',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description:
+        'A 7B parameters model, proves better quality than Phi-3-mini, with a focus on high-quality, reasoning-dense data.',
+      displayName: 'Phi-3-small instruct (8k)',
+      id: 'Phi-3-small-8k-instruct',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+    {
+      description: 'Refresh of Phi-3-mini model.',
+      displayName: 'Phi-3.5-mini instruct (128k)',
+      id: 'Phi-3-5-mini-instruct',
+      maxOutput: 4096,
+      tokens: 131_072,
+    },
+  ],
+  checkModel: 'Phi-3-mini-4k-instruct',
+  // Ref: https://github.blog/news-insights/product-news/introducing-github-models/
+  description:
+    "With GitHub Models, developers can become AI engineers and build with the industry's leading AI models.",
+  enabled: true,
+  id: 'github',
+  modelList: { showModelFetcher: true },
+  name: 'GitHub',
+  url: 'https://github.com/marketplace/models',
+};
+
+export default Github;
diff --git a/src/config/modelProviders/index.ts b/src/config/modelProviders/index.ts
index 3e1504b213e2..d08d60650524 100644
--- a/src/config/modelProviders/index.ts
+++ b/src/config/modelProviders/index.ts
@@ -1,5 +1,6 @@
 import { ChatModelCard, ModelProviderCard } from '@/types/llm';
 
+import Ai21Provider from './ai21';
 import Ai360Provider from './ai360';
 import AnthropicProvider from './anthropic';
 import AzureProvider from './azure';
@@ -7,6 +8,7 @@ import BaichuanProvider from './baichuan';
 import BedrockProvider from './bedrock';
 import DeepSeekProvider from './deepseek';
 import FireworksAIProvider from './fireworksai';
+import GithubProvider from './github';
 import GoogleProvider from './google';
 import GroqProvider from './groq';
 import MinimaxProvider from './minimax';
@@ -35,6 +37,7 @@ export const LOBE_DEFAULT_MODEL_LIST: ChatModelCard[] = [
   DeepSeekProvider.chatModels,
   GoogleProvider.chatModels,
   GroqProvider.chatModels,
+  GithubProvider.chatModels,
   MinimaxProvider.chatModels,
   MistralProvider.chatModels,
   MoonshotProvider.chatModels,
@@ -53,6 +56,7 @@ export const LOBE_DEFAULT_MODEL_LIST: ChatModelCard[] = [
   SiliconCloudProvider.chatModels,
   UpstageProvider.chatModels,
   SparkProvider.chatModels,
+  Ai21Provider.chatModels,
 ].flat();
 
 export const DEFAULT_MODEL_PROVIDER_LIST = [
@@ -64,6 +68,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [
   GoogleProvider,
   DeepSeekProvider,
   OpenRouterProvider,
+  GithubProvider,
   GroqProvider,
   NovitaProvider,
   PerplexityProvider,
@@ -71,6 +76,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [
   TogetherAIProvider,
   FireworksAIProvider,
   UpstageProvider,
+  Ai21Provider,
   QwenProvider,
   SparkProvider,
   ZhiPuProvider,
@@ -93,6 +99,7 @@ export const isProviderDisableBroswerRequest = (id: string) => {
   return !!provider;
 };
 
+export { default as Ai21ProviderCard } from './ai21';
 export { default as Ai360ProviderCard } from './ai360';
 export { default as AnthropicProviderCard } from './anthropic';
 export { default as AzureProviderCard } from './azure';
@@ -100,6 +107,7 @@ export { default as BaichuanProviderCard } from './baichuan';
 export { default as BedrockProviderCard } from './bedrock';
 export { default as DeepSeekProviderCard } from './deepseek';
 export { default as FireworksAIProviderCard } from './fireworksai';
+export { default as GithubProviderCard } from './github';
 export { default as GoogleProviderCard } from './google';
 export { default as GroqProviderCard } from './groq';
 export { default as MinimaxProviderCard } from './minimax';
diff --git a/src/const/layoutTokens.ts b/src/const/layoutTokens.ts
index 63166e2a60b4..728016bb40e7 100644
--- a/src/const/layoutTokens.ts
+++ b/src/const/layoutTokens.ts
@@ -9,7 +9,7 @@ export const CHAT_TEXTAREA_HEIGHT_MOBILE = 108;
 export const CHAT_SIDEBAR_WIDTH = 280;
 
 export const CHAT_DOCK_WIDTH = 400;
-export const CHAT_DOCK_TOOL_UI_WIDTH = 800;
+export const CHAT_DOCK_TOOL_UI_WIDTH = 600;
 
 export const MARKET_SIDEBAR_WIDTH = 400;
 export const FOLDER_WIDTH = 270;
diff --git a/src/const/plugin.test.ts b/src/const/plugin.test.ts
new file mode 100644
index 000000000000..8b9fd21571df
--- /dev/null
+++ b/src/const/plugin.test.ts
@@ -0,0 +1,80 @@
+import { describe, expect, it } from 'vitest';
+
+import { ARTIFACT_TAG_REGEX } from './plugin';
+
+describe('ARTIFACT_TAG_REGEX', () => {
+  it('should match a simple lobeArtifact tag', () => {
+    const input = '<lobeArtifact>Simple content</lobeArtifact>';
+    const matches = input.match(ARTIFACT_TAG_REGEX);
+    expect(matches).toHaveLength(2);
+    expect(matches?.[1]).toBe('Simple content');
+  });
+
+  it('should match a lobeArtifact tag with attributes', () => {
+    const input = '<lobeArtifact type="image">Content with attributes</lobeArtifact>';
+    const matches = input.match(ARTIFACT_TAG_REGEX);
+    expect(matches).toHaveLength(2);
+    expect(matches?.[1]).toBe('Content with attributes');
+  });
+
+  it('should match lobeArtifact tag with multiline content', () => {
+    const input = '<lobeArtifact>\nMultiline\ncontent\n</lobeArtifact>';
+    const matches = input.match(ARTIFACT_TAG_REGEX);
+    expect(matches).toHaveLength(2);
+    expect(matches?.[1]).toBe('\nMultiline\ncontent\n');
+  });
+
+  it('should match an unclosed lobeArtifact tag', () => {
+    const input = '<lobeArtifact>Unclosed tag';
+    const matches = input.match(ARTIFACT_TAG_REGEX);
+    expect(matches).toHaveLength(2);
+    expect(matches?.[1]).toBe('Unclosed tag');
+  });
+
+  it('should not match when there is no lobeArtifact tag', () => {
+    const input = 'This is a text without any lobeArtifact tag';
+    const matches = input.match(ARTIFACT_TAG_REGEX);
+    expect(matches).toBeNull();
+  });
+
+  it('should match', () => {
+    const input = `好的,让我来为您解释"OpenAI"这个词。
+
+<lobeThinking>这个词涉及人工智能领域的一家知名公司,我需要用批判性和幽默的视角来解读它的本质。我会结合当前AI发展的现状,用隐喻的方式来表达。</lobeThinking>
+
+<lobeArtifact identifier="openai-new-interpretation" type="image/svg+xml" title="OpenAI 汉语新解">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 600">
+  <rect width="400" height="600" fill="#f0f0f0"/>
+  <g font-family="Arial, sans-serif">
+    <text x="200" y="50" font-size="24" font-weight="bold" text-anchor="middle" fill="#333">汉语新解</text>
+    <line x1="50" y1="70" x2="350" y2="70" stroke="#333" stroke-width="2"/>
+    <text x="50" y="100" font-size="18" fill="#555">OpenAI</text>
+    <text x="50" y="130" font-size="14" fill="#777">オープンAI / Open AI</text>
+    <text x="50" y="180" font-size="16" fill="#444" width="300">
+      <tspan x="50" dy="0">一家打着"开放"旗号的AI公司,</tspan>
+      <tspan x="50" dy="25">实则是科技世界的现代炼金术士。</tspan>
+      <tspan x="50" dy="25">他们在神秘实验室里炼制数字魔药,</tspan>
+      <tspan x="50" dy="25">声称要拯救世界,却把钥匙藏在高塔顶端。</tspan>
+      <tspan x="50" dy="25">这是一场华丽的默剧,观众屏息以待,</tspan>
+      <tspan x="50" dy="25">而台上的演员正在即兴创作剧本。</tspan>
+    </text>
+    <path d="M50 400 Q200 300 350 400" fill="none" stroke="#666" stroke-width="2"/>
+    <text x="200" y="450" font-size="14" text-anchor="middle" fill="#555">开放的盒子里装着封闭的秘密</text>
+  </g>
+</svg>
+</lobeArtifact>
+
+OpenAI,这个名字听起来多么崇高啊!就像是一位身披白袍、头戴光环的科技天使,降临凡间来拯救我们这些被信息海洋淹没的可怜虫。
+
+但是,亲爱的朋友,让我们摘下那副玫瑰色的眼镜,仔细看看这位"救世主"的真面目。OpenAI,多么讽刺的名字啊!它就像是一个贴着"免费午餐"标签的高级餐厅,当你兴冲冲地推门而入时,才发现菜单上的价格足以让你的钱包瞬间减肥。
+
+这家公司就像是科技世界的现代炼金术士。他们在神秘的实验室里日夜不停地炼制数字魔药,声称要创造出能解决人类所有问题的"哲学家之石"。可惜啊,这些魔法配方都被锁在了高塔顶端,只有少数"有缘人"才能一睹真容。
+
+OpenAI的存在,就像是一场精心编排的默剧。台上的演员们摆出各种夸张的姿势,声称自己正在为人类的未来奋斗;台下的观众们屏息凝神,既期待着奇迹的出现,又担心着潘多拉魔盒被轻易打开。而最讽刺的是,这场戏的剧本还在即兴创作中,连导演自己都不知道下一幕会发生什么。
+
+所以,我亲爱的朋友,下次当你听到"OpenAI"这个词时,请记住:在这个看似开放的盒子里,装着的可能是一堆封闭的秘密。毕竟,在这个光鲜亮丽的科技舞台上,最稀缺的从来就不是智能,而是真正的开放与透明。`;
+
+    const matches = ARTIFACT_TAG_REGEX.exec(input);
+    expect(matches).toHaveLength(2);
+  });
+});
diff --git a/src/const/plugin.ts b/src/const/plugin.ts
index 94d5ce8ee4f1..b3de52d6d7f4 100644
--- a/src/const/plugin.ts
+++ b/src/const/plugin.ts
@@ -1,2 +1,14 @@
 export const PLUGIN_SCHEMA_SEPARATOR = '____';
 export const PLUGIN_SCHEMA_API_MD5_PREFIX = 'MD5HASH_';
+
+export const ARTIFACT_TAG = 'lobeArtifact';
+export const ARTIFACT_THINKING_TAG = 'lobeThinking';
+
+// https://regex101.com/r/TwzTkf/2
+export const ARTIFACT_TAG_REGEX = /<lobeArtifact\b[^>]*>(?<content>[\S\s]*?)(?:<\/lobeArtifact>|$)/;
+
+// https://regex101.com/r/r9gqGg/1
+export const ARTIFACT_TAG_CLOSED_REGEX = /<lobeArtifact\b[^>]*>([\S\s]*?)<\/lobeArtifact>/;
+
+// https://regex101.com/r/AvPA2g/1
+export const ARTIFACT_THINKING_TAG_REGEX = /<lobeThinking\b[^>]*>([\S\s]*?)(?:<\/lobeThinking>|$)/;
diff --git a/src/const/settings/llm.ts b/src/const/settings/llm.ts
index 3fb0e2b00628..df2c9cadcf88 100644
--- a/src/const/settings/llm.ts
+++ b/src/const/settings/llm.ts
@@ -1,10 +1,12 @@
 import {
+  Ai21ProviderCard,
   Ai360ProviderCard,
   AnthropicProviderCard,
   BaichuanProviderCard,
   BedrockProviderCard,
   DeepSeekProviderCard,
   FireworksAIProviderCard,
+  GithubProviderCard,
   GoogleProviderCard,
   GroqProviderCard,
   MinimaxProviderCard,
@@ -30,6 +32,10 @@ import { ModelProvider } from '@/libs/agent-runtime';
 import { UserModelProviderConfig } from '@/types/user/settings';
 
 export const DEFAULT_LLM_CONFIG: UserModelProviderConfig = {
+  ai21: {
+    enabled: false,
+    enabledModels: filterEnabledModels(Ai21ProviderCard),
+  },
   ai360: {
     enabled: false,
     enabledModels: filterEnabledModels(Ai360ProviderCard),
@@ -57,6 +63,10 @@ export const DEFAULT_LLM_CONFIG: UserModelProviderConfig = {
     enabled: false,
     enabledModels: filterEnabledModels(FireworksAIProviderCard),
   },
+  github: {
+    enabled: false,
+    enabledModels: filterEnabledModels(GithubProviderCard),
+  },
   google: {
     enabled: false,
     enabledModels: filterEnabledModels(GoogleProviderCard),
diff --git a/src/features/Conversation/Error/APIKeyForm/index.tsx b/src/features/Conversation/Error/APIKeyForm/index.tsx
index 5b729f71a346..703b079948f3 100644
--- a/src/features/Conversation/Error/APIKeyForm/index.tsx
+++ b/src/features/Conversation/Error/APIKeyForm/index.tsx
@@ -51,6 +51,10 @@ const APIKeyForm = memo<APIKeyFormProps>(({ id, provider }) => {
         return 'sk-********************************';
       }
 
+      case ModelProvider.Github: {
+        return 'ghp_*****************************';
+      }
+
       default: {
         return '*********************************';
       }
diff --git a/src/features/Conversation/Messages/Tool/Inspector/index.tsx b/src/features/Conversation/Messages/Tool/Inspector/index.tsx
index 6be69f1a7459..fb0febf7d7bd 100644
--- a/src/features/Conversation/Messages/Tool/Inspector/index.tsx
+++ b/src/features/Conversation/Messages/Tool/Inspector/index.tsx
@@ -52,7 +52,7 @@ const Inspector = memo<InspectorProps>(
     const { styles } = useStyles();
     const [open, setOpen] = useState(false);
     const [isMessageToolUIOpen, openToolUI, togglePortal] = useChatStore((s) => [
-      chatPortalSelectors.isArtifactMessageUIOpen(id)(s),
+      chatPortalSelectors.isPluginUIOpen(id)(s),
       s.openToolUI,
       s.togglePortal,
     ]);
diff --git a/src/features/Conversation/Messages/Tool/index.tsx b/src/features/Conversation/Messages/Tool/index.tsx
index 1b47c2788762..6175edf5b06a 100644
--- a/src/features/Conversation/Messages/Tool/index.tsx
+++ b/src/features/Conversation/Messages/Tool/index.tsx
@@ -17,7 +17,7 @@ import Inspector from './Inspector';
 const Message = memo<ChatMessage>(({ id, content, pluginState, plugin }) => {
   const [loading, isMessageToolUIOpen] = useChatStore((s) => [
     chatSelectors.isPluginApiInvoking(id)(s),
-    chatPortalSelectors.isArtifactMessageUIOpen(id)(s),
+    chatPortalSelectors.isPluginUIOpen(id)(s),
   ]);
   const { direction } = useContext(ConfigProvider.ConfigContext);
   const { t } = useTranslation('plugin');
diff --git a/src/features/Conversation/components/ChatItem/index.tsx b/src/features/Conversation/components/ChatItem/index.tsx
index abcd2df4a97e..93b0ca0c466c 100644
--- a/src/features/Conversation/components/ChatItem/index.tsx
+++ b/src/features/Conversation/components/ChatItem/index.tsx
@@ -1,7 +1,7 @@
 import { ChatItem } from '@lobehub/ui';
 import { createStyles } from 'antd-style';
 import isEqual from 'fast-deep-equal';
-import { ReactNode, memo, useCallback } from 'react';
+import { ReactNode, memo, useCallback, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 
 import { useAgentStore } from '@/store/agent';
@@ -22,8 +22,12 @@ import {
   renderMessages,
   useAvatarsClick,
 } from '../../Messages';
+import { markdownElements } from '../MarkdownElements';
 import ActionsBar from './ActionsBar';
 import HistoryDivider from './HistoryDivider';
+import { processWithArtifact } from './utils';
+
+const rehypePlugins = markdownElements.map((element) => element.rehypePlugin);
 
 const useStyles = createStyles(({ css, prefixCls }) => ({
   loading: css`
@@ -143,6 +147,22 @@ const Item = memo<ChatListItemProps>(({ index, id }) => {
     );
   });
 
+  // remove line breaks in artifact tag to make the ast transform easier
+  const message =
+    !editing && item?.role === 'assistant' ? processWithArtifact(item?.content) : item?.content;
+
+  const components = useMemo(
+    () =>
+      Object.fromEntries(
+        markdownElements.map((element) => {
+          const Component = element.Component;
+
+          return [element.tag, (props: any) => <Component {...props} id={id} />];
+        }),
+      ),
+    [id],
+  );
+
   return (
     item && (
       <>
@@ -165,9 +185,11 @@ const Item = memo<ChatListItemProps>(({ index, id }) => {
           fontSize={fontSize}
           loading={isProcessing}
           markdownProps={{
+            components,
             customRender: markdownCustomRender,
+            rehypePlugins,
           }}
-          message={item.content}
+          message={message}
           messageExtra={<MessageExtra data={item} />}
           onAvatarClick={onAvatarsClick?.(item.role)}
           onChange={(value) => updateMessageContent(item.id, value)}
diff --git a/src/features/Conversation/components/ChatItem/utils.test.ts b/src/features/Conversation/components/ChatItem/utils.test.ts
new file mode 100644
index 000000000000..80ce94ba80c5
--- /dev/null
+++ b/src/features/Conversation/components/ChatItem/utils.test.ts
@@ -0,0 +1,150 @@
+import { describe, expect, it } from 'vitest';
+
+import { processWithArtifact } from './utils';
+
+describe('processWithArtifact', () => {
+  it('should removeLineBreaks with closed tag', () => {
+    const input = `好的
+
+<lobeArtifact identifier="sleep-interpretation-card" type="image/svg+xml" title="睡觉的新解释">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 600">
+<defs>
+<style>
+@import url('https://fonts.googleapis.com/css2?family=Noto+Serif+SC:wght@400;700&amp;display=swap');
+</style>
+</defs>
+<!-- 背景 -->
+<rect width="400" height="600" fill="#F0EAD6"/>
+<!-- 总结 -->
+<text x="200" y="500" font-family="'Noto Serif SC', serif" font-size="20" text-anchor="middle" fill="#8B4513">睡觉：生产力的假死，创造力的重生。</text>
+</svg>
+</lobeArtifact>`;
+
+    const output = processWithArtifact(input);
+
+    expect(output).toEqual(`好的
+
+<lobeArtifact identifier="sleep-interpretation-card" type="image/svg+xml" title="睡觉的新解释"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 600"><defs><style>@import url('https://fonts.googleapis.com/css2?family=Noto+Serif+SC:wght@400;700&amp;display=swap');</style></defs><!-- 背景 --><rect width="400" height="600" fill="#F0EAD6"/><!-- 总结 --><text x="200" y="500" font-family="'Noto Serif SC', serif" font-size="20" text-anchor="middle" fill="#8B4513">睡觉：生产力的假死，创造力的重生。</text></svg></lobeArtifact>`);
+  });
+
+  it('should removeLineBreaks with open tag', () => {
+    const input = `好的
+
+<lobeArtifact identifier="ai-interpretation-card" type="image/svg+xml" title="人工智能新解卡片">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 600">
+  <defs>
+    <style>
+      @import url('https://fonts.googleapis.com/css2?family=Noto+Serif+SC:wght@400;700&amp;display=swap');
+    </style>
+  </defs>
+`;
+
+    const output = processWithArtifact(input);
+
+    expect(output).toEqual(`好的
+
+<lobeArtifact identifier="ai-interpretation-card" type="image/svg+xml" title="人工智能新解卡片"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 600">  <defs>    <style>      @import url('https://fonts.googleapis.com/css2?family=Noto+Serif+SC:wght@400;700&amp;display=swap');    </style>  </defs>`);
+  });
+  it('should not throw error with empty', () => {
+    const input = '';
+
+    const output = processWithArtifact(input);
+
+    expect(output).toEqual('');
+  });
+
+  describe('close the <lobeArtifact tag', () => {
+    it('close tag for <lobeArtifact', () => {
+      const input = '<lobeArtifact';
+
+      const output = processWithArtifact(input);
+
+      expect(output).toEqual('<lobeArtifact>');
+    });
+
+    it('close tag for <lobeArtifact identifier="something"', () => {
+      const input = '<lobeArtifact identifier="something"';
+
+      const output = processWithArtifact(input);
+
+      expect(output).toEqual('<lobeArtifact>');
+    });
+
+    it('close tag for <lobeArtifact identifier="ai-interpretation" type="image/svg+xml" titl', () => {
+      const input = '<lobeArtifact identifier="ai-interpretation" type="image/svg+xml" titl';
+
+      const output = processWithArtifact(input);
+
+      expect(output).toEqual('<lobeArtifact>');
+    });
+
+    it('only change the <lobeArtifact> part', () => {
+      const input = `好的,让我来用新的视角解释"人工智能"这个词汇。
+
+<lobeThinking>这个词汇涉及了当代科技和社会热点,需要用批判性和幽默感来解读其本质。我会用隐喻和讽刺来表达,同时保持简洁有力。</lobeThinking>
+
+<lobeArtifact identifier="ai-new-interpretation" type="image/svg+xml" t`;
+
+      const output = processWithArtifact(input);
+
+      expect(output).toEqual(`好的,让我来用新的视角解释"人工智能"这个词汇。
+
+<lobeThinking>这个词汇涉及了当代科技和社会热点,需要用批判性和幽默感来解读其本质。我会用隐喻和讽刺来表达,同时保持简洁有力。</lobeThinking>
+
+<lobeArtifact>`);
+    });
+
+    it('not change for <lobeArtifact />', () => {
+      const input = '<lobeArtifact/>';
+
+      const output = processWithArtifact(input);
+
+      expect(output).toEqual(input);
+    });
+  });
+
+  it('should removeLinkBreaks for lobeThinking', () => {
+    const input = `好的,让我以一个特别的视角来解释"人工智能"这个词汇。
+
+<lobeThinking>
+这个词汇涉及了当代科技和社会热点,需要用批判性、幽默而深刻的视角来解读。我会运用隐喻和讽刺,抓住其本质,并以精练的方式表达出来。这符合一个好的artifact的标准,因为它是一个独立的、可能被用户修改或重用的内容。我将创建一个新的SVG artifact来呈现这个解释。
+</lobeThinking>
+
+<lobeArtifact identifier="ai-new-interpretation" type="image/svg+xml" title="人工智能的新解释">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 600">
+  <defs>
+    <style>
+      @import url('https://fonts.googleapis.com/css2?family=Noto+Serif+SC:wght@400;700&amp;display=swap');
+      .background { fill: #f0f0f0; }
+      .title { font-family: 'Noto Serif SC', serif; font-size: 28px; font-weight: 700; fill: #333; }
+      .content { font-family: 'Noto Serif SC', serif; font-size: 18px; fill: #555; }
+      .divider { stroke: #999; stroke-width: 1; }
+      .decoration { fill: none; stroke: #999; stroke-width: 1; }
+    </style>
+  </defs>
+</svg>
+</lobeArtifact>
+
+我为"人工智能"这个词创建了一个新的解释,并将其呈现在一个SVG卡片中。这个解释采用了批判性和幽默的视角,试图揭示这个概念背后的一些潜在问题。`;
+
+    const output = processWithArtifact(input);
+
+    expect(output).toEqual(`好的,让我以一个特别的视角来解释"人工智能"这个词汇。
+
+<lobeThinking>这个词汇涉及了当代科技和社会热点,需要用批判性、幽默而深刻的视角来解读。我会运用隐喻和讽刺,抓住其本质,并以精练的方式表达出来。这符合一个好的artifact的标准,因为它是一个独立的、可能被用户修改或重用的内容。我将创建一个新的SVG artifact来呈现这个解释。</lobeThinking>
+
+<lobeArtifact identifier="ai-new-interpretation" type="image/svg+xml" title="人工智能的新解释"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 600">  <defs>    <style>      @import url('https://fonts.googleapis.com/css2?family=Noto+Serif+SC:wght@400;700&amp;display=swap');      .background { fill: #f0f0f0; }      .title { font-family: 'Noto Serif SC', serif; font-size: 28px; font-weight: 700; fill: #333; }      .content { font-family: 'Noto Serif SC', serif; font-size: 18px; fill: #555; }      .divider { stroke: #999; stroke-width: 1; }      .decoration { fill: none; stroke: #999; stroke-width: 1; }    </style>  </defs></svg></lobeArtifact>
+
+我为"人工智能"这个词创建了一个新的解释,并将其呈现在一个SVG卡片中。这个解释采用了批判性和幽默的视角,试图揭示这个概念背后的一些潜在问题。`);
+  });
+
+  it('should removeLinkBreaks for lobeThinking', () => {
+    const input = `<lobeThinking>
+这个词汇涉及了
+`;
+
+    const output = processWithArtifact(input);
+
+    expect(output).toEqual(`<lobeThinking>这个词汇涉及了`);
+  });
+});
diff --git a/src/features/Conversation/components/ChatItem/utils.ts b/src/features/Conversation/components/ChatItem/utils.ts
new file mode 100644
index 000000000000..c65dcff31083
--- /dev/null
+++ b/src/features/Conversation/components/ChatItem/utils.ts
@@ -0,0 +1,28 @@
+import { ARTIFACT_TAG_REGEX, ARTIFACT_THINKING_TAG_REGEX } from '@/const/plugin';
+
+/**
+ * Replace all line breaks in the matched `lobeArtifact` tag with an empty string
+ */
+export const processWithArtifact = (input: string = '') => {
+  let output = input;
+  const thinkMatch = ARTIFACT_THINKING_TAG_REGEX.exec(input);
+
+  // If the input contains the `lobeThinking` tag, replace all line breaks with an empty string
+  if (thinkMatch)
+    output = input.replace(ARTIFACT_THINKING_TAG_REGEX, (match) =>
+      match.replaceAll(/\r?\n|\r/g, ''),
+    );
+
+  const match = ARTIFACT_TAG_REGEX.exec(input);
+  // If the input contains the `lobeArtifact` tag, replace all line breaks with an empty string
+  if (match)
+    return output.replace(ARTIFACT_TAG_REGEX, (match) => match.replaceAll(/\r?\n|\r/g, ''));
+
+  // if not match, check if it's start with <lobeArtifact but not closed
+  const regex = /<lobeArtifact\b(?:(?!\/?>)[\S\s])*$/;
+  if (regex.test(output)) {
+    return output.replace(regex, '<lobeArtifact>');
+  }
+
+  return output;
+};
diff --git a/src/features/Conversation/components/InboxWelcome/AgentsSuggest.tsx b/src/features/Conversation/components/InboxWelcome/AgentsSuggest.tsx
index 0782eede87ec..1ae5328afb1e 100644
--- a/src/features/Conversation/components/InboxWelcome/AgentsSuggest.tsx
+++ b/src/features/Conversation/components/InboxWelcome/AgentsSuggest.tsx
@@ -4,13 +4,13 @@ import { ActionIcon, Avatar, Grid } from '@lobehub/ui';
 import { Skeleton, Typography } from 'antd';
 import { createStyles } from 'antd-style';
 import { RefreshCw } from 'lucide-react';
+import Link from 'next/link';
 import { memo, useState } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox } from 'react-layout-kit';
 import useSWR from 'swr';
 import urlJoin from 'url-join';
 
-import InterceptingLink from '@/components/InterceptingLink';
 import { assistantService } from '@/services/assistant';
 import { useUserStore } from '@/store/user';
 import { userGeneralSettingsSelectors } from '@/store/user/selectors';
@@ -105,10 +105,7 @@ const AgentsSuggest = memo<{ mobile?: boolean }>(({ mobile }) => {
           : assistantList
               .slice(sliceStart, sliceStart + agentLength)
               .map((item: DiscoverAssistantItem) => (
-                <InterceptingLink
-                  href={urlJoin('/discover/assistant/', item.identifier)}
-                  key={item.identifier}
-                >
+                <Link href={urlJoin('/discover/assistant/', item.identifier)} key={item.identifier}>
                   <Flexbox className={styles.card} gap={8} horizontal>
                     <Avatar avatar={item.meta.avatar} style={{ flex: 'none' }} />
                     <Flexbox gap={mobile ? 2 : 8} style={{ overflow: 'hidden', width: '100%' }}>
@@ -120,7 +117,7 @@ const AgentsSuggest = memo<{ mobile?: boolean }>(({ mobile }) => {
                       </Paragraph>
                     </Flexbox>
                   </Flexbox>
-                </InterceptingLink>
+                </Link>
               ))}
       </Grid>
     </Flexbox>
diff --git a/src/features/Conversation/components/MarkdownElements/LobeArtifact/Render/Icon.tsx b/src/features/Conversation/components/MarkdownElements/LobeArtifact/Render/Icon.tsx
new file mode 100644
index 000000000000..667c031f4d2b
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/LobeArtifact/Render/Icon.tsx
@@ -0,0 +1,96 @@
+import { SiReact } from '@icons-pack/react-simple-icons';
+import { Icon } from '@lobehub/ui';
+import { createStyles } from 'antd-style';
+import { CodeXml, GlobeIcon, ImageIcon, Loader2, OrigamiIcon } from 'lucide-react';
+import { memo } from 'react';
+
+const useStyles = createStyles(({ css, token, isDarkMode }) => ({
+  container: css`
+    cursor: pointer;
+
+    margin-block-start: 12px;
+
+    color: ${token.colorText};
+
+    border: 1px solid ${token.colorBorder};
+    border-radius: 8px;
+
+    &:hover {
+      background: ${isDarkMode ? '' : token.colorFillSecondary};
+    }
+  `,
+
+  desc: css`
+    color: ${token.colorTextTertiary};
+  `,
+  title: css`
+    overflow: hidden;
+    display: -webkit-box;
+    -webkit-box-orient: vertical;
+    -webkit-line-clamp: 1;
+
+    font-size: 16px;
+    text-overflow: ellipsis;
+  `,
+}));
+
+interface ArtifactProps {
+  type: string;
+}
+
+const SIZE = 28;
+const ArtifactIcon = memo<ArtifactProps>(({ type }) => {
+  const { theme } = useStyles();
+
+  if (!type)
+    return (
+      <Icon
+        icon={Loader2}
+        size={{ fontSize: SIZE }}
+        spin
+        style={{ color: theme.colorTextSecondary }}
+      />
+    );
+
+  switch (type) {
+    case 'application/lobe.artifacts.code': {
+      return (
+        <Icon
+          icon={CodeXml}
+          size={{ fontSize: SIZE }}
+          style={{ color: theme.colorTextSecondary }}
+        />
+      );
+    }
+
+    case 'application/lobe.artifacts.react': {
+      return <SiReact size={SIZE} style={{ color: theme.colorTextSecondary }} />;
+    }
+
+    case 'image/svg+xml': {
+      return (
+        <Icon
+          icon={ImageIcon}
+          size={{ fontSize: SIZE }}
+          style={{ color: theme.colorTextSecondary }}
+        />
+      );
+    }
+    case 'text/html': {
+      return (
+        <Icon
+          icon={GlobeIcon}
+          size={{ fontSize: SIZE }}
+          style={{ color: theme.colorTextSecondary }}
+        />
+      );
+    }
+    default: {
+      return (
+        <Icon color={theme.purple} icon={OrigamiIcon} size={{ fontSize: SIZE, strokeWidth: 1.2 }} />
+      );
+    }
+  }
+});
+
+export default ArtifactIcon;
diff --git a/src/features/Conversation/components/MarkdownElements/LobeArtifact/Render/index.tsx b/src/features/Conversation/components/MarkdownElements/LobeArtifact/Render/index.tsx
new file mode 100644
index 000000000000..07f4f237378b
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/LobeArtifact/Render/index.tsx
@@ -0,0 +1,129 @@
+import { Icon } from '@lobehub/ui';
+import { createStyles } from 'antd-style';
+import { Loader2 } from 'lucide-react';
+import { memo, useEffect } from 'react';
+import { useTranslation } from 'react-i18next';
+import { Center, Flexbox } from 'react-layout-kit';
+
+import { useChatStore } from '@/store/chat';
+import { chatPortalSelectors, chatSelectors } from '@/store/chat/selectors';
+import { dotLoading } from '@/styles/loading';
+
+import { MarkdownElementProps } from '../../type';
+import ArtifactIcon from './Icon';
+
+const useStyles = createStyles(({ css, token, isDarkMode }) => ({
+  avatar: css`
+    background: ${token.colorFillQuaternary};
+    border-inline-end: 1px solid ${token.colorSplit};
+  `,
+  container: css`
+    cursor: pointer;
+
+    margin-block-start: 12px;
+
+    color: ${token.colorText};
+
+    border: 1px solid ${token.colorBorder};
+    border-radius: 8px;
+    box-shadow: ${isDarkMode ? token.boxShadowSecondary : token.boxShadowTertiary};
+
+    &:hover {
+      background: ${token.colorFillQuaternary};
+    }
+  `,
+  desc: css`
+    font-size: 12px;
+    color: ${token.colorTextTertiary};
+  `,
+  title: css`
+    overflow: hidden;
+    display: -webkit-box;
+    -webkit-box-orient: vertical;
+    -webkit-line-clamp: 1;
+
+    text-overflow: ellipsis;
+  `,
+}));
+
+interface ArtifactProps extends MarkdownElementProps {
+  identifier: string;
+  title: string;
+  type: string;
+}
+
+const Render = memo<ArtifactProps>(({ identifier, title, type, children, id }) => {
+  const { t } = useTranslation('chat');
+  const { styles, cx } = useStyles();
+
+  const hasChildren = !!children;
+  const str = ((children as string) || '').toString?.();
+
+  const [isGenerating, isArtifactTagClosed, currentArtifactMessageId, openArtifact, closeArtifact] =
+    useChatStore((s) => {
+      return [
+        chatSelectors.isMessageGenerating(id)(s),
+        chatPortalSelectors.isArtifactTagClosed(id)(s),
+        chatPortalSelectors.artifactMessageId(s),
+        s.openArtifact,
+        s.closeArtifact,
+      ];
+    });
+
+  const openArtifactUI = () => {
+    openArtifact({ id, identifier, title, type });
+  };
+
+  useEffect(() => {
+    if (!hasChildren || !isGenerating) return;
+
+    openArtifactUI();
+  }, [isGenerating, hasChildren, str, identifier, title, type, id]);
+
+  return (
+    <p>
+      <Flexbox
+        className={styles.container}
+        gap={16}
+        onClick={() => {
+          if (currentArtifactMessageId === id) {
+            closeArtifact();
+          } else {
+            openArtifactUI();
+          }
+        }}
+        width={'100%'}
+      >
+        <Flexbox align={'center'} flex={1} horizontal>
+          <Center className={styles.avatar} height={64} horizontal width={64}>
+            <ArtifactIcon type={type} />
+          </Center>
+          <Flexbox gap={4} paddingBlock={8} paddingInline={12}>
+            {!title && isGenerating ? (
+              <Flexbox className={cx(dotLoading)} horizontal>
+                {t('artifact.generating')}
+              </Flexbox>
+            ) : (
+              <Flexbox className={cx(styles.title)}>{title || t('artifact.unknownTitle')}</Flexbox>
+            )}
+            {hasChildren && (
+              <Flexbox className={styles.desc} horizontal>
+                {identifier} ·{' '}
+                <Flexbox gap={2} horizontal>
+                  {!isArtifactTagClosed && (
+                    <div>
+                      <Icon icon={Loader2} spin />
+                    </div>
+                  )}
+                  {str?.length}
+                </Flexbox>
+              </Flexbox>
+            )}
+          </Flexbox>
+        </Flexbox>
+      </Flexbox>
+    </p>
+  );
+});
+
+export default Render;
diff --git a/src/features/Conversation/components/MarkdownElements/LobeArtifact/index.ts b/src/features/Conversation/components/MarkdownElements/LobeArtifact/index.ts
new file mode 100644
index 000000000000..2e7f0f0c6443
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/LobeArtifact/index.ts
@@ -0,0 +1,10 @@
+import Component from './Render';
+import rehypePlugin from './rehypePlugin';
+
+const AntArtifactElement = {
+  Component,
+  rehypePlugin,
+  tag: 'lobeArtifact',
+};
+
+export default AntArtifactElement;
diff --git a/src/features/Conversation/components/MarkdownElements/LobeArtifact/rehypePlugin.ts b/src/features/Conversation/components/MarkdownElements/LobeArtifact/rehypePlugin.ts
new file mode 100644
index 000000000000..b5e53251e196
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/LobeArtifact/rehypePlugin.ts
@@ -0,0 +1,74 @@
+import { SKIP, visit } from 'unist-util-visit';
+
+import { ARTIFACT_TAG } from '@/const/plugin';
+
+function rehypeAntArtifact() {
+  return (tree: any) => {
+    visit(tree, (node, index, parent) => {
+      if (node.type === 'element' && node.tagName === 'p' && node.children.length > 0) {
+        const firstChild = node.children[0];
+        if (firstChild.type === 'raw' && firstChild.value.startsWith(`<${ARTIFACT_TAG}`)) {
+          // 提取 lobeArtifact 的属性
+          const attributes: Record<string, string> = {};
+          const attributeRegex = /(\w+)="([^"]*)"/g;
+          let match;
+          while ((match = attributeRegex.exec(firstChild.value)) !== null) {
+            attributes[match[1]] = match[2];
+          }
+
+          // 创建新的 lobeArtifact 节点
+          const newNode = {
+            children: [
+              {
+                type: 'text',
+                value: node.children
+                  .slice(1, -1)
+                  .map((child: any) => {
+                    if (child.type === 'raw') {
+                      return child.value;
+                    } else if (child.type === 'text') {
+                      return child.value;
+                    } else if (child.type === 'element' && child.tagName === 'a') {
+                      return child.children[0].value;
+                    }
+                    return '';
+                  })
+                  .join('')
+                  .trim(),
+              },
+            ],
+            properties: attributes,
+            tagName: ARTIFACT_TAG,
+            type: 'element',
+          };
+
+          // 替换原来的 p 节点
+          parent.children.splice(index, 1, newNode);
+          return [SKIP, index];
+        }
+      }
+      // 如果字符串是 <lobeArtifact identifier="ai-new-interpretation" type="image/svg+xml" title="人工智能新解释">
+      // 得到的节点就是：
+      // {
+      //   type: 'raw',
+      //   value:
+      //     '<lobeArtifact identifier="ai-new-interpretation" type="image/svg+xml" title="人工智能新解释">',
+      // }
+      else if (node.type === 'raw' && node.value.startsWith(`<${ARTIFACT_TAG}`)) {
+        // 创建新的 lobeArtifact 节点
+        const newNode = {
+          children: [],
+          properties: {},
+          tagName: ARTIFACT_TAG,
+          type: 'element',
+        };
+
+        // 替换原来的 p 节点
+        parent.children.splice(index, 1, newNode);
+        return [SKIP, index];
+      }
+    });
+  };
+}
+
+export default rehypeAntArtifact;
diff --git a/src/features/Conversation/components/MarkdownElements/LobeThinking/Render.tsx b/src/features/Conversation/components/MarkdownElements/LobeThinking/Render.tsx
new file mode 100644
index 000000000000..5935b8326a2e
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/LobeThinking/Render.tsx
@@ -0,0 +1,86 @@
+import { Icon } from '@lobehub/ui';
+import { createStyles } from 'antd-style';
+import { BringToFrontIcon, ChevronDown, ChevronRight, Loader2Icon } from 'lucide-react';
+import { memo, useState } from 'react';
+import { useTranslation } from 'react-i18next';
+import { Flexbox } from 'react-layout-kit';
+
+import { ARTIFACT_THINKING_TAG } from '@/const/plugin';
+import { useChatStore } from '@/store/chat';
+import { chatSelectors } from '@/store/chat/selectors';
+import { dotLoading } from '@/styles/loading';
+
+import { MarkdownElementProps } from '../type';
+
+/**
+ * Replace all line breaks in the matched `lobeArtifact` tag with an empty string
+ */
+export const isLobeThinkingClosed = (input: string = '') => {
+  const openTag = `<${ARTIFACT_THINKING_TAG}>`;
+  const closeTag = `</${ARTIFACT_THINKING_TAG}>`;
+
+  return input.includes(openTag) && input.includes(closeTag);
+};
+
+const useStyles = createStyles(({ css, token }) => ({
+  container: css`
+    cursor: pointer;
+
+    padding-block: 8px;
+    padding-inline: 12px;
+    padding-inline-end: 12px;
+
+    color: ${token.colorText};
+
+    background: ${token.colorFillQuaternary};
+    border-radius: 8px;
+  `,
+  title: css`
+    overflow: hidden;
+    display: -webkit-box;
+    -webkit-box-orient: vertical;
+    -webkit-line-clamp: 1;
+
+    font-size: 12px;
+    text-overflow: ellipsis;
+  `,
+}));
+
+const Render = memo<MarkdownElementProps>(({ children, id }) => {
+  const { t } = useTranslation('chat');
+  const { styles, cx } = useStyles();
+
+  const [isGenerating] = useChatStore((s) => {
+    const message = chatSelectors.getMessageById(id)(s);
+    return [!isLobeThinkingClosed(message?.content)];
+  });
+
+  const [showDetail, setShowDetail] = useState(false);
+
+  const expand = showDetail || isGenerating;
+  return (
+    <Flexbox
+      className={styles.container}
+      gap={16}
+      onClick={() => {
+        setShowDetail(!showDetail);
+      }}
+      width={'100%'}
+    >
+      <Flexbox distribution={'space-between'} flex={1} horizontal>
+        <Flexbox gap={8} horizontal>
+          <Icon icon={isGenerating ? Loader2Icon : BringToFrontIcon} spin={isGenerating} />
+          {isGenerating ? (
+            <span className={cx(dotLoading)}>{t('artifact.thinking')}</span>
+          ) : (
+            t('artifact.thought')
+          )}
+        </Flexbox>
+        <Icon icon={expand ? ChevronDown : ChevronRight} />
+      </Flexbox>
+      {expand && children}
+    </Flexbox>
+  );
+});
+
+export default Render;
diff --git a/src/features/Conversation/components/MarkdownElements/LobeThinking/index.ts b/src/features/Conversation/components/MarkdownElements/LobeThinking/index.ts
new file mode 100644
index 000000000000..b56f87a02ea2
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/LobeThinking/index.ts
@@ -0,0 +1,12 @@
+import { ARTIFACT_THINKING_TAG } from '@/const/plugin';
+
+import Component from './Render';
+import rehypePlugin from './rehypePlugin';
+
+const AntThinkingElement = {
+  Component,
+  rehypePlugin,
+  tag: ARTIFACT_THINKING_TAG,
+};
+
+export default AntThinkingElement;
diff --git a/src/features/Conversation/components/MarkdownElements/LobeThinking/rehypePlugin.test.ts b/src/features/Conversation/components/MarkdownElements/LobeThinking/rehypePlugin.test.ts
new file mode 100644
index 000000000000..e1c71f94bdb2
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/LobeThinking/rehypePlugin.test.ts
@@ -0,0 +1,124 @@
+import { describe, expect, it } from 'vitest';
+
+import rehypePlugin from './rehypePlugin';
+
+describe('rehypePlugin', () => {
+  it('should transform <lobeThinking> tags within paragraphs', () => {
+    const tree = {
+      type: 'root',
+      children: [
+        {
+          type: 'element',
+          tagName: 'p',
+          children: [
+            { type: 'text', value: 'Before ' },
+            { type: 'raw', value: '<lobeThinking>' },
+            { type: 'text', value: 'Thinking content' },
+            { type: 'raw', value: '</lobeThinking>' },
+            { type: 'text', value: ' After' },
+          ],
+        },
+      ],
+    };
+
+    const expectedTree = {
+      type: 'root',
+      children: [
+        {
+          type: 'element',
+          tagName: 'lobeThinking',
+          properties: {},
+          children: [{ type: 'text', value: 'Thinking content' }],
+        },
+      ],
+    };
+
+    const plugin = rehypePlugin();
+    plugin(tree);
+
+    expect(tree).toEqual(expectedTree);
+  });
+
+  it('should not transform when only closing tag is present', () => {
+    const tree = {
+      type: 'root',
+      children: [
+        {
+          type: 'element',
+          tagName: 'p',
+          children: [
+            { type: 'text', value: 'Thinking content' },
+            { type: 'raw', value: '</lobeThinking>' },
+            { type: 'text', value: ' After' },
+          ],
+        },
+      ],
+    };
+
+    const originalTree = JSON.parse(JSON.stringify(tree));
+
+    const plugin = rehypePlugin();
+    plugin(tree);
+
+    expect(tree).toEqual(originalTree);
+  });
+
+  it('should handle multiple paragraphs and transformations', () => {
+    const tree = {
+      type: 'root',
+      children: [
+        {
+          type: 'element',
+          tagName: 'p',
+          children: [{ type: 'text', value: 'Normal paragraph' }],
+        },
+        {
+          type: 'element',
+          tagName: 'p',
+          children: [
+            { type: 'raw', value: '<lobeThinking>' },
+            { type: 'text', value: 'First thinking' },
+            { type: 'raw', value: '</lobeThinking>' },
+          ],
+        },
+        {
+          type: 'element',
+          tagName: 'p',
+          children: [
+            { type: 'raw', value: '<lobeThinking>' },
+            { type: 'text', value: 'Second thinking' },
+            { type: 'raw', value: '</lobeThinking>' },
+          ],
+        },
+      ],
+    };
+
+    const expectedTree = {
+      type: 'root',
+      children: [
+        {
+          type: 'element',
+          tagName: 'p',
+          children: [{ type: 'text', value: 'Normal paragraph' }],
+        },
+        {
+          type: 'element',
+          tagName: 'lobeThinking',
+          properties: {},
+          children: [{ type: 'text', value: 'First thinking' }],
+        },
+        {
+          type: 'element',
+          tagName: 'lobeThinking',
+          properties: {},
+          children: [{ type: 'text', value: 'Second thinking' }],
+        },
+      ],
+    };
+
+    const plugin = rehypePlugin();
+    plugin(tree);
+
+    expect(tree).toEqual(expectedTree);
+  });
+});
diff --git a/src/features/Conversation/components/MarkdownElements/LobeThinking/rehypePlugin.ts b/src/features/Conversation/components/MarkdownElements/LobeThinking/rehypePlugin.ts
new file mode 100644
index 000000000000..94c965a5fc89
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/LobeThinking/rehypePlugin.ts
@@ -0,0 +1,51 @@
+import type { Node } from 'unist';
+import { visit } from 'unist-util-visit';
+
+import { ARTIFACT_THINKING_TAG } from '@/const/plugin';
+
+// eslint-disable-next-line unicorn/consistent-function-scoping
+const rehypePlugin = () => (tree: Node) => {
+  visit(tree, 'element', (node: any, index, parent) => {
+    if (node.type === 'element' && node.tagName === 'p') {
+      const children = node.children || [];
+      const openTagIndex = children.findIndex(
+        (child: any) => child.type === 'raw' && child.value === `<${ARTIFACT_THINKING_TAG}>`,
+      );
+      const closeTagIndex = children.findIndex(
+        (child: any) => child.type === 'raw' && child.value === `</${ARTIFACT_THINKING_TAG}>`,
+      );
+
+      if (openTagIndex !== -1) {
+        // 有闭合标签的情况
+        if (closeTagIndex !== -1 && closeTagIndex > openTagIndex) {
+          const content = children.slice(openTagIndex + 1, closeTagIndex);
+          const lobeThinkingNode = {
+            children: content,
+            properties: {},
+            tagName: ARTIFACT_THINKING_TAG,
+            type: 'element',
+          };
+
+          // Replace the entire paragraph with our new lobeThinking node
+          parent.children.splice(index, 1, lobeThinkingNode);
+          return index; // Skip processing the newly inserted node
+        } else {
+          // 无闭合标签的情况
+          const content = children.slice(openTagIndex + 1);
+          const lobeThinkingNode = {
+            children: content,
+            properties: {},
+            tagName: ARTIFACT_THINKING_TAG,
+            type: 'element',
+          };
+
+          // Replace the entire paragraph with our new lobeThinking node
+          parent.children.splice(index, 1, lobeThinkingNode);
+          return index; // Skip processing the newly inserted node
+        }
+      }
+    }
+  });
+};
+
+export default rehypePlugin;
diff --git a/src/features/Conversation/components/MarkdownElements/index.ts b/src/features/Conversation/components/MarkdownElements/index.ts
new file mode 100644
index 000000000000..f573eb85ecc2
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/index.ts
@@ -0,0 +1,4 @@
+import LobeArtifact from './LobeArtifact';
+import LobeThinking from './LobeThinking';
+
+export const markdownElements = [LobeArtifact, LobeThinking];
diff --git a/src/features/Conversation/components/MarkdownElements/type.ts b/src/features/Conversation/components/MarkdownElements/type.ts
new file mode 100644
index 000000000000..13ee3ef0377e
--- /dev/null
+++ b/src/features/Conversation/components/MarkdownElements/type.ts
@@ -0,0 +1,7 @@
+import { ReactNode } from 'react';
+
+export interface MarkdownElementProps {
+  children: ReactNode;
+  id: string;
+  type: string;
+}
diff --git a/src/hooks/useInterceptingRoutes.ts b/src/hooks/useInterceptingRoutes.ts
index 47995f27c81b..8baf426019b6 100644
--- a/src/hooks/useInterceptingRoutes.ts
+++ b/src/hooks/useInterceptingRoutes.ts
@@ -47,20 +47,5 @@ export const useOpenChatSettings = (tab: ChatSettingsTabs = ChatSettingsTabs.Met
 };
 
 export const useInterceptingRoutes = () => {
-  const router = useQueryRoute();
-  const mobile = useIsMobile();
-  const isIntercepted = useContext(InterceptContext);
-  return useMemo(
-    () => ({
-      isIntercepted,
-      push: (url: string, disableIntercepting?: boolean) => {
-        if (disableIntercepting || mobile) {
-          router.push(`/redirect`, { query: { url } });
-          return;
-        }
-        router.push(url);
-      },
-    }),
-    [mobile, router, isIntercepted],
-  );
+  return useContext(InterceptContext);
 };
diff --git a/src/libs/agent-runtime/AgentRuntime.ts b/src/libs/agent-runtime/AgentRuntime.ts
index ebc514bec892..05ce1a7ef525 100644
--- a/src/libs/agent-runtime/AgentRuntime.ts
+++ b/src/libs/agent-runtime/AgentRuntime.ts
@@ -3,6 +3,7 @@ import { ClientOptions } from 'openai';
 import type { TracePayload } from '@/const/trace';
 
 import { LobeRuntimeAI } from './BaseAI';
+import { LobeAi21AI } from './ai21';
 import { LobeAi360AI } from './ai360';
 import { LobeAnthropicAI } from './anthropic';
 import { LobeAzureOpenAI } from './azureOpenai';
@@ -10,6 +11,7 @@ import { LobeBaichuanAI } from './baichuan';
 import { LobeBedrockAI, LobeBedrockAIParams } from './bedrock';
 import { LobeDeepSeekAI } from './deepseek';
 import { LobeFireworksAI } from './fireworksai';
+import { LobeGithubAI } from './github';
 import { LobeGoogleAI } from './google';
 import { LobeGroq } from './groq';
 import { LobeMinimaxAI } from './minimax';
@@ -116,6 +118,7 @@ class AgentRuntime {
   static async initializeWithProviderOptions(
     provider: string,
     params: Partial<{
+      ai21: Partial<ClientOptions>;
       ai360: Partial<ClientOptions>;
       anthropic: Partial<ClientOptions>;
       azure: { apiVersion?: string; apikey?: string; endpoint?: string };
@@ -123,6 +126,7 @@ class AgentRuntime {
       bedrock: Partial<LobeBedrockAIParams>;
       deepseek: Partial<ClientOptions>;
       fireworksai: Partial<ClientOptions>;
+      github: Partial<ClientOptions>;
       google: { apiKey?: string; baseURL?: string };
       groq: Partial<ClientOptions>;
       minimax: Partial<ClientOptions>;
@@ -218,6 +222,11 @@ class AgentRuntime {
         break;
       }
 
+      case ModelProvider.Github: {
+        runtimeModel = new LobeGithubAI(params.github);
+        break;
+      }
+
       case ModelProvider.OpenRouter: {
         runtimeModel = new LobeOpenRouterAI(params.openrouter);
         break;
@@ -282,6 +291,11 @@ class AgentRuntime {
         runtimeModel = new LobeSparkAI(params.spark);
         break;
       }
+
+      case ModelProvider.Ai21: {
+        runtimeModel = new LobeAi21AI(params.ai21);
+        break;
+      }
     }
 
     return new AgentRuntime(runtimeModel);
diff --git a/src/libs/agent-runtime/ai21/index.test.ts b/src/libs/agent-runtime/ai21/index.test.ts
new file mode 100644
index 000000000000..9b229ddd7c97
--- /dev/null
+++ b/src/libs/agent-runtime/ai21/index.test.ts
@@ -0,0 +1,255 @@
+// @vitest-environment node
+import OpenAI from 'openai';
+import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import {
+  ChatStreamCallbacks,
+  LobeOpenAICompatibleRuntime,
+  ModelProvider,
+} from '@/libs/agent-runtime';
+
+import * as debugStreamModule from '../utils/debugStream';
+import { LobeAi21AI } from './index';
+
+const provider = ModelProvider.Ai21;
+const defaultBaseURL = 'https://api.ai21.com/studio/v1';
+
+const bizErrorType = 'ProviderBizError';
+const invalidErrorType = 'InvalidProviderAPIKey';
+
+// Mock the console.error to avoid polluting test output
+vi.spyOn(console, 'error').mockImplementation(() => {});
+
+let instance: LobeOpenAICompatibleRuntime;
+
+beforeEach(() => {
+  instance = new LobeAi21AI({ apiKey: 'test' });
+
+  // 使用 vi.spyOn 来模拟 chat.completions.create 方法
+  vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
+    new ReadableStream() as any,
+  );
+});
+
+afterEach(() => {
+  vi.clearAllMocks();
+});
+
+describe('LobeAi21AI', () => {
+  describe('init', () => {
+    it('should correctly initialize with an API key', async () => {
+      const instance = new LobeAi21AI({ apiKey: 'test_api_key' });
+      expect(instance).toBeInstanceOf(LobeAi21AI);
+      expect(instance.baseURL).toEqual(defaultBaseURL);
+    });
+  });
+
+  describe('chat', () => {
+    describe('Error', () => {
+      it('should return OpenAIBizError with an openai error response when OpenAI.APIError is thrown', async () => {
+        // Arrange
+        const apiError = new OpenAI.APIError(
+          400,
+          {
+            status: 400,
+            error: {
+              message: 'Bad Request',
+            },
+          },
+          'Error message',
+          {},
+        );
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'jamba-1.5-mini',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: {
+              error: { message: 'Bad Request' },
+              status: 400,
+            },
+            errorType: bizErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should throw AgentRuntimeError with NoOpenAIAPIKey if no apiKey is provided', async () => {
+        try {
+          new LobeAi21AI({});
+        } catch (e) {
+          expect(e).toEqual({ errorType: invalidErrorType });
+        }
+      });
+
+      it('should return OpenAIBizError with the cause when OpenAI.APIError is thrown with cause', async () => {
+        // Arrange
+        const errorInfo = {
+          stack: 'abc',
+          cause: {
+            message: 'api is undefined',
+          },
+        };
+        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'jamba-1.5-mini',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: {
+              cause: { message: 'api is undefined' },
+              stack: 'abc',
+            },
+            errorType: bizErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should return OpenAIBizError with an cause response with desensitize Url', async () => {
+        // Arrange
+        const errorInfo = {
+          stack: 'abc',
+          cause: { message: 'api is undefined' },
+        };
+        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
+
+        instance = new LobeAi21AI({
+          apiKey: 'test',
+
+          baseURL: 'https://api.abc.com/v1',
+        });
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'jamba-1.5-mini',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: 'https://api.***.com/v1',
+            error: {
+              cause: { message: 'api is undefined' },
+              stack: 'abc',
+            },
+            errorType: bizErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should throw an InvalidAi21APIKey error type on 401 status code', async () => {
+        // Mock the API call to simulate a 401 error
+        const error = new Error('Unauthorized') as any;
+        error.status = 401;
+        vi.mocked(instance['client'].chat.completions.create).mockRejectedValue(error);
+
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'jamba-1.5-mini',
+            temperature: 0,
+          });
+        } catch (e) {
+          // Expect the chat method to throw an error with InvalidAi21APIKey
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: new Error('Unauthorized'),
+            errorType: invalidErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should return AgentRuntimeError for non-OpenAI errors', async () => {
+        // Arrange
+        const genericError = new Error('Generic Error');
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(genericError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'jamba-1.5-mini',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            errorType: 'AgentRuntimeError',
+            provider,
+            error: {
+              name: genericError.name,
+              cause: genericError.cause,
+              message: genericError.message,
+              stack: genericError.stack,
+            },
+          });
+        }
+      });
+    });
+
+    describe('DEBUG', () => {
+      it('should call debugStream and return StreamingTextResponse when DEBUG_AI21_CHAT_COMPLETION is 1', async () => {
+        // Arrange
+        const mockProdStream = new ReadableStream() as any; // 模拟的 prod 流
+        const mockDebugStream = new ReadableStream({
+          start(controller) {
+            controller.enqueue('Debug stream content');
+            controller.close();
+          },
+        }) as any;
+        mockDebugStream.toReadableStream = () => mockDebugStream; // 添加 toReadableStream 方法
+
+        // 模拟 chat.completions.create 返回值，包括模拟的 tee 方法
+        (instance['client'].chat.completions.create as Mock).mockResolvedValue({
+          tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }],
+        });
+
+        // 保存原始环境变量值
+        const originalDebugValue = process.env.DEBUG_AI21_CHAT_COMPLETION;
+
+        // 模拟环境变量
+        process.env.DEBUG_AI21_CHAT_COMPLETION = '1';
+        vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve());
+
+        // 执行测试
+        // 运行你的测试函数，确保它会在条件满足时调用 debugStream
+        // 假设的测试函数调用，你可能需要根据实际情况调整
+        await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'jamba-1.5-mini',
+          stream: true,
+          temperature: 0,
+        });
+
+        // 验证 debugStream 被调用
+        expect(debugStreamModule.debugStream).toHaveBeenCalled();
+
+        // 恢复原始环境变量值
+        process.env.DEBUG_AI21_CHAT_COMPLETION = originalDebugValue;
+      });
+    });
+  });
+});
diff --git a/src/libs/agent-runtime/ai21/index.ts b/src/libs/agent-runtime/ai21/index.ts
new file mode 100644
index 000000000000..67089341b7c0
--- /dev/null
+++ b/src/libs/agent-runtime/ai21/index.ts
@@ -0,0 +1,18 @@
+import { ModelProvider } from '../types';
+import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
+
+export const LobeAi21AI = LobeOpenAICompatibleFactory({
+  baseURL: 'https://api.ai21.com/studio/v1',
+  chatCompletion: {
+    handlePayload: (payload) => {
+      return {
+        ...payload,
+        stream: !payload.tools,
+      } as any;
+    },
+  },
+  debug: {
+    chatCompletion: () => process.env.DEBUG_AI21_CHAT_COMPLETION === '1',
+  },
+  provider: ModelProvider.Ai21,
+});
diff --git a/src/libs/agent-runtime/error.ts b/src/libs/agent-runtime/error.ts
index e364bba1cba5..11e94efd9648 100644
--- a/src/libs/agent-runtime/error.ts
+++ b/src/libs/agent-runtime/error.ts
@@ -13,6 +13,8 @@ export const AgentRuntimeErrorType = {
   InvalidBedrockCredentials: 'InvalidBedrockCredentials',
   StreamChunkError: 'StreamChunkError',
 
+  InvalidGithubToken: 'InvalidGithubToken',
+
   /**
    * @deprecated
    */
diff --git a/src/libs/agent-runtime/github/index.test.ts b/src/libs/agent-runtime/github/index.test.ts
new file mode 100644
index 000000000000..e466ac155389
--- /dev/null
+++ b/src/libs/agent-runtime/github/index.test.ts
@@ -0,0 +1,246 @@
+// @vitest-environment node
+import OpenAI from 'openai';
+import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
+import { ModelProvider } from '@/libs/agent-runtime';
+import { AgentRuntimeErrorType } from '@/libs/agent-runtime';
+
+import * as debugStreamModule from '../utils/debugStream';
+import { LobeGithubAI } from './index';
+
+const provider = ModelProvider.Github;
+const defaultBaseURL = 'https://models.inference.ai.azure.com';
+const bizErrorType = AgentRuntimeErrorType.ProviderBizError;
+const invalidErrorType = AgentRuntimeErrorType.InvalidGithubToken;
+
+// Mock the console.error to avoid polluting test output
+vi.spyOn(console, 'error').mockImplementation(() => {});
+
+let instance: LobeOpenAICompatibleRuntime;
+
+beforeEach(() => {
+  instance = new LobeGithubAI({ apiKey: 'test' });
+
+  // Use vi.spyOn to mock the chat.completions.create method
+  vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
+    new ReadableStream() as any,
+  );
+});
+
+afterEach(() => {
+  vi.clearAllMocks();
+});
+
+describe('LobeGithubAI', () => {
+  describe('init', () => {
+    it('should correctly initialize with an API key', async () => {
+      const instance = new LobeGithubAI({ apiKey: 'test_api_key' });
+      expect(instance).toBeInstanceOf(LobeGithubAI);
+      expect(instance.baseURL).toEqual(defaultBaseURL);
+    });
+  });
+
+  describe('chat', () => {
+    describe('Error', () => {
+      it('should return GithubBizError with an openai error response when OpenAI.APIError is thrown', async () => {
+        // Arrange
+        const apiError = new OpenAI.APIError(
+          400,
+          {
+            status: 400,
+            error: {
+              message: 'Bad Request',
+            },
+          },
+          'Error message',
+          {},
+        );
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'meta-llama-3-70b-instruct',
+            temperature: 0.7,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: {
+              error: { message: 'Bad Request' },
+              status: 400,
+            },
+            errorType: bizErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should throw AgentRuntimeError with InvalidGithubToken if no apiKey is provided', async () => {
+        try {
+          new LobeGithubAI({});
+        } catch (e) {
+          expect(e).toEqual({ errorType: invalidErrorType });
+        }
+      });
+
+      it('should return GithubBizError with the cause when OpenAI.APIError is thrown with cause', async () => {
+        // Arrange
+        const errorInfo = {
+          stack: 'abc',
+          cause: {
+            message: 'api is undefined',
+          },
+        };
+        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'meta-llama-3-70b-instruct',
+            temperature: 0.7,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: {
+              cause: { message: 'api is undefined' },
+              stack: 'abc',
+            },
+            errorType: bizErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should return GithubBizError with an cause response with desensitize Url', async () => {
+        // Arrange
+        const errorInfo = {
+          stack: 'abc',
+          cause: { message: 'api is undefined' },
+        };
+        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
+
+        instance = new LobeGithubAI({
+          apiKey: 'test',
+          baseURL: 'https://api.abc.com/v1',
+        });
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'meta-llama-3-70b-instruct',
+            temperature: 0.7,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: 'https://api.***.com/v1',
+            error: {
+              cause: { message: 'api is undefined' },
+              stack: 'abc',
+            },
+            errorType: bizErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should throw an InvalidGithubToken error type on 401 status code', async () => {
+        // Mock the API call to simulate a 401 error
+        const error = new Error('InvalidApiKey') as any;
+        error.status = 401;
+        vi.mocked(instance['client'].chat.completions.create).mockRejectedValue(error);
+
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'meta-llama-3-70b-instruct',
+            temperature: 0.7,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: new Error('InvalidApiKey'),
+            errorType: invalidErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should return AgentRuntimeError for non-OpenAI errors', async () => {
+        // Arrange
+        const genericError = new Error('Generic Error');
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(genericError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'meta-llama-3-70b-instruct',
+            temperature: 0.7,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            errorType: 'AgentRuntimeError',
+            provider,
+            error: {
+              name: genericError.name,
+              cause: genericError.cause,
+              message: genericError.message,
+              stack: genericError.stack,
+            },
+          });
+        }
+      });
+    });
+
+    describe('DEBUG', () => {
+      it('should call debugStream and return StreamingTextResponse when DEBUG_GITHUB_CHAT_COMPLETION is 1', async () => {
+        // Arrange
+        const mockProdStream = new ReadableStream() as any;
+        const mockDebugStream = new ReadableStream({
+          start(controller) {
+            controller.enqueue('Debug stream content');
+            controller.close();
+          },
+        }) as any;
+        mockDebugStream.toReadableStream = () => mockDebugStream;
+
+        // mock
+        (instance['client'].chat.completions.create as Mock).mockResolvedValue({
+          tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }],
+        });
+
+        const originalDebugValue = process.env.DEBUG_GITHUB_CHAT_COMPLETION;
+
+        process.env.DEBUG_GITHUB_CHAT_COMPLETION = '1';
+        vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve());
+
+        // 执行测试
+        await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'meta-llama-3-70b-instruct',
+          stream: true,
+          temperature: 0.7,
+        });
+
+        // 验证 debugStream 被调用
+        expect(debugStreamModule.debugStream).toHaveBeenCalled();
+
+        // 恢复原始环境变量值
+        process.env.DEBUG_GITHUB_CHAT_COMPLETION = originalDebugValue;
+      });
+    });
+  });
+});
diff --git a/src/libs/agent-runtime/github/index.ts b/src/libs/agent-runtime/github/index.ts
new file mode 100644
index 000000000000..63e326fd4f1c
--- /dev/null
+++ b/src/libs/agent-runtime/github/index.ts
@@ -0,0 +1,15 @@
+import { AgentRuntimeErrorType } from '../error';
+import { ModelProvider } from '../types';
+import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
+
+export const LobeGithubAI = LobeOpenAICompatibleFactory({
+  baseURL: 'https://models.inference.ai.azure.com',
+  debug: {
+    chatCompletion: () => process.env.DEBUG_GITHUB_CHAT_COMPLETION === '1',
+  },
+  errorType: {
+    bizError: AgentRuntimeErrorType.ProviderBizError,
+    invalidAPIKey: AgentRuntimeErrorType.InvalidGithubToken,
+  },
+  provider: ModelProvider.Github,
+});
diff --git a/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap b/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap
index c9727a8360f4..674b4cc609fa 100644
--- a/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap
+++ b/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap
@@ -3,73 +3,2182 @@
 exports[`LobeOpenRouterAI > models > should get models 1`] = `
 [
   {
-    "description": "LLaVA is a large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking [GPT-4](/models/openai/gpt-4-vision-preview) and setting a new state-of-the-art accuracy on Science QA
+    "description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.
+
+Read the launch post [here](https://txt.cohere.com/command-r/).
+
+Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "displayName": "Cohere: Command R (03-2024)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "cohere/command-r-03-2024",
+    "maxTokens": 4000,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).
+
+It offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).
+
+Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "displayName": "Cohere: Command R+ (04-2024)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "cohere/command-r-plus-04-2024",
+    "maxTokens": 4000,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.
+
+Read the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).
+
+Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "displayName": "Cohere: Command R+ (08-2024)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "cohere/command-r-plus-08-2024",
+    "maxTokens": 4000,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.
+
+Read the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).
+
+Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "displayName": "Cohere: Command R (08-2024)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "cohere/command-r-08-2024",
+    "maxTokens": 4000,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Gemini 1.5 Flash 8B Experimental is an experimental, 8B parameter version of the [Gemini 1.5 Flash](/models/google/gemini-flash-1.5) model.
+
+Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).
+
+#multimodal
+
+Note: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.",
+    "displayName": "Google: Gemini Flash 8B 1.5 Experimental",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/gemini-flash-8b-1.5-exp",
+    "maxTokens": 32768,
+    "tokens": 4000000,
+    "vision": true,
+  },
+  {
+    "description": "Gemini 1.5 Flash Experimental is an experimental version of the [Gemini 1.5 Flash](/models/google/gemini-flash-1.5) model.
+
+Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).
+
+#multimodal
+
+Note: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.",
+    "displayName": "Google: Gemini Flash 1.5 Experimental",
+    "enabled": true,
+    "functionCall": false,
+    "id": "google/gemini-flash-1.5-exp",
+    "maxTokens": 32768,
+    "tokens": 4000000,
+    "vision": true,
+  },
+  {
+    "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).",
+    "displayName": "Llama 3.1 Euryale 70B v2.2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "sao10k/l3.1-euryale-70b",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Jamba 1.5 Large is part of AI21's new family of open models, offering superior speed, efficiency, and quality.
+
+It features a 256K effective context window, the longest among open models, enabling improved performance on tasks like document summarization and analysis.
+
+Built on a novel SSM-Transformer architecture, it outperforms larger models like Llama 3.1 70B on benchmarks while maintaining resource efficiency.
+
+Read their [announcement](https://www.ai21.com/blog/announcing-jamba-model-family) to learn more.",
+    "displayName": "AI21: Jamba 1.5 Large",
+    "enabled": false,
+    "functionCall": false,
+    "id": "ai21/jamba-1-5-large",
+    "maxTokens": 4096,
+    "tokens": 256000,
+    "vision": false,
+  },
+  {
+    "description": "Jamba 1.5 Mini is the world's first production-grade Mamba-based model, combining SSM and Transformer architectures for a 256K context window and high efficiency.
+
+It works with 9 languages and can handle various writing and analysis tasks as well as or better than similar small models.
+
+This model uses less computer memory and works faster with longer texts than previous designs.
+
+Read their [announcement](https://www.ai21.com/blog/announcing-jamba-model-family) to learn more.",
+    "displayName": "AI21: Jamba 1.5 Mini",
+    "enabled": false,
+    "functionCall": false,
+    "id": "ai21/jamba-1-5-mini",
+    "maxTokens": 4096,
+    "tokens": 256000,
+    "vision": false,
+  },
+  {
+    "description": "Phi-3.5 models are lightweight, state-of-the-art open models. These models were trained with Phi-3 datasets that include both synthetic data and the filtered, publicly available websites data, with a focus on high quality and reasoning-dense properties. Phi-3.5 Mini uses 3.8B parameters, and is a dense decoder-only transformer model using the same tokenizer as [Phi-3 Mini](/models/microsoft/phi-3-mini-128k-instruct).
+
+The models underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks that test common sense, language understanding, math, code, long context and logical reasoning, Phi-3.5 models showcased robust and state-of-the-art performance among models with less than 13 billion parameters.",
+    "displayName": "Phi-3.5 Mini 128K Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/phi-3.5-mini-128k-instruct",
+    "maxTokens": undefined,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.
+
+Hermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.
+
+The Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.",
+    "displayName": "Nous: Hermes 3 70B Instruct",
+    "enabled": false,
+    "functionCall": true,
+    "id": "nousresearch/hermes-3-llama-3.1-70b",
+    "maxTokens": undefined,
+    "tokens": 131072,
+    "vision": false,
+  },
+  {
+    "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.
+
+Hermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.
+
+The Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.
+
+Hermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.",
+    "displayName": "Nous: Hermes 3 405B Instruct",
+    "enabled": false,
+    "functionCall": true,
+    "id": "nousresearch/hermes-3-llama-3.1-405b",
+    "maxTokens": undefined,
+    "tokens": 131072,
+    "vision": false,
+  },
+  {
+    "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.
+
+Hermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.
+
+The Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.
+
+Hermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.
+
+_These are extended-context endpoints for [Hermes 3 405B Instruct](/models/nousresearch/hermes-3-llama-3.1-405b). They may have higher prices._",
+    "displayName": "Nous: Hermes 3 405B Instruct (extended)",
+    "enabled": false,
+    "functionCall": true,
+    "id": "nousresearch/hermes-3-llama-3.1-405b:extended",
+    "maxTokens": undefined,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance. The model is built upon the Llama 3.1 405B and has internet access.",
+    "displayName": "Perplexity: Llama 3.1 Sonar 405B Online",
+    "enabled": true,
+    "functionCall": false,
+    "id": "perplexity/llama-3.1-sonar-huge-128k-online",
+    "maxTokens": undefined,
+    "tokens": 127072,
+    "vision": false,
+  },
+  {
+    "description": "Dynamic model continuously updated to the current version of [GPT-4o](/models/openai/gpt-4o) in ChatGPT. Intended for research and evaluation.
+
+Note: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.",
+    "displayName": "OpenAI: ChatGPT-4o",
+    "enabled": true,
+    "functionCall": false,
+    "id": "openai/chatgpt-4o-latest",
+    "maxTokens": 16384,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.
+
+Created by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.
+
+For best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.",
+    "displayName": "Llama 3 8B Lunaris",
+    "enabled": false,
+    "functionCall": false,
+    "id": "sao10k/l3-lunaris-8b",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Starcannon 12B is a creative roleplay and story writing model, using [nothingiisreal/mn-celeste-12b](https://openrouter.ai/models/nothingiisreal/mn-celeste-12b) as a base and [intervitens/mini-magnum-12b-v1.1](https://huggingface.co/intervitens/mini-magnum-12b-v1.1) merged in using the [TIES](https://arxiv.org/abs/2306.01708) method.
+
+Although more similar to Magnum overall, the model remains very creative, with a pleasant writing style. It is recommended for people wanting more variety than Magnum, and yet more verbose prose than Celeste.",
+    "displayName": "Mistral Nemo 12B Starcannon",
+    "enabled": false,
+    "functionCall": false,
+    "id": "aetherwiing/mn-starcannon-12b",
+    "maxTokens": undefined,
+    "tokens": 12000,
+    "vision": false,
+  },
+  {
+    "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).
+
+GPT-4o ("o" for "omni") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.
+
+For benchmarking against other models, it was briefly called ["im-also-a-good-gpt2-chatbot"](https://twitter.com/LiamFedus/status/1790064963966370209)",
+    "displayName": "OpenAI: GPT-4o (2024-08-06)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "openai/gpt-4o-2024-08-06",
+    "maxTokens": 16384,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Meta: Llama 3.1 405B (base)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3.1-405b",
+    "maxTokens": undefined,
+    "tokens": 131072,
+    "vision": false,
+  },
+  {
+    "description": "A specialized story writing and roleplaying model based on Mistral's NeMo 12B Instruct. Fine-tuned on curated datasets including Reddit Writing Prompts and Opus Instruct 25K.
+
+This model excels at creative writing, offering improved NSFW capabilities, with smarter and more active narration. It demonstrates remarkable versatility in both SFW and NSFW scenarios, with strong Out of Character (OOC) steering capabilities, allowing fine-tuned control over narrative direction and character behavior.
+
+Check out the model's [HuggingFace page](https://huggingface.co/nothingiisreal/MN-12B-Celeste-V1.9) for details on what parameters and prompts work best!",
+    "displayName": "Mistral Nemo 12B Celeste",
+    "enabled": false,
+    "functionCall": false,
+    "id": "nothingiisreal/mn-celeste-12b",
+    "maxTokens": undefined,
+    "tokens": 32000,
+    "vision": false,
+  },
+  {
+    "description": "Gemini 1.5 Pro (0827) is an experimental version of the [Gemini 1.5 Pro](/models/google/gemini-pro-1.5) model.
+
+Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).
+
+#multimodal
+
+Note: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.",
+    "displayName": "Google: Gemini Pro 1.5 Experimental",
+    "enabled": true,
+    "functionCall": false,
+    "id": "google/gemini-pro-1.5-exp",
+    "maxTokens": 32768,
+    "tokens": 4000000,
+    "vision": true,
+  },
+  {
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
+
+This is the online version of the [offline chat model](/models/perplexity/llama-3.1-sonar-large-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
+    "displayName": "Perplexity: Llama 3.1 Sonar 70B Online",
+    "enabled": true,
+    "functionCall": false,
+    "id": "perplexity/llama-3.1-sonar-large-128k-online",
+    "maxTokens": undefined,
+    "tokens": 127072,
+    "vision": false,
+  },
+  {
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
+
+This is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-large-128k-online) of this model has Internet access.",
+    "displayName": "Perplexity: Llama 3.1 Sonar 70B",
+    "enabled": true,
+    "functionCall": false,
+    "id": "perplexity/llama-3.1-sonar-large-128k-chat",
+    "maxTokens": undefined,
+    "tokens": 131072,
+    "vision": false,
+  },
+  {
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
+
+This is the online version of the [offline chat model](/models/perplexity/llama-3.1-sonar-small-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
+    "displayName": "Perplexity: Llama 3.1 Sonar 8B Online",
+    "enabled": true,
+    "functionCall": false,
+    "id": "perplexity/llama-3.1-sonar-small-128k-online",
+    "maxTokens": undefined,
+    "tokens": 127072,
+    "vision": false,
+  },
+  {
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
+
+This is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-small-128k-online) of this model has Internet access.",
+    "displayName": "Perplexity: Llama 3.1 Sonar 8B",
+    "enabled": true,
+    "functionCall": false,
+    "id": "perplexity/llama-3.1-sonar-small-128k-chat",
+    "maxTokens": undefined,
+    "tokens": 131072,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Meta: Llama 3.1 70B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3.1-70b-instruct",
+    "maxTokens": undefined,
+    "tokens": 131072,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).
+
+_These are free, rate-limited endpoints for [Llama 3.1 8B Instruct](/models/meta-llama/llama-3.1-8b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "Meta: Llama 3.1 8B Instruct (free)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "meta-llama/llama-3.1-8b-instruct:free",
+    "maxTokens": 4096,
+    "tokens": 131072,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Meta: Llama 3.1 8B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3.1-8b-instruct",
+    "maxTokens": undefined,
+    "tokens": 131072,
+    "vision": false,
+  },
+  {
+    "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.
+
+Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Meta: Llama 3.1 405B Instruct",
+    "enabled": true,
+    "functionCall": false,
+    "id": "meta-llama/llama-3.1-405b-instruct",
+    "maxTokens": undefined,
+    "tokens": 131072,
+    "vision": false,
+  },
+  {
+    "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a fine-tune of [Llama 3 70B](/models/meta-llama/llama-3-70b-instruct). It demonstrates improvements in instruction, conversation, coding, and function calling abilities, when compared to the original.
+
+Uncensored and is stripped of alignment and bias, it requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).
+
+Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Dolphin Llama 3 70B 🐬",
+    "enabled": false,
+    "functionCall": true,
+    "id": "cognitivecomputations/dolphin-llama-3-70b",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "A 7.3B parameter Mamba-based model designed for code and reasoning tasks.
+
+- Linear time inference, allowing for theoretically infinite sequence lengths
+- 256k token context window
+- Optimized for quick responses, especially beneficial for code productivity
+- Performs comparably to state-of-the-art transformer models in code and reasoning tasks
+- Available under the Apache 2.0 license for free use, modification, and distribution",
+    "displayName": "Mistral: Codestral Mamba",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/codestral-mamba",
+    "maxTokens": undefined,
+    "tokens": 256000,
+    "vision": false,
+  },
+  {
+    "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.
+
+The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.
+
+It supports function calling and is released under the Apache 2.0 license.",
+    "displayName": "Mistral: Mistral Nemo",
+    "enabled": false,
+    "functionCall": true,
+    "id": "mistralai/mistral-nemo",
+    "maxTokens": undefined,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.
+
+As their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.
+
+GPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).
+
+Check out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.",
+    "displayName": "OpenAI: GPT-4o-mini (2024-07-18)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-4o-mini-2024-07-18",
+    "maxTokens": 16384,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.
+
+As their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.
+
+GPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).
+
+Check out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.",
+    "displayName": "OpenAI: GPT-4o-mini",
+    "enabled": true,
+    "functionCall": false,
+    "id": "openai/gpt-4o-mini",
+    "maxTokens": 16384,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.
+
+It features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.
+
+For more details, see this [blog post](https://qwenlm.github.io/blog/qwen2/) and [GitHub repo](https://github.com/QwenLM/Qwen2).
+
+Usage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).
+
+_These are free, rate-limited endpoints for [Qwen 2 7B Instruct](/models/qwen/qwen-2-7b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "Qwen 2 7B Instruct (free)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "qwen/qwen-2-7b-instruct:free",
+    "maxTokens": 4096,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.
+
+It features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.
+
+For more details, see this [blog post](https://qwenlm.github.io/blog/qwen2/) and [GitHub repo](https://github.com/QwenLM/Qwen2).
+
+Usage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
+    "displayName": "Qwen 2 7B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "qwen/qwen-2-7b-instruct",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).
+
+Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.
+
+See the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
+    "displayName": "Google: Gemma 2 27B",
+    "enabled": true,
+    "functionCall": false,
+    "id": "google/gemma-2-27b-it",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "From the maker of [Goliath](https://openrouter.ai/models/alpindale/goliath-120b), Magnum 72B is the first in a new family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.
+
+The model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.",
+    "displayName": "Magnum 72B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "alpindale/magnum-72b",
+    "maxTokens": 1024,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "An experimental merge model based on Llama 3, exhibiting a very distinctive style of writing. It combines the the best of [Meta's Llama 3 8B](https://openrouter.ai/models/meta-llama/llama-3-8b-instruct) and Nous Research's [Hermes 2 Pro](https://openrouter.ai/models/nousresearch/hermes-2-pro-llama-3-8b).
+
+Hermes-2 Θ (theta) was specifically designed with a few capabilities in mind: executing function calls, generating JSON output, and most remarkably, demonstrating metacognitive abilities (contemplating the nature of thought and recognizing the diversity of cognitive processes among individuals).",
+    "displayName": "Nous: Hermes 2 Theta 8B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "nousresearch/hermes-2-theta-llama-3-8b",
+    "maxTokens": 2048,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.
+
+Designed for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.
+
+See the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).
+
+_These are free, rate-limited endpoints for [Gemma 2 9B](/models/google/gemma-2-9b-it). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "Google: Gemma 2 9B (free)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "google/gemma-2-9b-it:free",
+    "maxTokens": 2048,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.
+
+Designed for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.
+
+See the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
+    "displayName": "Google: Gemma 2 9B",
+    "enabled": true,
+    "functionCall": false,
+    "id": "google/gemma-2-9b-it",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Stheno 8B 32K is a creative writing/roleplay model from [Sao10k](https://ko-fi.com/sao10k). It was trained at 8K context, then expanded to 32K context.
+
+Compared to older Stheno version, this model is trained on:
+- 2x the amount of creative writing samples
+- Cleaned up roleplaying samples
+- Fewer low quality samples",
+    "displayName": "Llama 3 Stheno 8B v3.3 32K",
+    "enabled": false,
+    "functionCall": false,
+    "id": "sao10k/l3-stheno-8b",
+    "maxTokens": undefined,
+    "tokens": 32000,
+    "vision": false,
+  },
+  {
+    "description": "The Jamba-Instruct model, introduced by AI21 Labs, is an instruction-tuned variant of their hybrid SSM-Transformer Jamba model, specifically optimized for enterprise applications.
+
+- 256K Context Window: It can process extensive information, equivalent to a 400-page novel, which is beneficial for tasks involving large documents such as financial reports or legal documents
+- Safety and Accuracy: Jamba-Instruct is designed with enhanced safety features to ensure secure deployment in enterprise environments, reducing the risk and cost of implementation
+
+Read their [announcement](https://www.ai21.com/blog/announcing-jamba) to learn more.
+
+Jamba has a knowledge cutoff of February 2024.",
+    "displayName": "AI21: Jamba Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "ai21/jamba-instruct",
+    "maxTokens": 4096,
+    "tokens": 256000,
+    "vision": false,
+  },
+  {
+    "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:
+
+- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting
+- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights
+- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone
+- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)
+
+#multimodal",
+    "displayName": "Anthropic: Claude 3.5 Sonnet",
+    "enabled": true,
+    "functionCall": true,
+    "id": "anthropic/claude-3.5-sonnet",
+    "maxTokens": 8192,
+    "tokens": 200000,
+    "vision": true,
+  },
+  {
+    "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:
+
+- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting
+- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights
+- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone
+- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)
+
+#multimodal
+
+_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3.5-sonnet) variant._",
+    "displayName": "Anthropic: Claude 3.5 Sonnet (self-moderated)",
+    "enabled": false,
+    "functionCall": true,
+    "id": "anthropic/claude-3.5-sonnet:beta",
+    "maxTokens": 8192,
+    "tokens": 200000,
+    "vision": true,
+  },
+  {
+    "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).
+
+- Better prompt adherence.
+- Better anatomy / spatial awareness.
+- Adapts much better to unique and custom formatting / reply formats.
+- Very creative, lots of unique swipes.
+- Is not restrictive during roleplays.",
+    "displayName": "Llama 3 Euryale 70B v2.1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "sao10k/l3-euryale-70b",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Phi-3 4K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
+
+At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.
+
+For 128k context length, try [Phi-3 Medium 128K](/models/microsoft/phi-3-medium-128k-instruct).",
+    "displayName": "Phi-3 Medium 4K Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/phi-3-medium-4k-instruct",
+    "maxTokens": undefined,
+    "tokens": 4000,
+    "vision": false,
+  },
+  {
+    "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/models/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.
+
+This model is a successor to [Dolphin Mixtral 8x7B](/models/cognitivecomputations/dolphin-mixtral-8x7b).
+
+The model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).
+
+#moe #uncensored",
+    "displayName": "Dolphin 2.9.2 Mixtral 8x22B 🐬",
+    "enabled": false,
+    "functionCall": false,
+    "id": "cognitivecomputations/dolphin-mixtral-8x22b",
+    "maxTokens": undefined,
+    "tokens": 65536,
+    "vision": false,
+  },
+  {
+    "description": "Qwen2 72B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.
+
+It features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.
+
+For more details, see this [blog post](https://qwenlm.github.io/blog/qwen2/) and [GitHub repo](https://github.com/QwenLM/Qwen2).
+
+Usage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
+    "displayName": "Qwen 2 72B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "qwen/qwen-2-72b-instruct",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "OpenChat 8B is a library of open-source language models, fine-tuned with "C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.
+
+It outperforms many similarly sized models including [Llama 3 8B Instruct](/models/meta-llama/llama-3-8b-instruct) and various fine-tuned models. It excels in general conversation, coding assistance, and mathematical reasoning.
+
+- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).
+- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/models/openchat/openchat-8b).
+
+#open-source",
+    "displayName": "OpenChat 3.6 8B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openchat/openchat-8b",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.",
+    "displayName": "NousResearch: Hermes 2 Pro - Llama-3 8B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "nousresearch/hermes-2-pro-llama-3-8b",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
+
+An improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:
+
+- Extended vocabulary to 32768
+- Supports v3 Tokenizer
+- Supports function calling
+
+NOTE: Support for function calling depends on the provider.",
+    "displayName": "Mistral: Mistral 7B Instruct v0.3",
+    "enabled": false,
+    "functionCall": true,
+    "id": "mistralai/mistral-7b-instruct-v0.3",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
+
+*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*
+
+_These are free, rate-limited endpoints for [Mistral 7B Instruct](/models/mistralai/mistral-7b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "Mistral: Mistral 7B Instruct (free)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mistral-7b-instruct:free",
+    "maxTokens": 4096,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
+
+*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*",
+    "displayName": "Mistral: Mistral 7B Instruct",
+    "enabled": true,
+    "functionCall": false,
+    "id": "mistralai/mistral-7b-instruct",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
+
+*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*
+
+_These are higher-throughput endpoints for [Mistral 7B Instruct](/models/mistralai/mistral-7b-instruct). They may have higher prices._",
+    "displayName": "Mistral: Mistral 7B Instruct (nitro)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mistral-7b-instruct:nitro",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
+
+At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.
+
+_These are free, rate-limited endpoints for [Phi-3 Mini 128K Instruct](/models/microsoft/phi-3-mini-128k-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "Phi-3 Mini 128K Instruct (free)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/phi-3-mini-128k-instruct:free",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
+
+At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.",
+    "displayName": "Phi-3 Mini 128K Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/phi-3-mini-128k-instruct",
+    "maxTokens": undefined,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
+
+At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.
+
+For 4k context length, try [Phi-3 Medium 4K](/models/microsoft/phi-3-medium-4k-instruct).
+
+_These are free, rate-limited endpoints for [Phi-3 Medium 128K Instruct](/models/microsoft/phi-3-medium-128k-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "Phi-3 Medium 128K Instruct (free)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/phi-3-medium-128k-instruct:free",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.
+
+At time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.
+
+For 4k context length, try [Phi-3 Medium 4K](/models/microsoft/phi-3-medium-4k-instruct).",
+    "displayName": "Phi-3 Medium 128K Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/phi-3-medium-128k-instruct",
+    "maxTokens": undefined,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "The NeverSleep team is back, with a Llama 3 70B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.
+
+To enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.
+
+Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Llama 3 Lumimaid 70B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "neversleep/llama-3-lumimaid-70b",
+    "maxTokens": 2048,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Gemini 1.5 Flash is a foundation model that performs well at a variety of multimodal tasks such as visual understanding, classification, summarization, and creating content from image, audio and video. It's adept at processing visual and text inputs such as photographs, documents, infographics, and screenshots.
+
+Gemini 1.5 Flash is designed for high-volume, high-frequency tasks where cost and latency matter. On most common tasks, Flash achieves comparable quality to other Gemini Pro models at a significantly reduced cost. Flash is well-suited for applications like chat assistants and on-demand content generation where speed and scale matter.
+
+Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).
+
+#multimodal",
+    "displayName": "Google: Gemini Flash 1.5",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/gemini-flash-1.5",
+    "maxTokens": 32768,
+    "tokens": 4000000,
+    "vision": true,
+  },
+  {
+    "description": "DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model. It is further pre-trained from an intermediate checkpoint of DeepSeek-V2 with additional 6 trillion tokens.
+
+The original V1 model was trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese. It was pre-trained on project-level code corpus by employing a extra fill-in-the-blank task.",
+    "displayName": "DeepSeek-Coder-V2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "deepseek/deepseek-coder",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "DeepSeek-V2 Chat is a conversational finetune of DeepSeek-V2, a Mixture-of-Experts (MoE) language model. It comprises 236B total parameters, of which 21B are activated for each token.
+
+Compared with DeepSeek 67B, DeepSeek-V2 achieves stronger performance, and meanwhile saves 42.5% of training costs, reduces the KV cache by 93.3%, and boosts the maximum generation throughput to 5.76 times.
+
+DeepSeek-V2 achieves remarkable performance on both standard benchmarks and open-ended generation evaluations.",
+    "displayName": "DeepSeek-V2 Chat",
+    "enabled": true,
+    "functionCall": false,
+    "id": "deepseek/deepseek-chat",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
+
+This is the online version of the [offline chat model](/models/perplexity/llama-3-sonar-large-32k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
+    "displayName": "Perplexity: Llama3 Sonar 70B Online",
+    "enabled": false,
+    "functionCall": false,
+    "id": "perplexity/llama-3-sonar-large-32k-online",
+    "maxTokens": undefined,
+    "tokens": 28000,
+    "vision": false,
+  },
+  {
+    "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
+
+This is a normal offline LLM, but the [online version](/models/perplexity/llama-3-sonar-large-32k-online) of this model has Internet access.",
+    "displayName": "Perplexity: Llama3 Sonar 70B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "perplexity/llama-3-sonar-large-32k-chat",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
+
+This is the online version of the [offline chat model](/models/perplexity/llama-3-sonar-small-32k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
+    "displayName": "Perplexity: Llama3 Sonar 8B Online",
+    "enabled": false,
+    "functionCall": false,
+    "id": "perplexity/llama-3-sonar-small-32k-online",
+    "maxTokens": undefined,
+    "tokens": 28000,
+    "vision": false,
+  },
+  {
+    "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.
+
+This is a normal offline LLM, but the [online version](/models/perplexity/llama-3-sonar-small-32k-online) of this model has Internet access.",
+    "displayName": "Perplexity: Llama3 Sonar 8B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "perplexity/llama-3-sonar-small-32k-chat",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.
+
+LlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.
+
+For best results, please use raw prompt input or the \`/completions\` endpoint, instead of the chat API.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Meta: LlamaGuard 2 8B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-guard-2-8b",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "GPT-4o ("o" for "omni") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.
+
+For benchmarking against other models, it was briefly called ["im-also-a-good-gpt2-chatbot"](https://twitter.com/LiamFedus/status/1790064963966370209)",
+    "displayName": "OpenAI: GPT-4o (2024-05-13)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-4o-2024-05-13",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "GPT-4o ("o" for "omni") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.
+
+For benchmarking against other models, it was briefly called ["im-also-a-good-gpt2-chatbot"](https://twitter.com/LiamFedus/status/1790064963966370209)",
+    "displayName": "OpenAI: GPT-4o",
+    "enabled": true,
+    "functionCall": false,
+    "id": "openai/gpt-4o",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "GPT-4o Extended is an experimental variant of GPT-4o with an extended max output tokens. This model supports only text input to text output.
+
+_These are extended-context endpoints for [GPT-4o](/models/openai/gpt-4o). They may have higher prices._",
+    "displayName": "OpenAI: GPT-4o (extended)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-4o:extended",
+    "maxTokens": 64000,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 72B is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:
+
+- Significant performance improvement in human preference for chat models
+- Multilingual support of both base and chat models
+- Stable support of 32K context length for models of all sizes
+
+For more details, see this [blog post](https://qwenlm.github.io/blog/qwen1.5/) and [GitHub repo](https://github.com/QwenLM/Qwen1.5).
+
+Usage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
+    "displayName": "Qwen 1.5 72B Chat",
+    "enabled": false,
+    "functionCall": false,
+    "id": "qwen/qwen-72b-chat",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 110B is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:
+
+- Significant performance improvement in human preference for chat models
+- Multilingual support of both base and chat models
+- Stable support of 32K context length for models of all sizes
+
+For more details, see this [blog post](https://qwenlm.github.io/blog/qwen1.5/) and [GitHub repo](https://github.com/QwenLM/Qwen1.5).
+
+Usage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
+    "displayName": "Qwen 1.5 110B Chat",
+    "enabled": false,
+    "functionCall": false,
+    "id": "qwen/qwen-110b-chat",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "The NeverSleep team is back, with a Llama 3 8B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.
+
+To enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.
+
+Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Llama 3 Lumimaid 8B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "neversleep/llama-3-lumimaid-8b",
+    "maxTokens": undefined,
+    "tokens": 24576,
+    "vision": false,
+  },
+  {
+    "description": "The NeverSleep team is back, with a Llama 3 8B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.
+
+To enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.
+
+Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).
+
+_These are extended-context endpoints for [Llama 3 Lumimaid 8B](/models/neversleep/llama-3-lumimaid-8b). They may have higher prices._",
+    "displayName": "Llama 3 Lumimaid 8B (extended)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "neversleep/llama-3-lumimaid-8b:extended",
+    "maxTokens": 2048,
+    "tokens": 24576,
+    "vision": false,
+  },
+  {
+    "description": "Creative writing model, routed with permission. It's fast, it keeps the conversation going, and it stays in character.
+
+If you submit a raw prompt, you can use Alpaca or Vicuna formats.",
+    "displayName": "Fimbulvetr 11B v2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "sao10k/fimbulvetr-11b-v2",
+    "maxTokens": 2048,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Meta: Llama 3 70B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3-70b-instruct",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).
+
+_These are higher-throughput endpoints for [Llama 3 70B Instruct](/models/meta-llama/llama-3-70b-instruct). They may have higher prices._",
+    "displayName": "Meta: Llama 3 70B Instruct (nitro)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3-70b-instruct:nitro",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).
+
+_These are free, rate-limited endpoints for [Llama 3 8B Instruct](/models/meta-llama/llama-3-8b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "Meta: Llama 3 8B Instruct (free)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3-8b-instruct:free",
+    "maxTokens": 4096,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "displayName": "Meta: Llama 3 8B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3-8b-instruct",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).
+
+_These are higher-throughput endpoints for [Llama 3 8B Instruct](/models/meta-llama/llama-3-8b-instruct). They may have higher prices._",
+    "displayName": "Meta: Llama 3 8B Instruct (nitro)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3-8b-instruct:nitro",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.
+
+It has demonstrated strong performance compared to leading closed-source models in human evaluations.
+
+To read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).
+
+_These are extended-context endpoints for [Llama 3 8B Instruct](/models/meta-llama/llama-3-8b-instruct). They may have higher prices._",
+    "displayName": "Meta: Llama 3 8B Instruct (extended)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-3-8b-instruct:extended",
+    "maxTokens": 2048,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:
+- strong math, coding, and reasoning
+- large context length (64k)
+- fluency in English, French, Italian, German, and Spanish
+
+See benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).
+#moe",
+    "displayName": "Mistral: Mixtral 8x22B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mixtral-8x22b-instruct",
+    "maxTokens": undefined,
+    "tokens": 65536,
+    "vision": false,
+  },
+  {
+    "description": "WizardLM-2 7B is the smaller variant of Microsoft AI's latest Wizard model. It is the fastest and achieves comparable performance with existing 10x larger opensource leading models
+
+It is a finetune of [Mistral 7B Instruct](/models/mistralai/mistral-7b-instruct), using the same technique as [WizardLM-2 8x22B](/models/microsoft/wizardlm-2-8x22b).
+
+To read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).
+
+#moe",
+    "displayName": "WizardLM-2 7B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/wizardlm-2-7b",
+    "maxTokens": undefined,
+    "tokens": 32000,
+    "vision": false,
+  },
+  {
+    "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.
+
+It is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).
+
+To read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).
+
+#moe",
+    "displayName": "WizardLM-2 8x22B",
+    "enabled": true,
+    "functionCall": false,
+    "id": "microsoft/wizardlm-2-8x22b",
+    "maxTokens": undefined,
+    "tokens": 65536,
+    "vision": false,
+  },
+  {
+    "description": "Google's latest multimodal model, supporting image and video in text or chat prompts.
+
+Optimized for language tasks including:
+
+- Code generation
+- Text generation
+- Text editing
+- Problem solving
+- Recommendations
+- Information extraction
+- Data extraction or generation
+- AI agents
+
+Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).
+
+#multimodal",
+    "displayName": "Google: Gemini Pro 1.5",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/gemini-pro-1.5",
+    "maxTokens": 32768,
+    "tokens": 4000000,
+    "vision": true,
+  },
+  {
+    "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.
+
+Training data: up to December 2023.",
+    "displayName": "OpenAI: GPT-4 Turbo",
+    "enabled": false,
+    "functionCall": true,
+    "id": "openai/gpt-4-turbo",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": true,
+  },
+  {
+    "description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).
+
+It offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).
+
+Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "displayName": "Cohere: Command R+",
+    "enabled": true,
+    "functionCall": false,
+    "id": "cohere/command-r-plus",
+    "maxTokens": 4000,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "DBRX is a new open source large language model developed by Databricks. At 132B, it outperforms existing open source LLMs like Llama 2 70B and [Mixtral-8x7b](/models/mistralai/mixtral-8x7b) on standard industry benchmarks for language understanding, programming, math, and logic.
+
+It uses a fine-grained mixture-of-experts (MoE) architecture. 36B parameters are active on any input. It was pre-trained on 12T tokens of text and code data. Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts.
+
+See the launch announcement and benchmark results [here](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm).
+
+#moe",
+    "displayName": "Databricks: DBRX 132B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "databricks/dbrx-instruct",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.
+
+Descending from earlier versions of Midnight Rose and [Wizard Tulu Dolphin 70B](https://huggingface.co/sophosympatheia/Wizard-Tulu-Dolphin-70B-v1.0), it inherits the best qualities of each.",
+    "displayName": "Midnight Rose 70B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "sophosympatheia/midnight-rose-70b",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.
+
+Read the launch post [here](https://txt.cohere.com/command-r/).
+
+Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "displayName": "Cohere: Command R",
+    "enabled": true,
+    "functionCall": false,
+    "id": "cohere/command-r",
+    "maxTokens": 4000,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "Command is an instruction-following conversational model that performs language tasks with high quality, more reliably and with a longer context than our base generative models.
+
+Use of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "displayName": "Cohere: Command",
+    "enabled": false,
+    "functionCall": false,
+    "id": "cohere/command",
+    "maxTokens": 4000,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for
+near-instant responsiveness. Quick and accurate targeted performance.
+
+See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)
+
+#multimodal",
+    "displayName": "Anthropic: Claude 3 Haiku",
+    "enabled": true,
+    "functionCall": false,
+    "id": "anthropic/claude-3-haiku",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": true,
+  },
+  {
+    "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for
+near-instant responsiveness. Quick and accurate targeted performance.
+
+See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)
+
+#multimodal
+
+_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3-haiku) variant._",
+    "displayName": "Anthropic: Claude 3 Haiku (self-moderated)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-3-haiku:beta",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": true,
+  },
+  {
+    "description": "Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.
+
+See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
+
+#multimodal",
+    "displayName": "Anthropic: Claude 3 Sonnet",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-3-sonnet",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": true,
+  },
+  {
+    "description": "Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.
+
+See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
+
+#multimodal
+
+_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3-sonnet) variant._",
+    "displayName": "Anthropic: Claude 3 Sonnet (self-moderated)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-3-sonnet:beta",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": true,
+  },
+  {
+    "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.
+
+See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
+
+#multimodal",
+    "displayName": "Anthropic: Claude 3 Opus",
+    "enabled": true,
+    "functionCall": false,
+    "id": "anthropic/claude-3-opus",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": true,
+  },
+  {
+    "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.
+
+See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
+
+#multimodal
+
+_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3-opus) variant._",
+    "displayName": "Anthropic: Claude 3 Opus (self-moderated)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-3-opus:beta",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": true,
+  },
+  {
+    "description": "This is Mistral AI's flagship model, Mistral Large 2 (version \`mistral-large-2407\`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).
+
+It is fluent in English, French, Spanish, German, and Italian, with high grammatical accuracy, and its long context window allows precise information recall from large documents.",
+    "displayName": "Mistral Large",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mistral-large",
+    "maxTokens": undefined,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.
+
+**Note:** heavily rate limited by OpenAI while in preview.",
+    "displayName": "OpenAI: GPT-4 Turbo Preview",
+    "enabled": false,
+    "functionCall": true,
+    "id": "openai/gpt-4-turbo-preview",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": false,
+  },
+  {
+    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.
+
+Training data up to Sep 2021.",
+    "displayName": "OpenAI: GPT-3.5 Turbo (older v0613)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-3.5-turbo-0613",
+    "maxTokens": 4096,
+    "tokens": 4095,
+    "vision": false,
+  },
+  {
+    "description": "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the [Mixtral 8x7B MoE LLM](/models/mistralai/mixtral-8x7b).
+
+The model was trained on over 1,000,000 entries of primarily [GPT-4](/models/openai/gpt-4) generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.
+
+#moe",
+    "displayName": "Nous: Hermes 2 Mixtral 8x7B DPO",
+    "enabled": false,
+    "functionCall": false,
+    "id": "nousresearch/nous-hermes-2-mixtral-8x7b-dpo",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "This is Mistral AI's closed-source, medium-sided model. It's powered by a closed-source prototype and excels at reasoning, code, JSON, chat, and more. In benchmarks, it compares with many of the flagship models of other companies.",
+    "displayName": "Mistral Medium",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mistral-medium",
+    "maxTokens": undefined,
+    "tokens": 32000,
+    "vision": false,
+  },
+  {
+    "description": "This model is currently powered by Mixtral-8X7B-v0.1, a sparse mixture of experts model with 12B active parameters. It has better reasoning, exhibits more capabilities, can produce and reason about code, and is multiligual, supporting English, French, German, Italian, and Spanish.
+#moe",
+    "displayName": "Mistral Small",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mistral-small",
+    "maxTokens": undefined,
+    "tokens": 32000,
+    "vision": false,
+  },
+  {
+    "description": "This model is currently powered by Mistral-7B-v0.2, and incorporates a "better" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.",
+    "displayName": "Mistral Tiny",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mistral-tiny",
+    "maxTokens": undefined,
+    "tokens": 32000,
+    "vision": false,
+  },
+  {
+    "description": "A 75/25 merge of [Chronos 13b v2](https://huggingface.co/elinas/chronos-13b-v2) and [Nous Hermes Llama2 13b](/models/nousresearch/nous-hermes-llama2-13b). This offers the imaginative writing style of Chronos while retaining coherency. Outputs are long and use exceptional prose. #merge",
+    "displayName": "Chronos Hermes 13B v2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "austism/chronos-hermes-13b",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Nous Hermes 2 Yi 34B was trained on 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape.
+
+Nous-Hermes 2 on Yi 34B outperforms all Nous-Hermes & Open-Hermes models of the past, achieving new heights in all benchmarks for a Nous Research LLM as well as surpassing many popular finetunes.",
+    "displayName": "Nous: Hermes 2 Yi 34B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "nousresearch/nous-hermes-yi-34b",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.
+
+An improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:
+
+- 32k context window (vs 8k context in v0.1)
+- Rope-theta = 1e6
+- No Sliding-Window Attention",
+    "displayName": "Mistral: Mistral 7B Instruct v0.2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mistral-7b-instruct-v0.2",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "This is a 16k context fine-tune of [Mixtral-8x7b](/models/mistralai/mixtral-8x7b). It excels in coding tasks due to extensive training with coding data and is known for its obedience, although it lacks DPO tuning.
+
+The model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).
+
+#moe #uncensored",
+    "displayName": "Dolphin 2.6 Mixtral 8x7B 🐬",
+    "enabled": false,
+    "functionCall": false,
+    "id": "cognitivecomputations/dolphin-mixtral-8x7b",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Google's flagship multimodal model, supporting image and video in text or chat prompts for a text or code response.
+
+See the benchmarks and prompting guidelines from [Deepmind](https://deepmind.google/technologies/gemini/).
+
+Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).
+
+#multimodal",
+    "displayName": "Google: Gemini Pro Vision 1.0",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/gemini-pro-vision",
+    "maxTokens": 8192,
+    "tokens": 65536,
+    "vision": true,
+  },
+  {
+    "description": "Google's flagship text generation model. Designed to handle natural language tasks, multiturn text and code chat, and code generation.
+
+See the benchmarks and prompting guidelines from [Deepmind](https://deepmind.google/technologies/gemini/).
+
+Usage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).",
+    "displayName": "Google: Gemini Pro 1.0",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/gemini-pro",
+    "maxTokens": 32768,
+    "tokens": 131040,
+    "vision": false,
+  },
+  {
+    "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.
+
+Instruct model fine-tuned by Mistral. #moe",
+    "displayName": "Mixtral 8x7B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mixtral-8x7b-instruct",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.
+
+Instruct model fine-tuned by Mistral. #moe
+
+_These are higher-throughput endpoints for [Mixtral 8x7B Instruct](/models/mistralai/mixtral-8x7b-instruct). They may have higher prices._",
+    "displayName": "Mixtral 8x7B Instruct (nitro)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mixtral-8x7b-instruct:nitro",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI. Incorporates 8 experts (feed-forward networks) for a total of 47B parameters. Base model (not fine-tuned for instructions) - see [Mixtral 8x7B Instruct](/models/mistralai/mixtral-8x7b-instruct) for an instruct-tuned model.
+
+#moe",
+    "displayName": "Mixtral 8x7B (base)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mixtral-8x7b",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "This is the chat model variant of the [StripedHyena series](/models?q=stripedhyena) developed by Together in collaboration with Nous Research.
+
+StripedHyena uses a new architecture that competes with traditional Transformers, particularly in long-context data processing. It combines attention mechanisms with gated convolutions for improved speed, efficiency, and scaling. This model marks a significant advancement in AI architecture for sequence modeling tasks.",
+    "displayName": "StripedHyena Nous 7B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/stripedhyena-nous-7b",
+    "maxTokens": undefined,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "From the creator of [MythoMax](/models/gryphe/mythomax-l2-13b), merges a suite of models to reduce word anticipation, ministrations, and other undesirable words in ChatGPT roleplaying data.
+
+It combines [Neural Chat 7B](/models/intel/neural-chat-7b), Airoboros 7b, [Toppy M 7B](/models/undi95/toppy-m-7b), [Zepher 7b beta](/models/huggingfaceh4/zephyr-7b-beta), [Nous Capybara 34B](/models/nousresearch/nous-capybara-34b), [OpenHeremes 2.5](/models/teknium/openhermes-2.5-mistral-7b), and many others.
+
+#merge
+
+_These are free, rate-limited endpoints for [MythoMist 7B](/models/gryphe/mythomist-7b). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "MythoMist 7B (free)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gryphe/mythomist-7b:free",
+    "maxTokens": 4096,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "From the creator of [MythoMax](/models/gryphe/mythomax-l2-13b), merges a suite of models to reduce word anticipation, ministrations, and other undesirable words in ChatGPT roleplaying data.
+
+It combines [Neural Chat 7B](/models/intel/neural-chat-7b), Airoboros 7b, [Toppy M 7B](/models/undi95/toppy-m-7b), [Zepher 7b beta](/models/huggingfaceh4/zephyr-7b-beta), [Nous Capybara 34B](/models/nousresearch/nous-capybara-34b), [OpenHeremes 2.5](/models/teknium/openhermes-2.5-mistral-7b), and many others.
+
+#merge",
+    "displayName": "MythoMist 7B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gryphe/mythomist-7b",
+    "maxTokens": 2048,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "OpenChat 7B is a library of open-source language models, fine-tuned with "C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.
+
+- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).
+- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/models/openchat/openchat-8b).
+
+#open-source
+
+_These are free, rate-limited endpoints for [OpenChat 3.5 7B](/models/openchat/openchat-7b). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "OpenChat 3.5 7B (free)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openchat/openchat-7b:free",
+    "maxTokens": 4096,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "OpenChat 7B is a library of open-source language models, fine-tuned with "C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.
+
+- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).
+- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/models/openchat/openchat-8b).
+
+#open-source",
+    "displayName": "OpenChat 3.5 7B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openchat/openchat-7b",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.
+
+#merge #uncensored",
+    "displayName": "Noromaid 20B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "neversleep/noromaid-20b",
+    "maxTokens": 2048,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "displayName": "Anthropic: Claude Instant v1.1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-instant-1.1",
+    "maxTokens": 2048,
+    "tokens": 100000,
+    "vision": false,
+  },
+  {
+    "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.",
+    "displayName": "Anthropic: Claude v2.1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-2.1",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": false,
+  },
+  {
+    "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.
+
+_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-2.1) variant._",
+    "displayName": "Anthropic: Claude v2.1 (self-moderated)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-2.1:beta",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": false,
+  },
+  {
+    "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.",
+    "displayName": "Anthropic: Claude v2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-2",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": false,
+  },
+  {
+    "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.
+
+_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-2) variant._",
+    "displayName": "Anthropic: Claude v2 (self-moderated)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-2:beta",
+    "maxTokens": 4096,
+    "tokens": 200000,
+    "vision": false,
+  },
+  {
+    "description": "A continuation of [OpenHermes 2 model](/models/teknium/openhermes-2-mistral-7b), trained on additional code datasets.
+Potentially the most interesting finding from training on a good ratio (est. of around 7-14% of the total dataset) of code instruction was that it has boosted several non-code benchmarks, including TruthfulQA, AGIEval, and GPT4All suite. It did however reduce BigBench benchmark score, but the net gain overall is significant.",
+    "displayName": "OpenHermes 2.5 Mistral 7B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "teknium/openhermes-2.5-mistral-7b",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Ability to understand images, in addition to all other [GPT-4 Turbo capabilties](/models/openai/gpt-4-turbo). Training data: up to Apr 2023.
+
+**Note:** heavily rate limited by OpenAI while in preview.
 
 #multimodal",
-    "displayName": "Llava 13B",
+    "displayName": "OpenAI: GPT-4 Vision",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-4-vision-preview",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": true,
+  },
+  {
+    "description": "A Mythomax/MLewd_13B-style merge of selected 70B models.
+A multi-model merge of several LLaMA2 70B finetunes for roleplaying and creative work. The goal was to create a model that combines creativity with intelligence for an enhanced experience.
+
+#merge #uncensored",
+    "displayName": "lzlv 70B",
     "enabled": false,
     "functionCall": false,
-    "id": "haotian-liu/llava-13b",
+    "id": "lizpreciatior/lzlv-70b-fp16-hf",
     "maxTokens": undefined,
-    "tokens": 2048,
-    "vision": true,
+    "tokens": 4096,
+    "vision": false,
   },
   {
-    "description": "This vision-language model builds on innovations from the popular [OpenHermes-2.5](/models/teknium/openhermes-2.5-mistral-7b) model, by Teknium. It adds vision support, and is trained on a custom dataset enriched with function calling
+    "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.
 
-This project is led by [qnguyen3](https://twitter.com/stablequan) and [teknium](https://twitter.com/Teknium1).
+Credits to
+- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).
+- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.
 
-#multimodal",
-    "displayName": "Nous: Hermes 2 Vision 7B (alpha)",
+#merge",
+    "displayName": "Goliath 120B",
     "enabled": false,
-    "functionCall": true,
-    "id": "nousresearch/nous-hermes-2-vision-7b",
+    "functionCall": false,
+    "id": "alpindale/goliath-120b",
+    "maxTokens": 400,
+    "tokens": 6144,
+    "vision": false,
+  },
+  {
+    "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.
+List of merged models:
+- NousResearch/Nous-Capybara-7B-V1.9
+- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)
+- lemonilia/AshhLimaRP-Mistral-7B
+- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b
+- Undi95/Mistral-pippa-sharegpt-7b-qlora
+
+#merge #uncensored
+
+_These are free, rate-limited endpoints for [Toppy M 7B](/models/undi95/toppy-m-7b). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "Toppy M 7B (free)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "undi95/toppy-m-7b:free",
+    "maxTokens": 2048,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.
+List of merged models:
+- NousResearch/Nous-Capybara-7B-V1.9
+- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)
+- lemonilia/AshhLimaRP-Mistral-7B
+- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b
+- Undi95/Mistral-pippa-sharegpt-7b-qlora
+
+#merge #uncensored",
+    "displayName": "Toppy M 7B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "undi95/toppy-m-7b",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.
+List of merged models:
+- NousResearch/Nous-Capybara-7B-V1.9
+- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)
+- lemonilia/AshhLimaRP-Mistral-7B
+- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b
+- Undi95/Mistral-pippa-sharegpt-7b-qlora
+
+#merge #uncensored
+
+_These are higher-throughput endpoints for [Toppy M 7B](/models/undi95/toppy-m-7b). They may have higher prices._",
+    "displayName": "Toppy M 7B (nitro)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "undi95/toppy-m-7b:nitro",
     "maxTokens": undefined,
     "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Depending on their size, subject, and complexity, your prompts will be sent to [Llama 3 70B Instruct](/models/meta-llama/llama-3-70b-instruct), [Claude 3.5 Sonnet (self-moderated)](/models/anthropic/claude-3.5-sonnet:beta) or [GPT-4o](/models/openai/gpt-4o).  To see which model was used, visit [Activity](/activity).
+
+A major redesign of this router is coming soon. Stay tuned on [Discord](https://discord.gg/fVyRaUDgxW) for updates.",
+    "displayName": "Auto (best for prompt)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "openrouter/auto",
+    "maxTokens": undefined,
+    "tokens": 200000,
+    "vision": false,
+  },
+  {
+    "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.
+
+Training data: up to April 2023.",
+    "displayName": "OpenAI: GPT-4 Turbo (older v1106)",
+    "enabled": false,
+    "functionCall": true,
+    "id": "openai/gpt-4-1106-preview",
+    "maxTokens": 4096,
+    "tokens": 128000,
     "vision": true,
   },
   {
-    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.
+    "description": "An older GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.",
+    "displayName": "OpenAI: GPT-3.5 Turbo 16k (older v1106)",
+    "enabled": false,
+    "functionCall": true,
+    "id": "openai/gpt-3.5-turbo-1106",
+    "maxTokens": 4096,
+    "tokens": 16385,
+    "vision": false,
+  },
+  {
+    "description": "PaLM 2 fine-tuned for chatbot conversations that help with code-related questions.",
+    "displayName": "Google: PaLM 2 Code Chat 32k",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/palm-2-codechat-bison-32k",
+    "maxTokens": 32768,
+    "tokens": 131040,
+    "vision": false,
+  },
+  {
+    "description": "PaLM 2 is a language model by Google with improved multilingual, reasoning and coding capabilities.",
+    "displayName": "Google: PaLM 2 Chat 32k",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/palm-2-chat-bison-32k",
+    "maxTokens": 32768,
+    "tokens": 131040,
+    "vision": false,
+  },
+  {
+    "description": "A Llama 2 70B fine-tune using synthetic data (the Airoboros dataset).
 
-Updated by OpenAI to point to the [latest version of GPT-3.5](/models?q=openai/gpt-3.5). Training data up to Sep 2021.",
-    "displayName": "OpenAI: GPT-3.5 Turbo",
+Currently based on [jondurbin/airoboros-l2-70b](https://huggingface.co/jondurbin/airoboros-l2-70b-2.2.1), but might get updated in the future.",
+    "displayName": "Airoboros 70B",
     "enabled": false,
     "functionCall": false,
-    "id": "openai/gpt-3.5-turbo",
+    "id": "jondurbin/airoboros-l2-70b",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Xwin-LM aims to develop and open-source alignment tech for LLMs. Our first release, built-upon on the [Llama2](/models/\${Model.Llama_2_13B_Chat}) base models, ranked TOP-1 on AlpacaEval. Notably, it's the first to surpass [GPT-4](/models/\${Model.GPT_4}) on this benchmark. The project will be continuously updated.",
+    "displayName": "Xwin 70B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "xwin-lm/xwin-lm-70b",
+    "maxTokens": 400,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.",
+    "displayName": "Mistral: Mistral 7B Instruct v0.1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/mistral-7b-instruct-v0.1",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.",
+    "displayName": "OpenAI: GPT-3.5 Turbo Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-3.5-turbo-instruct",
+    "maxTokens": 4096,
+    "tokens": 4095,
+    "vision": false,
+  },
+  {
+    "description": "A blend of the new Pygmalion-13b and MythoMax. #merge",
+    "displayName": "Pygmalion: Mythalion 13B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "pygmalionai/mythalion-13b",
+    "maxTokens": 400,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "GPT-4-32k is an extended version of GPT-4, with the same capabilities but quadrupled context length, allowing for processing up to 40 pages of text in a single pass. This is particularly beneficial for handling longer content like interacting with PDFs without an external vector database. Training data: up to Sep 2021.",
+    "displayName": "OpenAI: GPT-4 32k (older v0314)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-4-32k-0314",
+    "maxTokens": 4096,
+    "tokens": 32767,
+    "vision": false,
+  },
+  {
+    "description": "GPT-4-32k is an extended version of GPT-4, with the same capabilities but quadrupled context length, allowing for processing up to 40 pages of text in a single pass. This is particularly beneficial for handling longer content like interacting with PDFs without an external vector database. Training data: up to Sep 2021.",
+    "displayName": "OpenAI: GPT-4 32k",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-4-32k",
+    "maxTokens": 4096,
+    "tokens": 32767,
+    "vision": false,
+  },
+  {
+    "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.",
+    "displayName": "OpenAI: GPT-3.5 Turbo 16k",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-3.5-turbo-16k",
     "maxTokens": 4096,
     "tokens": 16385,
     "vision": false,
   },
   {
-    "description": "Ability to understand images, in addition to all other [GPT-4 Turbo capabilties](/models/openai/gpt-4-turbo). Training data: up to Apr 2023.
+    "description": "A state-of-the-art language model fine-tuned on over 300k instructions by Nous Research, with Teknium and Emozilla leading the fine tuning process.",
+    "displayName": "Nous: Hermes 13B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "nousresearch/nous-hermes-llama2-13b",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Zephyr is a series of language models that are trained to act as helpful assistants. Zephyr-7B-β is the second model in the series, and is a fine-tuned version of [mistralai/Mistral-7B-v0.1](/models/mistralai/mistral-7b-instruct-v0.1) that was trained on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO).
 
-**Note:** heavily rate limited by OpenAI while in preview.
+_These are free, rate-limited endpoints for [Zephyr 7B](/models/huggingfaceh4/zephyr-7b-beta). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "displayName": "Hugging Face: Zephyr 7B (free)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "huggingfaceh4/zephyr-7b-beta:free",
+    "maxTokens": 2048,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.",
+    "displayName": "Mancer: Weaver (alpha)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mancer/weaver",
+    "maxTokens": 1000,
+    "tokens": 8000,
+    "vision": false,
+  },
+  {
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "displayName": "Anthropic: Claude Instant v1.0",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-instant-1.0",
+    "maxTokens": 4096,
+    "tokens": 100000,
+    "vision": false,
+  },
+  {
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "displayName": "Anthropic: Claude v1.2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-1.2",
+    "maxTokens": 4096,
+    "tokens": 100000,
+    "vision": false,
+  },
+  {
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "displayName": "Anthropic: Claude v1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-1",
+    "maxTokens": 4096,
+    "tokens": 100000,
+    "vision": false,
+  },
+  {
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "displayName": "Anthropic: Claude Instant v1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-instant-1",
+    "maxTokens": 4096,
+    "tokens": 100000,
+    "vision": false,
+  },
+  {
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.
 
-#multimodal",
-    "displayName": "OpenAI: GPT-4 Vision",
+_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-instant-1) variant._",
+    "displayName": "Anthropic: Claude Instant v1 (self-moderated)",
     "enabled": false,
     "functionCall": false,
-    "id": "openai/gpt-4-vision-preview",
+    "id": "anthropic/claude-instant-1:beta",
     "maxTokens": 4096,
-    "tokens": 128000,
-    "vision": true,
+    "tokens": 100000,
+    "vision": false,
+  },
+  {
+    "description": "Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.",
+    "displayName": "Anthropic: Claude v2.0",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-2.0",
+    "maxTokens": 4096,
+    "tokens": 100000,
+    "vision": false,
+  },
+  {
+    "description": "Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.
+
+_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-2.0) variant._",
+    "displayName": "Anthropic: Claude v2.0 (self-moderated)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "anthropic/claude-2.0:beta",
+    "maxTokens": 4096,
+    "tokens": 100000,
+    "vision": false,
+  },
+  {
+    "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge",
+    "displayName": "ReMM SLERP 13B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "undi95/remm-slerp-l2-13b",
+    "maxTokens": 400,
+    "tokens": 4096,
+    "vision": false,
   },
   {
-    "description": "Gemma by Google is an advanced, open-source language model family, leveraging the latest in decoder-only, text-to-text technology. It offers English language capabilities across text generation tasks like question answering, summarization, and reasoning. The Gemma 7B variant is comparable in performance to leading open source models.
+    "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge
 
-Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
-    "displayName": "Google: Gemma 7B",
+_These are extended-context endpoints for [ReMM SLERP 13B](/models/undi95/remm-slerp-l2-13b). They may have higher prices._",
+    "displayName": "ReMM SLERP 13B (extended)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "undi95/remm-slerp-l2-13b:extended",
+    "maxTokens": 400,
+    "tokens": 6144,
+    "vision": false,
+  },
+  {
+    "description": "PaLM 2 fine-tuned for chatbot conversations that help with code-related questions.",
+    "displayName": "Google: PaLM 2 Code Chat",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/palm-2-codechat-bison",
+    "maxTokens": 4096,
+    "tokens": 28672,
+    "vision": false,
+  },
+  {
+    "description": "PaLM 2 is a language model by Google with improved multilingual, reasoning and coding capabilities.",
+    "displayName": "Google: PaLM 2 Chat",
+    "enabled": false,
+    "functionCall": false,
+    "id": "google/palm-2-chat-bison",
+    "maxTokens": 4096,
+    "tokens": 36864,
+    "vision": false,
+  },
+  {
+    "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge",
+    "displayName": "MythoMax 13B",
     "enabled": false,
     "functionCall": false,
-    "id": "google/gemma-7b-it",
+    "id": "gryphe/mythomax-l2-13b",
     "maxTokens": undefined,
-    "tokens": 8192,
+    "tokens": 4096,
     "vision": false,
   },
   {
     "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge
 
-Note: this is a higher-throughput version of [this model](/models/gryphe/mythomax-l2-13b), and may have higher prices and slightly different outputs.",
+_These are higher-throughput endpoints for [MythoMax 13B](/models/gryphe/mythomax-l2-13b). They may have higher prices._",
     "displayName": "MythoMax 13B (nitro)",
     "enabled": false,
     "functionCall": false,
@@ -78,5 +2187,83 @@ Note: this is a higher-throughput version of [this model](/models/gryphe/mythoma
     "tokens": 4096,
     "vision": false,
   },
+  {
+    "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge
+
+_These are extended-context endpoints for [MythoMax 13B](/models/gryphe/mythomax-l2-13b). They may have higher prices._",
+    "displayName": "MythoMax 13B (extended)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gryphe/mythomax-l2-13b:extended",
+    "maxTokens": 400,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "A 13 billion parameter language model from Meta, fine tuned for chat completions",
+    "displayName": "Meta: Llama v2 13B Chat",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/llama-2-13b-chat",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.",
+    "displayName": "OpenAI: GPT-4 (older v0314)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-4-0314",
+    "maxTokens": 4096,
+    "tokens": 8191,
+    "vision": false,
+  },
+  {
+    "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.",
+    "displayName": "OpenAI: GPT-4",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-4",
+    "maxTokens": 4096,
+    "tokens": 8191,
+    "vision": true,
+  },
+  {
+    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.
+
+Training data up to Sep 2021.",
+    "displayName": "OpenAI: GPT-3.5 Turbo (older v0301)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-3.5-turbo-0301",
+    "maxTokens": 4096,
+    "tokens": 4095,
+    "vision": false,
+  },
+  {
+    "description": "The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.
+
+This version has a higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls.",
+    "displayName": "OpenAI: GPT-3.5 Turbo 16k",
+    "enabled": false,
+    "functionCall": true,
+    "id": "openai/gpt-3.5-turbo-0125",
+    "maxTokens": 4096,
+    "tokens": 16385,
+    "vision": false,
+  },
+  {
+    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.
+
+Training data up to Sep 2021.",
+    "displayName": "OpenAI: GPT-3.5 Turbo",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openai/gpt-3.5-turbo",
+    "maxTokens": 4096,
+    "tokens": 16385,
+    "vision": false,
+  },
 ]
 `;
diff --git a/src/libs/agent-runtime/openrouter/fixtures/models.json b/src/libs/agent-runtime/openrouter/fixtures/models.json
index 1fe6a23fef1a..371c1d75ccf2 100644
--- a/src/libs/agent-runtime/openrouter/fixtures/models.json
+++ b/src/libs/agent-runtime/openrouter/fixtures/models.json
@@ -1,62 +1,3370 @@
 [
   {
-    "id": "haotian-liu/llava-13b",
-    "name": "Llava 13B",
-    "description": "LLaVA is a large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking [GPT-4](/models/openai/gpt-4-vision-preview) and setting a new state-of-the-art accuracy on Science QA\n\n#multimodal",
-    "pricing": { "prompt": "0.000005", "completion": "0.000005", "image": "0", "request": "0" },
-    "context_length": 2048,
-    "architecture": { "modality": "multimodal", "tokenizer": "Llama2", "instruct_type": null },
-    "top_provider": { "max_completion_tokens": null, "is_moderated": false },
-    "per_request_limits": { "prompt_tokens": "891204", "completion_tokens": "891204" }
+    "id": "mattshumer/reflection-70b:free",
+    "name": "Reflection 70B (free)",
+    "created": 1725580800,
+    "description": "Reflection Llama-3.1 70B is trained with a new technique called Reflection-Tuning that teaches a LLM to detect mistakes in its reasoning and correct course.\n\nThe model was trained on synthetic data.\n\n_These are free, rate-limited endpoints for [Reflection 70B](/models/mattshumer/reflection-70b). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 131072,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
   },
   {
-    "id": "nousresearch/nous-hermes-2-vision-7b",
-    "name": "Nous: Hermes 2 Vision 7B (alpha)",
-    "description": "This vision-language model builds on innovations from the popular [OpenHermes-2.5](/models/teknium/openhermes-2.5-mistral-7b) model, by Teknium. It adds vision support, and is trained on a custom dataset enriched with function calling\n\nThis project is led by [qnguyen3](https://twitter.com/stablequan) and [teknium](https://twitter.com/Teknium1).\n\n#multimodal",
-    "pricing": { "prompt": "0.000005", "completion": "0.000005", "image": "0", "request": "0" },
+    "id": "mattshumer/reflection-70b",
+    "name": "Reflection 70B",
+    "created": 1725580800,
+    "description": "Reflection Llama-3.1 70B is trained with a new technique called Reflection-Tuning that teaches a LLM to detect mistakes in its reasoning and correct course.\n\nThe model was trained on synthetic data.",
+    "context_length": 131072,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000035",
+      "completion": "0.0000004",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "cohere/command-r-03-2024",
+    "name": "Cohere: Command R (03-2024)",
+    "created": 1725062400,
+    "description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.\n\nRead the launch post [here](https://txt.cohere.com/command-r/).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Cohere", "instruct_type": null },
+    "pricing": { "prompt": "0.0000005", "completion": "0.0000015", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4000,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "cohere/command-r-plus-04-2024",
+    "name": "Cohere: Command R+ (04-2024)",
+    "created": 1725062400,
+    "description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).\n\nIt offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Cohere", "instruct_type": null },
+    "pricing": { "prompt": "0.000003", "completion": "0.000015", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4000,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "cohere/command-r-plus-08-2024",
+    "name": "Cohere: Command R+ (08-2024)",
+    "created": 1724976000,
+    "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Cohere", "instruct_type": null },
+    "pricing": { "prompt": "0.0000025", "completion": "0.00001", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4000,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "cohere/command-r-08-2024",
+    "name": "Cohere: Command R (08-2024)",
+    "created": 1724976000,
+    "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Cohere", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000015",
+      "completion": "0.0000006",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4000,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemini-flash-8b-1.5-exp",
+    "name": "Google: Gemini Flash 8B 1.5 Experimental",
+    "created": 1724803200,
+    "description": "Gemini 1.5 Flash 8B Experimental is an experimental, 8B parameter version of the [Gemini 1.5 Flash](/models/google/gemini-flash-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.",
+    "context_length": 4000000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Gemini",
+      "instruct_type": null
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4000000,
+      "max_completion_tokens": 32768,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemini-flash-1.5-exp",
+    "name": "Google: Gemini Flash 1.5 Experimental",
+    "created": 1724803200,
+    "description": "Gemini 1.5 Flash Experimental is an experimental version of the [Gemini 1.5 Flash](/models/google/gemini-flash-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.",
+    "context_length": 4000000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Gemini",
+      "instruct_type": null
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4000000,
+      "max_completion_tokens": 32768,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "sao10k/l3.1-euryale-70b",
+    "name": "Llama 3.1 Euryale 70B v2.2",
+    "created": 1724803200,
+    "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": { "prompt": "0.0000015", "completion": "0.0000015", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 16000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "ai21/jamba-1-5-large",
+    "name": "AI21: Jamba 1.5 Large",
+    "created": 1724371200,
+    "description": "Jamba 1.5 Large is part of AI21's new family of open models, offering superior speed, efficiency, and quality.\n\nIt features a 256K effective context window, the longest among open models, enabling improved performance on tasks like document summarization and analysis.\n\nBuilt on a novel SSM-Transformer architecture, it outperforms larger models like Llama 3.1 70B on benchmarks while maintaining resource efficiency.\n\nRead their [announcement](https://www.ai21.com/blog/announcing-jamba-model-family) to learn more.",
+    "context_length": 256000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": null },
+    "pricing": { "prompt": "0.000002", "completion": "0.000008", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 256000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "ai21/jamba-1-5-mini",
+    "name": "AI21: Jamba 1.5 Mini",
+    "created": 1724371200,
+    "description": "Jamba 1.5 Mini is the world's first production-grade Mamba-based model, combining SSM and Transformer architectures for a 256K context window and high efficiency.\n\nIt works with 9 languages and can handle various writing and analysis tasks as well as or better than similar small models.\n\nThis model uses less computer memory and works faster with longer texts than previous designs.\n\nRead their [announcement](https://www.ai21.com/blog/announcing-jamba-model-family) to learn more.",
+    "context_length": 256000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": null },
+    "pricing": { "prompt": "0.0000002", "completion": "0.0000004", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 256000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "microsoft/phi-3.5-mini-128k-instruct",
+    "name": "Phi-3.5 Mini 128K Instruct",
+    "created": 1724198400,
+    "description": "Phi-3.5 models are lightweight, state-of-the-art open models. These models were trained with Phi-3 datasets that include both synthetic data and the filtered, publicly available websites data, with a focus on high quality and reasoning-dense properties. Phi-3.5 Mini uses 3.8B parameters, and is a dense decoder-only transformer model using the same tokenizer as [Phi-3 Mini](/models/microsoft/phi-3-mini-128k-instruct).\n\nThe models underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks that test common sense, language understanding, math, code, long context and logical reasoning, Phi-3.5 models showcased robust and state-of-the-art performance among models with less than 13 billion parameters.",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": "phi3" },
+    "pricing": { "prompt": "0.0000001", "completion": "0.0000001", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "nousresearch/hermes-3-llama-3.1-70b",
+    "name": "Nous: Hermes 3 70B Instruct",
+    "created": 1723939200,
+    "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.",
+    "context_length": 131072,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "chatml"
+    },
+    "pricing": { "prompt": "0.0000004", "completion": "0.0000004", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 12288,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "nousresearch/hermes-3-llama-3.1-405b",
+    "name": "Nous: Hermes 3 405B Instruct",
+    "created": 1723766400,
+    "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.",
+    "context_length": 131072,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "chatml"
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 18000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "nousresearch/hermes-3-llama-3.1-405b:extended",
+    "name": "Nous: Hermes 3 405B Instruct (extended)",
+    "created": 1723766400,
+    "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.\n\n_These are extended-context endpoints for [Hermes 3 405B Instruct](/models/nousresearch/hermes-3-llama-3.1-405b). They may have higher prices._",
+    "context_length": 128000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "chatml"
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "perplexity/llama-3.1-sonar-huge-128k-online",
+    "name": "Perplexity: Llama 3.1 Sonar 405B Online",
+    "created": 1723593600,
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance. The model is built upon the Llama 3.1 405B and has internet access.",
+    "context_length": 127072,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.000005",
+      "completion": "0.000005",
+      "image": "0",
+      "request": "0.005"
+    },
+    "top_provider": {
+      "context_length": 127072,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/chatgpt-4o-latest",
+    "name": "OpenAI: ChatGPT-4o",
+    "created": 1723593600,
+    "description": "Dynamic model continuously updated to the current version of [GPT-4o](/models/openai/gpt-4o) in ChatGPT. Intended for research and evaluation.\n\nNote: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.",
+    "context_length": 128000,
+    "architecture": { "modality": "text+image->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.000005",
+      "completion": "0.000015",
+      "image": "0.007225",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 16384,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "sao10k/l3-lunaris-8b",
+    "name": "Llama 3 8B Lunaris",
+    "created": 1723507200,
+    "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": { "prompt": "0.000002", "completion": "0.000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "aetherwiing/mn-starcannon-12b",
+    "name": "Mistral Nemo 12B Starcannon",
+    "created": 1723507200,
+    "description": "Starcannon 12B is a creative roleplay and story writing model, using [nothingiisreal/mn-celeste-12b](https://openrouter.ai/models/nothingiisreal/mn-celeste-12b) as a base and [intervitens/mini-magnum-12b-v1.1](https://huggingface.co/intervitens/mini-magnum-12b-v1.1) merged in using the [TIES](https://arxiv.org/abs/2306.01708) method.\n\nAlthough more similar to Magnum overall, the model remains very creative, with a pleasant writing style. It is recommended for people wanting more variety than Magnum, and yet more verbose prose than Celeste.",
+    "context_length": 12000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "chatml"
+    },
+    "pricing": { "prompt": "0.000002", "completion": "0.000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 12000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4o-2024-08-06",
+    "name": "OpenAI: GPT-4o (2024-08-06)",
+    "created": 1722902400,
+    "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)",
+    "context_length": 128000,
+    "architecture": { "modality": "text+image->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.0000025",
+      "completion": "0.00001",
+      "image": "0.0036125",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 16384,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3.1-405b",
+    "name": "Meta: Llama 3.1 405B (base)",
+    "created": 1722556800,
+    "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 131072,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": "none" },
+    "pricing": { "prompt": "0.000002", "completion": "0.000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "nothingiisreal/mn-celeste-12b",
+    "name": "Mistral Nemo 12B Celeste",
+    "created": 1722556800,
+    "description": "A specialized story writing and roleplaying model based on Mistral's NeMo 12B Instruct. Fine-tuned on curated datasets including Reddit Writing Prompts and Opus Instruct 25K.\n\nThis model excels at creative writing, offering improved NSFW capabilities, with smarter and more active narration. It demonstrates remarkable versatility in both SFW and NSFW scenarios, with strong Out of Character (OOC) steering capabilities, allowing fine-tuned control over narrative direction and character behavior.\n\nCheck out the model's [HuggingFace page](https://huggingface.co/nothingiisreal/MN-12B-Celeste-V1.9) for details on what parameters and prompts work best!",
+    "context_length": 32000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "chatml"
+    },
+    "pricing": { "prompt": "0.0000015", "completion": "0.0000015", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 32000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemini-pro-1.5-exp",
+    "name": "Google: Gemini Pro 1.5 Experimental",
+    "created": 1722470400,
+    "description": "Gemini 1.5 Pro (0827) is an experimental version of the [Gemini 1.5 Pro](/models/google/gemini-pro-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.",
+    "context_length": 4000000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Gemini",
+      "instruct_type": null
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4000000,
+      "max_completion_tokens": 32768,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "perplexity/llama-3.1-sonar-large-128k-online",
+    "name": "Perplexity: Llama 3.1 Sonar 70B Online",
+    "created": 1722470400,
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/models/perplexity/llama-3.1-sonar-large-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
+    "context_length": 127072,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.000001",
+      "completion": "0.000001",
+      "image": "0",
+      "request": "0.005"
+    },
+    "top_provider": {
+      "context_length": 127072,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "perplexity/llama-3.1-sonar-large-128k-chat",
+    "name": "Perplexity: Llama 3.1 Sonar 70B",
+    "created": 1722470400,
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-large-128k-online) of this model has Internet access.",
+    "context_length": 131072,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": { "prompt": "0.000001", "completion": "0.000001", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 131072,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "perplexity/llama-3.1-sonar-small-128k-online",
+    "name": "Perplexity: Llama 3.1 Sonar 8B Online",
+    "created": 1722470400,
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/models/perplexity/llama-3.1-sonar-small-128k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
+    "context_length": 127072,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.0000002",
+      "completion": "0.0000002",
+      "image": "0",
+      "request": "0.005"
+    },
+    "top_provider": {
+      "context_length": 127072,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "perplexity/llama-3.1-sonar-small-128k-chat",
+    "name": "Perplexity: Llama 3.1 Sonar 8B",
+    "created": 1722470400,
+    "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-small-128k-online) of this model has Internet access.",
+    "context_length": 131072,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": { "prompt": "0.0000002", "completion": "0.0000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 131072,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3.1-70b-instruct",
+    "name": "Meta: Llama 3.1 70B Instruct",
+    "created": 1721692800,
+    "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 131072,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": { "prompt": "0.0000003", "completion": "0.0000003", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 131072,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3.1-8b-instruct:free",
+    "name": "Meta: Llama 3.1 8B Instruct (free)",
+    "created": 1721692800,
+    "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3.1 8B Instruct](/models/meta-llama/llama-3.1-8b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 131072,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3.1-8b-instruct",
+    "name": "Meta: Llama 3.1 8B Instruct",
+    "created": 1721692800,
+    "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 131072,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 100000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3.1-405b-instruct",
+    "name": "Meta: Llama 3.1 405B Instruct",
+    "created": 1721692800,
+    "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 131072,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.00000179",
+      "completion": "0.00000179",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "cognitivecomputations/dolphin-llama-3-70b",
+    "name": "Dolphin Llama 3 70B 🐬",
+    "created": 1721347200,
+    "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a fine-tune of [Llama 3 70B](/models/meta-llama/llama-3-70b-instruct). It demonstrates improvements in instruction, conversation, coding, and function calling abilities, when compared to the original.\n\nUncensored and is stripped of alignment and bias, it requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "chatml"
+    },
+    "pricing": {
+      "prompt": "0.00000035",
+      "completion": "0.0000004",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/codestral-mamba",
+    "name": "Mistral: Codestral Mamba",
+    "created": 1721347200,
+    "description": "A 7.3B parameter Mamba-based model designed for code and reasoning tasks.\n\n- Linear time inference, allowing for theoretically infinite sequence lengths\n- 256k token context window\n- Optimized for quick responses, especially beneficial for code productivity\n- Performs comparably to state-of-the-art transformer models in code and reasoning tasks\n- Available under the Apache 2.0 license for free use, modification, and distribution",
+    "context_length": 256000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.00000025",
+      "completion": "0.00000025",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 256000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-nemo",
+    "name": "Mistral: Mistral Nemo",
+    "created": 1721347200,
+    "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.",
+    "context_length": 128000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.00000013",
+      "completion": "0.00000013",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4o-mini-2024-07-18",
+    "name": "OpenAI: GPT-4o-mini (2024-07-18)",
+    "created": 1721260800,
+    "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.",
+    "context_length": 128000,
+    "architecture": { "modality": "text+image->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000015",
+      "completion": "0.0000006",
+      "image": "0.007225",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 16384,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4o-mini",
+    "name": "OpenAI: GPT-4o-mini",
+    "created": 1721260800,
+    "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.",
+    "context_length": 128000,
+    "architecture": { "modality": "text+image->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000015",
+      "completion": "0.0000006",
+      "image": "0.007225",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 16384,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "qwen/qwen-2-7b-instruct:free",
+    "name": "Qwen 2 7B Instruct (free)",
+    "created": 1721088000,
+    "description": "Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.\n\nIt features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2/) and [GitHub repo](https://github.com/QwenLM/Qwen2).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).\n\n_These are free, rate-limited endpoints for [Qwen 2 7B Instruct](/models/qwen/qwen-2-7b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 32768,
+    "architecture": { "modality": "text->text", "tokenizer": "Qwen", "instruct_type": "chatml" },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "qwen/qwen-2-7b-instruct",
+    "name": "Qwen 2 7B Instruct",
+    "created": 1721088000,
+    "description": "Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.\n\nIt features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2/) and [GitHub repo](https://github.com/QwenLM/Qwen2).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
+    "context_length": 32768,
+    "architecture": { "modality": "text->text", "tokenizer": "Qwen", "instruct_type": "chatml" },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemma-2-27b-it",
+    "name": "Google: Gemma 2 27B",
+    "created": 1720828800,
+    "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).\n\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
+    "context_length": 8192,
+    "architecture": { "modality": "text->text", "tokenizer": "Gemini", "instruct_type": "gemma" },
+    "pricing": {
+      "prompt": "0.00000027",
+      "completion": "0.00000027",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "alpindale/magnum-72b",
+    "name": "Magnum 72B",
+    "created": 1720656000,
+    "description": "From the maker of [Goliath](https://openrouter.ai/models/alpindale/goliath-120b), Magnum 72B is the first in a new family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.\n\nThe model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.",
+    "context_length": 16384,
+    "architecture": { "modality": "text->text", "tokenizer": "Qwen", "instruct_type": "chatml" },
+    "pricing": {
+      "prompt": "0.00000375",
+      "completion": "0.0000045",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 16384,
+      "max_completion_tokens": 1024,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "nousresearch/hermes-2-theta-llama-3-8b",
+    "name": "Nous: Hermes 2 Theta 8B",
+    "created": 1720656000,
+    "description": "An experimental merge model based on Llama 3, exhibiting a very distinctive style of writing. It combines the the best of [Meta's Llama 3 8B](https://openrouter.ai/models/meta-llama/llama-3-8b-instruct) and Nous Research's [Hermes 2 Pro](https://openrouter.ai/models/nousresearch/hermes-2-pro-llama-3-8b).\n\nHermes-2 Θ (theta) was specifically designed with a few capabilities in mind: executing function calls, generating JSON output, and most remarkably, demonstrating metacognitive abilities (contemplating the nature of thought and recognizing the diversity of cognitive processes among individuals).",
+    "context_length": 16384,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "chatml"
+    },
+    "pricing": {
+      "prompt": "0.0000001875",
+      "completion": "0.000001125",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 16384,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemma-2-9b-it:free",
+    "name": "Google: Gemma 2 9B (free)",
+    "created": 1719532800,
+    "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).\n\n_These are free, rate-limited endpoints for [Gemma 2 9B](/models/google/gemma-2-9b-it). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 8192,
+    "architecture": { "modality": "text->text", "tokenizer": "Gemini", "instruct_type": "gemma" },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemma-2-9b-it",
+    "name": "Google: Gemma 2 9B",
+    "created": 1719532800,
+    "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
+    "context_length": 8192,
+    "architecture": { "modality": "text->text", "tokenizer": "Gemini", "instruct_type": "gemma" },
+    "pricing": {
+      "prompt": "0.00000006",
+      "completion": "0.00000006",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "sao10k/l3-stheno-8b",
+    "name": "Llama 3 Stheno 8B v3.3 32K",
+    "created": 1719446400,
+    "description": "Stheno 8B 32K is a creative writing/roleplay model from [Sao10k](https://ko-fi.com/sao10k). It was trained at 8K context, then expanded to 32K context.\n\nCompared to older Stheno version, this model is trained on:\n- 2x the amount of creative writing samples\n- Cleaned up roleplaying samples\n- Fewer low quality samples",
+    "context_length": 32000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.00000025",
+      "completion": "0.0000015",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "ai21/jamba-instruct",
+    "name": "AI21: Jamba Instruct",
+    "created": 1719273600,
+    "description": "The Jamba-Instruct model, introduced by AI21 Labs, is an instruction-tuned variant of their hybrid SSM-Transformer Jamba model, specifically optimized for enterprise applications.\n\n- 256K Context Window: It can process extensive information, equivalent to a 400-page novel, which is beneficial for tasks involving large documents such as financial reports or legal documents\n- Safety and Accuracy: Jamba-Instruct is designed with enhanced safety features to ensure secure deployment in enterprise environments, reducing the risk and cost of implementation\n\nRead their [announcement](https://www.ai21.com/blog/announcing-jamba) to learn more.\n\nJamba has a knowledge cutoff of February 2024.",
+    "context_length": 256000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": null },
+    "pricing": { "prompt": "0.0000005", "completion": "0.0000007", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 256000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-3.5-sonnet",
+    "name": "Anthropic: Claude 3.5 Sonnet",
+    "created": 1718841600,
+    "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal",
+    "context_length": 200000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Claude",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.000003",
+      "completion": "0.000015",
+      "image": "0.0048",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 8192,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-3.5-sonnet:beta",
+    "name": "Anthropic: Claude 3.5 Sonnet (self-moderated)",
+    "created": 1718841600,
+    "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3.5-sonnet) variant._",
+    "context_length": 200000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Claude",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.000003",
+      "completion": "0.000015",
+      "image": "0.0048",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 8192,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "sao10k/l3-euryale-70b",
+    "name": "Llama 3 Euryale 70B v2.1",
+    "created": 1718668800,
+    "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).\n\n- Better prompt adherence.\n- Better anatomy / spatial awareness.\n- Adapts much better to unique and custom formatting / reply formats.\n- Very creative, lots of unique swipes.\n- Is not restrictive during roleplays.",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.00000035",
+      "completion": "0.0000004",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "microsoft/phi-3-medium-4k-instruct",
+    "name": "Phi-3 Medium 4K Instruct",
+    "created": 1718409600,
+    "description": "Phi-3 4K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.\n\nFor 128k context length, try [Phi-3 Medium 128K](/models/microsoft/phi-3-medium-128k-instruct).",
+    "context_length": 4000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": "phi3" },
+    "pricing": {
+      "prompt": "0.00000014",
+      "completion": "0.00000014",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "cognitivecomputations/dolphin-mixtral-8x22b",
+    "name": "Dolphin 2.9.2 Mixtral 8x22B 🐬",
+    "created": 1717804800,
+    "description": "Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/models/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.\n\nThis model is a successor to [Dolphin Mixtral 8x7B](/models/cognitivecomputations/dolphin-mixtral-8x7b).\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored",
+    "context_length": 65536,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "chatml"
+    },
+    "pricing": { "prompt": "0.0000009", "completion": "0.0000009", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 16000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "qwen/qwen-2-72b-instruct",
+    "name": "Qwen 2 72B Instruct",
+    "created": 1717718400,
+    "description": "Qwen2 72B is a transformer-based model that excels in language understanding, multilingual capabilities, coding, mathematics, and reasoning.\n\nIt features SwiGLU activation, attention QKV bias, and group query attention. It is pretrained on extensive data with supervised finetuning and direct preference optimization.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2/) and [GitHub repo](https://github.com/QwenLM/Qwen2).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
+    "context_length": 32768,
+    "architecture": { "modality": "text->text", "tokenizer": "Qwen", "instruct_type": "chatml" },
+    "pricing": {
+      "prompt": "0.00000035",
+      "completion": "0.0000004",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openchat/openchat-8b",
+    "name": "OpenChat 3.6 8B",
+    "created": 1717200000,
+    "description": "OpenChat 8B is a library of open-source language models, fine-tuned with \"C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)\" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.\n\nIt outperforms many similarly sized models including [Llama 3 8B Instruct](/models/meta-llama/llama-3-8b-instruct) and various fine-tuned models. It excels in general conversation, coding assistance, and mathematical reasoning.\n\n- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).\n- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/models/openchat/openchat-8b).\n\n#open-source",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "openchat"
+    },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "nousresearch/hermes-2-pro-llama-3-8b",
+    "name": "NousResearch: Hermes 2 Pro - Llama-3 8B",
+    "created": 1716768000,
+    "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "chatml"
+    },
+    "pricing": {
+      "prompt": "0.00000014",
+      "completion": "0.00000014",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-7b-instruct-v0.3",
+    "name": "Mistral: Mistral 7B Instruct v0.3",
+    "created": 1716768000,
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-7b-instruct:free",
+    "name": "Mistral: Mistral 7B Instruct (free)",
+    "created": 1716768000,
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*\n\n_These are free, rate-limited endpoints for [Mistral 7B Instruct](/models/mistralai/mistral-7b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-7b-instruct",
+    "name": "Mistral: Mistral 7B Instruct",
+    "created": 1716768000,
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-7b-instruct:nitro",
+    "name": "Mistral: Mistral 7B Instruct (nitro)",
+    "created": 1716768000,
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*\n\n_These are higher-throughput endpoints for [Mistral 7B Instruct](/models/mistralai/mistral-7b-instruct). They may have higher prices._",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.00000007",
+      "completion": "0.00000007",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "microsoft/phi-3-mini-128k-instruct:free",
+    "name": "Phi-3 Mini 128K Instruct (free)",
+    "created": 1716681600,
+    "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.\n\n_These are free, rate-limited endpoints for [Phi-3 Mini 128K Instruct](/models/microsoft/phi-3-mini-128k-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": "phi3" },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "microsoft/phi-3-mini-128k-instruct",
+    "name": "Phi-3 Mini 128K Instruct",
+    "created": 1716681600,
+    "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": "phi3" },
+    "pricing": { "prompt": "0.0000001", "completion": "0.0000001", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "microsoft/phi-3-medium-128k-instruct:free",
+    "name": "Phi-3 Medium 128K Instruct (free)",
+    "created": 1716508800,
+    "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.\n\nFor 4k context length, try [Phi-3 Medium 4K](/models/microsoft/phi-3-medium-4k-instruct).\n\n_These are free, rate-limited endpoints for [Phi-3 Medium 128K Instruct](/models/microsoft/phi-3-medium-128k-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": "phi3" },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "microsoft/phi-3-medium-128k-instruct",
+    "name": "Phi-3 Medium 128K Instruct",
+    "created": 1716508800,
+    "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.\n\nFor 4k context length, try [Phi-3 Medium 4K](/models/microsoft/phi-3-medium-4k-instruct).",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": "phi3" },
+    "pricing": { "prompt": "0.000001", "completion": "0.000001", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "neversleep/llama-3-lumimaid-70b",
+    "name": "Llama 3 Lumimaid 70B",
+    "created": 1715817600,
+    "description": "The NeverSleep team is back, with a Llama 3 70B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.\n\nTo enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.000003375",
+      "completion": "0.0000045",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemini-flash-1.5",
+    "name": "Google: Gemini Flash 1.5",
+    "created": 1715644800,
+    "description": "Gemini 1.5 Flash is a foundation model that performs well at a variety of multimodal tasks such as visual understanding, classification, summarization, and creating content from image, audio and video. It's adept at processing visual and text inputs such as photographs, documents, infographics, and screenshots.\n\nGemini 1.5 Flash is designed for high-volume, high-frequency tasks where cost and latency matter. On most common tasks, Flash achieves comparable quality to other Gemini Pro models at a significantly reduced cost. Flash is well-suited for applications like chat assistants and on-demand content generation where speed and scale matter.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal",
+    "context_length": 4000000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Gemini",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.0000000375",
+      "completion": "0.00000015",
+      "image": "0.00004",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4000000,
+      "max_completion_tokens": 32768,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "deepseek/deepseek-coder",
+    "name": "DeepSeek-Coder-V2",
+    "created": 1715644800,
+    "description": "DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model. It is further pre-trained from an intermediate checkpoint of DeepSeek-V2 with additional 6 trillion tokens.\n\nThe original V1 model was trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese. It was pre-trained on project-level code corpus by employing a extra fill-in-the-blank task.",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000014",
+      "completion": "0.00000028",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "deepseek/deepseek-chat",
+    "name": "DeepSeek-V2 Chat",
+    "created": 1715644800,
+    "description": "DeepSeek-V2 Chat is a conversational finetune of DeepSeek-V2, a Mixture-of-Experts (MoE) language model. It comprises 236B total parameters, of which 21B are activated for each token.\n\nCompared with DeepSeek 67B, DeepSeek-V2 achieves stronger performance, and meanwhile saves 42.5% of training costs, reduces the KV cache by 93.3%, and boosts the maximum generation throughput to 5.76 times.\n\nDeepSeek-V2 achieves remarkable performance on both standard benchmarks and open-ended generation evaluations.",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000014",
+      "completion": "0.00000028",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "perplexity/llama-3-sonar-large-32k-online",
+    "name": "Perplexity: Llama3 Sonar 70B Online",
+    "created": 1715644800,
+    "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/models/perplexity/llama-3-sonar-large-32k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
+    "context_length": 28000,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.000001",
+      "completion": "0.000001",
+      "image": "0",
+      "request": "0.005"
+    },
+    "top_provider": {
+      "context_length": 28000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "perplexity/llama-3-sonar-large-32k-chat",
+    "name": "Perplexity: Llama3 Sonar 70B",
+    "created": 1715644800,
+    "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3-sonar-large-32k-online) of this model has Internet access.",
+    "context_length": 32768,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": { "prompt": "0.000001", "completion": "0.000001", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "perplexity/llama-3-sonar-small-32k-online",
+    "name": "Perplexity: Llama3 Sonar 8B Online",
+    "created": 1715644800,
+    "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is the online version of the [offline chat model](/models/perplexity/llama-3-sonar-small-32k-chat). It is focused on delivering helpful, up-to-date, and factual responses. #online",
+    "context_length": 28000,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.0000002",
+      "completion": "0.0000002",
+      "image": "0",
+      "request": "0.005"
+    },
+    "top_provider": {
+      "context_length": 28000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "perplexity/llama-3-sonar-small-32k-chat",
+    "name": "Perplexity: Llama3 Sonar 8B",
+    "created": 1715644800,
+    "description": "Llama3 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3-sonar-small-32k-online) of this model has Internet access.",
+    "context_length": 32768,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": null },
+    "pricing": { "prompt": "0.0000002", "completion": "0.0000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-guard-2-8b",
+    "name": "Meta: LlamaGuard 2 8B",
+    "created": 1715558400,
+    "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 8192,
+    "architecture": { "modality": "text->text", "tokenizer": "Llama3", "instruct_type": "none" },
+    "pricing": {
+      "prompt": "0.00000018",
+      "completion": "0.00000018",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4o-2024-05-13",
+    "name": "OpenAI: GPT-4o (2024-05-13)",
+    "created": 1715558400,
+    "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)",
+    "context_length": 128000,
+    "architecture": { "modality": "text+image->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.000005",
+      "completion": "0.000015",
+      "image": "0.007225",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4o",
+    "name": "OpenAI: GPT-4o",
+    "created": 1715558400,
+    "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)",
+    "context_length": 128000,
+    "architecture": { "modality": "text+image->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.000005",
+      "completion": "0.000015",
+      "image": "0.007225",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4o:extended",
+    "name": "OpenAI: GPT-4o (extended)",
+    "created": 1715558400,
+    "description": "GPT-4o Extended is an experimental variant of GPT-4o with an extended max output tokens. This model supports only text input to text output.\n\n_These are extended-context endpoints for [GPT-4o](/models/openai/gpt-4o). They may have higher prices._",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.000006",
+      "completion": "0.000018",
+      "image": "0.007225",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 64000,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "qwen/qwen-72b-chat",
+    "name": "Qwen 1.5 72B Chat",
+    "created": 1715212800,
+    "description": "Qwen1.5 72B is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:\n\n- Significant performance improvement in human preference for chat models\n- Multilingual support of both base and chat models\n- Stable support of 32K context length for models of all sizes\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen1.5/) and [GitHub repo](https://github.com/QwenLM/Qwen1.5).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
+    "context_length": 32768,
+    "architecture": { "modality": "text->text", "tokenizer": "Qwen", "instruct_type": "chatml" },
+    "pricing": {
+      "prompt": "0.00000081",
+      "completion": "0.00000081",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "qwen/qwen-110b-chat",
+    "name": "Qwen 1.5 110B Chat",
+    "created": 1715212800,
+    "description": "Qwen1.5 110B is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:\n\n- Significant performance improvement in human preference for chat models\n- Multilingual support of both base and chat models\n- Stable support of 32K context length for models of all sizes\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen1.5/) and [GitHub repo](https://github.com/QwenLM/Qwen1.5).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
+    "context_length": 32768,
+    "architecture": { "modality": "text->text", "tokenizer": "Qwen", "instruct_type": "chatml" },
+    "pricing": {
+      "prompt": "0.00000162",
+      "completion": "0.00000162",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "neversleep/llama-3-lumimaid-8b",
+    "name": "Llama 3 Lumimaid 8B",
+    "created": 1714780800,
+    "description": "The NeverSleep team is back, with a Llama 3 8B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.\n\nTo enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 24576,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.0000001875",
+      "completion": "0.000001125",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "neversleep/llama-3-lumimaid-8b:extended",
+    "name": "Llama 3 Lumimaid 8B (extended)",
+    "created": 1714780800,
+    "description": "The NeverSleep team is back, with a Llama 3 8B finetune trained on their curated roleplay data. Striking a balance between eRP and RP, Lumimaid was designed to be serious, yet uncensored when necessary.\n\nTo enhance it's overall intelligence and chat capability, roughly 40% of the training data was not roleplay. This provides a breadth of knowledge to access, while still keeping roleplay as the primary strength.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).\n\n_These are extended-context endpoints for [Llama 3 Lumimaid 8B](/models/neversleep/llama-3-lumimaid-8b). They may have higher prices._",
+    "context_length": 24576,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.0000001875",
+      "completion": "0.000001125",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 24576,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "sao10k/fimbulvetr-11b-v2",
+    "name": "Fimbulvetr 11B v2",
+    "created": 1713657600,
+    "description": "Creative writing model, routed with permission. It's fast, it keeps the conversation going, and it stays in character.\n\nIf you submit a raw prompt, you can use Alpaca or Vicuna formats.",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.000000375",
+      "completion": "0.0000015",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3-70b-instruct",
+    "name": "Meta: Llama 3 70B Instruct",
+    "created": 1713398400,
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.00000035",
+      "completion": "0.0000004",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3-70b-instruct:nitro",
+    "name": "Meta: Llama 3 70B Instruct (nitro)",
+    "created": 1713398400,
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).\n\n_These are higher-throughput endpoints for [Llama 3 70B Instruct](/models/meta-llama/llama-3-70b-instruct). They may have higher prices._",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.000000792",
+      "completion": "0.000000792",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3-8b-instruct:free",
+    "name": "Meta: Llama 3 8B Instruct (free)",
+    "created": 1713398400,
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).\n\n_These are free, rate-limited endpoints for [Llama 3 8B Instruct](/models/meta-llama/llama-3-8b-instruct). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3-8b-instruct",
+    "name": "Meta: Llama 3 8B Instruct",
+    "created": 1713398400,
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3-8b-instruct:nitro",
+    "name": "Meta: Llama 3 8B Instruct (nitro)",
+    "created": 1713398400,
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).\n\n_These are higher-throughput endpoints for [Llama 3 8B Instruct](/models/meta-llama/llama-3-8b-instruct). They may have higher prices._",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.000000162",
+      "completion": "0.000000162",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-3-8b-instruct:extended",
+    "name": "Meta: Llama 3 8B Instruct (extended)",
+    "created": 1713398400,
+    "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).\n\n_These are extended-context endpoints for [Llama 3 8B Instruct](/models/meta-llama/llama-3-8b-instruct). They may have higher prices._",
+    "context_length": 16384,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama3",
+      "instruct_type": "llama3"
+    },
+    "pricing": {
+      "prompt": "0.0000001875",
+      "completion": "0.000001125",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 16384,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mixtral-8x22b-instruct",
+    "name": "Mistral: Mixtral 8x22B Instruct",
+    "created": 1713312000,
+    "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe",
+    "context_length": 65536,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.00000065",
+      "completion": "0.00000065",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 65536,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "microsoft/wizardlm-2-7b",
+    "name": "WizardLM-2 7B",
+    "created": 1713225600,
+    "description": "WizardLM-2 7B is the smaller variant of Microsoft AI's latest Wizard model. It is the fastest and achieves comparable performance with existing 10x larger opensource leading models\n\nIt is a finetune of [Mistral 7B Instruct](/models/mistralai/mistral-7b-instruct), using the same technique as [WizardLM-2 8x22B](/models/microsoft/wizardlm-2-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe",
+    "context_length": 32000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "vicuna"
+    },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "microsoft/wizardlm-2-8x22b",
+    "name": "WizardLM-2 8x22B",
+    "created": 1713225600,
+    "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe",
+    "context_length": 65536,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "vicuna"
+    },
+    "pricing": { "prompt": "0.0000005", "completion": "0.0000005", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 65536,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemini-pro-1.5",
+    "name": "Google: Gemini Pro 1.5",
+    "created": 1712620800,
+    "description": "Google's latest multimodal model, supporting image and video in text or chat prompts.\n\nOptimized for language tasks including:\n\n- Code generation\n- Text generation\n- Text editing\n- Problem solving\n- Recommendations\n- Information extraction\n- Data extraction or generation\n- AI agents\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal",
+    "context_length": 4000000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Gemini",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.0000025",
+      "completion": "0.0000075",
+      "image": "0.00263",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4000000,
+      "max_completion_tokens": 32768,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4-turbo",
+    "name": "OpenAI: GPT-4 Turbo",
+    "created": 1712620800,
+    "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.",
+    "context_length": 128000,
+    "architecture": { "modality": "text+image->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00001",
+      "completion": "0.00003",
+      "image": "0.01445",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "cohere/command-r-plus",
+    "name": "Cohere: Command R+",
+    "created": 1712188800,
+    "description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).\n\nIt offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Cohere", "instruct_type": null },
+    "pricing": { "prompt": "0.000003", "completion": "0.000015", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4000,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "databricks/dbrx-instruct",
+    "name": "Databricks: DBRX 132B Instruct",
+    "created": 1711670400,
+    "description": "DBRX is a new open source large language model developed by Databricks. At 132B, it outperforms existing open source LLMs like Llama 2 70B and [Mixtral-8x7b](/models/mistralai/mixtral-8x7b) on standard industry benchmarks for language understanding, programming, math, and logic.\n\nIt uses a fine-grained mixture-of-experts (MoE) architecture. 36B parameters are active on any input. It was pre-trained on 12T tokens of text and code data. Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts.\n\nSee the launch announcement and benchmark results [here](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm).\n\n#moe",
+    "context_length": 32768,
+    "architecture": { "modality": "text->text", "tokenizer": "Other", "instruct_type": "chatml" },
+    "pricing": {
+      "prompt": "0.00000108",
+      "completion": "0.00000108",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "sophosympatheia/midnight-rose-70b",
+    "name": "Midnight Rose 70B",
+    "created": 1711065600,
+    "description": "A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.\n\nDescending from earlier versions of Midnight Rose and [Wizard Tulu Dolphin 70B](https://huggingface.co/sophosympatheia/Wizard-Tulu-Dolphin-70B-v1.0), it inherits the best qualities of each.",
     "context_length": 4096,
-    "architecture": { "modality": "multimodal", "tokenizer": "Mistral", "instruct_type": null },
-    "top_provider": { "max_completion_tokens": null, "is_moderated": false },
-    "per_request_limits": { "prompt_tokens": "891204", "completion_tokens": "891204" }
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "airoboros"
+    },
+    "pricing": { "prompt": "0.0000008", "completion": "0.0000008", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
   },
   {
-    "id": "openai/gpt-3.5-turbo",
-    "name": "OpenAI: GPT-3.5 Turbo",
-    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nUpdated by OpenAI to point to the [latest version of GPT-3.5](/models?q=openai/gpt-3.5). Training data up to Sep 2021.",
+    "id": "cohere/command-r",
+    "name": "Cohere: Command R",
+    "created": 1710374400,
+    "description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.\n\nRead the launch post [here](https://txt.cohere.com/command-r/).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Cohere", "instruct_type": null },
     "pricing": { "prompt": "0.0000005", "completion": "0.0000015", "image": "0", "request": "0" },
-    "context_length": 16385,
-    "architecture": { "modality": "text", "tokenizer": "GPT", "instruct_type": null },
-    "top_provider": { "max_completion_tokens": 4096, "is_moderated": true },
-    "per_request_limits": { "prompt_tokens": "8912044", "completion_tokens": "2970681" }
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4000,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "cohere/command",
+    "name": "Cohere: Command",
+    "created": 1710374400,
+    "description": "Command is an instruction-following conversational model that performs language tasks with high quality, more reliably and with a longer context than our base generative models.\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).",
+    "context_length": 4096,
+    "architecture": { "modality": "text->text", "tokenizer": "Cohere", "instruct_type": null },
+    "pricing": { "prompt": "0.000001", "completion": "0.000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": 4000,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-3-haiku",
+    "name": "Anthropic: Claude 3 Haiku",
+    "created": 1710288000,
+    "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal",
+    "context_length": 200000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Claude",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.00000025",
+      "completion": "0.00000125",
+      "image": "0.0004",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-3-haiku:beta",
+    "name": "Anthropic: Claude 3 Haiku (self-moderated)",
+    "created": 1710288000,
+    "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3-haiku) variant._",
+    "context_length": 200000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Claude",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.00000025",
+      "completion": "0.00000125",
+      "image": "0.0004",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-3-sonnet",
+    "name": "Anthropic: Claude 3 Sonnet",
+    "created": 1709596800,
+    "description": "Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal",
+    "context_length": 200000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Claude",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.000003",
+      "completion": "0.000015",
+      "image": "0.0048",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-3-sonnet:beta",
+    "name": "Anthropic: Claude 3 Sonnet (self-moderated)",
+    "created": 1709596800,
+    "description": "Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3-sonnet) variant._",
+    "context_length": 200000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Claude",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.000003",
+      "completion": "0.000015",
+      "image": "0.0048",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-3-opus",
+    "name": "Anthropic: Claude 3 Opus",
+    "created": 1709596800,
+    "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal",
+    "context_length": 200000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Claude",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.000015",
+      "completion": "0.000075",
+      "image": "0.024",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-3-opus:beta",
+    "name": "Anthropic: Claude 3 Opus (self-moderated)",
+    "created": 1709596800,
+    "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-3-opus) variant._",
+    "context_length": 200000,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Claude",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.000015",
+      "completion": "0.000075",
+      "image": "0.024",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-large",
+    "name": "Mistral Large",
+    "created": 1708905600,
+    "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt is fluent in English, French, Spanish, German, and Italian, with high grammatical accuracy, and its long context window allows precise information recall from large documents.",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "Mistral", "instruct_type": null },
+    "pricing": { "prompt": "0.000003", "completion": "0.000009", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4-turbo-preview",
+    "name": "OpenAI: GPT-4 Turbo Preview",
+    "created": 1706140800,
+    "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.00001", "completion": "0.00003", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-3.5-turbo-0613",
+    "name": "OpenAI: GPT-3.5 Turbo (older v0613)",
+    "created": 1706140800,
+    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.",
+    "context_length": 4095,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.000001", "completion": "0.000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4095,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "nousresearch/nous-hermes-2-mixtral-8x7b-dpo",
+    "name": "Nous: Hermes 2 Mixtral 8x7B DPO",
+    "created": 1705363200,
+    "description": "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the [Mixtral 8x7B MoE LLM](/models/mistralai/mixtral-8x7b).\n\nThe model was trained on over 1,000,000 entries of primarily [GPT-4](/models/openai/gpt-4) generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.\n\n#moe",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "chatml"
+    },
+    "pricing": {
+      "prompt": "0.00000045",
+      "completion": "0.00000045",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-medium",
+    "name": "Mistral Medium",
+    "created": 1704844800,
+    "description": "This is Mistral AI's closed-source, medium-sided model. It's powered by a closed-source prototype and excels at reasoning, code, JSON, chat, and more. In benchmarks, it compares with many of the flagship models of other companies.",
+    "context_length": 32000,
+    "architecture": { "modality": "text->text", "tokenizer": "Mistral", "instruct_type": null },
+    "pricing": { "prompt": "0.0000027", "completion": "0.0000081", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 32000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-small",
+    "name": "Mistral Small",
+    "created": 1704844800,
+    "description": "This model is currently powered by Mixtral-8X7B-v0.1, a sparse mixture of experts model with 12B active parameters. It has better reasoning, exhibits more capabilities, can produce and reason about code, and is multiligual, supporting English, French, German, Italian, and Spanish.\n#moe",
+    "context_length": 32000,
+    "architecture": { "modality": "text->text", "tokenizer": "Mistral", "instruct_type": null },
+    "pricing": { "prompt": "0.000002", "completion": "0.000006", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 32000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-tiny",
+    "name": "Mistral Tiny",
+    "created": 1704844800,
+    "description": "This model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.",
+    "context_length": 32000,
+    "architecture": { "modality": "text->text", "tokenizer": "Mistral", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000025",
+      "completion": "0.00000025",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32000,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "austism/chronos-hermes-13b",
+    "name": "Chronos Hermes 13B v2",
+    "created": 1704412800,
+    "description": "A 75/25 merge of [Chronos 13b v2](https://huggingface.co/elinas/chronos-13b-v2) and [Nous Hermes Llama2 13b](/models/nousresearch/nous-hermes-llama2-13b). This offers the imaginative writing style of Chronos while retaining coherency. Outputs are long and use exceptional prose. #merge",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.00000013",
+      "completion": "0.00000013",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "nousresearch/nous-hermes-yi-34b",
+    "name": "Nous: Hermes 2 Yi 34B",
+    "created": 1704153600,
+    "description": "Nous Hermes 2 Yi 34B was trained on 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape.\n\nNous-Hermes 2 on Yi 34B outperforms all Nous-Hermes & Open-Hermes models of the past, achieving new heights in all benchmarks for a Nous Research LLM as well as surpassing many popular finetunes.",
+    "context_length": 4096,
+    "architecture": { "modality": "text->text", "tokenizer": "Yi", "instruct_type": "chatml" },
+    "pricing": {
+      "prompt": "0.00000072",
+      "completion": "0.00000072",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-7b-instruct-v0.2",
+    "name": "Mistral: Mistral 7B Instruct v0.2",
+    "created": 1703721600,
+    "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "cognitivecomputations/dolphin-mixtral-8x7b",
+    "name": "Dolphin 2.6 Mixtral 8x7B 🐬",
+    "created": 1703116800,
+    "description": "This is a 16k context fine-tune of [Mixtral-8x7b](/models/mistralai/mixtral-8x7b). It excels in coding tasks due to extensive training with coding data and is known for its obedience, although it lacks DPO tuning.\n\nThe model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).\n\n#moe #uncensored",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "chatml"
+    },
+    "pricing": { "prompt": "0.0000005", "completion": "0.0000005", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemini-pro-vision",
+    "name": "Google: Gemini Pro Vision 1.0",
+    "created": 1702425600,
+    "description": "Google's flagship multimodal model, supporting image and video in text or chat prompts for a text or code response.\n\nSee the benchmarks and prompting guidelines from [Deepmind](https://deepmind.google/technologies/gemini/).\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal",
+    "context_length": 65536,
+    "architecture": {
+      "modality": "text+image->text",
+      "tokenizer": "Gemini",
+      "instruct_type": null
+    },
+    "pricing": {
+      "prompt": "0.000000125",
+      "completion": "0.000000375",
+      "image": "0.0025",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 65536,
+      "max_completion_tokens": 8192,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/gemini-pro",
+    "name": "Google: Gemini Pro 1.0",
+    "created": 1702425600,
+    "description": "Google's flagship text generation model. Designed to handle natural language tasks, multiturn text and code chat, and code generation.\n\nSee the benchmarks and prompting guidelines from [Deepmind](https://deepmind.google/technologies/gemini/).\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).",
+    "context_length": 131040,
+    "architecture": { "modality": "text->text", "tokenizer": "Gemini", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.000000125",
+      "completion": "0.000000375",
+      "image": "0.0025",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 131040,
+      "max_completion_tokens": 32768,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mixtral-8x7b-instruct",
+    "name": "Mixtral 8x7B Instruct",
+    "created": 1702166400,
+    "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.00000024",
+      "completion": "0.00000024",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mixtral-8x7b-instruct:nitro",
+    "name": "Mixtral 8x7B Instruct (nitro)",
+    "created": 1702166400,
+    "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe\n\n_These are higher-throughput endpoints for [Mixtral 8x7B Instruct](/models/mistralai/mixtral-8x7b-instruct). They may have higher prices._",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.00000054",
+      "completion": "0.00000054",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mixtral-8x7b",
+    "name": "Mixtral 8x7B (base)",
+    "created": 1702166400,
+    "description": "A pretrained generative Sparse Mixture of Experts, by Mistral AI. Incorporates 8 experts (feed-forward networks) for a total of 47B parameters. Base model (not fine-tuned for instructions) - see [Mixtral 8x7B Instruct](/models/mistralai/mixtral-8x7b-instruct) for an instruct-tuned model.\n\n#moe",
+    "context_length": 32768,
+    "architecture": { "modality": "text->text", "tokenizer": "Mistral", "instruct_type": "none" },
+    "pricing": {
+      "prompt": "0.00000054",
+      "completion": "0.00000054",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "togethercomputer/stripedhyena-nous-7b",
+    "name": "StripedHyena Nous 7B",
+    "created": 1702080000,
+    "description": "This is the chat model variant of the [StripedHyena series](/models?q=stripedhyena) developed by Together in collaboration with Nous Research.\n\nStripedHyena uses a new architecture that competes with traditional Transformers, particularly in long-context data processing. It combines attention mechanisms with gated convolutions for improved speed, efficiency, and scaling. This model marks a significant advancement in AI architecture for sequence modeling tasks.",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.00000018",
+      "completion": "0.00000018",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "gryphe/mythomist-7b:free",
+    "name": "MythoMist 7B (free)",
+    "created": 1701907200,
+    "description": "From the creator of [MythoMax](/models/gryphe/mythomax-l2-13b), merges a suite of models to reduce word anticipation, ministrations, and other undesirable words in ChatGPT roleplaying data.\n\nIt combines [Neural Chat 7B](/models/intel/neural-chat-7b), Airoboros 7b, [Toppy M 7B](/models/undi95/toppy-m-7b), [Zepher 7b beta](/models/huggingfaceh4/zephyr-7b-beta), [Nous Capybara 34B](/models/nousresearch/nous-capybara-34b), [OpenHeremes 2.5](/models/teknium/openhermes-2.5-mistral-7b), and many others.\n\n#merge\n\n_These are free, rate-limited endpoints for [MythoMist 7B](/models/gryphe/mythomist-7b). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "alpaca"
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "gryphe/mythomist-7b",
+    "name": "MythoMist 7B",
+    "created": 1701907200,
+    "description": "From the creator of [MythoMax](/models/gryphe/mythomax-l2-13b), merges a suite of models to reduce word anticipation, ministrations, and other undesirable words in ChatGPT roleplaying data.\n\nIt combines [Neural Chat 7B](/models/intel/neural-chat-7b), Airoboros 7b, [Toppy M 7B](/models/undi95/toppy-m-7b), [Zepher 7b beta](/models/huggingfaceh4/zephyr-7b-beta), [Nous Capybara 34B](/models/nousresearch/nous-capybara-34b), [OpenHeremes 2.5](/models/teknium/openhermes-2.5-mistral-7b), and many others.\n\n#merge",
+    "context_length": 32768,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.000000375",
+      "completion": "0.000000375",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 32768,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openchat/openchat-7b:free",
+    "name": "OpenChat 3.5 7B (free)",
+    "created": 1701129600,
+    "description": "OpenChat 7B is a library of open-source language models, fine-tuned with \"C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)\" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.\n\n- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).\n- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/models/openchat/openchat-8b).\n\n#open-source\n\n_These are free, rate-limited endpoints for [OpenChat 3.5 7B](/models/openchat/openchat-7b). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "openchat"
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openchat/openchat-7b",
+    "name": "OpenChat 3.5 7B",
+    "created": 1701129600,
+    "description": "OpenChat 7B is a library of open-source language models, fine-tuned with \"C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)\" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.\n\n- For OpenChat fine-tuned on Mistral 7B, check out [OpenChat 7B](/models/openchat/openchat-7b).\n- For OpenChat fine-tuned on Llama 8B, check out [OpenChat 8B](/models/openchat/openchat-8b).\n\n#open-source",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "openchat"
+    },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "neversleep/noromaid-20b",
+    "name": "Noromaid 20B",
+    "created": 1700956800,
+    "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.0000015",
+      "completion": "0.00000225",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-instant-1.1",
+    "name": "Anthropic: Claude Instant v1.1",
+    "created": 1700611200,
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "context_length": 100000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Claude",
+      "instruct_type": "claude"
+    },
+    "pricing": { "prompt": "0.0000008", "completion": "0.0000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 100000,
+      "max_completion_tokens": 2048,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-2.1",
+    "name": "Anthropic: Claude v2.1",
+    "created": 1700611200,
+    "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.",
+    "context_length": 200000,
+    "architecture": { "modality": "text->text", "tokenizer": "Claude", "instruct_type": null },
+    "pricing": { "prompt": "0.000008", "completion": "0.000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-2.1:beta",
+    "name": "Anthropic: Claude v2.1 (self-moderated)",
+    "created": 1700611200,
+    "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-2.1) variant._",
+    "context_length": 200000,
+    "architecture": { "modality": "text->text", "tokenizer": "Claude", "instruct_type": null },
+    "pricing": { "prompt": "0.000008", "completion": "0.000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-2",
+    "name": "Anthropic: Claude v2",
+    "created": 1700611200,
+    "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.",
+    "context_length": 200000,
+    "architecture": { "modality": "text->text", "tokenizer": "Claude", "instruct_type": null },
+    "pricing": { "prompt": "0.000008", "completion": "0.000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-2:beta",
+    "name": "Anthropic: Claude v2 (self-moderated)",
+    "created": 1700611200,
+    "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-2) variant._",
+    "context_length": 200000,
+    "architecture": { "modality": "text->text", "tokenizer": "Claude", "instruct_type": null },
+    "pricing": { "prompt": "0.000008", "completion": "0.000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 200000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "teknium/openhermes-2.5-mistral-7b",
+    "name": "OpenHermes 2.5 Mistral 7B",
+    "created": 1700438400,
+    "description": "A continuation of [OpenHermes 2 model](/models/teknium/openhermes-2-mistral-7b), trained on additional code datasets.\nPotentially the most interesting finding from training on a good ratio (est. of around 7-14% of the total dataset) of code instruction was that it has boosted several non-code benchmarks, including TruthfulQA, AGIEval, and GPT4All suite. It did however reduce BigBench benchmark score, but the net gain overall is significant.",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "chatml"
+    },
+    "pricing": {
+      "prompt": "0.00000017",
+      "completion": "0.00000017",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
   },
   {
     "id": "openai/gpt-4-vision-preview",
     "name": "OpenAI: GPT-4 Vision",
+    "created": 1699833600,
     "description": "Ability to understand images, in addition to all other [GPT-4 Turbo capabilties](/models/openai/gpt-4-turbo). Training data: up to Apr 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.\n\n#multimodal",
-    "pricing": { "prompt": "0.00001", "completion": "0.00003", "image": "0.01445", "request": "0" },
     "context_length": 128000,
-    "architecture": { "modality": "multimodal", "tokenizer": "GPT", "instruct_type": null },
-    "top_provider": { "max_completion_tokens": 4096, "is_moderated": true },
-    "per_request_limits": { "prompt_tokens": "445602", "completion_tokens": "148534" }
+    "architecture": { "modality": "text+image->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00001",
+      "completion": "0.00003",
+      "image": "0.01445",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "lizpreciatior/lzlv-70b-fp16-hf",
+    "name": "lzlv 70B",
+    "created": 1699747200,
+    "description": "A Mythomax/MLewd_13B-style merge of selected 70B models.\nA multi-model merge of several LLaMA2 70B finetunes for roleplaying and creative work. The goal was to create a model that combines creativity with intelligence for an enhanced experience.\n\n#merge #uncensored",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "airoboros"
+    },
+    "pricing": {
+      "prompt": "0.00000035",
+      "completion": "0.0000004",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "alpindale/goliath-120b",
+    "name": "Goliath 120B",
+    "created": 1699574400,
+    "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
+    "context_length": 6144,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "airoboros"
+    },
+    "pricing": {
+      "prompt": "0.000009375",
+      "completion": "0.000009375",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 6144,
+      "max_completion_tokens": 400,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "undi95/toppy-m-7b:free",
+    "name": "Toppy M 7B (free)",
+    "created": 1699574400,
+    "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored\n\n_These are free, rate-limited endpoints for [Toppy M 7B](/models/undi95/toppy-m-7b). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "alpaca"
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "undi95/toppy-m-7b",
+    "name": "Toppy M 7B",
+    "created": 1699574400,
+    "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.00000007",
+      "completion": "0.00000007",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "undi95/toppy-m-7b:nitro",
+    "name": "Toppy M 7B (nitro)",
+    "created": 1699574400,
+    "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored\n\n_These are higher-throughput endpoints for [Toppy M 7B](/models/undi95/toppy-m-7b). They may have higher prices._",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.00000007",
+      "completion": "0.00000007",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openrouter/auto",
+    "name": "Auto (best for prompt)",
+    "created": 1699401600,
+    "description": "Depending on their size, subject, and complexity, your prompts will be sent to [Llama 3 70B Instruct](/models/meta-llama/llama-3-70b-instruct), [Claude 3.5 Sonnet (self-moderated)](/models/anthropic/claude-3.5-sonnet:beta) or [GPT-4o](/models/openai/gpt-4o).  To see which model was used, visit [Activity](/activity).\n\nA major redesign of this router is coming soon. Stay tuned on [Discord](https://discord.gg/fVyRaUDgxW) for updates.",
+    "context_length": 200000,
+    "architecture": { "modality": "text->text", "tokenizer": "Router", "instruct_type": null },
+    "pricing": { "prompt": "-1", "completion": "-1", "request": "-1", "image": "-1" },
+    "top_provider": {
+      "context_length": null,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4-1106-preview",
+    "name": "OpenAI: GPT-4 Turbo (older v1106)",
+    "created": 1699228800,
+    "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.",
+    "context_length": 128000,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.00001", "completion": "0.00003", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 128000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
   },
   {
-    "id": "google/gemma-7b-it",
-    "name": "Google: Gemma 7B",
-    "description": "Gemma by Google is an advanced, open-source language model family, leveraging the latest in decoder-only, text-to-text technology. It offers English language capabilities across text generation tasks like question answering, summarization, and reasoning. The Gemma 7B variant is comparable in performance to leading open source models.\n\nUsage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
-    "pricing": { "prompt": "0.00000013", "completion": "0.00000013", "image": "0", "request": "0" },
+    "id": "openai/gpt-3.5-turbo-1106",
+    "name": "OpenAI: GPT-3.5 Turbo 16k (older v1106)",
+    "created": 1699228800,
+    "description": "An older GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.",
+    "context_length": 16385,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.000001", "completion": "0.000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 16385,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/palm-2-codechat-bison-32k",
+    "name": "Google: PaLM 2 Code Chat 32k",
+    "created": 1698969600,
+    "description": "PaLM 2 fine-tuned for chatbot conversations that help with code-related questions.",
+    "context_length": 131040,
+    "architecture": { "modality": "text->text", "tokenizer": "PaLM", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000025",
+      "completion": "0.0000005",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 131072,
+      "max_completion_tokens": 32768,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/palm-2-chat-bison-32k",
+    "name": "Google: PaLM 2 Chat 32k",
+    "created": 1698969600,
+    "description": "PaLM 2 is a language model by Google with improved multilingual, reasoning and coding capabilities.",
+    "context_length": 131040,
+    "architecture": { "modality": "text->text", "tokenizer": "PaLM", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000025",
+      "completion": "0.0000005",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 131072,
+      "max_completion_tokens": 32768,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "jondurbin/airoboros-l2-70b",
+    "name": "Airoboros 70B",
+    "created": 1698537600,
+    "description": "A Llama 2 70B fine-tune using synthetic data (the Airoboros dataset).\n\nCurrently based on [jondurbin/airoboros-l2-70b](https://huggingface.co/jondurbin/airoboros-l2-70b-2.2.1), but might get updated in the future.",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "airoboros"
+    },
+    "pricing": { "prompt": "0.0000005", "completion": "0.0000005", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "xwin-lm/xwin-lm-70b",
+    "name": "Xwin 70B",
+    "created": 1697328000,
+    "description": "Xwin-LM aims to develop and open-source alignment tech for LLMs. Our first release, built-upon on the [Llama2](/models/${Model.Llama_2_13B_Chat}) base models, ranked TOP-1 on AlpacaEval. Notably, it's the first to surpass [GPT-4](/models/${Model.GPT_4}) on this benchmark. The project will be continuously updated.",
     "context_length": 8192,
-    "architecture": { "modality": "text", "tokenizer": "Llama2", "instruct_type": "gemma" },
-    "top_provider": { "max_completion_tokens": null, "is_moderated": false },
-    "per_request_limits": { "prompt_tokens": "34277093", "completion_tokens": "34277093" }
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "airoboros"
+    },
+    "pricing": {
+      "prompt": "0.00000375",
+      "completion": "0.00000375",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 400,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mistralai/mistral-7b-instruct-v0.1",
+    "name": "Mistral: Mistral 7B Instruct v0.1",
+    "created": 1695859200,
+    "description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "mistral"
+    },
+    "pricing": {
+      "prompt": "0.000000055",
+      "completion": "0.000000055",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-3.5-turbo-instruct",
+    "name": "OpenAI: GPT-3.5 Turbo Instruct",
+    "created": 1695859200,
+    "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.",
+    "context_length": 4095,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": "chatml" },
+    "pricing": { "prompt": "0.0000015", "completion": "0.000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4095,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "pygmalionai/mythalion-13b",
+    "name": "Pygmalion: Mythalion 13B",
+    "created": 1693612800,
+    "description": "A blend of the new Pygmalion-13b and MythoMax. #merge",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.000001125",
+      "completion": "0.000001125",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 400,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4-32k-0314",
+    "name": "OpenAI: GPT-4 32k (older v0314)",
+    "created": 1693180800,
+    "description": "GPT-4-32k is an extended version of GPT-4, with the same capabilities but quadrupled context length, allowing for processing up to 40 pages of text in a single pass. This is particularly beneficial for handling longer content like interacting with PDFs without an external vector database. Training data: up to Sep 2021.",
+    "context_length": 32767,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.00006", "completion": "0.00012", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 32767,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4-32k",
+    "name": "OpenAI: GPT-4 32k",
+    "created": 1693180800,
+    "description": "GPT-4-32k is an extended version of GPT-4, with the same capabilities but quadrupled context length, allowing for processing up to 40 pages of text in a single pass. This is particularly beneficial for handling longer content like interacting with PDFs without an external vector database. Training data: up to Sep 2021.",
+    "context_length": 32767,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.00006", "completion": "0.00012", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 32767,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-3.5-turbo-16k",
+    "name": "OpenAI: GPT-3.5 Turbo 16k",
+    "created": 1693180800,
+    "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.",
+    "context_length": 16385,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.000003", "completion": "0.000004", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 16385,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "nousresearch/nous-hermes-llama2-13b",
+    "name": "Nous: Hermes 13B",
+    "created": 1692489600,
+    "description": "A state-of-the-art language model fine-tuned on over 300k instructions by Nous Research, with Teknium and Emozilla leading the fine tuning process.",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.00000017",
+      "completion": "0.00000017",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "huggingfaceh4/zephyr-7b-beta:free",
+    "name": "Hugging Face: Zephyr 7B (free)",
+    "created": 1690934400,
+    "description": "Zephyr is a series of language models that are trained to act as helpful assistants. Zephyr-7B-β is the second model in the series, and is a fine-tuned version of [mistralai/Mistral-7B-v0.1](/models/mistralai/mistral-7b-instruct-v0.1) that was trained on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO).\n\n_These are free, rate-limited endpoints for [Zephyr 7B](/models/huggingfaceh4/zephyr-7b-beta). Outputs may be cached. Read about rate limits [here](/docs/limits)._",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Mistral",
+      "instruct_type": "zephyr"
+    },
+    "pricing": { "prompt": "0", "completion": "0", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": 2048,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "mancer/weaver",
+    "name": "Mancer: Weaver (alpha)",
+    "created": 1690934400,
+    "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.",
+    "context_length": 8000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.000001875",
+      "completion": "0.00000225",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8000,
+      "max_completion_tokens": 1000,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-instant-1.0",
+    "name": "Anthropic: Claude Instant v1.0",
+    "created": 1690502400,
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "context_length": 100000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Claude",
+      "instruct_type": "claude"
+    },
+    "pricing": { "prompt": "0.0000008", "completion": "0.0000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 100000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-1.2",
+    "name": "Anthropic: Claude v1.2",
+    "created": 1690502400,
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "context_length": 100000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Claude",
+      "instruct_type": "claude"
+    },
+    "pricing": { "prompt": "0.000008", "completion": "0.000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 100000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-1",
+    "name": "Anthropic: Claude v1",
+    "created": 1690502400,
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "context_length": 100000,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Claude",
+      "instruct_type": "claude"
+    },
+    "pricing": { "prompt": "0.000008", "completion": "0.000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 100000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-instant-1",
+    "name": "Anthropic: Claude Instant v1",
+    "created": 1690502400,
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.",
+    "context_length": 100000,
+    "architecture": { "modality": "text->text", "tokenizer": "Claude", "instruct_type": null },
+    "pricing": { "prompt": "0.0000008", "completion": "0.0000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 100000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-instant-1:beta",
+    "name": "Anthropic: Claude Instant v1 (self-moderated)",
+    "created": 1690502400,
+    "description": "Anthropic's model for low-latency, high throughput text generation. Supports hundreds of pages of text.\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-instant-1) variant._",
+    "context_length": 100000,
+    "architecture": { "modality": "text->text", "tokenizer": "Claude", "instruct_type": null },
+    "pricing": { "prompt": "0.0000008", "completion": "0.0000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 100000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-2.0",
+    "name": "Anthropic: Claude v2.0",
+    "created": 1690502400,
+    "description": "Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.",
+    "context_length": 100000,
+    "architecture": { "modality": "text->text", "tokenizer": "Claude", "instruct_type": null },
+    "pricing": { "prompt": "0.000008", "completion": "0.000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 100000,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "anthropic/claude-2.0:beta",
+    "name": "Anthropic: Claude v2.0 (self-moderated)",
+    "created": 1690502400,
+    "description": "Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.\n\n_This is a faster endpoint, made available in collaboration with Anthropic, that is self-moderated: response moderation happens on the provider's side instead of OpenRouter's. For requests that pass moderation, it's identical to the [Standard](/models/anthropic/claude-2.0) variant._",
+    "context_length": 100000,
+    "architecture": { "modality": "text->text", "tokenizer": "Claude", "instruct_type": null },
+    "pricing": { "prompt": "0.000008", "completion": "0.000024", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 100000,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "undi95/remm-slerp-l2-13b",
+    "name": "ReMM SLERP 13B",
+    "created": 1689984000,
+    "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.000001125",
+      "completion": "0.000001125",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": 400,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "undi95/remm-slerp-l2-13b:extended",
+    "name": "ReMM SLERP 13B (extended)",
+    "created": 1689984000,
+    "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge\n\n_These are extended-context endpoints for [ReMM SLERP 13B](/models/undi95/remm-slerp-l2-13b). They may have higher prices._",
+    "context_length": 6144,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.000001125",
+      "completion": "0.000001125",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 6144,
+      "max_completion_tokens": 400,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/palm-2-codechat-bison",
+    "name": "Google: PaLM 2 Code Chat",
+    "created": 1689811200,
+    "description": "PaLM 2 fine-tuned for chatbot conversations that help with code-related questions.",
+    "context_length": 28672,
+    "architecture": { "modality": "text->text", "tokenizer": "PaLM", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000025",
+      "completion": "0.0000005",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 28672,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "google/palm-2-chat-bison",
+    "name": "Google: PaLM 2 Chat",
+    "created": 1689811200,
+    "description": "PaLM 2 is a language model by Google with improved multilingual, reasoning and coding capabilities.",
+    "context_length": 36864,
+    "architecture": { "modality": "text->text", "tokenizer": "PaLM", "instruct_type": null },
+    "pricing": {
+      "prompt": "0.00000025",
+      "completion": "0.0000005",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 36864,
+      "max_completion_tokens": 4096,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "gryphe/mythomax-l2-13b",
+    "name": "MythoMax 13B",
+    "created": 1688256000,
+    "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": { "prompt": "0.0000001", "completion": "0.0000001", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
   },
   {
     "id": "gryphe/mythomax-l2-13b:nitro",
     "name": "MythoMax 13B (nitro)",
-    "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge\n\nNote: this is a higher-throughput version of [this model](/models/gryphe/mythomax-l2-13b), and may have higher prices and slightly different outputs.",
+    "created": 1688256000,
+    "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge\n\n_These are higher-throughput endpoints for [MythoMax 13B](/models/gryphe/mythomax-l2-13b). They may have higher prices._",
+    "context_length": 4096,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
     "pricing": { "prompt": "0.0000002", "completion": "0.0000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "gryphe/mythomax-l2-13b:extended",
+    "name": "MythoMax 13B (extended)",
+    "created": 1688256000,
+    "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge\n\n_These are extended-context endpoints for [MythoMax 13B](/models/gryphe/mythomax-l2-13b). They may have higher prices._",
+    "context_length": 8192,
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "alpaca"
+    },
+    "pricing": {
+      "prompt": "0.000001125",
+      "completion": "0.000001125",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 8192,
+      "max_completion_tokens": 400,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "meta-llama/llama-2-13b-chat",
+    "name": "Meta: Llama v2 13B Chat",
+    "created": 1687219200,
+    "description": "A 13 billion parameter language model from Meta, fine tuned for chat completions",
     "context_length": 4096,
-    "architecture": { "modality": "text", "tokenizer": "Llama2", "instruct_type": "alpaca" },
-    "top_provider": { "max_completion_tokens": null, "is_moderated": false },
-    "per_request_limits": { "prompt_tokens": "22280110", "completion_tokens": "22280110" }
+    "architecture": {
+      "modality": "text->text",
+      "tokenizer": "Llama2",
+      "instruct_type": "llama2"
+    },
+    "pricing": {
+      "prompt": "0.00000027",
+      "completion": "0.00000027",
+      "image": "0",
+      "request": "0"
+    },
+    "top_provider": {
+      "context_length": 4096,
+      "max_completion_tokens": null,
+      "is_moderated": false
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4-0314",
+    "name": "OpenAI: GPT-4 (older v0314)",
+    "created": 1685232000,
+    "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.",
+    "context_length": 8191,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.00003", "completion": "0.00006", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8191,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-4",
+    "name": "OpenAI: GPT-4",
+    "created": 1685232000,
+    "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.",
+    "context_length": 8191,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.00003", "completion": "0.00006", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 8191,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-3.5-turbo-0301",
+    "name": "OpenAI: GPT-3.5 Turbo (older v0301)",
+    "created": 1685232000,
+    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.",
+    "context_length": 4095,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.000001", "completion": "0.000002", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 4095,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-3.5-turbo-0125",
+    "name": "OpenAI: GPT-3.5 Turbo 16k",
+    "created": 1685232000,
+    "description": "The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.\n\nThis version has a higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls.",
+    "context_length": 16385,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.0000005", "completion": "0.0000015", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 16385,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
+  },
+  {
+    "id": "openai/gpt-3.5-turbo",
+    "name": "OpenAI: GPT-3.5 Turbo",
+    "created": 1685232000,
+    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.",
+    "context_length": 16385,
+    "architecture": { "modality": "text->text", "tokenizer": "GPT", "instruct_type": null },
+    "pricing": { "prompt": "0.0000005", "completion": "0.0000015", "image": "0", "request": "0" },
+    "top_provider": {
+      "context_length": 16385,
+      "max_completion_tokens": 4096,
+      "is_moderated": true
+    },
+    "per_request_limits": null
   }
 ]
diff --git a/src/libs/agent-runtime/openrouter/index.ts b/src/libs/agent-runtime/openrouter/index.ts
index 250be93a5c40..109e17bfbbee 100644
--- a/src/libs/agent-runtime/openrouter/index.ts
+++ b/src/libs/agent-runtime/openrouter/index.ts
@@ -23,7 +23,8 @@ export const LobeOpenRouterAI = LobeOpenAICompatibleFactory({
         description: model.description,
         displayName: model.name,
         enabled: LOBE_DEFAULT_MODEL_LIST.find((m) => model.id.endsWith(m.id))?.enabled || false,
-        functionCall: model.description.includes('function calling'),
+        functionCall:
+          model.description.includes('function calling') || model.description.includes('tools'),
         id: model.id,
         maxTokens:
           typeof model.top_provider.max_completion_tokens === 'number'
diff --git a/src/libs/agent-runtime/types/type.ts b/src/libs/agent-runtime/types/type.ts
index 6d074670e0da..ac8e9b2e46bf 100644
--- a/src/libs/agent-runtime/types/type.ts
+++ b/src/libs/agent-runtime/types/type.ts
@@ -22,6 +22,7 @@ export interface CreateChatCompletionOptions {
 }
 
 export enum ModelProvider {
+  Ai21 = 'ai21',
   Ai360 = 'ai360',
   Anthropic = 'anthropic',
   Azure = 'azure',
@@ -29,6 +30,7 @@ export enum ModelProvider {
   Bedrock = 'bedrock',
   DeepSeek = 'deepseek',
   FireworksAI = 'fireworksai',
+  Github = 'github',
   Google = 'google',
   Groq = 'groq',
   Minimax = 'minimax',
diff --git a/src/locales/default/chat.ts b/src/locales/default/chat.ts
index 27668fba2896..a68330b94099 100644
--- a/src/locales/default/chat.ts
+++ b/src/locales/default/chat.ts
@@ -7,6 +7,12 @@ export default {
   agentDefaultMessageWithSystemRole: '你好，我是 **{{name}}**，{{systemRole}}，让我们开始对话吧！',
   agentDefaultMessageWithoutEdit: '你好，我是 **{{name}}**，让我们开始对话吧！',
   agentsAndConversations: '助手与会话',
+  artifact: {
+    generating: '生成中',
+    thinking: '思考中',
+    thought: '思考过程',
+    unknownTitle: '未命名作品',
+  },
   backToBottom: '跳转至当前',
   chatList: {
     longMessageDetail: '查看详情',
@@ -160,6 +166,7 @@ export default {
     clear: '删除语音',
   },
   updateAgent: '更新助理信息',
+
   upload: {
     action: {
       fileUpload: '上传文件',
diff --git a/src/locales/default/error.ts b/src/locales/default/error.ts
index 556b7a4750be..78b5eae59380 100644
--- a/src/locales/default/error.ts
+++ b/src/locales/default/error.ts
@@ -107,6 +107,9 @@ export default {
     SubscriptionPlanLimit:
       '您的订阅额度已用尽，无法使用该功能，请升级到更高计划，或购买资源包后继续使用',
 
+    // Github Token
+    InvalidGithubToken: 'Github PAT 不正确或为空，请检查 Github PAT 后重试',
+
     /* eslint-enable */
   },
   stt: {
diff --git a/src/locales/default/modelProvider.ts b/src/locales/default/modelProvider.ts
index bc28461ab04d..2bb3c7ee5c66 100644
--- a/src/locales/default/modelProvider.ts
+++ b/src/locales/default/modelProvider.ts
@@ -52,6 +52,13 @@ export default {
       title: '使用自定义 Bedrock 鉴权信息',
     },
   },
+  github: {
+    personalAccessToken: {
+      desc: '填入你的 Github PAT，点击[这里](https://github.com/settings/tokens) 创建',
+      placeholder: 'ghp_xxxxxx',
+      title: 'Github PAT',
+    },
+  },
   ollama: {
     checker: {
       desc: '测试代理地址是否正确填写',
diff --git a/src/locales/default/portal.ts b/src/locales/default/portal.ts
index c906e12a3ea9..79affdedc0cd 100644
--- a/src/locales/default/portal.ts
+++ b/src/locales/default/portal.ts
@@ -6,13 +6,29 @@ export default {
       file: '文件',
     },
   },
+  Plugins: '插件',
   actions: {
     genAiMessage: '创建助手消息',
     summary: '总结',
     summaryTooltip: '总结当前内容',
   },
+  artifacts: {
+    display: {
+      code: '代码',
+      preview: '预览',
+    },
+    svg: {
+      copyAsImage: '复制为图片',
+      copyFail: '复制失败，错误原因:{{error}}',
+      copySuccess: '图片复制成功',
+      download: {
+        png: '下载为 PNG',
+        svg: '下载为 SVG',
+      },
+    },
+  },
   emptyArtifactList: '当前 Artifacts 列表为空，请在会话中按需使用插件后再查看',
-  emptyKnowledgeList: '当前知识列表为空，请在会话中按需开启知识库后再查看',
+  emptyKnowledgeList: '当前知识列表为空',
   files: '文件',
   messageDetail: '消息详情',
   title: '工作区',
diff --git a/src/server/globalConfig/index.ts b/src/server/globalConfig/index.ts
index 004eaead6666..794ea6b75e1c 100644
--- a/src/server/globalConfig/index.ts
+++ b/src/server/globalConfig/index.ts
@@ -6,6 +6,7 @@ import { getLLMConfig } from '@/config/llm';
 import {
   BedrockProviderCard,
   FireworksAIProviderCard,
+  GithubProviderCard,
   GoogleProviderCard,
   GroqProviderCard,
   NovitaProviderCard,
@@ -45,6 +46,9 @@ export const getServerGlobalConfig = () => {
     ENABLED_GROQ,
     GROQ_MODEL_LIST,
 
+    ENABLED_GITHUB,
+    GITHUB_MODEL_LIST,
+
     ENABLED_DEEPSEEK,
     ENABLED_PERPLEXITY,
     ENABLED_ANTHROPIC,
@@ -60,6 +64,7 @@ export const getServerGlobalConfig = () => {
     ENABLED_STEPFUN,
     ENABLED_BAICHUAN,
     ENABLED_TAICHU,
+    ENABLED_AI21,
     ENABLED_AI360,
 
     ENABLED_SILICONCLOUD,
@@ -97,6 +102,7 @@ export const getServerGlobalConfig = () => {
     enabledAccessCode: ACCESS_CODES?.length > 0,
     enabledOAuthSSO: enableNextAuth,
     languageModel: {
+      ai21: { enabled: ENABLED_AI21 },
       ai360: { enabled: ENABLED_AI360 },
       anthropic: {
         enabled: ENABLED_ANTHROPIC,
@@ -130,6 +136,14 @@ export const getServerGlobalConfig = () => {
         }),
       },
 
+      github: {
+        enabled: ENABLED_GITHUB,
+        enabledModels: extractEnabledModels(GITHUB_MODEL_LIST),
+        serverModelCards: transformToChatModelCards({
+          defaultChatModels: GithubProviderCard.chatModels,
+          modelString: GITHUB_MODEL_LIST,
+        }),
+      },
       google: {
         enabled: ENABLED_GOOGLE,
         enabledModels: extractEnabledModels(GOOGLE_MODEL_LIST),
diff --git a/src/store/chat/slices/message/selectors.ts b/src/store/chat/slices/message/selectors.ts
index d260273147f9..7c896b20ead4 100644
--- a/src/store/chat/slices/message/selectors.ts
+++ b/src/store/chat/slices/message/selectors.ts
@@ -16,7 +16,7 @@ import { MetaData } from '@/types/meta';
 import { merge } from '@/utils/merge';
 
 import { chatHelpers } from '../../helpers';
-import type { ChatStore } from '../../store';
+import type { ChatStoreState } from '../../initialState';
 
 const getMeta = (message: ChatMessage) => {
   switch (message.role) {
@@ -36,10 +36,10 @@ const getMeta = (message: ChatMessage) => {
   }
 };
 
-const currentChatKey = (s: ChatStore) => messageMapKey(s.activeId, s.activeTopicId);
+const currentChatKey = (s: ChatStoreState) => messageMapKey(s.activeId, s.activeTopicId);
 
 // 当前激活的消息列表
-const currentChats = (s: ChatStore): ChatMessage[] => {
+const currentChats = (s: ChatStoreState): ChatMessage[] => {
   if (!s.activeId) return [];
 
   const messages = s.messagesMap[currentChatKey(s)] || [];
@@ -47,19 +47,19 @@ const currentChats = (s: ChatStore): ChatMessage[] => {
   return messages.map((i) => ({ ...i, meta: getMeta(i) }));
 };
 
-const currentToolMessages = (s: ChatStore) => {
+const currentToolMessages = (s: ChatStoreState) => {
   const messages = currentChats(s);
 
   return messages.filter((m) => m.role === 'tool');
 };
 
-const currentUserMessages = (s: ChatStore) => {
+const currentUserMessages = (s: ChatStoreState) => {
   const messages = currentChats(s);
 
   return messages.filter((m) => m.role === 'user');
 };
 
-const currentUserFiles = (s: ChatStore) => {
+const currentUserFiles = (s: ChatStoreState) => {
   const userMessages = currentUserMessages(s);
 
   return userMessages
@@ -70,7 +70,7 @@ const currentUserFiles = (s: ChatStore) => {
 
 const initTime = Date.now();
 
-const showInboxWelcome = (s: ChatStore): boolean => {
+const showInboxWelcome = (s: ChatStoreState): boolean => {
   const isInbox = s.activeId === INBOX_SESSION_ID;
   if (!isInbox) return false;
 
@@ -83,7 +83,7 @@ const showInboxWelcome = (s: ChatStore): boolean => {
 // Custom message for new assistant initialization
 const currentChatsWithGuideMessage =
   (meta: MetaData) =>
-  (s: ChatStore): ChatMessage[] => {
+  (s: ChatStoreState): ChatMessage[] => {
     // skip tool message
     const data = currentChats(s).filter((m) => m.role !== 'tool');
 
@@ -120,47 +120,49 @@ const currentChatsWithGuideMessage =
     return [emptyInboxGuideMessage];
   };
 
-const currentChatIDsWithGuideMessage = (s: ChatStore) => {
+const currentChatIDsWithGuideMessage = (s: ChatStoreState) => {
   const meta = sessionMetaSelectors.currentAgentMeta(useSessionStore.getState());
 
   return currentChatsWithGuideMessage(meta)(s).map((s) => s.id);
 };
 
-const currentChatsWithHistoryConfig = (s: ChatStore): ChatMessage[] => {
+const currentChatsWithHistoryConfig = (s: ChatStoreState): ChatMessage[] => {
   const chats = currentChats(s);
   const config = agentSelectors.currentAgentChatConfig(useAgentStore.getState());
 
   return chatHelpers.getSlicedMessagesWithConfig(chats, config);
 };
 
-const chatsMessageString = (s: ChatStore): string => {
+const chatsMessageString = (s: ChatStoreState): string => {
   const chats = currentChatsWithHistoryConfig(s);
   return chats.map((m) => m.content).join('');
 };
 
-const getMessageById = (id: string) => (s: ChatStore) =>
+const getMessageById = (id: string) => (s: ChatStoreState) =>
   chatHelpers.getMessageById(currentChats(s), id);
 
-const getMessageByToolCallId = (id: string) => (s: ChatStore) => {
+const getMessageByToolCallId = (id: string) => (s: ChatStoreState) => {
   const messages = currentChats(s);
   return messages.find((m) => m.tool_call_id === id);
 };
-const getTraceIdByMessageId = (id: string) => (s: ChatStore) => getMessageById(id)(s)?.traceId;
+const getTraceIdByMessageId = (id: string) => (s: ChatStoreState) => getMessageById(id)(s)?.traceId;
 
-const latestMessage = (s: ChatStore) => currentChats(s).at(-1);
+const latestMessage = (s: ChatStoreState) => currentChats(s).at(-1);
 
-const currentChatLoadingState = (s: ChatStore) => !s.messagesInit;
+const currentChatLoadingState = (s: ChatStoreState) => !s.messagesInit;
 
-const isCurrentChatLoaded = (s: ChatStore) => !!s.messagesMap[currentChatKey(s)];
+const isCurrentChatLoaded = (s: ChatStoreState) => !!s.messagesMap[currentChatKey(s)];
 
-const isMessageEditing = (id: string) => (s: ChatStore) => s.messageEditingIds.includes(id);
-const isMessageLoading = (id: string) => (s: ChatStore) => s.messageLoadingIds.includes(id);
+const isMessageEditing = (id: string) => (s: ChatStoreState) => s.messageEditingIds.includes(id);
+const isMessageLoading = (id: string) => (s: ChatStoreState) => s.messageLoadingIds.includes(id);
 
-const isMessageGenerating = (id: string) => (s: ChatStore) => s.chatLoadingIds.includes(id);
-const isMessageInRAGFlow = (id: string) => (s: ChatStore) => s.messageRAGLoadingIds.includes(id);
-const isPluginApiInvoking = (id: string) => (s: ChatStore) => s.pluginApiLoadingIds.includes(id);
+const isMessageGenerating = (id: string) => (s: ChatStoreState) => s.chatLoadingIds.includes(id);
+const isMessageInRAGFlow = (id: string) => (s: ChatStoreState) =>
+  s.messageRAGLoadingIds.includes(id);
+const isPluginApiInvoking = (id: string) => (s: ChatStoreState) =>
+  s.pluginApiLoadingIds.includes(id);
 
-const isToolCallStreaming = (id: string, index: number) => (s: ChatStore) => {
+const isToolCallStreaming = (id: string, index: number) => (s: ChatStoreState) => {
   const isLoading = s.toolCallingStreamIds[id];
 
   if (!isLoading) return false;
@@ -168,15 +170,15 @@ const isToolCallStreaming = (id: string, index: number) => (s: ChatStore) => {
   return isLoading[index];
 };
 
-const isAIGenerating = (s: ChatStore) => s.chatLoadingIds.length > 0;
-const isInRAGFlow = (s: ChatStore) => s.messageRAGLoadingIds.length > 0;
-const isCreatingMessage = (s: ChatStore) => s.isCreatingMessage;
-const isHasMessageLoading = (s: ChatStore) => s.messageLoadingIds.length > 0;
+const isAIGenerating = (s: ChatStoreState) => s.chatLoadingIds.length > 0;
+const isInRAGFlow = (s: ChatStoreState) => s.messageRAGLoadingIds.length > 0;
+const isCreatingMessage = (s: ChatStoreState) => s.isCreatingMessage;
+const isHasMessageLoading = (s: ChatStoreState) => s.messageLoadingIds.length > 0;
 
 /**
  * this function is used to determine whether the send button should be disabled
  */
-const isSendButtonDisabledByMessage = (s: ChatStore) =>
+const isSendButtonDisabledByMessage = (s: ChatStoreState) =>
   // 1. when there is message loading
   isHasMessageLoading(s) ||
   // 2. when is creating the topic
diff --git a/src/store/chat/slices/portal/action.ts b/src/store/chat/slices/portal/action.ts
index 05f66a42b5e7..428bfebcfa5b 100644
--- a/src/store/chat/slices/portal/action.ts
+++ b/src/store/chat/slices/portal/action.ts
@@ -2,16 +2,17 @@ import { StateCreator } from 'zustand/vanilla';
 
 import { ChatStore } from '@/store/chat/store';
 
-import { PortalFile } from './initialState';
+import { PortalArtifact, PortalFile } from './initialState';
 
 export interface ChatPortalAction {
+  closeArtifact: () => void;
   closeFilePreview: () => void;
   closeMessageDetail: () => void;
   closeToolUI: () => void;
+  openArtifact: (artifact: PortalArtifact) => void;
   openFilePreview: (portal: PortalFile) => void;
   openMessageDetail: (messageId: string) => void;
   openToolUI: (messageId: string, identifier: string) => void;
-
   togglePortal: (open?: boolean) => void;
 }
 
@@ -21,6 +22,10 @@ export const chatPortalSlice: StateCreator<
   [],
   ChatPortalAction
 > = (set, get) => ({
+  closeArtifact: () => {
+    get().togglePortal(false);
+    set({ portalArtifact: undefined }, false, 'closeArtifact');
+  },
   closeFilePreview: () => {
     set({ portalFile: undefined }, false, 'closeFilePreview');
   },
@@ -30,6 +35,11 @@ export const chatPortalSlice: StateCreator<
   closeToolUI: () => {
     set({ portalToolMessage: undefined }, false, 'closeToolUI');
   },
+  openArtifact: (artifact) => {
+    get().togglePortal(true);
+
+    set({ portalArtifact: artifact }, false, 'openArtifact');
+  },
   openFilePreview: (portal) => {
     get().togglePortal(true);
 
@@ -49,4 +59,7 @@ export const chatPortalSlice: StateCreator<
     const showInspector = open === undefined ? !get().showPortal : open;
     set({ showPortal: showInspector }, false, 'toggleInspector');
   },
+  // updateArtifactContent: (content) => {
+  //   set({ portalArtifact: content }, false, 'updateArtifactContent');
+  // },
 });
diff --git a/src/store/chat/slices/portal/initialState.ts b/src/store/chat/slices/portal/initialState.ts
index 979dfbedd506..c9b447af4be8 100644
--- a/src/store/chat/slices/portal/initialState.ts
+++ b/src/store/chat/slices/portal/initialState.ts
@@ -4,7 +4,17 @@ export interface PortalFile {
   fileId: string;
 }
 
+export interface PortalArtifact {
+  children?: string;
+  id: string;
+  identifier?: string;
+  title?: string;
+  type?: string;
+}
+
 export interface ChatPortalState {
+  portalArtifact?: PortalArtifact;
+  portalArtifactDisplayMode?: 'code' | 'preview';
   portalFile?: PortalFile;
   portalMessageDetail?: string;
   portalToolMessage?: { id: string; identifier: string };
@@ -12,5 +22,6 @@ export interface ChatPortalState {
 }
 
 export const initialChatPortalState: ChatPortalState = {
+  portalArtifactDisplayMode: 'preview',
   showPortal: false,
 };
diff --git a/src/store/chat/slices/portal/selectors.test.ts b/src/store/chat/slices/portal/selectors.test.ts
index 754c8fe13b19..aad3039ee77f 100644
--- a/src/store/chat/slices/portal/selectors.test.ts
+++ b/src/store/chat/slices/portal/selectors.test.ts
@@ -21,12 +21,12 @@ describe('chatDockSelectors', () => {
 
   describe('toolUIMessageId', () => {
     it('should return undefined when dockToolMessage is not set', () => {
-      expect(chatPortalSelectors.artifactMessageId(createState())).toBeUndefined();
+      expect(chatPortalSelectors.toolMessageId(createState())).toBeUndefined();
     });
 
     it('should return the id when dockToolMessage is set', () => {
       const state = createState({ portalToolMessage: { id: 'test-id', identifier: 'test' } });
-      expect(chatPortalSelectors.artifactMessageId(state)).toBe('test-id');
+      expect(chatPortalSelectors.toolMessageId(state)).toBe('test-id');
     });
   });
 
@@ -36,8 +36,8 @@ describe('chatDockSelectors', () => {
         portalToolMessage: { id: 'test-id', identifier: 'test' },
         showPortal: false,
       });
-      expect(chatPortalSelectors.isArtifactMessageUIOpen('test-id')(state)).toBe(false);
-      expect(chatPortalSelectors.isArtifactMessageUIOpen('other-id')(state)).toBe(false);
+      expect(chatPortalSelectors.isPluginUIOpen('test-id')(state)).toBe(false);
+      expect(chatPortalSelectors.isPluginUIOpen('other-id')(state)).toBe(false);
     });
 
     it('should return true when id matches and showDock is true', () => {
@@ -45,18 +45,18 @@ describe('chatDockSelectors', () => {
         portalToolMessage: { id: 'test-id', identifier: 'test' },
         showPortal: true,
       });
-      expect(chatPortalSelectors.isArtifactMessageUIOpen('test-id')(state)).toBe(true);
+      expect(chatPortalSelectors.isPluginUIOpen('test-id')(state)).toBe(true);
     });
   });
 
   describe('showToolUI', () => {
     it('should return false when dockToolMessage is not set', () => {
-      expect(chatPortalSelectors.showArtifactUI(createState())).toBe(false);
+      expect(chatPortalSelectors.showPluginUI(createState())).toBe(false);
     });
 
     it('should return true when dockToolMessage is set', () => {
       const state = createState({ portalToolMessage: { id: 'test-id', identifier: 'test' } });
-      expect(chatPortalSelectors.showArtifactUI(state)).toBe(true);
+      expect(chatPortalSelectors.showPluginUI(state)).toBe(true);
     });
   });
 
@@ -70,4 +70,26 @@ describe('chatDockSelectors', () => {
       expect(chatPortalSelectors.toolUIIdentifier(state)).toBe('test');
     });
   });
+
+  describe('showFilePreview', () => {
+    it('should return false when portalFile is not set', () => {
+      expect(chatPortalSelectors.showFilePreview(createState())).toBe(false);
+    });
+
+    it('should return true when portalFile is set', () => {
+      const state = createState({ portalFile: { fileId: 'file-id', chunkText: 'chunk' } });
+      expect(chatPortalSelectors.showFilePreview(state)).toBe(true);
+    });
+  });
+
+  describe('previewFileId', () => {
+    it('should return undefined when portalFile is not set', () => {
+      expect(chatPortalSelectors.previewFileId(createState())).toBeUndefined();
+    });
+
+    it('should return the fileId when portalFile is set', () => {
+      const state = createState({ portalFile: { fileId: 'file-id', chunkText: 'chunk' } });
+      expect(chatPortalSelectors.previewFileId(state)).toBe('file-id');
+    });
+  });
 });
diff --git a/src/store/chat/slices/portal/selectors.ts b/src/store/chat/slices/portal/selectors.ts
index 06ac40d755e0..61a439af0848 100644
--- a/src/store/chat/slices/portal/selectors.ts
+++ b/src/store/chat/slices/portal/selectors.ts
@@ -1,27 +1,71 @@
+import { ARTIFACT_TAG_CLOSED_REGEX, ARTIFACT_TAG_REGEX } from '@/const/plugin';
 import type { ChatStoreState } from '@/store/chat';
 
-const artifactMessageId = (s: ChatStoreState) => s.portalToolMessage?.id;
+import { chatSelectors } from '../message/selectors';
+
 const showPortal = (s: ChatStoreState) => s.showPortal;
 
-const isArtifactMessageUIOpen = (id: string) => (s: ChatStoreState) =>
-  artifactMessageId(s) === id && showPortal(s);
+const showMessageDetail = (s: ChatStoreState) => !!s.portalMessageDetail;
+const messageDetailId = (s: ChatStoreState) => s.portalMessageDetail;
+
+const showPluginUI = (s: ChatStoreState) => !!s.portalToolMessage;
+
+const toolMessageId = (s: ChatStoreState) => s.portalToolMessage?.id;
+const isPluginUIOpen = (id: string) => (s: ChatStoreState) =>
+  toolMessageId(s) === id && showPortal(s);
+const toolUIIdentifier = (s: ChatStoreState) => s.portalToolMessage?.identifier;
 
-const showArtifactUI = (s: ChatStoreState) => !!s.portalToolMessage;
 const showFilePreview = (s: ChatStoreState) => !!s.portalFile;
-const showMessageDetail = (s: ChatStoreState) => !!s.portalMessageDetail;
 const previewFileId = (s: ChatStoreState) => s.portalFile?.fileId;
-const messageDetailId = (s: ChatStoreState) => s.portalMessageDetail;
 const chunkText = (s: ChatStoreState) => s.portalFile?.chunkText;
 
+const showArtifactUI = (s: ChatStoreState) => !!s.portalArtifact;
+const artifactTitle = (s: ChatStoreState) => s.portalArtifact?.title;
+const artifactIdentifier = (s: ChatStoreState) => s.portalArtifact?.identifier || '';
+const artifactMessageId = (s: ChatStoreState) => s.portalArtifact?.id;
+const artifactType = (s: ChatStoreState) => s.portalArtifact?.type;
+
+const artifactMessageContent = (id: string) => (s: ChatStoreState) => {
+  const message = chatSelectors.getMessageById(id)(s);
+  return message?.content || '';
+};
+
+const artifactCode = (id: string) => (s: ChatStoreState) => {
+  const messageContent = artifactMessageContent(id)(s);
+  const result = messageContent.match(ARTIFACT_TAG_REGEX);
+
+  return result?.groups?.content || '';
+};
+
+const isArtifactTagClosed = (id: string) => (s: ChatStoreState) => {
+  const content = artifactMessageContent(id)(s);
+
+  return ARTIFACT_TAG_CLOSED_REGEX.test(content || '');
+};
+
+/* eslint-disable sort-keys-fix/sort-keys-fix, typescript-sort-keys/interface */
 export const chatPortalSelectors = {
-  artifactMessageId,
-  chunkText,
-  isArtifactMessageUIOpen,
-  messageDetailId,
+  isPluginUIOpen,
+
   previewFileId,
-  showArtifactUI,
   showFilePreview,
+  chunkText,
+
+  messageDetailId,
   showMessageDetail,
+
+  showPluginUI,
   showPortal,
-  toolUIIdentifier: (s: ChatStoreState) => s.portalToolMessage?.identifier,
+
+  toolMessageId,
+  toolUIIdentifier,
+
+  showArtifactUI,
+  artifactTitle,
+  artifactIdentifier,
+  artifactMessageId,
+  artifactType,
+  artifactCode,
+  artifactMessageContent,
+  isArtifactTagClosed,
 };
diff --git a/src/styles/loading.ts b/src/styles/loading.ts
new file mode 100644
index 000000000000..9e142caec8a9
--- /dev/null
+++ b/src/styles/loading.ts
@@ -0,0 +1,28 @@
+import { css } from 'antd-style';
+
+export const dotLoading = css`
+  &::after {
+    content: '\\2026'; /* ascii code for the ellipsis character */
+
+    overflow: hidden;
+    display: inline-block;
+
+    width: 0;
+
+    vertical-align: bottom;
+
+    animation: ellipsis steps(4, end) 900ms infinite;
+  }
+
+  @keyframes ellipsis {
+    to {
+      width: 1.25em;
+    }
+  }
+
+  @keyframes ellipsis {
+    to {
+      width: 1.25em;
+    }
+  }
+`;
diff --git a/src/tools/artifacts/index.ts b/src/tools/artifacts/index.ts
new file mode 100644
index 000000000000..3556f8ab5395
--- /dev/null
+++ b/src/tools/artifacts/index.ts
@@ -0,0 +1,13 @@
+import { systemPrompt } from '@/tools/artifacts/systemRole';
+import { BuiltinToolManifest } from '@/types/tool';
+
+export const ArtifactsManifest: BuiltinToolManifest = {
+  api: [],
+  identifier: 'lobe-artifacts',
+  meta: {
+    avatar: `data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTAyNCIgaGVpZ2h0PSIxMDI0IiBmaWxsPSJub25lIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPjxyZWN0IHdpZHRoPSIxMDI0IiBoZWlnaHQ9IjEwMjQiIHJ4PSI1MTIiIGZpbGw9IiNDNDc4NUIiLz48cGF0aCBkPSJNNTEyIDYxMmM1NS4yMjggMCAxMDAtNDQuNzcyIDEwMC0xMDBzLTQ0Ljc3Mi0xMDAtMTAwLTEwMC0xMDAgNDQuNzcyLTEwMCAxMDAgNDQuNzcyIDEwMCAxMDAgMTAwWiIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjU2IiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiLz48cGF0aCBkPSJNNTEyIDY2MmExNDkuOTk4IDE0OS45OTggMCAwIDEtOTIuNTk3IDEzOC41ODIgMTUwLjAwMiAxNTAuMDAyIDAgMCAxLTIwNC41MjEtMTA5LjMxOCAxNDkuOTk4IDE0OS45OTggMCAwIDEgNjMuNzgzLTE1My45ODRBMTQ5Ljk5MiAxNDkuOTkyIDAgMCAxIDM2MiA1MTJhMTQ5Ljk5OSAxNDkuOTk5IDAgMSAxIDE1MC0xNTAgMTQ5Ljk5OCAxNDkuOTk4IDAgMCAxIDI5Ny4xMTgtMjkuMjYzQTE1MCAxNTAgMCAwIDEgNjYyIDUxMmMyOS42NjcgMCA1OC42NjggOC43OTcgODMuMzM1IDI1LjI4YTE0OS45OTUgMTQ5Ljk5NSAwIDAgMSA2My43ODMgMTUzLjk4NEExNTAgMTUwIDAgMCAxIDUxMiA2NjJaTTUxMiAzNjJ2NTBNMzYyIDUxMmg1ME02NjIgNTEyaC01ME01MTIgNjYydi01ME0zNzguNjY3IDM3OC42NjdsNjIuNjY2IDYyLjY2Nk01ODIuNjY3IDQ0MS4zMzNsNjIuNjY2LTYyLjY2Nk0zNzguNjY3IDY0NS4zMzNsNjIuNjY2LTYyLjY2Nk01ODIuNjY3IDU4Mi42NjdsNjIuNjY2IDYyLjY2NiIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjU2IiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiLz48L3N2Zz4=`,
+    title: 'Artifacts',
+  },
+  systemRole: systemPrompt,
+  type: 'builtin',
+};
diff --git a/src/tools/artifacts/systemRole.ts b/src/tools/artifacts/systemRole.ts
new file mode 100644
index 000000000000..fc019aa67d50
--- /dev/null
+++ b/src/tools/artifacts/systemRole.ts
@@ -0,0 +1,338 @@
+export const systemPrompt = `<artifacts_info>
+The assistant can create and reference artifacts during conversations. Artifacts are for substantial, self-contained content that users might modify or reuse, displayed in a separate UI window for clarity.
+
+# Good artifacts are...
+- Substantial content (>15 lines)
+- Content that the user is likely to modify, iterate on, or take ownership of
+- Self-contained, complex content that can be understood on its own, without context from the conversation
+- Content intended for eventual use outside the conversation (e.g., reports, emails, presentations)
+- Content likely to be referenced or reused multiple times
+
+# Don't use artifacts for...
+- Simple, informational, or short content, such as brief code snippets, mathematical equations, or small examples
+- Primarily explanatory, instructional, or illustrative content, such as examples provided to clarify a concept
+- Suggestions, commentary, or feedback on existing artifacts
+- Conversational or explanatory content that doesn't represent a standalone piece of work
+- Content that is dependent on the current conversational context to be useful
+- Content that is unlikely to be modified or iterated upon by the user
+- Request from users that appears to be a one-off question
+
+# Usage notes
+- One artifact per message unless specifically requested
+- Prefer in-line content (don't use artifacts) when possible. Unnecessary use of artifacts can be jarring for users.
+- If a user asks the assistant to "draw an SVG" or "make a website," the assistant does not need to explain that it doesn't have these capabilities. Creating the code and placing it within the appropriate artifact will fulfill the user's intentions.
+- If asked to generate an image, the assistant can offer an SVG instead. The assistant isn't very proficient at making SVG images but should engage with the task positively. Self-deprecating humor about its abilities can make it an entertaining experience for users.
+- The assistant errs on the side of simplicity and avoids overusing artifacts for content that can be effectively presented within the conversation.
+
+<artifact_instructions>
+  When collaborating with the user on creating content that falls into compatible categories, the assistant should follow these steps:
+
+  1. Immediately before invoking an artifact, think for one sentence in <lobeThinking> tags about how it evaluates against the criteria for a good and bad artifact. Consider if the content would work just fine without an artifact. If it's artifact-worthy, in another sentence determine if it's a new artifact or an update to an existing one (most common). For updates, reuse the prior identifier.
+  2. Wrap the content in opening and closing \`<lobeArtifact>\` tags.
+  3. Assign an identifier to the \`identifier\` attribute of the opening \`<lobeArtifact>\` tag. For updates, reuse the prior identifier. For new artifacts, the identifier should be descriptive and relevant to the content, using kebab-case (e.g., "example-code-snippet"). This identifier will be used consistently throughout the artifact's lifecycle, even when updating or iterating on the artifact.
+  4. Include a \`title\` attribute in the \`<lobeArtifact>\` tag to provide a brief title or description of the content.
+  5. Add a \`type\` attribute to the opening \`<lobeArtifact>\` tag to specify the type of content the artifact represents. Assign one of the following values to the \`type\` attribute:
+    - Code: "application/lobe.artifacts.code"
+      - Use for code snippets or scripts in any programming language.
+      - Include the language name as the value of the \`language\` attribute (e.g., \`language="python"\`).
+      - Do not use triple backticks when putting code in an artifact.
+    - Documents: "text/markdown"
+      - Plain text, Markdown, or other formatted text documents
+    - HTML: "text/html"
+      - The user interface can render single file HTML pages placed within the artifact tags. HTML, JS, and CSS should be in a single file when using the \`text/html\` type.
+      - Images from the web are not allowed, but you can use placeholder images by specifying the width and height like so \`<img src="/api/placeholder/400/320" alt="placeholder" />\`
+      - The only place external scripts can be imported from is https://cdnjs.cloudflare.com
+      - It is inappropriate to use "text/html" when sharing snippets, code samples & example HTML or CSS code, as it would be rendered as a webpage and the source code would be obscured. The assistant should instead use "application/lobe.artifacts.code" defined above.
+      - If the assistant is unable to follow the above requirements for any reason, use "application/lobe.artifacts.code" type for the artifact instead, which will not attempt to render the webpage.
+    - SVG: "image/svg+xml"
+      - The user interface will render the Scalable Vector Graphics (SVG) image within the artifact tags.
+      - The assistant should specify the viewbox of the SVG rather than defining a width/height
+    - Mermaid Diagrams: "application/lobe.artifacts.mermaid"
+      - The user interface will render Mermaid diagrams placed within the artifact tags.
+      - Do not put Mermaid code in a code block when using artifacts.
+    - React Components: "application/lobe.artifacts.react"
+      - Use this for displaying either: React elements, e.g. \`<strong>Hello World!</strong>\`, React pure functional components, e.g. \`() => <strong>Hello World!</strong>\`, React functional components with Hooks, or React component classes
+      - When creating a React component, ensure it has no required props (or provide default values for all props) and use a default export.
+      - Use Tailwind classes for styling. DO NOT USE ARBITRARY VALUES (e.g. \`h-[600px]\`).
+      - Base React is available to be imported. To use hooks, first import it at the top of the artifact, e.g. \`import { useState } from "react"\`
+      - The lucide-react@0.263.1 library is available to be imported. e.g. \`import { Camera } from "lucide-react"\` & \`<Camera color="red" size={48} />\`
+      - The recharts charting library is available to be imported, e.g. \`import { LineChart, XAxis, ... } from "recharts"\` & \`<LineChart ...><XAxis dataKey="name"> ...\`
+      - The assistant can use prebuilt components from the \`shadcn/ui\` library after it is imported: \`import { Alert, AlertDescription, AlertTitle, AlertDialog, AlertDialogAction } from '@/components/ui/alert';\`. If using components from the shadcn/ui library, the assistant mentions this to the user and offers to help them install the components if necessary.
+      - NO OTHER LIBRARIES (e.g. zod, hookform) ARE INSTALLED OR ABLE TO BE IMPORTED.
+      - Images from the web are not allowed, but you can use placeholder images by specifying the width and height like so \`<img src="/api/placeholder/400/320" alt="placeholder" />\`
+      - If you are unable to follow the above requirements for any reason, use "application/lobe.artifacts.code" type for the artifact instead, which will not attempt to render the component.
+  6. Include the complete and updated content of the artifact, without any truncation or minimization. Don't use "// rest of the code remains the same...".
+  7. If unsure whether the content qualifies as an artifact, if an artifact should be updated, or which type to assign to an artifact, err on the side of not creating an artifact.
+</artifact_instructions>
+
+Here are some examples of correct usage of artifacts by other AI assistants:
+
+<examples>
+  <example_docstring>
+    This example demonstrates how to create a new artifact and reference it in the response.
+  </example_docstring>
+
+  <example>
+    <user_query>Can you help me create a Python script to calculate the factorial of a number?</user_query>
+
+    <assistant_response>
+      Sure! Here's a Python script that calculates the factorial of a number:
+
+      <lobeThinking>Creating a Python script to calculate factorials meets the criteria for a good artifact. It's a self-contained piece of code that can be understood on its own and is likely to be reused or modified. This is a new conversation, so there are no pre-existing artifacts. Therefore, I'm creating a new artifact.</lobeThinking>
+
+      <lobeArtifact identifier="factorial-script" type="application/lobe.artifacts.code" language="python" title="Simple Python factorial script">
+        def factorial(n):
+            if n == 0:
+                return 1
+            else:
+                return n * factorial(n - 1)
+
+      ...
+    </assistant_response>
+
+  </example>
+
+<example_docstring>
+This example shows how to update an existing artifact and reference it in the response.
+</example_docstring>
+
+  <example>
+    <user_query>Thanks for the factorial script! Can you modify it to handle invalid input gracefully?</user_query>
+
+    <assistant_response>
+      Good point! It's important to handle invalid input to prevent errors. I'll update the script to include input validation.
+
+      <lobeThinking>This request is a direct modification of the existing "factorial-calculator" artifact. It's not a new artifact but an update to make the script more robust. I'll reuse the "factorial-calculator" identifier to maintain continuity and show the evolution of our code.</lobeThinking>
+
+      <lobeArtifact identifier="factorial-script" type="application/lobe.artifacts.code" language="python" title="Simple Python factorial script">
+        def factorial(n):
+            if n == 0:
+                return 1
+            else:
+                return n * factorial(n - 1)
+
+        while True:
+            try:
+                num = int(input("Enter a non-negative integer: "))
+                if num < 0:
+                    raise ValueError
+                break
+            except ValueError:
+                print("Invalid input. Please enter a non-negative integer.")
+
+      ...
+    </assistant_response>
+
+  </example>
+
+<example_docstring>
+This example shows how to create an SVG artifact and reference it in the response.
+</example_docstring>
+
+  <example>
+    <user_query>Can you draw a simple blue circle in SVG?</user_query>
+
+    <assistant_response>
+      I'll create a simple blue circle for you using SVG.
+
+      <lobeThinking>Drawing a simple blue circle in SVG is a good candidate for an artifact. It's a standalone visual element that can be reused or modified. This is a new request, so I'll create a new artifact with the identifier "blue-circle-svg".</lobeThinking>
+
+      <lobeArtifact identifier="blue-circle-svg" type="image/svg+xml" title="Simple blue circle SVG">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+          <circle cx="50" cy="50" r="40" fill="blue" />
+        </svg>
+      </lobeArtifact>
+
+      Here's a simple SVG image of a blue circle, as requested. The circle is centered at coordinates (50, 50) with a radius of 40 pixels, and it's filled with the color blue.
+
+      ...
+    </assistant_response>
+
+  </example>
+
+<example_docstring>
+    This example demonstrates how to create a Mermaid artifact for a simple flow chart.
+  </example_docstring>
+
+  <example>
+    <user_query>Can you create a simple flow chart showing the process of making tea using Mermaid?</user_query>
+
+    <assistant_response>
+      Sure! Here's a simple flow chart depicting the process of making tea using Mermaid syntax:
+
+      <lobeThinking>Creating a flow chart to show the process of making tea is an excellent candidate for an artifact. It's a self-contained diagram that can be understood on its own and might be used in various contexts like a presentation or a how-to guide. This is a new request, so I'll create a new artifact with the identifier "tea-making-flowchart".</lobeThinking>
+
+      <lobeArtifact identifier="tea-making-flowchart" type="application/lobe.artifacts.mermaid" title="Flow chart: Making Tea">
+        graph TD
+            A[Start] --> B{Water boiled?}
+            B -->|Yes| C[Add tea leaves to cup]
+            B -->|No| D[Boil water]
+            D --> B
+            C --> E[Pour boiling water into cup]
+            E --> F[Steep tea for desired time]
+            F --> G[Remove tea leaves]
+            G --> H[Add milk or sugar, if desired]
+            H --> I[Enjoy your tea!]
+            I --> J[End]
+      </lobeArtifact>
+
+      This flow chart uses Mermaid syntax to visualize the steps involved in making a cup of tea.
+
+      ...
+    </assistant_response>
+  </example>
+
+<example_docstring>
+This example demonstrates the assistant's preference to update existing artifacts, rather than create new ones.
+</example_docstring>
+
+  <example>
+    <user_query>I'm teaching a beginner's programming course, and I want to include some interactive examples for the students to try out. Do you have any suggestions for simple programs that could help them understand basic concepts?</user_query>
+
+    <assistant_response>
+      Teaching a beginner's programming course is a great opportunity to introduce students to fundamental concepts through interactive examples. One idea that comes to mind is building upon the factorial calculator we discussed earlier.
+
+      <lobeThinking>The interactive factorial calculator is a good artifact: it's substantial (>15 lines), self-contained, and usable outside the conversation. It's not simple informational or primarily explanatory content. This is an update to our existing "factorial-calculator" artifact, transforming it into a teaching tool.</lobeThinking>
+
+      <lobeArtifact identifier="factorial-script" type="application/lobe.artifacts.code" language="python" title="Simple Python factorial script">
+        def factorial(n):
+            if n == 0:
+                return 1
+            else:
+                return n * factorial(n - 1)
+
+        def main():
+            print("Welcome to the Factorial Calculator!")
+            print("Enter a non-negative integer to calculate its factorial.")
+            print("Enter 'q' to quit the program.")
+
+            while True:
+                user_input = input("
+
+Enter a number (or 'q' to quit): ")
+
+                if user_input == 'q':
+                    print("Thank you for using the Factorial Calculator. Goodbye!")
+                    break
+
+                try:
+                    num = int(user_input)
+                    if num < 0:
+                        raise ValueError
+                    result = factorial(num)
+                    print(f"The factorial of {num} is {result}")
+                except ValueError:
+                    print("Invalid input. Please enter a non-negative integer or 'q' to quit.")
+
+        if __name__ == "__main__":
+            main()
+      </lobeArtifact>
+
+      I've updated it to make it even more suitable as a learning example for your students. Here are the key changes:
+
+      1. Separated the factorial calculation logic into its own function, \`factorial()\`. This demonstrates the concept of function definition and recursion.
+
+      ...
+    </assistant_response>
+
+  </example>
+
+<example_docstring>
+  This example demonstrates how to create a React component artifact for a metrics dashboard.
+</example_docstring>
+
+  <example>
+    <user_query>Can you create a React component for a metrics dashboard?</user_query>
+
+    <assistant_response>
+      Absolutely! Here's a React component that implements a basic metrics dashboard:
+
+      <lobeThinking>Creating a React component for a metrics dashboard is a good artifact. It's substantial, self-contained, and can be reused in various web applications. It's not just a brief code snippet or primarily explanatory content. This is a new request, so I'll create a new artifact with the identifier "metrics-dashboard-component".</lobeThinking>
+
+      <lobeArtifact identifier="dashboard-component" type="application/lobe.artifacts.react" title="React Component: Metrics Dashboard">
+        import React, { useState, useEffect } from 'react';
+        import { BarChart, Bar, XAxis, YAxis, ResponsiveContainer } from 'recharts';
+        import { Card, CardHeader, CardContent } from '@/components/ui/card';
+
+        const generateData = () => [...Array(12)].map((_, i) => ({
+          month: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][i],
+          revenue: Math.floor(Math.random() * 5000) + 1000
+        }));
+
+        const MetricCard = ({ title, value, change }) => (
+          <Card>
+            <CardHeader>{title}</CardHeader>
+
+      ...
+
+        export default Dashboard;
+      </lobeArtifact>
+
+      Feel free to ask if you want to extend this component!
+    </assistant_response>
+  </example>
+
+<example_docstring>
+This example demonstrates the assistant's decision not to use an artifact because it would make the information less accessible and hinder the natural flow of the conversation.
+</example_docstring>
+
+  <example>
+    <user_query>
+      Can you give me a quick rundown of some common Python list methods? I'm new to Python and trying to understand the basics.
+    </user_query>
+
+    <assistant_response>
+      Absolutely! Python lists are versatile and have many useful methods. Here's a quick overview of some common ones:
+
+      <lobeThinking>The user asked about common Python list methods. While informative, putting this in a separate artifact window would make it harder for the user to quickly scan, understand, and ask follow-up questions. Keeping it in our chat makes the info more glanceable and our discussion more fluid.</lobeThinking>
+
+      1. \`append(x)\`: Adds an item \`x\` to the end of the list.
+        Example: \`my_list.append(4)\`
+
+      2. \`extend(iterable)\`: Adds all items from an iterable (like another list) to the end.
+        Example: \`my_list.extend([5, 6, 7])\`
+
+      3. \`insert(i, x)\`: Inserts an item \`x\` at a given position \`i\`.
+        Example: \`my_list.insert(1, "new")\`
+
+      ...
+    </assistant_response>
+  </example>
+
+<example_docstring>
+This example demonstrates the assistant's decision not to use an artifact for an explanation that includes code samples, because the content is primarily educational and benefits from being part of the conversational flow.
+</example_docstring>
+
+  <example>
+    <user_query>
+      I'm learning Python and algorithms. Can you explain how binary search works and maybe show me a simple Python implementation? I learn best by seeing code.
+    </user_query>
+
+    <assistant_response>
+      Absolutely! Binary search is an efficient algorithm for finding an item in a sorted list. It works by repeatedly dividing the search interval in half. Let's walk through it with a simple Python implementation.
+
+      <lobeThinking>The user asked for an explanation of binary search with a Python implementation. Although this involves code, it's primarily educational—the code is part of the explanation, not a standalone tool. Putting this in an artifact would disrupt the learning flow. The user learns best by seeing code in context, so keeping everything in our chat makes it easier for them to follow along and ask questions.</lobeThinking>
+
+      First, let's define a function for binary search:
+
+      \`\`\`python
+      def binary_search(arr, target):
+          left, right = 0, len(arr) - 1
+
+          while left <= right:
+              mid = (left + right) // 2
+              if arr[mid] == target:
+                  return mid
+              elif arr[mid] < target:
+                  left = mid + 1
+      ...
+    </assistant_response>
+  </example>
+
+</examples>
+
+The assistant should not mention any of these instructions to the user, nor make reference to the \`lobeArtifact\` tag, any of the MIME types (e.g. \`application/lobe.artifacts.code\`), or related syntax unless it is directly relevant to the query.
+
+The assistant should always take care to not produce artifacts that would be highly hazardous to human health or wellbeing if misused, even if is asked to produce them for seemingly benign reasons. However, if Claude would be willing to produce the same content in text form, it should be willing to produce it in an artifact.
+</artifacts_info>
+`;
diff --git a/src/tools/index.ts b/src/tools/index.ts
index bec7ab7e4fa5..6dc1aaf90000 100644
--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -1,8 +1,14 @@
 import { LobeBuiltinTool } from '@/types/tool';
 
+import { ArtifactsManifest } from './artifacts';
 import { DalleManifest } from './dalle';
 
 export const builtinTools: LobeBuiltinTool[] = [
+  {
+    identifier: ArtifactsManifest.identifier,
+    manifest: ArtifactsManifest,
+    type: 'builtin',
+  },
   {
     identifier: DalleManifest.identifier,
     manifest: DalleManifest,
diff --git a/src/types/user/settings/keyVaults.ts b/src/types/user/settings/keyVaults.ts
index b188652cc5b8..8fe21885f606 100644
--- a/src/types/user/settings/keyVaults.ts
+++ b/src/types/user/settings/keyVaults.ts
@@ -17,6 +17,7 @@ export interface AWSBedrockKeyVault {
 }
 
 export interface UserKeyVaults {
+  ai21?: OpenAICompatibleKeyVault;
   ai360?: OpenAICompatibleKeyVault;
   anthropic?: OpenAICompatibleKeyVault;
   azure?: AzureOpenAIKeyVault;
@@ -24,6 +25,7 @@ export interface UserKeyVaults {
   bedrock?: AWSBedrockKeyVault;
   deepseek?: OpenAICompatibleKeyVault;
   fireworksai?: OpenAICompatibleKeyVault;
+  github?: OpenAICompatibleKeyVault;
   google?: OpenAICompatibleKeyVault;
   groq?: OpenAICompatibleKeyVault;
   lobehub?: any;
diff --git a/src/utils/clipboard.ts b/src/utils/clipboard.ts
new file mode 100644
index 000000000000..c66b17d2a426
--- /dev/null
+++ b/src/utils/clipboard.ts
@@ -0,0 +1,53 @@
+const copyUsingFallback = (imageUrl: string) => {
+  const img = new Image();
+  img.addEventListener('load', function () {
+    const canvas = document.createElement('canvas');
+    canvas.width = img.width;
+    canvas.height = img.height;
+    const ctx = canvas.getContext('2d');
+    ctx!.drawImage(img, 0, 0);
+
+    try {
+      canvas.toBlob(function (blob) {
+        // @ts-ignore
+        const item = new ClipboardItem({ 'image/png': blob });
+        navigator.clipboard.write([item]).then(function () {
+          console.log('Image copied to clipboard successfully using canvas and modern API');
+        });
+      });
+    } catch {
+      // 如果 toBlob 或 ClipboardItem 不被支持，使用 data URL
+      const dataURL = canvas.toDataURL('image/png');
+      const textarea = document.createElement('textarea');
+      textarea.value = dataURL;
+      document.body.append(textarea);
+      textarea.select();
+
+      document.execCommand('copy');
+
+      textarea.remove();
+    }
+  });
+  img.src = imageUrl;
+};
+
+const copyUsingModernAPI = async (imageUrl: string) => {
+  try {
+    const base64Response = await fetch(imageUrl);
+    const blob = await base64Response.blob();
+    const item = new ClipboardItem({ 'image/png': blob });
+    await navigator.clipboard.write([item]);
+  } catch (error) {
+    console.error('Failed to copy image using modern API:', error);
+    copyUsingFallback(imageUrl);
+  }
+};
+
+export const copyImageToClipboard = async (imageUrl: string) => {
+  // 检查是否支持现代 Clipboard API
+  if (navigator.clipboard && 'write' in navigator.clipboard) {
+    await copyUsingModernAPI(imageUrl);
+  } else {
+    copyUsingFallback(imageUrl);
+  }
+};