diff --git a/README.md b/README.md
index 57a88695..c84e3fc6 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
## Supported Models
diff --git a/readme_md.tpl b/readme_md.tpl
index 52ac97aa..3694ab8d 100644
--- a/readme_md.tpl
+++ b/readme_md.tpl
@@ -1,5 +1,5 @@
## Supported Models
diff --git a/source/llamacpp-chat/bentofile.yaml b/source/llamacpp-chat/bentofile.yaml
index 4556d4d0..68ff8733 100644
--- a/source/llamacpp-chat/bentofile.yaml
+++ b/source/llamacpp-chat/bentofile.yaml
@@ -1,8 +1,7 @@
service: "service:LlamaCppChat"
labels:
- owner: bentoml-team
- platforms: macos
source: https://github.com/bentoml/openllm-models-feed/tree/main/source/llamacpp-chat
+ platforms: macos
include:
- "*.py"
- "ui/*"
diff --git a/source/recipe.yaml b/source/recipe.yaml
index 81012fa4..f28cc6cd 100644
--- a/source/recipe.yaml
+++ b/source/recipe.yaml
@@ -14,7 +14,7 @@
chat_template: phi-3
extra_labels:
openllm_alias: 3.8b,3.8b-mini,3.8b-mini-instruct-4k-fp16
- openllm_hf_model_id: microsoft/Phi-3-mini-4k-instruct
+ model_name: microsoft/Phi-3-mini-4k-instruct
"llama2:7b-chat-fp16":
project: vllm-chat
service_config:
@@ -31,7 +31,7 @@
chat_template: llama-2-chat
extra_labels:
openllm_alias: 7b,7b-chat
- openllm_hf_model_id: meta-llama/Llama-2-7b-chat-hf
+ model_name: meta-llama/Llama-2-7b-chat-hf
"llama2:13b-chat-fp16":
project: vllm-chat
service_config:
@@ -48,7 +48,7 @@
chat_template: llama-2-chat
extra_labels:
openllm_alias: 13b,13b-chat
- openllm_hf_model_id: meta-llama/Llama-2-13b-chat-hf
+ model_name: meta-llama/Llama-2-13b-chat-hf
"llama2:70b-chat-fp16":
project: vllm-chat
service_config:
@@ -65,7 +65,7 @@
chat_template: llama-2-chat
extra_labels:
openllm_alias: 70b,70b-chat
- openllm_hf_model_id: meta-llama/Llama-2-70b-chat-hf
+ model_name: meta-llama/Llama-2-70b-chat-hf
"llama2:7b-chat-awq-4bit":
project: vllm-chat
service_config:
@@ -83,7 +83,7 @@
chat_template: llama-2-chat
extra_labels:
openllm_alias: 7b-4bit,7b-chat-4bit
- openllm_hf_model_id: TheBloke/Llama-2-7B-Chat-AWQ
+ model_name: TheBloke/Llama-2-7B-Chat-AWQ
"mistral:7b-instruct-awq-4bit":
project: vllm-chat
service_config:
@@ -102,7 +102,7 @@
chat_template: mistral-instruct
extra_labels:
openllm_alias: 7b-4bit,7b-instruct-4bit
- openllm_hf_model_id: TheBloke/Mistral-7B-Instruct-v0.1-AWQ
+ model_name: TheBloke/Mistral-7B-Instruct-v0.1-AWQ
"mistral:7b-instruct-fp16":
project: vllm-chat
service_config:
@@ -120,7 +120,7 @@
chat_template: mistral-instruct
extra_labels:
openllm_alias: 7b,7b-instruct
- openllm_hf_model_id: mistralai/Mistral-7B-Instruct-v0.1
+ model_name: mistralai/Mistral-7B-Instruct-v0.1
"llama3:8b-instruct-awq-4bit":
project: vllm-chat
service_config:
@@ -136,7 +136,7 @@
quantization: awq
extra_labels:
openllm_alias: 8b-4bit,8b-instruct-4bit
- openllm_hf_model_id: casperhansen/llama-3-8b-instruct-awq
+ model_name: casperhansen/llama-3-8b-instruct-awq
"llama3:70b-instruct-awq-4bit":
project: vllm-chat
service_config:
@@ -152,7 +152,7 @@
quantization: awq
extra_labels:
openllm_alias: 70b-4bit,70b-instruct-4bit
- openllm_hf_model_id: casperhansen/llama-3-70b-instruct-awq
+ model_name: casperhansen/llama-3-70b-instruct-awq
"llama3:8b-instruct-fp16":
project: vllm-chat
service_config:
@@ -168,7 +168,7 @@
dtype: half
extra_labels:
openllm_alias: 8b,8b-instruct
- openllm_hf_model_id: meta-llama/Meta-Llama-3-8B-Instruct
+ model_name: meta-llama/Meta-Llama-3-8B-Instruct
"llama3:70b-instruct-fp16":
project: vllm-chat
service_config:
@@ -183,7 +183,7 @@
max_model_len: 2048
extra_labels:
openllm_alias: 70b,70b-instruct
- openllm_hf_model_id: meta-llama/Meta-Llama-3-70B-Instruct
+ model_name: meta-llama/Meta-Llama-3-70B-Instruct
"gemma:2b-instruct-fp16":
project: vllm-chat
service_config:
@@ -199,7 +199,7 @@
dtype: half
extra_labels:
openllm_alias: 2b,2b-instruct
- openllm_hf_model_id: google/gemma-2b-it
+ model_name: google/gemma-2b-it
"gemma:7b-instruct-fp16":
project: vllm-chat
service_config:
@@ -215,7 +215,7 @@
dtype: half
extra_labels:
openllm_alias: 7b,7b-instruct
- openllm_hf_model_id: google/gemma-7b-it
+ model_name: google/gemma-7b-it
"gemma:7b-instruct-awq-4bit":
project: vllm-chat
service_config:
@@ -232,7 +232,7 @@
chat_template: gemma-it
extra_labels:
openllm_alias: 7b-4bit,7b-instruct-4bit
- openllm_hf_model_id: casperhansen/gemma-7b-it-awq
+ model_name: casperhansen/gemma-7b-it-awq
"mixtral:8x7b-instruct-v0.1-fp16":
project: vllm-chat
service_config:
@@ -248,7 +248,7 @@
chat_template: mistral-instruct
extra_labels:
openllm_alias: 8x7b,8x7b-instruct
- openllm_hf_model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+ model_name: mistralai/Mixtral-8x7B-Instruct-v0.1
"mixtral:8x7b-instruct-v0.1-awq-4bit":
project: vllm-chat
service_config:
@@ -266,7 +266,7 @@
chat_template: mistral-instruct
extra_labels:
openllm_alias: 8x7b-4bit
- openllm_hf_model_id: casperhansen/mixtral-instruct-awq
+ model_name: casperhansen/mixtral-instruct-awq
"qwen2:0.5b-instruct-fp16":
project: vllm-chat
service_config:
@@ -282,7 +282,7 @@
dtype: half
extra_labels:
openllm_alias: 0.5b,0.5b-instruct
- openllm_hf_model_id: Qwen/Qwen2-0.5B-Instruct
+ model_name: Qwen/Qwen2-0.5B-Instruct
"qwen2:1.5b-instruct-fp16":
project: vllm-chat
service_config:
@@ -298,7 +298,7 @@
dtype: half
extra_labels:
openllm_alias: 1.5b,1.5b-instruct
- openllm_hf_model_id: Qwen/Qwen2-1.5B-Instruct
+ model_name: Qwen/Qwen2-1.5B-Instruct
"qwen2:7b-instruct-awq-4bit":
project: vllm-chat
service_config:
@@ -314,7 +314,7 @@
quantization: awq
extra_labels:
openllm_alias: 7b-4bit,7b-instruct-4bit
- openllm_hf_model_id: Qwen/Qwen2-7B-Instruct-AWQ
+ model_name: Qwen/Qwen2-7B-Instruct-AWQ
"qwen2:7b-instruct-fp16":
project: vllm-chat
service_config:
@@ -330,7 +330,7 @@
dtype: half
extra_labels:
openllm_alias: 7b,7b-instruct
- openllm_hf_model_id: Qwen/Qwen2-7B-Instruct
+ model_name: Qwen/Qwen2-7B-Instruct
"qwen2:72b-instruct-awq-4bit":
project: vllm-chat
service_config:
@@ -346,7 +346,7 @@
quantization: awq
extra_labels:
openllm_alias: 72b-4bit,72b-instruct-4bit
- openllm_hf_model_id: Qwen/Qwen2-72B-Instruct-AWQ
+ model_name: Qwen/Qwen2-72B-Instruct-AWQ
"qwen2:57b-a14b-instruct-fp16":
project: vllm-chat
service_config:
@@ -362,7 +362,7 @@
dtype: half
extra_labels:
openllm_alias: 57b-a14b,57b-a14b-instruct
- openllm_hf_model_id: Qwen/Qwen2-57B-A14B-Instruct
+ model_name: Qwen/Qwen2-57B-A14B-Instruct
"qwen2:72b-instruct-fp16":
project: vllm-chat
service_config:
@@ -378,7 +378,7 @@
dtype: half
extra_labels:
openllm_alias: 72b,72b-instruct
- openllm_hf_model_id: Qwen/Qwen2-72B-Instruct
+ model_name: Qwen/Qwen2-72B-Instruct
"phi3:3.8b-instruct-ggml-q4":
project: llamacpp-chat
service_config:
@@ -393,4 +393,4 @@
chat_template: phi-3
extra_labels:
openllm_alias: 3.8b-q4,3.8b-mini-q4,3.8b-mini-instruct-4k-ggml-q4
- openllm_hf_model_id: microsoft/Phi-3-mini-4k-instruct-gguf
+ model_name: microsoft/Phi-3-mini-4k-instruct-gguf
diff --git a/source/vllm-chat/bentofile.yaml b/source/vllm-chat/bentofile.yaml
index 037db2b5..1077bbe4 100644
--- a/source/vllm-chat/bentofile.yaml
+++ b/source/vllm-chat/bentofile.yaml
@@ -1,8 +1,7 @@
service: "service:VLLM"
labels:
- owner: bentoml-team
- platforms: linux
source: https://github.com/bentoml/openllm-models-feed/tree/main/source/vllm-chat
+ platforms: linux
include:
- "*.py"
- "ui/*"