Update README.md

bentoml · Jul 24, 2024 · 5920649 · 5920649
1 parent 456df08
commit 5920649
Showing 1 changed file with 36 additions and 23 deletions.
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 - [Gemma](#gemma)
 - [Llama-2](#llama2)
 - [Mixtral](#mixtral)
+- [](#llama3.1)
 
 ---
 
@@ -29,10 +30,10 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| llama3 | 70b-instruct-awq-4bit-e968 | [HF Link](https://huggingface.co/casperhansen/llama-3-70b-instruct-awq) |
-| llama3 | 70b-instruct-fp16-6aed | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) |
-| llama3 | 8b-instruct-awq-4bit-f9de | [HF Link](https://huggingface.co/casperhansen/llama-3-8b-instruct-awq) |
-| llama3 | 8b-instruct-fp16-f703 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) |
+| llama3 | 70b-instruct-awq-4bit-9204 | [HF Link](https://huggingface.co/casperhansen/llama-3-70b-instruct-awq) |
+| llama3 | 70b-instruct-fp16-7936 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) |
+| llama3 | 8b-instruct-awq-4bit-985b | [HF Link](https://huggingface.co/casperhansen/llama-3-8b-instruct-awq) |
+| llama3 | 8b-instruct-fp16-8638 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) |
 
 ---
 
@@ -41,7 +42,7 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| phi3 | 3.8b-instruct-fp16-30b8 | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) |
+| phi3 | 3.8b-instruct-fp16-c4d8 | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) |
 | phi3 | 3.8b-instruct-ggml-q4-f5db | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) |
 
 ---
@@ -51,8 +52,8 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| mistral | 7b-instruct-awq-4bit-0850 | [HF Link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-AWQ) |
-| mistral | 7b-instruct-fp16-ac2b | [HF Link](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) |
+| mistral | 7b-instruct-awq-4bit-332d | [HF Link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-AWQ) |
+| mistral | 7b-instruct-fp16-c489 | [HF Link](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) |
 
 ---
 
@@ -61,13 +62,13 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| qwen2 | 0.5b-instruct-fp16-fcc6 | [HF Link](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) |
-| qwen2 | 1.5b-instruct-fp16-50d8 | [HF Link](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) |
-| qwen2 | 57b-a14b-instruct-fp16-3f06 | [HF Link](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct) |
-| qwen2 | 72b-instruct-awq-4bit-15fd | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct-AWQ) |
-| qwen2 | 72b-instruct-fp16-7b44 | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct) |
-| qwen2 | 7b-instruct-awq-4bit-ce1b | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct-AWQ) |
-| qwen2 | 7b-instruct-fp16-844c | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct) |
+| qwen2 | 0.5b-instruct-fp16-0bca | [HF Link](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) |
+| qwen2 | 1.5b-instruct-fp16-f784 | [HF Link](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) |
+| qwen2 | 57b-a14b-instruct-fp16-c9b8 | [HF Link](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct) |
+| qwen2 | 72b-instruct-awq-4bit-13bf | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct-AWQ) |
+| qwen2 | 72b-instruct-fp16-8c5b | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct) |
+| qwen2 | 7b-instruct-awq-4bit-3150 | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct-AWQ) |
+| qwen2 | 7b-instruct-fp16-0016 | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct) |
 
 ---
 
@@ -76,9 +77,9 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| gemma | 2b-instruct-fp16-0856 | [HF Link](https://huggingface.co/google/gemma-2b-it) |
-| gemma | 7b-instruct-awq-4bit-d11b | [HF Link](https://huggingface.co/casperhansen/gemma-7b-it-awq) |
-| gemma | 7b-instruct-fp16-3e1c | [HF Link](https://huggingface.co/google/gemma-7b-it) |
+| gemma | 2b-instruct-fp16-f020 | [HF Link](https://huggingface.co/google/gemma-2b-it) |
+| gemma | 7b-instruct-awq-4bit-2eed | [HF Link](https://huggingface.co/casperhansen/gemma-7b-it-awq) |
+| gemma | 7b-instruct-fp16-1e96 | [HF Link](https://huggingface.co/google/gemma-7b-it) |
 
 ---
 
@@ -87,10 +88,10 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| llama2 | 13b-chat-fp16-921b | [HF Link](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) |
-| llama2 | 70b-chat-fp16-258c | [HF Link](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) |
-| llama2 | 7b-chat-awq-4bit-8df2 | [HF Link](https://huggingface.co/TheBloke/Llama-2-7B-Chat-AWQ) |
-| llama2 | 7b-chat-fp16-2e3a | [HF Link](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) |
+| llama2 | 13b-chat-fp16-603a | [HF Link](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) |
+| llama2 | 70b-chat-fp16-95c5 | [HF Link](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) |
+| llama2 | 7b-chat-awq-4bit-c733 | [HF Link](https://huggingface.co/TheBloke/Llama-2-7B-Chat-AWQ) |
+| llama2 | 7b-chat-fp16-b8c6 | [HF Link](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) |
 
 ---
 
@@ -99,8 +100,20 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| mixtral | 8x7b-instruct-v0.1-awq-4bit-2953 | [HF Link](https://huggingface.co/casperhansen/mixtral-instruct-awq) |
-| mixtral | 8x7b-instruct-v0.1-fp16-71c6 | [HF Link](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) |
+| mixtral | 8x7b-instruct-v0.1-awq-4bit-7682 | [HF Link](https://huggingface.co/casperhansen/mixtral-instruct-awq) |
+| mixtral | 8x7b-instruct-v0.1-fp16-39ff | [HF Link](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) |
+
+---
+
+
+###  <a id="llama3.1"></a>
+
+| Model | Version | Huggingface Link |
+| --- | --- | --- |
+| llama3.1 | 70b-instruct-awq-4bit-f55b | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4) |
+| llama3.1 | 70b-instruct-fp16-2eb2 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct) |
+| llama3.1 | 8b-instruct-awq-4bit-f737 | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4) |
+| llama3.1 | 8b-instruct-fp16-6d7b | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) |
 
 ---