Update README.md

bentoml · Aug 12, 2024 · 0712e5e · 0712e5e
1 parent bc3eb46
commit 0712e5e
Showing 1 changed file with 42 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -23,6 +23,7 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 - [Gemma](#gemma)
 - [Llama-2](#llama2)
 - [Mixtral](#mixtral)
+- [](#mistral-large)
 
 ---
 
@@ -31,11 +32,11 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| llama3.1 | 405b-instruct-awq-4bit-a733 | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4) |
-| llama3.1 | 70b-instruct-awq-4bit-f55b | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4) |
-| llama3.1 | 70b-instruct-fp16-b665 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct) |
-| llama3.1 | 8b-instruct-awq-4bit-f737 | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4) |
-| llama3.1 | 8b-instruct-fp16-6d7b | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) |
+| llama3.1 | 405b-instruct-awq-4bit-675e | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4) |
+| llama3.1 | 70b-instruct-awq-4bit-28ed | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4) |
+| llama3.1 | 70b-instruct-fp16-b66b | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct) |
+| llama3.1 | 8b-instruct-awq-4bit-5cb2 | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4) |
+| llama3.1 | 8b-instruct-fp16-1c1c | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) |
 
 ---
 
@@ -44,10 +45,10 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| llama3 | 70b-instruct-awq-4bit-9204 | [HF Link](https://huggingface.co/casperhansen/llama-3-70b-instruct-awq) |
-| llama3 | 70b-instruct-fp16-53f1 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) |
-| llama3 | 8b-instruct-awq-4bit-985b | [HF Link](https://huggingface.co/casperhansen/llama-3-8b-instruct-awq) |
-| llama3 | 8b-instruct-fp16-8638 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) |
+| llama3 | 70b-instruct-awq-4bit-9ceb | [HF Link](https://huggingface.co/casperhansen/llama-3-70b-instruct-awq) |
+| llama3 | 70b-instruct-fp16-c3e4 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) |
+| llama3 | 8b-instruct-awq-4bit-1c94 | [HF Link](https://huggingface.co/casperhansen/llama-3-8b-instruct-awq) |
+| llama3 | 8b-instruct-fp16-ba7c | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) |
 
 ---
 
@@ -56,8 +57,8 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| phi3 | 3.8b-instruct-fp16-c4d8 | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) |
-| phi3 | 3.8b-instruct-ggml-q4-f5db | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) |
+| phi3 | 3.8b-instruct-fp16-37b9 | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) |
+| phi3 | 3.8b-instruct-ggml-q4-cf55 | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) |
 
 ---
 
@@ -66,10 +67,8 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| mistral | 7b-instruct-awq-4bit-332d | [HF Link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-AWQ) |
-| mistral | 7b-instruct-fp16-c489 | [HF Link](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) |
-| mistral | large-123b-instruct-awq-4bit-ec0c | [HF Link](https://huggingface.co/casperhansen/mistral-large-instruct-2407-awq) |
-| mistral | large-123b-instruct-fp16-cadc | [HF Link](https://huggingface.co/mistralai/Mistral-Large-Instruct-2407) |
+| mistral | 7b-instruct-awq-4bit-4406 | [HF Link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-AWQ) |
+| mistral | 7b-instruct-fp16-e3bd | [HF Link](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) |
 
 ---
 
@@ -78,8 +77,8 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| gemma2 | 27b-instruct-fp16-9fff | [HF Link](https://huggingface.co/google/gemma-2-27b-it) |
-| gemma2 | 9b-instruct-fp16-dce1 | [HF Link](https://huggingface.co/google/gemma-2-9b-it) |
+| gemma2 | 27b-instruct-fp16-9799 | [HF Link](https://huggingface.co/google/gemma-2-27b-it) |
+| gemma2 | 9b-instruct-fp16-cb2b | [HF Link](https://huggingface.co/google/gemma-2-9b-it) |
 
 ---
 
@@ -88,13 +87,13 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| qwen2 | 0.5b-instruct-fp16-0bca | [HF Link](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) |
-| qwen2 | 1.5b-instruct-fp16-f784 | [HF Link](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) |
-| qwen2 | 57b-a14b-instruct-fp16-4dcd | [HF Link](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct) |
-| qwen2 | 72b-instruct-awq-4bit-13bf | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct-AWQ) |
-| qwen2 | 72b-instruct-fp16-f73f | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct) |
-| qwen2 | 7b-instruct-awq-4bit-3150 | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct-AWQ) |
-| qwen2 | 7b-instruct-fp16-0016 | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct) |
+| qwen2 | 0.5b-instruct-fp16-bca0 | [HF Link](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) |
+| qwen2 | 1.5b-instruct-fp16-df66 | [HF Link](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) |
+| qwen2 | 57b-a14b-instruct-fp16-b847 | [HF Link](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct) |
+| qwen2 | 72b-instruct-awq-4bit-60b1 | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct-AWQ) |
+| qwen2 | 72b-instruct-fp16-ee8e | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct) |
+| qwen2 | 7b-instruct-awq-4bit-02f4 | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct-AWQ) |
+| qwen2 | 7b-instruct-fp16-761c | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct) |
 
 ---
 
@@ -103,9 +102,9 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| gemma | 2b-instruct-fp16-f020 | [HF Link](https://huggingface.co/google/gemma-2b-it) |
-| gemma | 7b-instruct-awq-4bit-2eed | [HF Link](https://huggingface.co/casperhansen/gemma-7b-it-awq) |
-| gemma | 7b-instruct-fp16-1e96 | [HF Link](https://huggingface.co/google/gemma-7b-it) |
+| gemma | 2b-instruct-fp16-f6ee | [HF Link](https://huggingface.co/google/gemma-2b-it) |
+| gemma | 7b-instruct-awq-4bit-bdb5 | [HF Link](https://huggingface.co/casperhansen/gemma-7b-it-awq) |
+| gemma | 7b-instruct-fp16-35e0 | [HF Link](https://huggingface.co/google/gemma-7b-it) |
 
 ---
 
@@ -114,10 +113,10 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| llama2 | 13b-chat-fp16-603a | [HF Link](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) |
-| llama2 | 70b-chat-fp16-11af | [HF Link](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) |
-| llama2 | 7b-chat-awq-4bit-c733 | [HF Link](https://huggingface.co/TheBloke/Llama-2-7B-Chat-AWQ) |
-| llama2 | 7b-chat-fp16-b8c6 | [HF Link](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) |
+| llama2 | 13b-chat-fp16-a846 | [HF Link](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) |
+| llama2 | 70b-chat-fp16-fcef | [HF Link](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) |
+| llama2 | 7b-chat-awq-4bit-753b | [HF Link](https://huggingface.co/TheBloke/Llama-2-7B-Chat-AWQ) |
+| llama2 | 7b-chat-fp16-dc53 | [HF Link](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) |
 
 ---
 
@@ -126,8 +125,18 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly
 
 | Model | Version | Huggingface Link |
 | --- | --- | --- |
-| mixtral | 8x7b-instruct-v0.1-awq-4bit-7682 | [HF Link](https://huggingface.co/casperhansen/mixtral-instruct-awq) |
-| mixtral | 8x7b-instruct-v0.1-fp16-572d | [HF Link](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) |
+| mixtral | 8x7b-instruct-v0.1-awq-4bit-7bae | [HF Link](https://huggingface.co/casperhansen/mixtral-instruct-awq) |
+| mixtral | 8x7b-instruct-v0.1-fp16-1c82 | [HF Link](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) |
+
+---
+
+
+###  <a id="mistral-large"></a>
+
+| Model | Version | Huggingface Link |
+| --- | --- | --- |
+| mistral-large | 123b-instruct-awq-4bit-c380 | [HF Link](https://huggingface.co/casperhansen/mistral-large-instruct-2407-awq) |
+| mistral-large | 123b-instruct-fp16-a203 | [HF Link](https://huggingface.co/mistralai/Mistral-Large-Instruct-2407) |
 
 ---