From 592064982bd7c102d1427de394598f4ee99fd6d6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 24 Jul 2024 01:12:55 +0000 Subject: [PATCH] Update README.md --- README.md | 59 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 9c36b07f..8c83f566 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly - [Gemma](#gemma) - [Llama-2](#llama2) - [Mixtral](#mixtral) +- [](#llama3.1) --- @@ -29,10 +30,10 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly | Model | Version | Huggingface Link | | --- | --- | --- | -| llama3 | 70b-instruct-awq-4bit-e968 | [HF Link](https://huggingface.co/casperhansen/llama-3-70b-instruct-awq) | -| llama3 | 70b-instruct-fp16-6aed | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) | -| llama3 | 8b-instruct-awq-4bit-f9de | [HF Link](https://huggingface.co/casperhansen/llama-3-8b-instruct-awq) | -| llama3 | 8b-instruct-fp16-f703 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) | +| llama3 | 70b-instruct-awq-4bit-9204 | [HF Link](https://huggingface.co/casperhansen/llama-3-70b-instruct-awq) | +| llama3 | 70b-instruct-fp16-7936 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) | +| llama3 | 8b-instruct-awq-4bit-985b | [HF Link](https://huggingface.co/casperhansen/llama-3-8b-instruct-awq) | +| llama3 | 8b-instruct-fp16-8638 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) | --- @@ -41,7 +42,7 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly | Model | Version | Huggingface Link | | --- | --- | --- | -| phi3 | 3.8b-instruct-fp16-30b8 | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | +| phi3 | 3.8b-instruct-fp16-c4d8 | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | | phi3 | 3.8b-instruct-ggml-q4-f5db | [HF Link](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) | --- @@ -51,8 +52,8 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly | Model | Version | Huggingface Link | | --- | --- | --- | -| mistral | 7b-instruct-awq-4bit-0850 | [HF Link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-AWQ) | -| mistral | 7b-instruct-fp16-ac2b | [HF Link](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) | +| mistral | 7b-instruct-awq-4bit-332d | [HF Link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-AWQ) | +| mistral | 7b-instruct-fp16-c489 | [HF Link](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) | --- @@ -61,13 +62,13 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly | Model | Version | Huggingface Link | | --- | --- | --- | -| qwen2 | 0.5b-instruct-fp16-fcc6 | [HF Link](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) | -| qwen2 | 1.5b-instruct-fp16-50d8 | [HF Link](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) | -| qwen2 | 57b-a14b-instruct-fp16-3f06 | [HF Link](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct) | -| qwen2 | 72b-instruct-awq-4bit-15fd | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct-AWQ) | -| qwen2 | 72b-instruct-fp16-7b44 | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct) | -| qwen2 | 7b-instruct-awq-4bit-ce1b | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct-AWQ) | -| qwen2 | 7b-instruct-fp16-844c | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct) | +| qwen2 | 0.5b-instruct-fp16-0bca | [HF Link](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) | +| qwen2 | 1.5b-instruct-fp16-f784 | [HF Link](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) | +| qwen2 | 57b-a14b-instruct-fp16-c9b8 | [HF Link](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct) | +| qwen2 | 72b-instruct-awq-4bit-13bf | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct-AWQ) | +| qwen2 | 72b-instruct-fp16-8c5b | [HF Link](https://huggingface.co/Qwen/Qwen2-72B-Instruct) | +| qwen2 | 7b-instruct-awq-4bit-3150 | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct-AWQ) | +| qwen2 | 7b-instruct-fp16-0016 | [HF Link](https://huggingface.co/Qwen/Qwen2-7B-Instruct) | --- @@ -76,9 +77,9 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly | Model | Version | Huggingface Link | | --- | --- | --- | -| gemma | 2b-instruct-fp16-0856 | [HF Link](https://huggingface.co/google/gemma-2b-it) | -| gemma | 7b-instruct-awq-4bit-d11b | [HF Link](https://huggingface.co/casperhansen/gemma-7b-it-awq) | -| gemma | 7b-instruct-fp16-3e1c | [HF Link](https://huggingface.co/google/gemma-7b-it) | +| gemma | 2b-instruct-fp16-f020 | [HF Link](https://huggingface.co/google/gemma-2b-it) | +| gemma | 7b-instruct-awq-4bit-2eed | [HF Link](https://huggingface.co/casperhansen/gemma-7b-it-awq) | +| gemma | 7b-instruct-fp16-1e96 | [HF Link](https://huggingface.co/google/gemma-7b-it) | --- @@ -87,10 +88,10 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly | Model | Version | Huggingface Link | | --- | --- | --- | -| llama2 | 13b-chat-fp16-921b | [HF Link](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) | -| llama2 | 70b-chat-fp16-258c | [HF Link](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | -| llama2 | 7b-chat-awq-4bit-8df2 | [HF Link](https://huggingface.co/TheBloke/Llama-2-7B-Chat-AWQ) | -| llama2 | 7b-chat-fp16-2e3a | [HF Link](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) | +| llama2 | 13b-chat-fp16-603a | [HF Link](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) | +| llama2 | 70b-chat-fp16-95c5 | [HF Link](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | +| llama2 | 7b-chat-awq-4bit-c733 | [HF Link](https://huggingface.co/TheBloke/Llama-2-7B-Chat-AWQ) | +| llama2 | 7b-chat-fp16-b8c6 | [HF Link](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) | --- @@ -99,8 +100,20 @@ openllm repo add nightly https://github.com/bentoml/openllm-models@nightly | Model | Version | Huggingface Link | | --- | --- | --- | -| mixtral | 8x7b-instruct-v0.1-awq-4bit-2953 | [HF Link](https://huggingface.co/casperhansen/mixtral-instruct-awq) | -| mixtral | 8x7b-instruct-v0.1-fp16-71c6 | [HF Link](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) | +| mixtral | 8x7b-instruct-v0.1-awq-4bit-7682 | [HF Link](https://huggingface.co/casperhansen/mixtral-instruct-awq) | +| mixtral | 8x7b-instruct-v0.1-fp16-39ff | [HF Link](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) | + +--- + + +### + +| Model | Version | Huggingface Link | +| --- | --- | --- | +| llama3.1 | 70b-instruct-awq-4bit-f55b | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4) | +| llama3.1 | 70b-instruct-fp16-2eb2 | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct) | +| llama3.1 | 8b-instruct-awq-4bit-f737 | [HF Link](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4) | +| llama3.1 | 8b-instruct-fp16-6d7b | [HF Link](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) | ---