Skip to content

Commit

Permalink
feat: Support llama-3.1 405B awq (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
rickzx authored Jul 24, 2024
1 parent 5777664 commit c478576
Show file tree
Hide file tree
Showing 604 changed files with 2,879 additions and 40 deletions.
2 changes: 1 addition & 1 deletion bentoml/bentos/gemma/2b-instruct-fp16-f020/bento.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ service: service:VLLM
name: gemma
version: 2b-instruct-fp16-f020
bentoml_version: 1.3.0
creation_time: '2024-07-24T01:41:41.538750+00:00'
creation_time: '2024-07-24T08:19:28.675802+00:00'
labels:
model_name: google/gemma-2b-it
openllm_alias: 2b,2b-instruct
Expand Down
2 changes: 1 addition & 1 deletion bentoml/bentos/gemma/7b-instruct-awq-4bit-2eed/bento.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ service: service:VLLM
name: gemma
version: 7b-instruct-awq-4bit-2eed
bentoml_version: 1.3.0
creation_time: '2024-07-24T01:41:55.950169+00:00'
creation_time: '2024-07-24T08:19:43.100369+00:00'
labels:
model_name: casperhansen/gemma-7b-it-awq
openllm_alias: 7b-4bit,7b-instruct-4bit
Expand Down
2 changes: 1 addition & 1 deletion bentoml/bentos/gemma/7b-instruct-fp16-1e96/bento.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ service: service:VLLM
name: gemma
version: 7b-instruct-fp16-1e96
bentoml_version: 1.3.0
creation_time: '2024-07-24T01:41:48.722559+00:00'
creation_time: '2024-07-24T08:19:35.937782+00:00'
labels:
model_name: google/gemma-7b-it
openllm_alias: 7b,7b-instruct
Expand Down
2 changes: 1 addition & 1 deletion bentoml/bentos/llama2/13b-chat-fp16-603a/bento.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ service: service:VLLM
name: llama2
version: 13b-chat-fp16-603a
bentoml_version: 1.3.0
creation_time: '2024-07-24T01:40:08.002412+00:00'
creation_time: '2024-07-24T08:17:48.252205+00:00'
labels:
model_name: meta-llama/Llama-2-13b-chat-hf
openllm_alias: 13b,13b-chat
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# llama2:70b-chat-fp16-95c5
# llama2:70b-chat-fp16-11af

[![pypi_status](https://img.shields.io/badge/BentoML-1.3.0-informational)](https://pypi.org/project/BentoML)
[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
service: service:VLLM
name: llama2
version: 70b-chat-fp16-95c5
version: 70b-chat-fp16-11af
bentoml_version: 1.3.0
creation_time: '2024-07-24T01:40:15.203387+00:00'
creation_time: '2024-07-24T08:17:55.444721+00:00'
labels:
model_name: meta-llama/Llama-2-70b-chat-hf
openllm_alias: 70b,70b-chat
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
dtype: half
max_model_len: 1024
model: meta-llama/Llama-2-70b-chat-hf
tensor_parallel_size: 2
extra_labels:
model_name: meta-llama/Llama-2-70b-chat-hf
openllm_alias: 70b,70b-chat
Expand Down
2 changes: 1 addition & 1 deletion bentoml/bentos/llama2/7b-chat-awq-4bit-c733/bento.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ service: service:VLLM
name: llama2
version: 7b-chat-awq-4bit-c733
bentoml_version: 1.3.0
creation_time: '2024-07-24T01:40:22.459480+00:00'
creation_time: '2024-07-24T08:18:02.586833+00:00'
labels:
model_name: TheBloke/Llama-2-7B-Chat-AWQ
openllm_alias: 7b-4bit,7b-chat-4bit
Expand Down
2 changes: 1 addition & 1 deletion bentoml/bentos/llama2/7b-chat-fp16-b8c6/bento.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ service: service:VLLM
name: llama2
version: 7b-chat-fp16-b8c6
bentoml_version: 1.3.0
creation_time: '2024-07-24T01:40:00.852316+00:00'
creation_time: '2024-07-24T08:17:41.042045+00:00'
labels:
model_name: meta-llama/Llama-2-7b-chat-hf
openllm_alias: 7b,7b-chat
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# mixtral:8x7b-instruct-v0.1-fp16-39ff
# llama3.1:405b-instruct-awq-4bit-a733

[![pypi_status](https://img.shields.io/badge/BentoML-1.3.0-informational)](https://pypi.org/project/BentoML)
[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)
Expand Down
Loading

0 comments on commit c478576

Please sign in to comment.