diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/README.md b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/README.md similarity index 97% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/README.md rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/README.md index a3e072c2..b94fda3d 100644 --- a/bentoml/bentos/gemma/2b-instruct-fp16-da36/README.md +++ b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/README.md @@ -1,4 +1,4 @@ -# gemma:2b-instruct-fp16-da36 +# gemma:2b-instruct-fp16-26b3 [![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/apis/openapi.yaml b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/apis/openapi.yaml rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/apis/openapi.yaml index 66d16abb..c023cee4 100644 --- a/bentoml/bentos/gemma/2b-instruct-fp16-da36/apis/openapi.yaml +++ b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/apis/schema.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/apis/schema.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/apis/schema.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/apis/schema.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/bento.yaml b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/bento.yaml similarity index 97% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/bento.yaml rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/bento.yaml index ddf7b4f5..31cfc636 100644 --- a/bentoml/bentos/gemma/2b-instruct-fp16-da36/bento.yaml +++ b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: gemma -version: 2b-instruct-fp16-da36 +version: 2b-instruct-fp16-26b3 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:31:05.934035+00:00' +creation_time: '2024-07-08T07:57:26.100524+00:00' labels: openllm_alias: 2b,2b-instruct openllm_hf_model_id: google/gemma-2b-it diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/env/docker/Dockerfile b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/env/docker/Dockerfile rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/docker/Dockerfile diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/env/docker/entrypoint.sh b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/env/docker/entrypoint.sh rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/docker/entrypoint.sh diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/env/python/install.sh b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/python/install.sh similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/env/python/install.sh rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/python/install.sh diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/python/requirements.lock.txt b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/python/requirements.lock.txt similarity index 99% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/python/requirements.lock.txt rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/python/requirements.lock.txt index 9118ea44..925ce193 100644 --- a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/python/requirements.lock.txt +++ b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/python/requirements.lock.txt @@ -143,5 +143,5 @@ yarl==1.9.4 zipp==3.19.2 # The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 +pip==24.1.2 setuptools==70.2.0 diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/env/python/requirements.txt b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/env/python/requirements.txt rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/python/requirements.txt diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/env/python/version.txt b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/python/version.txt similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/env/python/version.txt rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/env/python/version.txt diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/bento_constants.py b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/bento_constants.py similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/bento_constants.py rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/bento_constants.py diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/bentofile.yaml b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/bentofile.yaml rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/bentofile.yaml diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/service.py b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/service.py similarity index 99% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/service.py rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/service.py index 94af0b0d..42152243 100644 --- a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/service.py +++ b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/service.py @@ -85,7 +85,7 @@ async def catch_all(full_path: str): @bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") +@bentoml.mount_asgi_app(ui_app, path="/chat") @bentoml.service(**SERVICE_CONFIG) class VLLM: def __init__(self) -> None: diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/404.html b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/404.html similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/404.html rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/404.html diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/apple-touch-icon.png b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/apple-touch-icon.png rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/chat.html b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/chat.html similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/chat.html rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/chat.html diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/chat.txt b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/chat.txt rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/chat.txt diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/favicon-16x16.png b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/favicon-16x16.png rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/favicon.ico b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/favicon.ico rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/favicon.ico diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/index.html b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/index.html similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/index.html rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/index.html diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/index.txt b/bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/index.txt similarity index 100% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/src/ui/index.txt rename to bentoml/bentos/gemma/2b-instruct-fp16-26b3/src/ui/index.txt diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/README.md b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/README.md similarity index 96% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/README.md rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/README.md index b5b98d6a..141fc9be 100644 --- a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/README.md +++ b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/README.md @@ -1,4 +1,4 @@ -# gemma:7b-instruct-awq-4bit-6a74 +# gemma:7b-instruct-awq-4bit-d670 [![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/apis/openapi.yaml b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/apis/openapi.yaml rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/apis/openapi.yaml index 8a2e4925..94ad2388 100644 --- a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/apis/openapi.yaml +++ b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/apis/schema.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/apis/schema.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/apis/schema.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/apis/schema.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/bento.yaml b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/bento.yaml similarity index 97% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/bento.yaml rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/bento.yaml index 6d824568..42840219 100644 --- a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/bento.yaml +++ b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: gemma -version: 7b-instruct-awq-4bit-6a74 +version: 7b-instruct-awq-4bit-d670 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:32:14.773611+00:00' +creation_time: '2024-07-08T07:59:18.619602+00:00' labels: openllm_alias: 7b-4bit,7b-instruct-4bit openllm_hf_model_id: casperhansen/gemma-7b-it-awq diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/docker/Dockerfile b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/docker/Dockerfile rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/docker/Dockerfile diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/docker/entrypoint.sh b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/docker/entrypoint.sh rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/docker/entrypoint.sh diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/python/install.sh b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/python/install.sh similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/python/install.sh rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/python/install.sh diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/python/requirements.lock.txt b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/python/requirements.lock.txt similarity index 99% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/python/requirements.lock.txt rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/python/requirements.lock.txt index 9118ea44..925ce193 100644 --- a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/python/requirements.lock.txt +++ b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/python/requirements.lock.txt @@ -143,5 +143,5 @@ yarl==1.9.4 zipp==3.19.2 # The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 +pip==24.1.2 setuptools==70.2.0 diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/python/requirements.txt b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/python/requirements.txt rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/python/requirements.txt diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/python/version.txt b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/python/version.txt similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/env/python/version.txt rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/env/python/version.txt diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/bento_constants.py b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/bento_constants.py similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/bento_constants.py rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/bento_constants.py diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/bentofile.yaml b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/bentofile.yaml rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/bentofile.yaml diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/service.py b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/service.py similarity index 99% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/service.py rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/service.py index 94af0b0d..42152243 100644 --- a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/service.py +++ b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/service.py @@ -85,7 +85,7 @@ async def catch_all(full_path: str): @bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") +@bentoml.mount_asgi_app(ui_app, path="/chat") @bentoml.service(**SERVICE_CONFIG) class VLLM: def __init__(self) -> None: diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/404.html b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/404.html similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/404.html rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/404.html diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/apple-touch-icon.png b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/apple-touch-icon.png rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/chat.html b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/chat.html similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/chat.html rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/chat.html diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/chat.txt b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/chat.txt rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/chat.txt diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/favicon-16x16.png b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/favicon-16x16.png rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/favicon.ico b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/favicon.ico rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/favicon.ico diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/index.html b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/index.html similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/index.html rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/index.html diff --git a/bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/index.txt b/bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/index.txt similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-awq-4bit-6a74/src/ui/index.txt rename to bentoml/bentos/gemma/7b-instruct-awq-4bit-d670/src/ui/index.txt diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/README.md b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/README.md similarity index 97% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/README.md rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/README.md index eb51bd75..8b67b935 100644 --- a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/README.md +++ b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/README.md @@ -1,4 +1,4 @@ -# gemma:7b-instruct-fp16-dafc +# gemma:7b-instruct-fp16-ae3e [![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/apis/openapi.yaml b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/apis/openapi.yaml rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/apis/openapi.yaml index 7e2b81c6..6bdaf9cf 100644 --- a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/apis/openapi.yaml +++ b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/apis/schema.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/apis/schema.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/apis/schema.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/apis/schema.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/bento.yaml b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/bento.yaml similarity index 97% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/bento.yaml rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/bento.yaml index 4d5c44e3..d34ccd66 100644 --- a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/bento.yaml +++ b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: gemma -version: 7b-instruct-fp16-dafc +version: 7b-instruct-fp16-ae3e bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:31:38.255529+00:00' +creation_time: '2024-07-08T07:58:22.853090+00:00' labels: openllm_alias: 7b,7b-instruct openllm_hf_model_id: google/gemma-7b-it diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/docker/Dockerfile b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/docker/Dockerfile rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/docker/Dockerfile diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/docker/entrypoint.sh b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/docker/entrypoint.sh rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/docker/entrypoint.sh diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/python/install.sh b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/python/install.sh similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/python/install.sh rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/python/install.sh diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/env/python/requirements.lock.txt b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/python/requirements.lock.txt similarity index 99% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/env/python/requirements.lock.txt rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/python/requirements.lock.txt index 9118ea44..925ce193 100644 --- a/bentoml/bentos/llama2/13b-chat-fp16-0b04/env/python/requirements.lock.txt +++ b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/python/requirements.lock.txt @@ -143,5 +143,5 @@ yarl==1.9.4 zipp==3.19.2 # The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 +pip==24.1.2 setuptools==70.2.0 diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/python/requirements.txt b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/python/requirements.txt rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/python/requirements.txt diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/python/version.txt b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/python/version.txt similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/env/python/version.txt rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/env/python/version.txt diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/bento_constants.py b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/bento_constants.py similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/bento_constants.py rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/bento_constants.py diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/bentofile.yaml b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/bentofile.yaml rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/bentofile.yaml diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/service.py b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/service.py similarity index 99% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/service.py rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/service.py index 94af0b0d..42152243 100644 --- a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/service.py +++ b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/service.py @@ -85,7 +85,7 @@ async def catch_all(full_path: str): @bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") +@bentoml.mount_asgi_app(ui_app, path="/chat") @bentoml.service(**SERVICE_CONFIG) class VLLM: def __init__(self) -> None: diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/404.html b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/404.html similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/404.html rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/404.html diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/apple-touch-icon.png b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/apple-touch-icon.png rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/chat.html b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/chat.html similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/chat.html rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/chat.html diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/chat.txt b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/chat.txt rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/chat.txt diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/favicon-16x16.png b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/favicon-16x16.png rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/favicon.ico b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/favicon.ico rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/favicon.ico diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/index.html b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/index.html similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/index.html rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/index.html diff --git a/bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/index.txt b/bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/index.txt similarity index 100% rename from bentoml/bentos/gemma/7b-instruct-fp16-dafc/src/ui/index.txt rename to bentoml/bentos/gemma/7b-instruct-fp16-ae3e/src/ui/index.txt diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/README.md b/bentoml/bentos/llama2/13b-chat-fp16-4059/README.md similarity index 97% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/README.md rename to bentoml/bentos/llama2/13b-chat-fp16-4059/README.md index 77ada4da..13f594c3 100644 --- a/bentoml/bentos/llama2/70b-chat-fp16-4512/README.md +++ b/bentoml/bentos/llama2/13b-chat-fp16-4059/README.md @@ -1,4 +1,4 @@ -# llama2:70b-chat-fp16-4512 +# llama2:13b-chat-fp16-4059 [![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/apis/openapi.yaml b/bentoml/bentos/llama2/13b-chat-fp16-4059/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/apis/openapi.yaml rename to bentoml/bentos/llama2/13b-chat-fp16-4059/apis/openapi.yaml index ead9279a..f6916886 100644 --- a/bentoml/bentos/llama2/13b-chat-fp16-0b04/apis/openapi.yaml +++ b/bentoml/bentos/llama2/13b-chat-fp16-4059/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/apis/schema.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/apis/schema.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/apis/schema.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/apis/schema.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/bento.yaml b/bentoml/bentos/llama2/13b-chat-fp16-4059/bento.yaml similarity index 97% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/bento.yaml rename to bentoml/bentos/llama2/13b-chat-fp16-4059/bento.yaml index 6f8aaf79..b7ac776e 100644 --- a/bentoml/bentos/llama2/13b-chat-fp16-0b04/bento.yaml +++ b/bentoml/bentos/llama2/13b-chat-fp16-4059/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: llama2 -version: 13b-chat-fp16-0b04 +version: 13b-chat-fp16-4059 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:26:13.248771+00:00' +creation_time: '2024-07-08T07:49:27.527664+00:00' labels: openllm_alias: 13b,13b-chat openllm_hf_model_id: meta-llama/Llama-2-13b-chat-hf diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/env/docker/Dockerfile b/bentoml/bentos/llama2/13b-chat-fp16-4059/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/env/docker/Dockerfile rename to bentoml/bentos/llama2/13b-chat-fp16-4059/env/docker/Dockerfile diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/env/docker/entrypoint.sh b/bentoml/bentos/llama2/13b-chat-fp16-4059/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/env/docker/entrypoint.sh rename to bentoml/bentos/llama2/13b-chat-fp16-4059/env/docker/entrypoint.sh diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/env/python/install.sh b/bentoml/bentos/llama2/13b-chat-fp16-4059/env/python/install.sh similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/env/python/install.sh rename to bentoml/bentos/llama2/13b-chat-fp16-4059/env/python/install.sh diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-da36/env/python/requirements.lock.txt b/bentoml/bentos/llama2/13b-chat-fp16-4059/env/python/requirements.lock.txt similarity index 99% rename from bentoml/bentos/gemma/2b-instruct-fp16-da36/env/python/requirements.lock.txt rename to bentoml/bentos/llama2/13b-chat-fp16-4059/env/python/requirements.lock.txt index 9118ea44..925ce193 100644 --- a/bentoml/bentos/gemma/2b-instruct-fp16-da36/env/python/requirements.lock.txt +++ b/bentoml/bentos/llama2/13b-chat-fp16-4059/env/python/requirements.lock.txt @@ -143,5 +143,5 @@ yarl==1.9.4 zipp==3.19.2 # The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 +pip==24.1.2 setuptools==70.2.0 diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/env/python/requirements.txt b/bentoml/bentos/llama2/13b-chat-fp16-4059/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/env/python/requirements.txt rename to bentoml/bentos/llama2/13b-chat-fp16-4059/env/python/requirements.txt diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/env/python/version.txt b/bentoml/bentos/llama2/13b-chat-fp16-4059/env/python/version.txt similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/env/python/version.txt rename to bentoml/bentos/llama2/13b-chat-fp16-4059/env/python/version.txt diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/bento_constants.py b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/bento_constants.py similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/bento_constants.py rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/bento_constants.py diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/bentofile.yaml b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/bentofile.yaml rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/bentofile.yaml diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/service.py b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/service.py similarity index 99% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/service.py rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/service.py index 94af0b0d..42152243 100644 --- a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/service.py +++ b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/service.py @@ -85,7 +85,7 @@ async def catch_all(full_path: str): @bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") +@bentoml.mount_asgi_app(ui_app, path="/chat") @bentoml.service(**SERVICE_CONFIG) class VLLM: def __init__(self) -> None: diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/404.html b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/404.html similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/404.html rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/404.html diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/apple-touch-icon.png b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/apple-touch-icon.png rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/chat.html b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/chat.html similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/chat.html rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/chat.html diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/chat.txt b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/chat.txt rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/chat.txt diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/favicon-16x16.png b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/favicon-16x16.png rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/favicon.ico b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/favicon.ico rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/favicon.ico diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/index.html b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/index.html similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/index.html rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/index.html diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/index.txt b/bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/index.txt similarity index 100% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/src/ui/index.txt rename to bentoml/bentos/llama2/13b-chat-fp16-4059/src/ui/index.txt diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/env/python/requirements.lock.txt b/bentoml/bentos/llama2/70b-chat-fp16-4512/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/llama2/70b-chat-fp16-4512/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/service.py b/bentoml/bentos/llama2/70b-chat-fp16-4512/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/llama2/13b-chat-fp16-0b04/README.md b/bentoml/bentos/llama2/70b-chat-fp16-b0de/README.md similarity index 97% rename from bentoml/bentos/llama2/13b-chat-fp16-0b04/README.md rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/README.md index 06b8e6ee..6e491b03 100644 --- a/bentoml/bentos/llama2/13b-chat-fp16-0b04/README.md +++ b/bentoml/bentos/llama2/70b-chat-fp16-b0de/README.md @@ -1,4 +1,4 @@ -# llama2:13b-chat-fp16-0b04 +# llama2:70b-chat-fp16-b0de [![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/apis/openapi.yaml b/bentoml/bentos/llama2/70b-chat-fp16-b0de/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/apis/openapi.yaml rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/apis/openapi.yaml index 6c11dc34..db09bf21 100644 --- a/bentoml/bentos/llama2/70b-chat-fp16-4512/apis/openapi.yaml +++ b/bentoml/bentos/llama2/70b-chat-fp16-b0de/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/apis/schema.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/apis/schema.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/apis/schema.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/apis/schema.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/bento.yaml b/bentoml/bentos/llama2/70b-chat-fp16-b0de/bento.yaml similarity index 97% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/bento.yaml rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/bento.yaml index 85bb68a0..8bcf39e3 100644 --- a/bentoml/bentos/llama2/70b-chat-fp16-4512/bento.yaml +++ b/bentoml/bentos/llama2/70b-chat-fp16-b0de/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: llama2 -version: 70b-chat-fp16-4512 +version: 70b-chat-fp16-b0de bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:26:44.922045+00:00' +creation_time: '2024-07-08T07:50:16.964511+00:00' labels: openllm_alias: 70b,70b-chat openllm_hf_model_id: meta-llama/Llama-2-70b-chat-hf diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/env/docker/Dockerfile b/bentoml/bentos/llama2/70b-chat-fp16-b0de/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/env/docker/Dockerfile rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/env/docker/Dockerfile diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/env/docker/entrypoint.sh b/bentoml/bentos/llama2/70b-chat-fp16-b0de/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/env/docker/entrypoint.sh rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/env/docker/entrypoint.sh diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/env/python/install.sh b/bentoml/bentos/llama2/70b-chat-fp16-b0de/env/python/install.sh similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/env/python/install.sh rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/env/python/install.sh diff --git a/bentoml/bentos/llama2/70b-chat-fp16-b0de/env/python/requirements.lock.txt b/bentoml/bentos/llama2/70b-chat-fp16-b0de/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/llama2/70b-chat-fp16-b0de/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/env/python/requirements.txt b/bentoml/bentos/llama2/70b-chat-fp16-b0de/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/env/python/requirements.txt rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/env/python/requirements.txt diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/env/python/version.txt b/bentoml/bentos/llama2/70b-chat-fp16-b0de/env/python/version.txt similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/env/python/version.txt rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/env/python/version.txt diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/bento_constants.py b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/bento_constants.py similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/bento_constants.py rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/bento_constants.py diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/bentofile.yaml b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/bentofile.yaml rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/bentofile.yaml diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/service.py b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/404.html b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/404.html similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/404.html rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/404.html diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/apple-touch-icon.png b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/apple-touch-icon.png rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/chat.html b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/chat.html similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/chat.html rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/chat.html diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/chat.txt b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/chat.txt rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/chat.txt diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/favicon-16x16.png b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/favicon-16x16.png rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/favicon.ico b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/favicon.ico rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/favicon.ico diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/index.html b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/index.html similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/index.html rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/index.html diff --git a/bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/index.txt b/bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/index.txt similarity index 100% rename from bentoml/bentos/llama2/70b-chat-fp16-4512/src/ui/index.txt rename to bentoml/bentos/llama2/70b-chat-fp16-b0de/src/ui/index.txt diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/README.md b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/README.md new file mode 100644 index 00000000..35641bf0 --- /dev/null +++ b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/README.md @@ -0,0 +1,16 @@ +# llama2:7b-chat-awq-4bit-a35d + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/apis/openapi.yaml b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/apis/openapi.yaml rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/apis/openapi.yaml index 6b47f503..a0320a7c 100644 --- a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/apis/openapi.yaml +++ b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/apis/schema.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/apis/schema.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/apis/schema.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/apis/schema.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/bento.yaml b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/bento.yaml similarity index 97% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/bento.yaml rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/bento.yaml index 36f78348..33276542 100644 --- a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/bento.yaml +++ b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: llama2 -version: 7b-chat-awq-4bit-dcbf +version: 7b-chat-awq-4bit-a35d bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:27:17.138012+00:00' +creation_time: '2024-07-08T07:51:07.606369+00:00' labels: openllm_alias: 7b-4bit,7b-chat-4bit openllm_hf_model_id: TheBloke/Llama-2-7B-Chat-AWQ diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/docker/Dockerfile b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/docker/Dockerfile rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/docker/Dockerfile diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/docker/entrypoint.sh b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/docker/entrypoint.sh rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/docker/entrypoint.sh diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/python/install.sh b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/python/install.sh similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/python/install.sh rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/python/install.sh diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/python/requirements.lock.txt b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/python/requirements.txt b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/python/requirements.txt rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/python/requirements.txt diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/python/version.txt b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/python/version.txt similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/python/version.txt rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/env/python/version.txt diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/bento_constants.py b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/bento_constants.py similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/bento_constants.py rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/bento_constants.py diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/bentofile.yaml b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/bentofile.yaml rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/bentofile.yaml diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/service.py b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/404.html b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/404.html similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/404.html rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/404.html diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/apple-touch-icon.png b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/apple-touch-icon.png rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/chat.html b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/chat.html similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/chat.html rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/chat.html diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/chat.txt b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/chat.txt rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/chat.txt diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/favicon-16x16.png b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/favicon-16x16.png rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/favicon.ico b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/favicon.ico rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/favicon.ico diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/index.html b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/index.html similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/index.html rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/index.html diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/index.txt b/bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/index.txt similarity index 100% rename from bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/ui/index.txt rename to bentoml/bentos/llama2/7b-chat-awq-4bit-a35d/src/ui/index.txt diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/README.md b/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/README.md deleted file mode 100644 index 01706ae2..00000000 --- a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# llama2:7b-chat-awq-4bit-dcbf - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/python/requirements.lock.txt b/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/service.py b/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/llama2/7b-chat-awq-4bit-dcbf/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/env/python/requirements.lock.txt b/bentoml/bentos/llama2/7b-chat-fp16-d960/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/llama2/7b-chat-fp16-d960/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/service.py b/bentoml/bentos/llama2/7b-chat-fp16-d960/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/README.md b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/README.md similarity index 97% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/README.md rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/README.md index d919c09c..80f71b9a 100644 --- a/bentoml/bentos/llama2/7b-chat-fp16-d960/README.md +++ b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/README.md @@ -1,4 +1,4 @@ -# llama2:7b-chat-fp16-d960 +# llama2:7b-chat-fp16-f9ef [![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/apis/openapi.yaml b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/apis/openapi.yaml rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/apis/openapi.yaml index 68dc4977..39a14a61 100644 --- a/bentoml/bentos/llama2/7b-chat-fp16-d960/apis/openapi.yaml +++ b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/apis/schema.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/apis/schema.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/apis/schema.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/apis/schema.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/bento.yaml b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/bento.yaml similarity index 97% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/bento.yaml rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/bento.yaml index 884a8fa5..e65c062f 100644 --- a/bentoml/bentos/llama2/7b-chat-fp16-d960/bento.yaml +++ b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: llama2 -version: 7b-chat-fp16-d960 +version: 7b-chat-fp16-f9ef bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:25:41.044102+00:00' +creation_time: '2024-07-08T07:48:39.737970+00:00' labels: openllm_alias: 7b,7b-chat openllm_hf_model_id: meta-llama/Llama-2-7b-chat-hf diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/env/docker/Dockerfile b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/env/docker/Dockerfile rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/docker/Dockerfile diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/env/docker/entrypoint.sh b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/env/docker/entrypoint.sh rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/docker/entrypoint.sh diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/env/python/install.sh b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/python/install.sh similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/env/python/install.sh rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/python/install.sh diff --git a/bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/python/requirements.lock.txt b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/env/python/requirements.txt b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/env/python/requirements.txt rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/python/requirements.txt diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/env/python/version.txt b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/python/version.txt similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/env/python/version.txt rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/env/python/version.txt diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/bento_constants.py b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/bento_constants.py similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/bento_constants.py rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/bento_constants.py diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/bentofile.yaml b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/bentofile.yaml rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/bentofile.yaml diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/service.py b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/404.html b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/404.html similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/404.html rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/404.html diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/apple-touch-icon.png b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/apple-touch-icon.png rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/chat.html b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/chat.html similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/chat.html rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/chat.html diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/chat.txt b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/chat.txt rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/chat.txt diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/favicon-16x16.png b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/favicon-16x16.png rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/favicon.ico b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/favicon.ico rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/favicon.ico diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/index.html b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/index.html similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/index.html rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/index.html diff --git a/bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/index.txt b/bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/index.txt similarity index 100% rename from bentoml/bentos/llama2/7b-chat-fp16-d960/src/ui/index.txt rename to bentoml/bentos/llama2/7b-chat-fp16-f9ef/src/ui/index.txt diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/README.md b/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/README.md deleted file mode 100644 index 75c828d3..00000000 --- a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# llama3:70b-instruct-awq-4bit-3948 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/python/requirements.lock.txt b/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/service.py b/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/README.md b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/README.md new file mode 100644 index 00000000..41865b54 --- /dev/null +++ b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/README.md @@ -0,0 +1,16 @@ +# llama3:70b-instruct-awq-4bit-a5d5 + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/apis/openapi.yaml b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/apis/openapi.yaml rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/apis/openapi.yaml index ebcf6cad..428352e1 100644 --- a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/apis/openapi.yaml +++ b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/apis/schema.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/apis/schema.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/apis/schema.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/apis/schema.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/bento.yaml b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/bento.yaml similarity index 97% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/bento.yaml rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/bento.yaml index e9506746..e8220dd2 100644 --- a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/bento.yaml +++ b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: llama3 -version: 70b-instruct-awq-4bit-3948 +version: 70b-instruct-awq-4bit-a5d5 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:29:29.883840+00:00' +creation_time: '2024-07-08T07:54:39.183603+00:00' labels: openllm_alias: 70b-4bit,70b-instruct-4bit openllm_hf_model_id: casperhansen/llama-3-70b-instruct-awq diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/docker/Dockerfile b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/docker/Dockerfile rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/docker/Dockerfile diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/docker/entrypoint.sh b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/docker/entrypoint.sh rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/docker/entrypoint.sh diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/python/install.sh b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/python/install.sh similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/python/install.sh rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/python/install.sh diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/python/requirements.lock.txt b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/python/requirements.txt b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/python/requirements.txt rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/python/requirements.txt diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/python/version.txt b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/python/version.txt similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/env/python/version.txt rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/env/python/version.txt diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/bento_constants.py b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/bento_constants.py similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/bento_constants.py rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/bento_constants.py diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/bentofile.yaml b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/bentofile.yaml rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/bentofile.yaml diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/service.py b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/404.html b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/404.html similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/404.html rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/404.html diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/apple-touch-icon.png b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/apple-touch-icon.png rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/chat.html b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/chat.html similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/chat.html rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/chat.html diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/chat.txt b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/chat.txt rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/chat.txt diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/favicon-16x16.png b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/favicon-16x16.png rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/favicon.ico b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/favicon.ico rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/favicon.ico diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/index.html b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/index.html similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/index.html rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/index.html diff --git a/bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/index.txt b/bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/index.txt similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-awq-4bit-3948/src/ui/index.txt rename to bentoml/bentos/llama3/70b-instruct-awq-4bit-a5d5/src/ui/index.txt diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/README.md b/bentoml/bentos/llama3/70b-instruct-fp16-8678/README.md deleted file mode 100644 index d4344339..00000000 --- a/bentoml/bentos/llama3/70b-instruct-fp16-8678/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# llama3:70b-instruct-fp16-8678 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/env/python/requirements.lock.txt b/bentoml/bentos/llama3/70b-instruct-fp16-8678/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/llama3/70b-instruct-fp16-8678/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/service.py b/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/README.md b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/README.md new file mode 100644 index 00000000..91bc230a --- /dev/null +++ b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/README.md @@ -0,0 +1,16 @@ +# llama3:70b-instruct-fp16-d0d4 + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/apis/openapi.yaml b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/apis/openapi.yaml rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/apis/openapi.yaml index e37db675..6c8a31fc 100644 --- a/bentoml/bentos/llama3/70b-instruct-fp16-8678/apis/openapi.yaml +++ b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/apis/schema.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/apis/schema.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/apis/schema.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/apis/schema.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/bento.yaml b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/bento.yaml similarity index 97% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/bento.yaml rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/bento.yaml index c4b2eb5f..e6e8e81f 100644 --- a/bentoml/bentos/llama3/70b-instruct-fp16-8678/bento.yaml +++ b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: llama3 -version: 70b-instruct-fp16-8678 +version: 70b-instruct-fp16-d0d4 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:30:34.297813+00:00' +creation_time: '2024-07-08T07:56:30.483132+00:00' labels: openllm_alias: 70b,70b-instruct openllm_hf_model_id: meta-llama/Meta-Llama-3-70B-Instruct diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/env/docker/Dockerfile b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/env/docker/Dockerfile rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/docker/Dockerfile diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/env/docker/entrypoint.sh b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/env/docker/entrypoint.sh rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/docker/entrypoint.sh diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/env/python/install.sh b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/python/install.sh similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/env/python/install.sh rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/python/install.sh diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/python/requirements.lock.txt b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/env/python/requirements.txt b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/env/python/requirements.txt rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/python/requirements.txt diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/env/python/version.txt b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/python/version.txt similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/env/python/version.txt rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/env/python/version.txt diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/bento_constants.py b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/bento_constants.py similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/bento_constants.py rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/bento_constants.py diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/bentofile.yaml b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/bentofile.yaml rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/bentofile.yaml diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/service.py b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/404.html b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/404.html similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/404.html rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/404.html diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/apple-touch-icon.png b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/apple-touch-icon.png rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/chat.html b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/chat.html similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/chat.html rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/chat.html diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/chat.txt b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/chat.txt rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/chat.txt diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/favicon-16x16.png b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/favicon-16x16.png rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/favicon.ico b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/favicon.ico rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/favicon.ico diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/index.html b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/index.html similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/index.html rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/index.html diff --git a/bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/index.txt b/bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/index.txt similarity index 100% rename from bentoml/bentos/llama3/70b-instruct-fp16-8678/src/ui/index.txt rename to bentoml/bentos/llama3/70b-instruct-fp16-d0d4/src/ui/index.txt diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/README.md b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/README.md new file mode 100644 index 00000000..33d89ae3 --- /dev/null +++ b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/README.md @@ -0,0 +1,16 @@ +# llama3:8b-instruct-awq-4bit-e780 + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/apis/openapi.yaml b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/apis/openapi.yaml rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/apis/openapi.yaml index 02bb1ae8..c2e3789e 100644 --- a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/apis/openapi.yaml +++ b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/apis/schema.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/apis/schema.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/apis/schema.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/apis/schema.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/bento.yaml b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/bento.yaml similarity index 97% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/bento.yaml rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/bento.yaml index 231cd61c..bb8fdbf9 100644 --- a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/bento.yaml +++ b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: llama3 -version: 8b-instruct-awq-4bit-f711 +version: 8b-instruct-awq-4bit-e780 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:28:57.744403+00:00' +creation_time: '2024-07-08T07:53:44.539551+00:00' labels: openllm_alias: 8b-4bit,8b-instruct-4bit openllm_hf_model_id: casperhansen/llama-3-8b-instruct-awq diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/docker/Dockerfile b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/docker/Dockerfile rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/docker/Dockerfile diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/docker/entrypoint.sh b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/docker/entrypoint.sh rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/docker/entrypoint.sh diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/python/install.sh b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/python/install.sh similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/python/install.sh rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/python/install.sh diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/python/requirements.lock.txt b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/python/requirements.txt b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/python/requirements.txt rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/python/requirements.txt diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/python/version.txt b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/python/version.txt similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/python/version.txt rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/env/python/version.txt diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/bento_constants.py b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/bento_constants.py similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/bento_constants.py rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/bento_constants.py diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/bentofile.yaml b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/bentofile.yaml rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/bentofile.yaml diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/service.py b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/404.html b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/404.html similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/404.html rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/404.html diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/apple-touch-icon.png b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/apple-touch-icon.png rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/chat.html b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/chat.html similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/chat.html rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/chat.html diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/chat.txt b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/chat.txt rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/chat.txt diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/favicon-16x16.png b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/favicon-16x16.png rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/favicon.ico b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/favicon.ico rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/favicon.ico diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/index.html b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/index.html similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/index.html rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/index.html diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/index.txt b/bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/index.txt similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/ui/index.txt rename to bentoml/bentos/llama3/8b-instruct-awq-4bit-e780/src/ui/index.txt diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/README.md b/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/README.md deleted file mode 100644 index 02a87d53..00000000 --- a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# llama3:8b-instruct-awq-4bit-f711 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/python/requirements.lock.txt b/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/service.py b/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/llama3/8b-instruct-awq-4bit-f711/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-0083/README.md b/bentoml/bentos/llama3/8b-instruct-fp16-0083/README.md new file mode 100644 index 00000000..2ca7abdb --- /dev/null +++ b/bentoml/bentos/llama3/8b-instruct-fp16-0083/README.md @@ -0,0 +1,16 @@ +# llama3:8b-instruct-fp16-0083 + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/apis/openapi.yaml b/bentoml/bentos/llama3/8b-instruct-fp16-0083/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/apis/openapi.yaml rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/apis/openapi.yaml index cd2e710d..24e12850 100644 --- a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/apis/openapi.yaml +++ b/bentoml/bentos/llama3/8b-instruct-fp16-0083/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/apis/schema.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/apis/schema.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/apis/schema.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/apis/schema.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/bento.yaml b/bentoml/bentos/llama3/8b-instruct-fp16-0083/bento.yaml similarity index 97% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/bento.yaml rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/bento.yaml index 3f4cfe4c..d53bfee8 100644 --- a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/bento.yaml +++ b/bentoml/bentos/llama3/8b-instruct-fp16-0083/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: llama3 -version: 8b-instruct-fp16-46f3 +version: 8b-instruct-fp16-0083 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:30:02.056933+00:00' +creation_time: '2024-07-08T07:55:34.916627+00:00' labels: openllm_alias: 8b,8b-instruct openllm_hf_model_id: meta-llama/Meta-Llama-3-8B-Instruct diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/docker/Dockerfile b/bentoml/bentos/llama3/8b-instruct-fp16-0083/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/docker/Dockerfile rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/env/docker/Dockerfile diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/docker/entrypoint.sh b/bentoml/bentos/llama3/8b-instruct-fp16-0083/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/docker/entrypoint.sh rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/env/docker/entrypoint.sh diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/python/install.sh b/bentoml/bentos/llama3/8b-instruct-fp16-0083/env/python/install.sh similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/python/install.sh rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/env/python/install.sh diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-0083/env/python/requirements.lock.txt b/bentoml/bentos/llama3/8b-instruct-fp16-0083/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/llama3/8b-instruct-fp16-0083/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/python/requirements.txt b/bentoml/bentos/llama3/8b-instruct-fp16-0083/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/python/requirements.txt rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/env/python/requirements.txt diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/python/version.txt b/bentoml/bentos/llama3/8b-instruct-fp16-0083/env/python/version.txt similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/python/version.txt rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/env/python/version.txt diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/bento_constants.py b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/bento_constants.py similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/bento_constants.py rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/bento_constants.py diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/bentofile.yaml b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/bentofile.yaml rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/bentofile.yaml diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/service.py b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/404.html b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/404.html similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/404.html rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/404.html diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/apple-touch-icon.png b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/apple-touch-icon.png rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/chat.html b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/chat.html similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/chat.html rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/chat.html diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/chat.txt b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/chat.txt rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/chat.txt diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/favicon-16x16.png b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/favicon-16x16.png rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/favicon.ico b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/favicon.ico rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/favicon.ico diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/index.html b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/index.html similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/index.html rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/index.html diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/index.txt b/bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/index.txt similarity index 100% rename from bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/ui/index.txt rename to bentoml/bentos/llama3/8b-instruct-fp16-0083/src/ui/index.txt diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/README.md b/bentoml/bentos/llama3/8b-instruct-fp16-46f3/README.md deleted file mode 100644 index 46bdfbb0..00000000 --- a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# llama3:8b-instruct-fp16-46f3 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/python/requirements.lock.txt b/bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/service.py b/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/llama3/8b-instruct-fp16-46f3/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/README.md b/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/README.md deleted file mode 100644 index 8de4e3e2..00000000 --- a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# mistral:7b-instruct-awq-4bit-2a5e - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/python/requirements.lock.txt b/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/service.py b/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/README.md b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/README.md new file mode 100644 index 00000000..d0d777b9 --- /dev/null +++ b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/README.md @@ -0,0 +1,16 @@ +# mistral:7b-instruct-awq-4bit-33ce + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/apis/openapi.yaml b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/apis/openapi.yaml rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/apis/openapi.yaml index b2d4a6ae..b709eb38 100644 --- a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/apis/openapi.yaml +++ b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/apis/schema.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/apis/schema.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/apis/schema.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/apis/schema.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/bento.yaml b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/bento.yaml similarity index 97% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/bento.yaml rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/bento.yaml index 7bbe7ee5..e078bf6f 100644 --- a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/bento.yaml +++ b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: mistral -version: 7b-instruct-awq-4bit-2a5e +version: 7b-instruct-awq-4bit-33ce bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:27:48.637768+00:00' +creation_time: '2024-07-08T07:52:01.835826+00:00' labels: openllm_alias: 7b-4bit,7b-instruct-4bit openllm_hf_model_id: TheBloke/Mistral-7B-Instruct-v0.1-AWQ diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/docker/Dockerfile b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/docker/Dockerfile rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/docker/Dockerfile diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/docker/entrypoint.sh b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/docker/entrypoint.sh rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/docker/entrypoint.sh diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/python/install.sh b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/python/install.sh similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/python/install.sh rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/python/install.sh diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/python/requirements.lock.txt b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/python/requirements.txt b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/python/requirements.txt rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/python/requirements.txt diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/python/version.txt b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/python/version.txt similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/env/python/version.txt rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/env/python/version.txt diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/bento_constants.py b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/bento_constants.py similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/bento_constants.py rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/bento_constants.py diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/bentofile.yaml b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/bentofile.yaml rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/bentofile.yaml diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/service.py b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/404.html b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/404.html similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/404.html rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/404.html diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/apple-touch-icon.png b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/apple-touch-icon.png rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/chat.html b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/chat.html similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/chat.html rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/chat.html diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/chat.txt b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/chat.txt rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/chat.txt diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/favicon-16x16.png b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/favicon-16x16.png rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/favicon.ico b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/favicon.ico rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/favicon.ico diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/index.html b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/index.html similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/index.html rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/index.html diff --git a/bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/index.txt b/bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/index.txt similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-awq-4bit-2a5e/src/ui/index.txt rename to bentoml/bentos/mistral/7b-instruct-awq-4bit-33ce/src/ui/index.txt diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/README.md b/bentoml/bentos/mistral/7b-instruct-fp16-e205/README.md deleted file mode 100644 index 0feac4b5..00000000 --- a/bentoml/bentos/mistral/7b-instruct-fp16-e205/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# mistral:7b-instruct-fp16-e205 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/env/python/requirements.lock.txt b/bentoml/bentos/mistral/7b-instruct-fp16-e205/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/mistral/7b-instruct-fp16-e205/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/service.py b/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-fe78/README.md b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/README.md new file mode 100644 index 00000000..932624bb --- /dev/null +++ b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/README.md @@ -0,0 +1,16 @@ +# mistral:7b-instruct-fp16-fe78 + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/apis/openapi.yaml b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/apis/openapi.yaml rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/apis/openapi.yaml index 8bce7f3c..58094c6e 100644 --- a/bentoml/bentos/mistral/7b-instruct-fp16-e205/apis/openapi.yaml +++ b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/apis/schema.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/apis/schema.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/apis/schema.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/apis/schema.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/bento.yaml b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/bento.yaml similarity index 97% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/bento.yaml rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/bento.yaml index de1227bb..e34f4c8d 100644 --- a/bentoml/bentos/mistral/7b-instruct-fp16-e205/bento.yaml +++ b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: mistral -version: 7b-instruct-fp16-e205 +version: 7b-instruct-fp16-fe78 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:28:21.369963+00:00' +creation_time: '2024-07-08T07:52:53.052699+00:00' labels: openllm_alias: 7b,7b-instruct openllm_hf_model_id: mistralai/Mistral-7B-Instruct-v0.1 diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/env/docker/Dockerfile b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/env/docker/Dockerfile rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/docker/Dockerfile diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/env/docker/entrypoint.sh b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/env/docker/entrypoint.sh rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/docker/entrypoint.sh diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/env/python/install.sh b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/python/install.sh similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/env/python/install.sh rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/python/install.sh diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/python/requirements.lock.txt b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/env/python/requirements.txt b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/env/python/requirements.txt rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/python/requirements.txt diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/env/python/version.txt b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/python/version.txt similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/env/python/version.txt rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/env/python/version.txt diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/bento_constants.py b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/bento_constants.py similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/bento_constants.py rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/bento_constants.py diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/bentofile.yaml b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/bentofile.yaml rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/bentofile.yaml diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/service.py b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/404.html b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/404.html similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/404.html rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/404.html diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/apple-touch-icon.png b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/apple-touch-icon.png rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/chat.html b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/chat.html similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/chat.html rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/chat.html diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/chat.txt b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/chat.txt rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/chat.txt diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/favicon-16x16.png b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/favicon-16x16.png rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/favicon.ico b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/favicon.ico rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/favicon.ico diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/index.html b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/index.html similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/index.html rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/index.html diff --git a/bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/index.txt b/bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/index.txt similarity index 100% rename from bentoml/bentos/mistral/7b-instruct-fp16-e205/src/ui/index.txt rename to bentoml/bentos/mistral/7b-instruct-fp16-fe78/src/ui/index.txt diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/README.md b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/README.md new file mode 100644 index 00000000..fac54194 --- /dev/null +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/README.md @@ -0,0 +1,16 @@ +# mixtral:8x7b-instruct-v0.1-awq-4bit-0016 + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/apis/openapi.yaml b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/apis/openapi.yaml rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/apis/openapi.yaml index 661e2e4b..6ac630ea 100644 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/apis/openapi.yaml +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/apis/schema.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/apis/schema.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/apis/schema.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/apis/schema.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/bento.yaml b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/bento.yaml similarity index 97% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/bento.yaml rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/bento.yaml index c0379167..26e05d44 100644 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/bento.yaml +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: mixtral -version: 8x7b-instruct-v0.1-awq-4bit-32eb +version: 8x7b-instruct-v0.1-awq-4bit-0016 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:33:21.450969+00:00' +creation_time: '2024-07-08T08:01:04.838303+00:00' labels: openllm_alias: 8x7b-4bit openllm_hf_model_id: casperhansen/mixtral-instruct-awq diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/docker/Dockerfile b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/docker/Dockerfile rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/docker/Dockerfile diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/docker/entrypoint.sh b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/docker/entrypoint.sh rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/docker/entrypoint.sh diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/python/install.sh b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/python/install.sh similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/python/install.sh rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/python/install.sh diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/python/requirements.lock.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/python/requirements.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/python/requirements.txt rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/python/requirements.txt diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/python/version.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/python/version.txt similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/python/version.txt rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/env/python/version.txt diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/bento_constants.py b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/bento_constants.py similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/bento_constants.py rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/bento_constants.py diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/bentofile.yaml b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/bentofile.yaml rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/bentofile.yaml diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/service.py b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/404.html b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/404.html similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/404.html rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/404.html diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/apple-touch-icon.png b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/apple-touch-icon.png rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/chat.html b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/chat.html similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/chat.html rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/chat.html diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/chat.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/chat.txt rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/chat.txt diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/favicon-16x16.png b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/favicon-16x16.png rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/favicon.ico b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/favicon.ico rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/favicon.ico diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/index.html b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/index.html similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/index.html rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/index.html diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/index.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/index.txt similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/ui/index.txt rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-0016/src/ui/index.txt diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/README.md b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/README.md deleted file mode 100644 index 2bcd5f9e..00000000 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# mixtral:8x7b-instruct-v0.1-awq-4bit-32eb - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/python/requirements.lock.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/service.py b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-awq-4bit-32eb/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/README.md b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/README.md new file mode 100644 index 00000000..f24b3f88 --- /dev/null +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/README.md @@ -0,0 +1,16 @@ +# mixtral:8x7b-instruct-v0.1-fp16-b90a + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/apis/openapi.yaml b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/apis/openapi.yaml rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/apis/openapi.yaml index 77130619..70915c09 100644 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/apis/openapi.yaml +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/apis/schema.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/apis/schema.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/apis/schema.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/apis/schema.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/bento.yaml b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/bento.yaml similarity index 97% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/bento.yaml rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/bento.yaml index 6594c80f..62c773e9 100644 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/bento.yaml +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: mixtral -version: 8x7b-instruct-v0.1-fp16-d01a +version: 8x7b-instruct-v0.1-fp16-b90a bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:32:49.140140+00:00' +creation_time: '2024-07-08T08:00:09.830997+00:00' labels: openllm_alias: 8x7b,8x7b-instruct openllm_hf_model_id: mistralai/Mixtral-8x7B-Instruct-v0.1 diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/docker/Dockerfile b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/docker/Dockerfile rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/docker/Dockerfile diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/docker/entrypoint.sh b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/docker/entrypoint.sh rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/docker/entrypoint.sh diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/python/install.sh b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/python/install.sh similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/python/install.sh rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/python/install.sh diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/python/requirements.lock.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/python/requirements.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/python/requirements.txt rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/python/requirements.txt diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/python/version.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/python/version.txt similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/python/version.txt rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/env/python/version.txt diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/bento_constants.py b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/bento_constants.py similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/bento_constants.py rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/bento_constants.py diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/bentofile.yaml b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/bentofile.yaml rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/bentofile.yaml diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/service.py b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/404.html b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/404.html similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/404.html rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/404.html diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/apple-touch-icon.png b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/apple-touch-icon.png rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/chat.html b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/chat.html similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/chat.html rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/chat.html diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/chat.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/chat.txt rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/chat.txt diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/favicon-16x16.png b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/favicon-16x16.png rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/favicon.ico b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/favicon.ico rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/favicon.ico diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/index.html b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/index.html similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/index.html rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/index.html diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/index.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/index.txt similarity index 100% rename from bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/ui/index.txt rename to bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-b90a/src/ui/index.txt diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/README.md b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/README.md deleted file mode 100644 index 3d8102c2..00000000 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# mixtral:8x7b-instruct-v0.1-fp16-d01a - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/python/requirements.lock.txt b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/service.py b/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/mixtral/8x7b-instruct-v0.1-fp16-d01a/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/README.md b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/README.md new file mode 100644 index 00000000..297cc7b9 --- /dev/null +++ b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/README.md @@ -0,0 +1,16 @@ +# phi3:3.8b-instruct-fp16-07bf + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/apis/openapi.yaml b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/apis/openapi.yaml rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/apis/openapi.yaml index c9ff014e..601e159e 100644 --- a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/apis/openapi.yaml +++ b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/apis/schema.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/apis/schema.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/apis/schema.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/apis/schema.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/bento.yaml b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/bento.yaml similarity index 97% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/bento.yaml rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/bento.yaml index f0264884..7c133e53 100644 --- a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/bento.yaml +++ b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: phi3 -version: 3.8b-instruct-fp16-a912 +version: 3.8b-instruct-fp16-07bf bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:25:09.006927+00:00' +creation_time: '2024-07-08T07:47:49.195324+00:00' labels: openllm_alias: 3.8b,3.8b-mini,3.8b-mini-instruct-4k-fp16 openllm_hf_model_id: microsoft/Phi-3-mini-4k-instruct diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/docker/Dockerfile b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/docker/Dockerfile rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/docker/Dockerfile diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/docker/entrypoint.sh b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/docker/entrypoint.sh rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/docker/entrypoint.sh diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/python/install.sh b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/python/install.sh similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/python/install.sh rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/python/install.sh diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/python/requirements.lock.txt b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/python/requirements.txt b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/python/requirements.txt rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/python/requirements.txt diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/python/version.txt b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/python/version.txt similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/python/version.txt rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/env/python/version.txt diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/bento_constants.py b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/bento_constants.py similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/bento_constants.py rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/bento_constants.py diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/bentofile.yaml b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/bentofile.yaml rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/bentofile.yaml diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/service.py b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/404.html b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/404.html similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/404.html rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/404.html diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/apple-touch-icon.png b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/apple-touch-icon.png rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/chat.html b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/chat.html similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/chat.html rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/chat.html diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/chat.txt b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/chat.txt rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/chat.txt diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/favicon-16x16.png b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/favicon-16x16.png rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/favicon.ico b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/favicon.ico rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/favicon.ico diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/index.html b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/index.html similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/index.html rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/index.html diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/index.txt b/bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/index.txt similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/ui/index.txt rename to bentoml/bentos/phi3/3.8b-instruct-fp16-07bf/src/ui/index.txt diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/README.md b/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/README.md deleted file mode 100644 index efcea327..00000000 --- a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# phi3:3.8b-instruct-fp16-a912 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/python/requirements.lock.txt b/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/service.py b/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/phi3/3.8b-instruct-fp16-a912/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/README.md b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/README.md new file mode 100644 index 00000000..730e2b4b --- /dev/null +++ b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/README.md @@ -0,0 +1,16 @@ +# phi3:3.8b-instruct-ggml-q4-584f + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/apis/openapi.yaml b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/apis/openapi.yaml rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/apis/openapi.yaml index 7610b16d..54d43aa4 100644 --- a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/apis/openapi.yaml +++ b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/apis/openapi.yaml @@ -161,6 +161,39 @@ info: version: None openapi: 3.0.2 paths: + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -201,39 +234,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: description: "\n Chat API that takes in a list of messages and returns\ diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/apis/schema.json b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/apis/schema.json similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/apis/schema.json rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/apis/schema.json diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/bento.yaml b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/bento.yaml similarity index 96% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/bento.yaml rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/bento.yaml index bcd66628..955d0af3 100644 --- a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/bento.yaml +++ b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/bento.yaml @@ -1,8 +1,8 @@ service: service:LlamaCppChat name: phi3 -version: 3.8b-instruct-ggml-q4-6139 +version: 3.8b-instruct-ggml-q4-584f bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:37:35.922668+00:00' +creation_time: '2024-07-08T08:07:54.266242+00:00' labels: openllm_alias: 3.8b-q4,3.8b-mini-q4,3.8b-mini-instruct-4k-ggml-q4 openllm_hf_model_id: microsoft/Phi-3-mini-4k-instruct-gguf diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/docker/Dockerfile b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/docker/Dockerfile rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/docker/Dockerfile diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/docker/entrypoint.sh b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/docker/entrypoint.sh rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/docker/entrypoint.sh diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/python/install.sh b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/python/install.sh similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/python/install.sh rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/python/install.sh diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/python/requirements.lock.txt b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/python/requirements.lock.txt similarity index 99% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/python/requirements.lock.txt rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/python/requirements.lock.txt index 8a362ece..35a2baad 100644 --- a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/python/requirements.lock.txt +++ b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/python/requirements.lock.txt @@ -96,5 +96,5 @@ yarl==1.9.4 zipp==3.19.2 # The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 +pip==24.1.2 setuptools==70.2.0 diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/python/requirements.txt b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/python/requirements.txt rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/python/requirements.txt diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/python/version.txt b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/python/version.txt similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/env/python/version.txt rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/env/python/version.txt diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/bento_constants.py b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/bento_constants.py similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/bento_constants.py rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/bento_constants.py diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/bentofile.yaml b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/bentofile.yaml rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/bentofile.yaml diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/service.py b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/service.py similarity index 98% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/service.py rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/service.py index c93b0b3d..3cfd766d 100644 --- a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/service.py +++ b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/service.py @@ -72,7 +72,7 @@ async def catch_all(full_path: str): sys.modules.pop("prometheus_client") -@bentoml.mount_asgi_app(ui_app, path="/ui") +@bentoml.mount_asgi_app(ui_app, path="/chat") @bentoml.mount_asgi_app(openai_api_app, path="/v1") @bentoml.service(**SERVICE_CONFIG) class LlamaCppChat: diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/404.html b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/404.html similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/404.html rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/404.html diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/apple-touch-icon.png b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/apple-touch-icon.png rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/chat.html b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/chat.html similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/chat.html rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/chat.html diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/chat.txt b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/chat.txt rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/chat.txt diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/favicon-16x16.png b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/favicon-16x16.png rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/favicon.ico b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/favicon.ico rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/favicon.ico diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/index.html b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/index.html similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/index.html rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/index.html diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/index.txt b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/index.txt similarity index 100% rename from bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/src/ui/index.txt rename to bentoml/bentos/phi3/3.8b-instruct-ggml-q4-584f/src/ui/index.txt diff --git a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/README.md b/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/README.md deleted file mode 100644 index 9bf072db..00000000 --- a/bentoml/bentos/phi3/3.8b-instruct-ggml-q4-6139/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# phi3:3.8b-instruct-ggml-q4-6139 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/README.md b/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/README.md deleted file mode 100644 index 5859cae4..00000000 --- a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# qwen2:0.5b-instruct-fp16-855c - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/service.py b/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/README.md b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/README.md new file mode 100644 index 00000000..044b5173 --- /dev/null +++ b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/README.md @@ -0,0 +1,16 @@ +# qwen2:0.5b-instruct-fp16-96bb + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/apis/openapi.yaml b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/apis/openapi.yaml rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/apis/openapi.yaml index 0a2548d3..d75e5a54 100644 --- a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/apis/openapi.yaml +++ b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/apis/schema.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/apis/schema.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/apis/schema.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/apis/schema.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/bento.yaml b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/bento.yaml similarity index 97% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/bento.yaml rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/bento.yaml index 86d012b4..70e7ad56 100644 --- a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/bento.yaml +++ b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: qwen2 -version: 0.5b-instruct-fp16-855c +version: 0.5b-instruct-fp16-96bb bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:33:53.962663+00:00' +creation_time: '2024-07-08T08:01:59.846091+00:00' labels: openllm_alias: 0.5b,0.5b-instruct openllm_hf_model_id: Qwen/Qwen2-0.5B-Instruct diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/docker/Dockerfile b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/docker/Dockerfile rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/docker/Dockerfile diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/docker/entrypoint.sh b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/docker/entrypoint.sh rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/docker/entrypoint.sh diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/python/install.sh b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/python/install.sh similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/python/install.sh rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/python/install.sh diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/python/requirements.txt b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/python/requirements.txt rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/python/requirements.txt diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/python/version.txt b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/python/version.txt similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/env/python/version.txt rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/env/python/version.txt diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/bento_constants.py b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/bento_constants.py similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/bento_constants.py rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/bento_constants.py diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/bentofile.yaml b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/bentofile.yaml rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/bentofile.yaml diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/service.py b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/404.html b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/404.html similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/404.html rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/404.html diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/apple-touch-icon.png b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/apple-touch-icon.png rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/chat.html b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/chat.html similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/chat.html rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/chat.html diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/chat.txt b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/chat.txt rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/chat.txt diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/favicon-16x16.png b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/favicon-16x16.png rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/favicon.ico b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/favicon.ico rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/favicon.ico diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/index.html b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/index.html similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/index.html rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/index.html diff --git a/bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/index.txt b/bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/index.txt similarity index 100% rename from bentoml/bentos/qwen2/0.5b-instruct-fp16-855c/src/ui/index.txt rename to bentoml/bentos/qwen2/0.5b-instruct-fp16-96bb/src/ui/index.txt diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/README.md b/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/README.md deleted file mode 100644 index d5e5ae76..00000000 --- a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# qwen2:1.5b-instruct-fp16-3346 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/service.py b/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/README.md b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/README.md new file mode 100644 index 00000000..ea4c69c9 --- /dev/null +++ b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/README.md @@ -0,0 +1,16 @@ +# qwen2:1.5b-instruct-fp16-ec69 + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/apis/openapi.yaml b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/apis/openapi.yaml rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/apis/openapi.yaml index d862f124..c18035b0 100644 --- a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/apis/openapi.yaml +++ b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/apis/schema.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/apis/schema.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/apis/schema.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/apis/schema.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/bento.yaml b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/bento.yaml similarity index 97% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/bento.yaml rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/bento.yaml index e5813e05..7d3c5659 100644 --- a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/bento.yaml +++ b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: qwen2 -version: 1.5b-instruct-fp16-3346 +version: 1.5b-instruct-fp16-ec69 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:34:25.926068+00:00' +creation_time: '2024-07-08T08:02:52.426817+00:00' labels: openllm_alias: 1.5b,1.5b-instruct openllm_hf_model_id: Qwen/Qwen2-1.5B-Instruct diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/docker/Dockerfile b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/docker/Dockerfile rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/docker/Dockerfile diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/docker/entrypoint.sh b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/docker/entrypoint.sh rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/docker/entrypoint.sh diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/python/install.sh b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/python/install.sh similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/python/install.sh rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/python/install.sh diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/python/requirements.txt b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/python/requirements.txt rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/python/requirements.txt diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/python/version.txt b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/python/version.txt similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/env/python/version.txt rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/env/python/version.txt diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/bento_constants.py b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/bento_constants.py similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/bento_constants.py rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/bento_constants.py diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/bentofile.yaml b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/bentofile.yaml rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/bentofile.yaml diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/service.py b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/404.html b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/404.html similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/404.html rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/404.html diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/apple-touch-icon.png b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/apple-touch-icon.png rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/chat.html b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/chat.html similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/chat.html rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/chat.html diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/chat.txt b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/chat.txt rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/chat.txt diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/favicon-16x16.png b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/favicon-16x16.png rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/favicon.ico b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/favicon.ico rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/favicon.ico diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/index.html b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/index.html similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/index.html rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/index.html diff --git a/bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/index.txt b/bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/index.txt similarity index 100% rename from bentoml/bentos/qwen2/1.5b-instruct-fp16-3346/src/ui/index.txt rename to bentoml/bentos/qwen2/1.5b-instruct-fp16-ec69/src/ui/index.txt diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/README.md b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/README.md deleted file mode 100644 index 3387db84..00000000 --- a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# qwen2:57b-a14b-instruct-fp16-8d40 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/service.py b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/README.md b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/README.md new file mode 100644 index 00000000..486b076e --- /dev/null +++ b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/README.md @@ -0,0 +1,16 @@ +# qwen2:57b-a14b-instruct-fp16-f63b + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/apis/openapi.yaml b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/apis/openapi.yaml rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/apis/openapi.yaml index d103883f..6283970e 100644 --- a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/apis/openapi.yaml +++ b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/apis/schema.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/apis/schema.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/apis/schema.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/apis/schema.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/bento.yaml b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/bento.yaml similarity index 97% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/bento.yaml rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/bento.yaml index 696fbbac..0ed33662 100644 --- a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/bento.yaml +++ b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: qwen2 -version: 57b-a14b-instruct-fp16-8d40 +version: 57b-a14b-instruct-fp16-f63b bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:36:34.888300+00:00' +creation_time: '2024-07-08T08:06:13.245817+00:00' labels: openllm_alias: 57b-a14b,57b-a14b-instruct openllm_hf_model_id: Qwen/Qwen2-57B-A14B-Instruct diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/docker/Dockerfile b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/docker/Dockerfile rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/docker/Dockerfile diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/docker/entrypoint.sh b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/docker/entrypoint.sh rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/docker/entrypoint.sh diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/python/install.sh b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/python/install.sh similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/python/install.sh rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/python/install.sh diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/python/requirements.txt b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/python/requirements.txt rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/python/requirements.txt diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/python/version.txt b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/python/version.txt similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/env/python/version.txt rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/env/python/version.txt diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/bento_constants.py b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/bento_constants.py similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/bento_constants.py rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/bento_constants.py diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/bentofile.yaml b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/bentofile.yaml rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/bentofile.yaml diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/service.py b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/404.html b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/404.html similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/404.html rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/404.html diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/apple-touch-icon.png b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/apple-touch-icon.png rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/chat.html b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/chat.html similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/chat.html rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/chat.html diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/chat.txt b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/chat.txt rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/chat.txt diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/favicon-16x16.png b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/favicon-16x16.png rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/favicon.ico b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/favicon.ico rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/favicon.ico diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/index.html b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/index.html similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/index.html rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/index.html diff --git a/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/index.txt b/bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/index.txt similarity index 100% rename from bentoml/bentos/qwen2/57b-a14b-instruct-fp16-8d40/src/ui/index.txt rename to bentoml/bentos/qwen2/57b-a14b-instruct-fp16-f63b/src/ui/index.txt diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/README.md b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/README.md deleted file mode 100644 index db254975..00000000 --- a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# qwen2:72b-instruct-awq-4bit-177e - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/service.py b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/README.md b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/README.md new file mode 100644 index 00000000..fd5a6a8a --- /dev/null +++ b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/README.md @@ -0,0 +1,16 @@ +# qwen2:72b-instruct-awq-4bit-f44a + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/apis/openapi.yaml b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/apis/openapi.yaml rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/apis/openapi.yaml index 39589566..6cedb5d5 100644 --- a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/apis/openapi.yaml +++ b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/apis/schema.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/apis/schema.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/apis/schema.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/apis/schema.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/bento.yaml b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/bento.yaml similarity index 97% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/bento.yaml rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/bento.yaml index 23f54d12..dad942e7 100644 --- a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/bento.yaml +++ b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: qwen2 -version: 72b-instruct-awq-4bit-177e +version: 72b-instruct-awq-4bit-f44a bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:36:02.542794+00:00' +creation_time: '2024-07-08T08:05:19.616286+00:00' labels: openllm_alias: 72b-4bit,72b-instruct-4bit openllm_hf_model_id: Qwen/Qwen2-72B-Instruct-AWQ diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/docker/Dockerfile b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/docker/Dockerfile rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/docker/Dockerfile diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/docker/entrypoint.sh b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/docker/entrypoint.sh rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/docker/entrypoint.sh diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/python/install.sh b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/python/install.sh similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/python/install.sh rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/python/install.sh diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/python/requirements.txt b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/python/requirements.txt rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/python/requirements.txt diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/python/version.txt b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/python/version.txt similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/env/python/version.txt rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/env/python/version.txt diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/bento_constants.py b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/bento_constants.py similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/bento_constants.py rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/bento_constants.py diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/bentofile.yaml b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/bentofile.yaml rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/bentofile.yaml diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/service.py b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/404.html b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/404.html similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/404.html rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/404.html diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/apple-touch-icon.png b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/apple-touch-icon.png rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/chat.html b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/chat.html similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/chat.html rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/chat.html diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/chat.txt b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/chat.txt rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/chat.txt diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/favicon-16x16.png b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/favicon-16x16.png rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/favicon.ico b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/favicon.ico rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/favicon.ico diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/index.html b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/index.html similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/index.html rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/index.html diff --git a/bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/index.txt b/bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/index.txt similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-awq-4bit-177e/src/ui/index.txt rename to bentoml/bentos/qwen2/72b-instruct-awq-4bit-f44a/src/ui/index.txt diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/README.md b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/README.md new file mode 100644 index 00000000..15efac16 --- /dev/null +++ b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/README.md @@ -0,0 +1,16 @@ +# qwen2:72b-instruct-fp16-8d53 + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/apis/openapi.yaml b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/apis/openapi.yaml rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/apis/openapi.yaml index 59a4fbda..9ba9f5c6 100644 --- a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/apis/openapi.yaml +++ b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/apis/schema.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/apis/schema.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/apis/schema.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/apis/schema.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/bento.yaml b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/bento.yaml similarity index 97% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/bento.yaml rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/bento.yaml index 2b33f8b8..bdbe2a80 100644 --- a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/bento.yaml +++ b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: qwen2 -version: 72b-instruct-fp16-e6e1 +version: 72b-instruct-fp16-8d53 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:37:07.218272+00:00' +creation_time: '2024-07-08T08:07:07.352770+00:00' labels: openllm_alias: 72b,72b-instruct openllm_hf_model_id: Qwen/Qwen2-72B-Instruct diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/docker/Dockerfile b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/docker/Dockerfile rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/docker/Dockerfile diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/docker/entrypoint.sh b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/docker/entrypoint.sh rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/docker/entrypoint.sh diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/python/install.sh b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/python/install.sh similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/python/install.sh rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/python/install.sh diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/python/requirements.txt b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/python/requirements.txt rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/python/requirements.txt diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/python/version.txt b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/python/version.txt similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/python/version.txt rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/env/python/version.txt diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/bento_constants.py b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/bento_constants.py similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/bento_constants.py rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/bento_constants.py diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/bentofile.yaml b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/bentofile.yaml rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/bentofile.yaml diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/service.py b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/404.html b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/404.html similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/404.html rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/404.html diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/apple-touch-icon.png b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/apple-touch-icon.png rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/chat.html b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/chat.html similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/chat.html rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/chat.html diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/chat.txt b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/chat.txt rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/chat.txt diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/favicon-16x16.png b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/favicon-16x16.png rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/favicon.ico b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/favicon.ico rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/favicon.ico diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/index.html b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/index.html similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/index.html rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/index.html diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/index.txt b/bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/index.txt similarity index 100% rename from bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/ui/index.txt rename to bentoml/bentos/qwen2/72b-instruct-fp16-8d53/src/ui/index.txt diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/README.md b/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/README.md deleted file mode 100644 index 218ece4b..00000000 --- a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# qwen2:72b-instruct-fp16-e6e1 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/service.py b/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/qwen2/72b-instruct-fp16-e6e1/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/README.md b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/README.md new file mode 100644 index 00000000..1c54b21b --- /dev/null +++ b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/README.md @@ -0,0 +1,16 @@ +# qwen2:7b-instruct-awq-4bit-6b70 + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/apis/openapi.yaml b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/apis/openapi.yaml rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/apis/openapi.yaml index b28dec19..a57267f8 100644 --- a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/apis/openapi.yaml +++ b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/apis/schema.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/apis/schema.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/apis/schema.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/apis/schema.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/bento.yaml b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/bento.yaml similarity index 97% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/bento.yaml rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/bento.yaml index 8e1e23bf..80a1dd60 100644 --- a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/bento.yaml +++ b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: qwen2 -version: 7b-instruct-awq-4bit-77be +version: 7b-instruct-awq-4bit-6b70 bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:34:58.013451+00:00' +creation_time: '2024-07-08T08:03:39.433813+00:00' labels: openllm_alias: 7b-4bit,7b-instruct-4bit openllm_hf_model_id: Qwen/Qwen2-7B-Instruct-AWQ diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/docker/Dockerfile b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/docker/Dockerfile rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/docker/Dockerfile diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/docker/entrypoint.sh b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/docker/entrypoint.sh rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/docker/entrypoint.sh diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/python/install.sh b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/python/install.sh similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/python/install.sh rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/python/install.sh diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/python/requirements.txt b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/python/requirements.txt rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/python/requirements.txt diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/python/version.txt b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/python/version.txt similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/python/version.txt rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/env/python/version.txt diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/bento_constants.py b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/bento_constants.py similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/bento_constants.py rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/bento_constants.py diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/bentofile.yaml b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/bentofile.yaml rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/bentofile.yaml diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/service.py b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/404.html b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/404.html similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/404.html rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/404.html diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/apple-touch-icon.png b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/apple-touch-icon.png rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/chat.html b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/chat.html similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/chat.html rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/chat.html diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/chat.txt b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/chat.txt rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/chat.txt diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/favicon-16x16.png b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/favicon-16x16.png rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/favicon.ico b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/favicon.ico rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/favicon.ico diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/index.html b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/index.html similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/index.html rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/index.html diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/index.txt b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/index.txt similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/ui/index.txt rename to bentoml/bentos/qwen2/7b-instruct-awq-4bit-6b70/src/ui/index.txt diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/README.md b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/README.md deleted file mode 100644 index 2d1409cc..00000000 --- a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# qwen2:7b-instruct-awq-4bit-77be - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/service.py b/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/qwen2/7b-instruct-awq-4bit-77be/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-805b/README.md b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/README.md new file mode 100644 index 00000000..e10ffbd4 --- /dev/null +++ b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/README.md @@ -0,0 +1,16 @@ +# qwen2:7b-instruct-fp16-805b + +[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) +[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) +[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) +[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) +[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) + +This is a Machine Learning Service created with BentoML. + +## Help + +* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. +* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. +* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. +* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/apis/openapi.yaml b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/apis/openapi.yaml similarity index 99% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/apis/openapi.yaml rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/apis/openapi.yaml index f74d2182..2cafa05c 100644 --- a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/apis/openapi.yaml +++ b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/apis/openapi.yaml @@ -1036,6 +1036,39 @@ paths: tags: - Service APIs x-bentoml-name: generate + /chat/: + get: + operationId: serve_chat_html__get + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + summary: Serve Chat Html + /chat/{full_path}: + get: + operationId: catch_all__full_path__get + parameters: + - in: path + name: full_path + required: true + schema: + title: Full Path + type: string + responses: + '200': + content: + application/json: + schema: {} + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Catch All /healthz: get: description: Health check endpoint. Expecting an empty response with status @@ -1076,39 +1109,6 @@ paths: description: Successful Response tags: - Infrastructure - /ui/: - get: - operationId: serve_chat_html__get - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - summary: Serve Chat Html - /ui/{full_path}: - get: - operationId: catch_all__full_path__get - parameters: - - in: path - name: full_path - required: true - schema: - title: Full Path - type: string - responses: - '200': - content: - application/json: - schema: {} - description: Successful Response - '422': - content: - application/json: - schema: - $ref: '#/components/schemas/HTTPValidationError' - description: Validation Error - summary: Catch All /v1/chat/completions: post: operationId: create_chat_completion_chat_completions_post diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/apis/schema.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/apis/schema.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/apis/schema.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/apis/schema.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/bento.yaml b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/bento.yaml similarity index 97% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/bento.yaml rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/bento.yaml index fc6ea9d5..936e46a3 100644 --- a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/bento.yaml +++ b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/bento.yaml @@ -1,8 +1,8 @@ service: service:VLLM name: qwen2 -version: 7b-instruct-fp16-9381 +version: 7b-instruct-fp16-805b bentoml_version: 1.2.19 -creation_time: '2024-07-05T07:35:30.163927+00:00' +creation_time: '2024-07-08T08:04:26.795593+00:00' labels: openllm_alias: 7b,7b-instruct openllm_hf_model_id: Qwen/Qwen2-7B-Instruct diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/docker/Dockerfile b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/docker/Dockerfile similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/docker/Dockerfile rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/docker/Dockerfile diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/docker/entrypoint.sh b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/docker/entrypoint.sh similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/docker/entrypoint.sh rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/docker/entrypoint.sh diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/python/install.sh b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/python/install.sh similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/python/install.sh rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/python/install.sh diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/python/requirements.lock.txt new file mode 100644 index 00000000..925ce193 --- /dev/null +++ b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/python/requirements.lock.txt @@ -0,0 +1,147 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +appdirs==1.4.4 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +bentoml==1.2.19 +build==1.2.1 +cattrs==23.1.2 +certifi==2024.7.4 +charset-normalizer==3.3.2 +circus==0.18.0 +click==8.1.7 +click-option-group==0.5.6 +cloudpickle==3.0.0 +cmake==3.30.0 +deepmerge==1.1.1 +deprecated==1.2.14 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.6.1 +email-validator==2.2.0 +exceptiongroup==1.2.1 +fastapi==0.111.0 +fastapi-cli==0.0.4 +filelock==3.15.4 +frozenlist==1.4.1 +fs==2.4.16 +fsspec==2024.6.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +httpx-ws==0.6.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-metadata==6.11.0 +inflection==0.5.1 +interegular==0.3.3 +jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.43.0 +lm-format-enforcer==0.10.1 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +nest-asyncio==1.6.0 +networkx==3.2.1 +ninja==1.11.1.1 +numba==0.60.0 +numpy==1.26.0 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-ml-py==11.525.150 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvtx-cu12==12.1.105 +openai==1.35.10 +opentelemetry-api==1.20.0 +opentelemetry-instrumentation==0.41b0 +opentelemetry-instrumentation-aiohttp-client==0.41b0 +opentelemetry-instrumentation-asgi==0.41b0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 +opentelemetry-util-http==0.41b0 +orjson==3.10.6 +outlines==0.0.34 +packaging==24.1 +pathspec==0.12.1 +pip-requirements-parser==32.0.1 +pip-tools==7.4.1 +prometheus-client==0.20.0 +prometheus-fastapi-instrumentator==7.0.0 +protobuf==5.27.2 +psutil==6.0.0 +py-cpuinfo==9.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pygments==2.18.0 +pyparsing==3.1.2 +pyproject-hooks==1.1.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-json-logger==2.0.7 +python-multipart==0.0.9 +pyyaml==6.0.1 +pyzmq==26.0.3 +ray==2.31.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +schema==0.7.7 +scipy==1.13.1 +sentencepiece==0.2.0 +shellingham==1.5.4 +simple-di==0.1.5 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12.1 +tiktoken==0.7.0 +tokenizers==0.19.1 +tomli==2.0.1 +tomli-w==1.0.0 +torch==2.3.0 +tornado==6.4.1 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing-extensions==4.12.2 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn[standard]==0.30.1 +uvloop==0.19.0 +vllm==0.4.3 +vllm-flash-attn==2.5.8.post2 +watchfiles==0.22.0 +websockets==12.0 +wheel==0.43.0 +wrapt==1.16.0 +wsproto==1.2.0 +xformers==0.0.26.post1 +yarl==1.9.4 +zipp==3.19.2 + +# The following packages are considered to be unsafe in a requirements file: +pip==24.1.2 +setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/python/requirements.txt b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/python/requirements.txt similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/python/requirements.txt rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/python/requirements.txt diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/python/version.txt b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/python/version.txt similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/python/version.txt rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/env/python/version.txt diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/bento_constants.py b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/bento_constants.py similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/bento_constants.py rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/bento_constants.py diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/bentofile.yaml b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/bentofile.yaml similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/bentofile.yaml rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/bentofile.yaml diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/alpaca.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/alpaca.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/alpaca.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/alpaca.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/amberchat.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/amberchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/amberchat.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/amberchat.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/chatml.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/chatml.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/chatml.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/chatml.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/chatqa.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/chatqa.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/chatqa.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/chatqa.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/falcon-instruct.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/falcon-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/falcon-instruct.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/falcon-instruct.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/gemma-it.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/gemma-it.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/gemma-it.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/gemma-it.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/llama-2-chat.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/llama-2-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/llama-2-chat.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/llama-2-chat.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/llama-3-chat.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/llama-3-chat.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/llama-3-chat.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/llama-3-chat.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/mistral-instruct.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/mistral-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/mistral-instruct.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/mistral-instruct.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/openchat.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/openchat.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/openchat.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/openchat.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/phi-3.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/phi-3.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/phi-3.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/phi-3.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/saiga.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/saiga.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/saiga.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/saiga.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/solar-instruct.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/solar-instruct.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/solar-instruct.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/solar-instruct.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/vicuna.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/vicuna.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/vicuna.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/vicuna.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/zephyr.jinja b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/zephyr.jinja similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/chat_templates/zephyr.jinja rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/chat_templates/zephyr.jinja diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/alpaca.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/alpaca.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/alpaca.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/alpaca.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/amberchat.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/amberchat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/amberchat.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/amberchat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/chatqa.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/chatqa.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/chatqa.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/chatqa.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/gemma-it.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/gemma-it.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/gemma-it.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/gemma-it.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/llama-2-chat.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/llama-2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/llama-2-chat.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/llama-2-chat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/llama-3-chat.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/llama-3-chat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/llama-3-chat.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/llama-3-chat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/mistral-instruct.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/mistral-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/mistral-instruct.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/mistral-instruct.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/openchat.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/openchat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/openchat.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/openchat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/orca-2.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/orca-2.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/orca-2.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/orca-2.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/phi-3.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/phi-3.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/phi-3.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/phi-3.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/qwen2-chat.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/qwen2-chat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/qwen2-chat.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/qwen2-chat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/saiga.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/saiga.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/saiga.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/saiga.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/solar-instruct.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/solar-instruct.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/solar-instruct.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/solar-instruct.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/vicuna.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/vicuna.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/vicuna.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/vicuna.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/yi-chat.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/yi-chat.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/yi-chat.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/yi-chat.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/zephyr.json b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/zephyr.json similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/chat_templates/generation_configs/zephyr.json rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/chat_templates/generation_configs/zephyr.json diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/service.py b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/service.py new file mode 100644 index 00000000..42152243 --- /dev/null +++ b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/service.py @@ -0,0 +1,239 @@ +import functools +import json +import logging +import os +import sys +import uuid +from typing import AsyncGenerator, Literal, Optional + +import bentoml +import fastapi +import fastapi.staticfiles +import pydantic +import vllm.entrypoints.openai.api_server as vllm_api_server +import yaml +from annotated_types import Ge, Le +from bento_constants import CONSTANT_YAML +from fastapi.responses import FileResponse +from typing_extensions import Annotated, Literal + + +class Message(pydantic.BaseModel): + content: str + role: Literal["system", "user", "assistant"] + + +CONSTANTS = yaml.safe_load(CONSTANT_YAML) + +ENGINE_CONFIG = CONSTANTS["engine_config"] +SERVICE_CONFIG = CONSTANTS["service_config"] +OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +openai_api_app = fastapi.FastAPI() +static_app = fastapi.FastAPI() +ui_app = fastapi.FastAPI() + + +OPENAI_ENDPOINTS = [ + ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], + ["/completions", vllm_api_server.create_completion, ["POST"]], + ["/models", vllm_api_server.show_available_models, ["GET"]], +] + + +class Message(pydantic.BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +for route, endpoint, methods in OPENAI_ENDPOINTS: + openai_api_app.add_api_route( + path=route, + endpoint=endpoint, + methods=methods, + include_in_schema=True, + ) + + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") + +ui_app.mount( + "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" +) + + +@ui_app.get("/") +async def serve_chat_html(): + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +@ui_app.get("/{full_path:path}") +async def catch_all(full_path: str): + file_path = os.path.join(STATIC_DIR, full_path) + if os.path.exists(file_path): + return FileResponse(file_path) + return FileResponse(os.path.join(STATIC_DIR, "chat.html")) + + +# special handling for prometheus_client of bentoml +if "prometheus_client" in sys.modules: + sys.modules.pop("prometheus_client") + + +@bentoml.mount_asgi_app(openai_api_app, path="/v1") +@bentoml.mount_asgi_app(ui_app, path="/chat") +@bentoml.service(**SERVICE_CONFIG) +class VLLM: + def __init__(self) -> None: + from transformers import AutoTokenizer + from vllm import AsyncEngineArgs, AsyncLLMEngine + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat + from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion + + ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) + self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) + self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) + logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") + + if OVERRIDE_CHAT_TEMPLATE: # use community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + chat_template = gen_config["template"] + else: + chat_template = None + + model_config = self.engine.engine.get_model_config() + + # inject the engine into the openai serving chat and completion + vllm_api_server.openai_serving_chat = OpenAIServingChat( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + response_role="assistant", + chat_template=chat_template, + model_config=model_config, + ) + vllm_api_server.openai_serving_completion = OpenAIServingCompletion( + engine=self.engine, + served_model_names=[ENGINE_CONFIG["model"]], + model_config=model_config, + lora_modules=None, + ) + + @bentoml.api(route="/api/generate") + async def generate( + self, + prompt: str = "Explain superconductors like I'm five years old", + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + if stop is None: + stop = [] + + from vllm import SamplingParams + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) + + cursor = 0 + async for request_output in stream: + text = request_output.outputs[0].text + yield text[cursor:] + cursor = len(text) + + @bentoml.api(route="/api/chat") + async def chat( + self, + messages: list[Message] = [ + Message(content="what is the meaning of life?", role="user") + ], + model: str = ENGINE_CONFIG["model"], + max_tokens: Annotated[ + int, + Ge(128), + Le(ENGINE_CONFIG["max_model_len"]), + ] = ENGINE_CONFIG["max_model_len"], + stop: Optional[list[str]] = None, + ) -> AsyncGenerator[str, None]: + """ + light-weight chat API that takes in a list of messages and returns a response + """ + from vllm import SamplingParams + + try: + if OVERRIDE_CHAT_TEMPLATE: # community chat template + gen_config = _get_gen_config(CONSTANTS["chat_template"]) + if not stop: + if gen_config["stop_str"]: + stop = [gen_config["stop_str"]] + else: + stop = [] + system_prompt = gen_config["system_prompt"] + self.tokenizer.chat_template = gen_config["template"] + else: + if not stop: + if self.tokenizer.eos_token is not None: + stop = [self.tokenizer.eos_token] + else: + stop = [] + system_prompt = None + + SAMPLING_PARAM = SamplingParams( + max_tokens=max_tokens, + stop=stop, + ) + if system_prompt and messages[0].role != "system": + messages = [dict(role="system", content=system_prompt)] + messages + + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + stream = await self.engine.add_request( + uuid.uuid4().hex, prompt, SAMPLING_PARAM + ) + + cursor = 0 + strip_flag = True + async for request_output in stream: + text = request_output.outputs[0].text + assistant_message = text[cursor:] + if not strip_flag: # strip the leading whitespace + yield assistant_message + elif assistant_message.strip(): + strip_flag = False + yield assistant_message.lstrip() + cursor = len(text) + except Exception as e: + logger.error(f"Error in chat API: {e}") + yield f"Error in chat API: {e}" + + +@functools.lru_cache(maxsize=1) +def _get_gen_config(community_chat_template: str) -> dict: + logger.info(f"Load community_chat_template: {community_chat_template}") + chat_template_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "chat_templates" + ) + config_path = os.path.join( + os.path.dirname(__file__), "chat_templates", "generation_configs" + ) + with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: + gen_config = json.load(f) + chat_template_file = gen_config["chat_template"].split("/")[-1] + with open(os.path.join(chat_template_path, chat_template_file)) as f: + chat_template = f.read() + gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") + return gen_config diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/404.html b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/404.html similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/404.html rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/404.html diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/0e5ce63c-f5957df8d97fa48f.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/370b0802-87e84e603248538e.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/3d47b92a-f8bda4b39f1e2d9d.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/479ba886-0c92f49cb8e74e58.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/59650de3-87b10f0662b51900.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/66ec4792-34336521b476aa45.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/674-a1fcdac3696c5ed0.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/69-bf2efb63b1299e3b.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/700-532b1fe2415e5859.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/700-532b1fe2415e5859.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/700-532b1fe2415e5859.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/700-532b1fe2415e5859.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/899-fa939dd99dc7a1df.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/8e1d74a4-a6b9a2554f9153c0.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/94730671-0f73873f7f5896de.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/995-34374f39bb210839.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/995-34374f39bb210839.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/995-34374f39bb210839.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/995-34374f39bb210839.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/app/(site)/page-5b6e14439f55739b.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/app/chat/page-9c8e223f40771eb6.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/app/layout-df2dea9dba0ceb06.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/app/not-found-c76dccfb8b88da53.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/d3ac728e-0c798b3b8aa3bf53.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/fd9d1056-32c33f3919735051.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/framework-00a8ba1a63cfdc9e.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/main-app-e95f89b5006af8a8.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/main-bf1416cb53f2b4c0.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/pages/_app-d21e88acd55d90f1.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/pages/_error-d6107f1aac0c574c.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/chunks/webpack-ee8b17d5a5297ccd.js diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/css/429544bd3cd8ce3a.css b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/css/429544bd3cd8ce3a.css similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/css/429544bd3cd8ce3a.css rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/css/429544bd3cd8ce3a.css diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/css/5b67f082b31cfc7b.css b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/css/5b67f082b31cfc7b.css similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/css/5b67f082b31cfc7b.css rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/css/5b67f082b31cfc7b.css diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/css/9e63023b20ddb15e.css b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/css/9e63023b20ddb15e.css similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/_next/static/css/9e63023b20ddb15e.css rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/_next/static/css/9e63023b20ddb15e.css diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/apple-touch-icon.png b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/apple-touch-icon.png similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/apple-touch-icon.png rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/apple-touch-icon.png diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/chat.html b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/chat.html similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/chat.html rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/chat.html diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/chat.txt b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/chat.txt similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/chat.txt rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/chat.txt diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/favicon-16x16.png b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/favicon-16x16.png similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/favicon-16x16.png rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/favicon-16x16.png diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/favicon.ico b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/favicon.ico similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/favicon.ico rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/favicon.ico diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/index.html b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/index.html similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/index.html rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/index.html diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/index.txt b/bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/index.txt similarity index 100% rename from bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/ui/index.txt rename to bentoml/bentos/qwen2/7b-instruct-fp16-805b/src/ui/index.txt diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/README.md b/bentoml/bentos/qwen2/7b-instruct-fp16-9381/README.md deleted file mode 100644 index d3a8d976..00000000 --- a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# qwen2:7b-instruct-fp16-9381 - -[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML) -[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/) -[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger) -[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML) -[![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai) - -This is a Machine Learning Service created with BentoML. - -## Help - -* [📖 Documentation](https://docs.bentoml.com/en/latest/): Learn how to use BentoML. -* [💬 Community](https://l.bentoml.com/join-slack-swagger): Join the BentoML Slack community. -* [🐛 GitHub Issues](https://github.com/bentoml/BentoML/issues): Report bugs and feature requests. -* Tip: you can also [customize this README](https://docs.bentoml.com/en/latest/concepts/bento.html#description). diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/python/requirements.lock.txt b/bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/python/requirements.lock.txt deleted file mode 100644 index 9118ea44..00000000 --- a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/env/python/requirements.lock.txt +++ /dev/null @@ -1,147 +0,0 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -annotated-types==0.7.0 -anyio==4.4.0 -appdirs==1.4.4 -asgiref==3.8.1 -async-timeout==4.0.3 -attrs==23.2.0 -bentoml==1.2.19 -build==1.2.1 -cattrs==23.1.2 -certifi==2024.7.4 -charset-normalizer==3.3.2 -circus==0.18.0 -click==8.1.7 -click-option-group==0.5.6 -cloudpickle==3.0.0 -cmake==3.30.0 -deepmerge==1.1.1 -deprecated==1.2.14 -diskcache==5.6.3 -distro==1.9.0 -dnspython==2.6.1 -email-validator==2.2.0 -exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 -filelock==3.15.4 -frozenlist==1.4.1 -fs==2.4.16 -fsspec==2024.6.1 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -httpx-ws==0.6.0 -huggingface-hub==0.23.4 -idna==3.7 -importlib-metadata==6.11.0 -inflection==0.5.1 -interegular==0.3.3 -jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -lark==1.1.9 -llvmlite==0.43.0 -lm-format-enforcer==0.10.1 -markdown-it-py==3.0.0 -markupsafe==2.1.5 -mdurl==0.1.2 -mpmath==1.3.0 -msgpack==1.0.8 -multidict==6.0.5 -nest-asyncio==1.6.0 -networkx==3.2.1 -ninja==1.11.1.1 -numba==0.60.0 -numpy==1.26.0 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-ml-py==11.525.150 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 -nvidia-nvtx-cu12==12.1.105 -openai==1.35.10 -opentelemetry-api==1.20.0 -opentelemetry-instrumentation==0.41b0 -opentelemetry-instrumentation-aiohttp-client==0.41b0 -opentelemetry-instrumentation-asgi==0.41b0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 -opentelemetry-util-http==0.41b0 -orjson==3.10.6 -outlines==0.0.34 -packaging==24.1 -pathspec==0.12.1 -pip-requirements-parser==32.0.1 -pip-tools==7.4.1 -prometheus-client==0.20.0 -prometheus-fastapi-instrumentator==7.0.0 -protobuf==5.27.2 -psutil==6.0.0 -py-cpuinfo==9.0.0 -pydantic==2.8.2 -pydantic-core==2.20.1 -pygments==2.18.0 -pyparsing==3.1.2 -pyproject-hooks==1.1.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -python-json-logger==2.0.7 -python-multipart==0.0.9 -pyyaml==6.0.1 -pyzmq==26.0.3 -ray==2.31.0 -referencing==0.35.1 -regex==2024.5.15 -requests==2.32.3 -rich==13.7.1 -rpds-py==0.18.1 -safetensors==0.4.3 -schema==0.7.7 -scipy==1.13.1 -sentencepiece==0.2.0 -shellingham==1.5.4 -simple-di==0.1.5 -six==1.16.0 -sniffio==1.3.1 -starlette==0.37.2 -sympy==1.12.1 -tiktoken==0.7.0 -tokenizers==0.19.1 -tomli==2.0.1 -tomli-w==1.0.0 -torch==2.3.0 -tornado==6.4.1 -tqdm==4.66.4 -transformers==4.41.0 -triton==2.3.0 -typer==0.12.3 -typing-extensions==4.12.2 -ujson==5.10.0 -urllib3==2.2.2 -uvicorn[standard]==0.30.1 -uvloop==0.19.0 -vllm==0.4.3 -vllm-flash-attn==2.5.8.post2 -watchfiles==0.22.0 -websockets==12.0 -wheel==0.43.0 -wrapt==1.16.0 -wsproto==1.2.0 -xformers==0.0.26.post1 -yarl==1.9.4 -zipp==3.19.2 - -# The following packages are considered to be unsafe in a requirements file: -pip==24.1.1 -setuptools==70.2.0 diff --git a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/service.py b/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/service.py deleted file mode 100644 index 94af0b0d..00000000 --- a/bentoml/bentos/qwen2/7b-instruct-fp16-9381/src/service.py +++ /dev/null @@ -1,239 +0,0 @@ -import functools -import json -import logging -import os -import sys -import uuid -from typing import AsyncGenerator, Literal, Optional - -import bentoml -import fastapi -import fastapi.staticfiles -import pydantic -import vllm.entrypoints.openai.api_server as vllm_api_server -import yaml -from annotated_types import Ge, Le -from bento_constants import CONSTANT_YAML -from fastapi.responses import FileResponse -from typing_extensions import Annotated, Literal - - -class Message(pydantic.BaseModel): - content: str - role: Literal["system", "user", "assistant"] - - -CONSTANTS = yaml.safe_load(CONSTANT_YAML) - -ENGINE_CONFIG = CONSTANTS["engine_config"] -SERVICE_CONFIG = CONSTANTS["service_config"] -OVERRIDE_CHAT_TEMPLATE = CONSTANTS.get("chat_template") - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -openai_api_app = fastapi.FastAPI() -static_app = fastapi.FastAPI() -ui_app = fastapi.FastAPI() - - -OPENAI_ENDPOINTS = [ - ["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]], - ["/completions", vllm_api_server.create_completion, ["POST"]], - ["/models", vllm_api_server.show_available_models, ["GET"]], -] - - -class Message(pydantic.BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - -for route, endpoint, methods in OPENAI_ENDPOINTS: - openai_api_app.add_api_route( - path=route, - endpoint=endpoint, - methods=methods, - include_in_schema=True, - ) - - -STATIC_DIR = os.path.join(os.path.dirname(__file__), "ui") - -ui_app.mount( - "/static", fastapi.staticfiles.StaticFiles(directory=STATIC_DIR), name="static" -) - - -@ui_app.get("/") -async def serve_chat_html(): - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -@ui_app.get("/{full_path:path}") -async def catch_all(full_path: str): - file_path = os.path.join(STATIC_DIR, full_path) - if os.path.exists(file_path): - return FileResponse(file_path) - return FileResponse(os.path.join(STATIC_DIR, "chat.html")) - - -# special handling for prometheus_client of bentoml -if "prometheus_client" in sys.modules: - sys.modules.pop("prometheus_client") - - -@bentoml.mount_asgi_app(openai_api_app, path="/v1") -@bentoml.mount_asgi_app(ui_app, path="/ui") -@bentoml.service(**SERVICE_CONFIG) -class VLLM: - def __init__(self) -> None: - from transformers import AutoTokenizer - from vllm import AsyncEngineArgs, AsyncLLMEngine - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - - ENGINE_ARGS = AsyncEngineArgs(**ENGINE_CONFIG) - self.engine = AsyncLLMEngine.from_engine_args(ENGINE_ARGS) - self.tokenizer = AutoTokenizer.from_pretrained(ENGINE_CONFIG["model"]) - logger.info(f"VLLM service initialized with model: {ENGINE_CONFIG['model']}") - - if OVERRIDE_CHAT_TEMPLATE: # use community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - chat_template = gen_config["template"] - else: - chat_template = None - - model_config = self.engine.engine.get_model_config() - - # inject the engine into the openai serving chat and completion - vllm_api_server.openai_serving_chat = OpenAIServingChat( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - response_role="assistant", - chat_template=chat_template, - model_config=model_config, - ) - vllm_api_server.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, - served_model_names=[ENGINE_CONFIG["model"]], - model_config=model_config, - lora_modules=None, - ) - - @bentoml.api(route="/api/generate") - async def generate( - self, - prompt: str = "Explain superconductors like I'm five years old", - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - if stop is None: - stop = [] - - from vllm import SamplingParams - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM) - - cursor = 0 - async for request_output in stream: - text = request_output.outputs[0].text - yield text[cursor:] - cursor = len(text) - - @bentoml.api(route="/api/chat") - async def chat( - self, - messages: list[Message] = [ - Message(content="what is the meaning of life?", role="user") - ], - model: str = ENGINE_CONFIG["model"], - max_tokens: Annotated[ - int, - Ge(128), - Le(ENGINE_CONFIG["max_model_len"]), - ] = ENGINE_CONFIG["max_model_len"], - stop: Optional[list[str]] = None, - ) -> AsyncGenerator[str, None]: - """ - light-weight chat API that takes in a list of messages and returns a response - """ - from vllm import SamplingParams - - try: - if OVERRIDE_CHAT_TEMPLATE: # community chat template - gen_config = _get_gen_config(CONSTANTS["chat_template"]) - if not stop: - if gen_config["stop_str"]: - stop = [gen_config["stop_str"]] - else: - stop = [] - system_prompt = gen_config["system_prompt"] - self.tokenizer.chat_template = gen_config["template"] - else: - if not stop: - if self.tokenizer.eos_token is not None: - stop = [self.tokenizer.eos_token] - else: - stop = [] - system_prompt = None - - SAMPLING_PARAM = SamplingParams( - max_tokens=max_tokens, - stop=stop, - ) - if system_prompt and messages[0].role != "system": - messages = [dict(role="system", content=system_prompt)] + messages - - prompt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - - stream = await self.engine.add_request( - uuid.uuid4().hex, prompt, SAMPLING_PARAM - ) - - cursor = 0 - strip_flag = True - async for request_output in stream: - text = request_output.outputs[0].text - assistant_message = text[cursor:] - if not strip_flag: # strip the leading whitespace - yield assistant_message - elif assistant_message.strip(): - strip_flag = False - yield assistant_message.lstrip() - cursor = len(text) - except Exception as e: - logger.error(f"Error in chat API: {e}") - yield f"Error in chat API: {e}" - - -@functools.lru_cache(maxsize=1) -def _get_gen_config(community_chat_template: str) -> dict: - logger.info(f"Load community_chat_template: {community_chat_template}") - chat_template_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "chat_templates" - ) - config_path = os.path.join( - os.path.dirname(__file__), "chat_templates", "generation_configs" - ) - with open(os.path.join(config_path, f"{community_chat_template}.json")) as f: - gen_config = json.load(f) - chat_template_file = gen_config["chat_template"].split("/")[-1] - with open(os.path.join(chat_template_path, chat_template_file)) as f: - chat_template = f.read() - gen_config["template"] = chat_template.replace(" ", "").replace("\n", "") - return gen_config