update

bentoml · Jul 3, 2024 · c8bc7c3 · c8bc7c3
1 parent 2514be6
commit c8bc7c3
Show file tree

Hide file tree

Showing 1,996 changed files with 14,392 additions and 8,217 deletions.
diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-0f34ff9/env/python/version.txt b/bentoml/bentos/gemma/2b-instruct-fp16-0f34ff9/env/python/version.txt
diff --git a/...tos/llama2/7b-chat-fp16-50fcbe9/README.md → ...tos/gemma/2b-instruct-fp16-ad2d/README.md b/...tos/llama2/7b-chat-fp16-50fcbe9/README.md → ...tos/gemma/2b-instruct-fp16-ad2d/README.md
@@ -1,4 +1,4 @@
-# llama2:7b-chat-fp16-50fcbe9
+# gemma:2b-instruct-fp16-ad2d
 
 [![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)
 [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)

diff --git a/...b-instruct-fp16-0f34ff9/apis/openapi.yaml → ...a/2b-instruct-fp16-ad2d/apis/openapi.yaml b/...b-instruct-fp16-0f34ff9/apis/openapi.yaml → ...a/2b-instruct-fp16-ad2d/apis/openapi.yaml
@@ -230,6 +230,7 @@ components:
             - $ref: '#/components/schemas/ChatCompletionAssistantMessageParam'
             - $ref: '#/components/schemas/ChatCompletionToolMessageParam'
             - $ref: '#/components/schemas/ChatCompletionFunctionMessageParam'
+            - $ref: '#/components/schemas/CustomChatCompletionMessageParam'
           title: Messages
           type: array
         min_p:
@@ -325,6 +326,7 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+          default: 0
           title: Top Logprobs
         top_p:
           anyOf:
@@ -396,6 +398,7 @@ components:
               anyOf:
               - $ref: '#/components/schemas/ChatCompletionContentPartTextParam'
               - $ref: '#/components/schemas/ChatCompletionContentPartImageParam'
+              - $ref: '#/components/schemas/CustomChatCompletionContentPartParam'
             type: array
           title: Content
         name:
@@ -657,6 +660,39 @@ components:
       - prompt
       title: CompletionRequest
       type: object
+    CustomChatCompletionContentPartParam:
+      additionalProperties: true
+      properties:
+        type:
+          title: Type
+          type: string
+      required:
+      - type
+      title: CustomChatCompletionContentPartParam
+      type: object
+    CustomChatCompletionMessageParam:
+      description: Enables custom roles in the Chat Completion API.
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/ChatCompletionContentPartTextParam'
+              - $ref: '#/components/schemas/ChatCompletionContentPartImageParam'
+              - $ref: '#/components/schemas/CustomChatCompletionContentPartParam'
+            type: array
+          title: Content
+        name:
+          title: Name
+          type: string
+        role:
+          title: Role
+          type: string
+      required:
+      - role
+      title: CustomChatCompletionMessageParam
+      type: object
     Function:
       properties:
         arguments:

diff --git a/...2b-instruct-fp16-0f34ff9/apis/schema.json → ...ma/2b-instruct-fp16-ad2d/apis/schema.json b/...2b-instruct-fp16-0f34ff9/apis/schema.json → ...ma/2b-instruct-fp16-ad2d/apis/schema.json
diff --git a/...gemma/2b-instruct-fp16-0f34ff9/bento.yaml → ...os/gemma/2b-instruct-fp16-ad2d/bento.yaml b/...gemma/2b-instruct-fp16-0f34ff9/bento.yaml → ...os/gemma/2b-instruct-fp16-ad2d/bento.yaml
@@ -1,16 +1,14 @@
 service: service:VLLM
 name: gemma
-version: 2b-instruct-fp16-0f34ff9
+version: 2b-instruct-fp16-ad2d
 bentoml_version: 1.2.19
-creation_time: '2024-07-02T14:28:31.122616+00:00'
+creation_time: '2024-07-03T09:42:28.113622+00:00'
 labels:
   openllm_alias: 2b,2b-instruct
   openllm_hf_model_id: google/gemma-2b-it
   owner: bentoml-team
   platforms: linux
-  service_home: /chat
-  source_directory: vllm-chat
-  source_repo: https://github.com/bentoml/openllm-repo-recipe.git
+  source: https://github.com/bentoml/openllm-repo-recipe/tree/main/vllm-chat
 models: []
 runners: []
 entry_service: gemma
@@ -118,7 +116,7 @@ schema:
 apis: []
 docker:
   distro: debian
-  python_version: '3.11'
+  python_version: '3.9'
   cuda_version: null
   env:
     HF_TOKEN: ''

diff --git a/...struct-fp16-2529bfd/env/docker/Dockerfile → ...-instruct-fp16-ad2d/env/docker/Dockerfile b/...struct-fp16-2529bfd/env/docker/Dockerfile → ...-instruct-fp16-ad2d/env/docker/Dockerfile
@@ -5,7 +5,7 @@
 # ===========================================
 
 # Block SETUP_BENTO_BASE_IMAGE
-FROM python:3.11-slim as base-container
+FROM python:3.9-slim as base-container
 
 ENV LANG=C.UTF-8
 

diff --git a/...uct-fp16-0f34ff9/env/docker/entrypoint.sh → ...struct-fp16-ad2d/env/docker/entrypoint.sh b/...uct-fp16-0f34ff9/env/docker/entrypoint.sh → ...struct-fp16-ad2d/env/docker/entrypoint.sh
diff --git a/...struct-fp16-0f34ff9/env/python/install.sh → ...-instruct-fp16-ad2d/env/python/install.sh b/...struct-fp16-0f34ff9/env/python/install.sh → ...-instruct-fp16-ad2d/env/python/install.sh
diff --git a/...-0f34ff9/env/python/requirements.lock.txt → ...p16-ad2d/env/python/requirements.lock.txt b/...-0f34ff9/env/python/requirements.lock.txt → ...p16-ad2d/env/python/requirements.lock.txt
@@ -4,6 +4,7 @@ annotated-types==0.7.0
 anyio==4.4.0
 appdirs==1.4.4
 asgiref==3.8.1
+async-timeout==4.0.3
 attrs==23.2.0
 bentoml==1.2.19
 build==1.2.1
@@ -21,6 +22,7 @@ diskcache==5.6.3
 distro==1.9.0
 dnspython==2.6.1
 email-validator==2.2.0
+exceptiongroup==1.2.1
 fastapi==0.111.0
 fastapi-cli==0.0.4
 filelock==3.15.4
@@ -51,7 +53,7 @@ mpmath==1.3.0
 msgpack==1.0.8
 multidict==6.0.5
 nest-asyncio==1.6.0
-networkx==3.3
+networkx==3.2.1
 ninja==1.11.1.1
 numba==0.60.0
 numpy==1.26.0
@@ -68,15 +70,15 @@ nvidia-ml-py==11.525.150
 nvidia-nccl-cu12==2.20.5
 nvidia-nvjitlink-cu12==12.5.82
 nvidia-nvtx-cu12==12.1.105
-openai==1.35.7
+openai==1.35.9
 opentelemetry-api==1.20.0
 opentelemetry-instrumentation==0.41b0
 opentelemetry-instrumentation-aiohttp-client==0.41b0
 opentelemetry-instrumentation-asgi==0.41b0
 opentelemetry-sdk==1.20.0
 opentelemetry-semantic-conventions==0.41b0
 opentelemetry-util-http==0.41b0
-orjson==3.10.5
+orjson==3.10.6
 outlines==0.0.34
 packaging==24.1
 pathspec==0.12.1
@@ -106,7 +108,7 @@ rich==13.7.1
 rpds-py==0.18.1
 safetensors==0.4.3
 schema==0.7.7
-scipy==1.14.0
+scipy==1.13.1
 sentencepiece==0.2.0
 shellingham==1.5.4
 simple-di==0.1.5
@@ -116,6 +118,7 @@ starlette==0.37.2
 sympy==1.12.1
 tiktoken==0.7.0
 tokenizers==0.19.1
+tomli==2.0.1
 tomli-w==1.0.0
 torch==2.3.0
 tornado==6.4.1

diff --git a/...-fp16-0f34ff9/env/python/requirements.txt → ...uct-fp16-ad2d/env/python/requirements.txt b/...-fp16-0f34ff9/env/python/requirements.txt → ...uct-fp16-ad2d/env/python/requirements.txt
diff --git a/bentoml/bentos/gemma/2b-instruct-fp16-ad2d/env/python/version.txt b/bentoml/bentos/gemma/2b-instruct-fp16-ad2d/env/python/version.txt
@@ -0,0 +1 @@
+3.9.19
diff --git a/...truct-fp16-0f34ff9/src/bento_constants.py → ...instruct-fp16-ad2d/src/bento_constants.py b/...truct-fp16-0f34ff9/src/bento_constants.py → ...instruct-fp16-ad2d/src/bento_constants.py
diff --git a/...-instruct-fp16-0f34ff9/src/bentofile.yaml → .../2b-instruct-fp16-ad2d/src/bentofile.yaml b/...-instruct-fp16-0f34ff9/src/bentofile.yaml → .../2b-instruct-fp16-ad2d/src/bentofile.yaml
@@ -11,7 +11,7 @@ docker:
   dockerfile_template: null
   env:
     HF_TOKEN: ''
-  python_version: '3.11'
+  python_version: '3.9'
   setup_script: null
   system_packages: null
 envs:
@@ -32,9 +32,7 @@ labels:
   openllm_hf_model_id: google/gemma-2b-it
   owner: bentoml-team
   platforms: linux
-  service_home: /chat
-  source_directory: vllm-chat
-  source_repo: https://github.com/bentoml/openllm-repo-recipe.git
+  source: https://github.com/bentoml/openllm-repo-recipe/tree/main/vllm-chat
 models: []
 name: null
 python:

diff --git a/...hat_templates/chat_templates/alpaca.jinja → ...hat_templates/chat_templates/alpaca.jinja b/...hat_templates/chat_templates/alpaca.jinja → ...hat_templates/chat_templates/alpaca.jinja
diff --git a/..._templates/chat_templates/amberchat.jinja → ..._templates/chat_templates/amberchat.jinja b/..._templates/chat_templates/amberchat.jinja → ..._templates/chat_templates/amberchat.jinja
diff --git a/...hat_templates/chat_templates/chatml.jinja → ...hat_templates/chat_templates/chatml.jinja b/...hat_templates/chat_templates/chatml.jinja → ...hat_templates/chat_templates/chatml.jinja
diff --git a/...hat_templates/chat_templates/chatqa.jinja → ...hat_templates/chat_templates/chatqa.jinja b/...hat_templates/chat_templates/chatqa.jinja → ...hat_templates/chat_templates/chatqa.jinja
diff --git a/...ates/chat_templates/falcon-instruct.jinja → ...ates/chat_templates/falcon-instruct.jinja b/...ates/chat_templates/falcon-instruct.jinja → ...ates/chat_templates/falcon-instruct.jinja
diff --git a/...t_templates/chat_templates/gemma-it.jinja → ...t_templates/chat_templates/gemma-it.jinja b/...t_templates/chat_templates/gemma-it.jinja → ...t_templates/chat_templates/gemma-it.jinja
diff --git a/...mplates/chat_templates/llama-2-chat.jinja → ...mplates/chat_templates/llama-2-chat.jinja b/...mplates/chat_templates/llama-2-chat.jinja → ...mplates/chat_templates/llama-2-chat.jinja
diff --git a/...mplates/chat_templates/llama-3-chat.jinja → ...mplates/chat_templates/llama-3-chat.jinja b/...mplates/chat_templates/llama-3-chat.jinja → ...mplates/chat_templates/llama-3-chat.jinja
diff --git a/...tes/chat_templates/mistral-instruct.jinja → ...tes/chat_templates/mistral-instruct.jinja b/...tes/chat_templates/mistral-instruct.jinja → ...tes/chat_templates/mistral-instruct.jinja
diff --git a/...t_templates/chat_templates/openchat.jinja → ...t_templates/chat_templates/openchat.jinja b/...t_templates/chat_templates/openchat.jinja → ...t_templates/chat_templates/openchat.jinja
diff --git a/...chat_templates/chat_templates/phi-3.jinja → ...chat_templates/chat_templates/phi-3.jinja b/...chat_templates/chat_templates/phi-3.jinja → ...chat_templates/chat_templates/phi-3.jinja
diff --git a/...chat_templates/chat_templates/saiga.jinja → ...chat_templates/chat_templates/saiga.jinja b/...chat_templates/chat_templates/saiga.jinja → ...chat_templates/chat_templates/saiga.jinja
diff --git a/...lates/chat_templates/solar-instruct.jinja → ...lates/chat_templates/solar-instruct.jinja b/...lates/chat_templates/solar-instruct.jinja → ...lates/chat_templates/solar-instruct.jinja
diff --git a/...hat_templates/chat_templates/vicuna.jinja → ...hat_templates/chat_templates/vicuna.jinja b/...hat_templates/chat_templates/vicuna.jinja → ...hat_templates/chat_templates/vicuna.jinja
diff --git a/...hat_templates/chat_templates/zephyr.jinja → ...hat_templates/chat_templates/zephyr.jinja b/...hat_templates/chat_templates/zephyr.jinja → ...hat_templates/chat_templates/zephyr.jinja
diff --git a/..._templates/generation_configs/alpaca.json → ..._templates/generation_configs/alpaca.json b/..._templates/generation_configs/alpaca.json → ..._templates/generation_configs/alpaca.json
diff --git a/...mplates/generation_configs/amberchat.json → ...mplates/generation_configs/amberchat.json b/...mplates/generation_configs/amberchat.json → ...mplates/generation_configs/amberchat.json
diff --git a/..._templates/generation_configs/chatqa.json → ..._templates/generation_configs/chatqa.json b/..._templates/generation_configs/chatqa.json → ..._templates/generation_configs/chatqa.json
diff --git a/...emplates/generation_configs/gemma-it.json → ...emplates/generation_configs/gemma-it.json b/...emplates/generation_configs/gemma-it.json → ...emplates/generation_configs/gemma-it.json
diff --git a/...ates/generation_configs/llama-2-chat.json → ...ates/generation_configs/llama-2-chat.json b/...ates/generation_configs/llama-2-chat.json → ...ates/generation_configs/llama-2-chat.json
diff --git a/...ates/generation_configs/llama-3-chat.json → ...ates/generation_configs/llama-3-chat.json b/...ates/generation_configs/llama-3-chat.json → ...ates/generation_configs/llama-3-chat.json
diff --git a/.../generation_configs/mistral-instruct.json → .../generation_configs/mistral-instruct.json b/.../generation_configs/mistral-instruct.json → .../generation_configs/mistral-instruct.json
diff --git a/...emplates/generation_configs/openchat.json → ...emplates/generation_configs/openchat.json b/...emplates/generation_configs/openchat.json → ...emplates/generation_configs/openchat.json
diff --git a/..._templates/generation_configs/orca-2.json → ..._templates/generation_configs/orca-2.json b/..._templates/generation_configs/orca-2.json → ..._templates/generation_configs/orca-2.json
diff --git a/...t_templates/generation_configs/phi-3.json → ...t_templates/generation_configs/phi-3.json b/...t_templates/generation_configs/phi-3.json → ...t_templates/generation_configs/phi-3.json
diff --git a/...plates/generation_configs/qwen2-chat.json → ...plates/generation_configs/qwen2-chat.json b/...plates/generation_configs/qwen2-chat.json → ...plates/generation_configs/qwen2-chat.json
diff --git a/...t_templates/generation_configs/saiga.json → ...t_templates/generation_configs/saiga.json b/...t_templates/generation_configs/saiga.json → ...t_templates/generation_configs/saiga.json
diff --git a/...es/generation_configs/solar-instruct.json → ...es/generation_configs/solar-instruct.json b/...es/generation_configs/solar-instruct.json → ...es/generation_configs/solar-instruct.json
diff --git a/..._templates/generation_configs/vicuna.json → ..._templates/generation_configs/vicuna.json b/..._templates/generation_configs/vicuna.json → ..._templates/generation_configs/vicuna.json
diff --git a/...templates/generation_configs/yi-chat.json → ...templates/generation_configs/yi-chat.json b/...templates/generation_configs/yi-chat.json → ...templates/generation_configs/yi-chat.json
diff --git a/..._templates/generation_configs/zephyr.json → ..._templates/generation_configs/zephyr.json b/..._templates/generation_configs/zephyr.json → ..._templates/generation_configs/zephyr.json
diff --git a/...a/2b-instruct-fp16-0f34ff9/src/service.py → ...emma/2b-instruct-fp16-ad2d/src/service.py b/...a/2b-instruct-fp16-0f34ff9/src/service.py → ...emma/2b-instruct-fp16-ad2d/src/service.py
@@ -170,49 +170,55 @@ async def chat(
         """
         from vllm import SamplingParams
 
-        if OVERRIDE_CHAT_TEMPLATE:  # community chat template
-            gen_config = _get_gen_config(CONSTANTS["chat_template"])
-            if not stop:
-                if gen_config["stop_str"]:
-                    stop = [gen_config["stop_str"]]
-                else:
-                    stop = []
-            system_prompt = gen_config["system_prompt"]
-            self.tokenizer.chat_template = gen_config["template"]
-        else:
-            if not stop:
-                if self.tokenizer.eos_token is not None:
-                    stop = [self.tokenizer.eos_token]
-                else:
-                    stop = []
-            system_prompt = None
-
-        SAMPLING_PARAM = SamplingParams(
-            max_tokens=max_tokens,
-            stop=stop,
-        )
-        if system_prompt and messages[0].get("role") != "system":
-            messages = [dict(role="system", content=system_prompt)] + messages
-
-        prompt = self.tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True,
-        )
-
-        stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)
-
-        cursor = 0
-        strip_flag = True
-        async for request_output in stream:
-            text = request_output.outputs[0].text
-            assistant_message = text[cursor:]
-            if not strip_flag:  # strip the leading whitespace
-                yield assistant_message
-            elif assistant_message.strip():
-                strip_flag = False
-                yield assistant_message.lstrip()
-            cursor = len(text)
+        try:
+            if OVERRIDE_CHAT_TEMPLATE:  # community chat template
+                gen_config = _get_gen_config(CONSTANTS["chat_template"])
+                if not stop:
+                    if gen_config["stop_str"]:
+                        stop = [gen_config["stop_str"]]
+                    else:
+                        stop = []
+                system_prompt = gen_config["system_prompt"]
+                self.tokenizer.chat_template = gen_config["template"]
+            else:
+                if not stop:
+                    if self.tokenizer.eos_token is not None:
+                        stop = [self.tokenizer.eos_token]
+                    else:
+                        stop = []
+                system_prompt = None
+
+            SAMPLING_PARAM = SamplingParams(
+                max_tokens=max_tokens,
+                stop=stop,
+            )
+            if system_prompt and messages[0].role != "system":
+                messages = [dict(role="system", content=system_prompt)] + messages
+
+            prompt = self.tokenizer.apply_chat_template(
+                messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+
+            stream = await self.engine.add_request(
+                uuid.uuid4().hex, prompt, SAMPLING_PARAM
+            )
+
+            cursor = 0
+            strip_flag = True
+            async for request_output in stream:
+                text = request_output.outputs[0].text
+                assistant_message = text[cursor:]
+                if not strip_flag:  # strip the leading whitespace
+                    yield assistant_message
+                elif assistant_message.strip():
+                    strip_flag = False
+                    yield assistant_message.lstrip()
+                cursor = len(text)
+        except Exception as e:
+            logger.error(f"Error in chat API: {e}")
+            yield f"Error in chat API: {e}"
 
 
 @functools.lru_cache(maxsize=1)

diff --git a/.../2b-instruct-fp16-0f34ff9/src/ui/404.html → ...mma/2b-instruct-fp16-ad2d/src/ui/404.html b/.../2b-instruct-fp16-0f34ff9/src/ui/404.html → ...mma/2b-instruct-fp16-ad2d/src/ui/404.html
diff --git a/...c/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js → ...c/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js b/...c/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js → ...c/UFSnOXBHq5ysU6-5BuENB/_buildManifest.js
diff --git a/...tic/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js → ...tic/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js b/...tic/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js → ...tic/UFSnOXBHq5ysU6-5BuENB/_ssgManifest.js
diff --git a/...tatic/chunks/0e5ce63c-f5957df8d97fa48f.js → ...tatic/chunks/0e5ce63c-f5957df8d97fa48f.js b/...tatic/chunks/0e5ce63c-f5957df8d97fa48f.js → ...tatic/chunks/0e5ce63c-f5957df8d97fa48f.js
diff --git a/...tatic/chunks/370b0802-87e84e603248538e.js → ...tatic/chunks/370b0802-87e84e603248538e.js b/...tatic/chunks/370b0802-87e84e603248538e.js → ...tatic/chunks/370b0802-87e84e603248538e.js
diff --git a/...tatic/chunks/3d47b92a-f8bda4b39f1e2d9d.js → ...tatic/chunks/3d47b92a-f8bda4b39f1e2d9d.js b/...tatic/chunks/3d47b92a-f8bda4b39f1e2d9d.js → ...tatic/chunks/3d47b92a-f8bda4b39f1e2d9d.js
diff --git a/...tatic/chunks/479ba886-0c92f49cb8e74e58.js → ...tatic/chunks/479ba886-0c92f49cb8e74e58.js b/...tatic/chunks/479ba886-0c92f49cb8e74e58.js → ...tatic/chunks/479ba886-0c92f49cb8e74e58.js
diff --git a/...tatic/chunks/59650de3-87b10f0662b51900.js → ...tatic/chunks/59650de3-87b10f0662b51900.js b/...tatic/chunks/59650de3-87b10f0662b51900.js → ...tatic/chunks/59650de3-87b10f0662b51900.js
diff --git a/...tatic/chunks/66ec4792-34336521b476aa45.js → ...tatic/chunks/66ec4792-34336521b476aa45.js b/...tatic/chunks/66ec4792-34336521b476aa45.js → ...tatic/chunks/66ec4792-34336521b476aa45.js
diff --git a/...ext/static/chunks/674-a1fcdac3696c5ed0.js → ...ext/static/chunks/674-a1fcdac3696c5ed0.js b/...ext/static/chunks/674-a1fcdac3696c5ed0.js → ...ext/static/chunks/674-a1fcdac3696c5ed0.js
diff --git a/...next/static/chunks/69-bf2efb63b1299e3b.js → ...next/static/chunks/69-bf2efb63b1299e3b.js b/...next/static/chunks/69-bf2efb63b1299e3b.js → ...next/static/chunks/69-bf2efb63b1299e3b.js
diff --git a/...ext/static/chunks/700-532b1fe2415e5859.js → ...ext/static/chunks/700-532b1fe2415e5859.js b/...ext/static/chunks/700-532b1fe2415e5859.js → ...ext/static/chunks/700-532b1fe2415e5859.js
diff --git a/...ext/static/chunks/899-fa939dd99dc7a1df.js → ...ext/static/chunks/899-fa939dd99dc7a1df.js b/...ext/static/chunks/899-fa939dd99dc7a1df.js → ...ext/static/chunks/899-fa939dd99dc7a1df.js
diff --git a/...tatic/chunks/8e1d74a4-a6b9a2554f9153c0.js → ...tatic/chunks/8e1d74a4-a6b9a2554f9153c0.js b/...tatic/chunks/8e1d74a4-a6b9a2554f9153c0.js → ...tatic/chunks/8e1d74a4-a6b9a2554f9153c0.js
diff --git a/...tatic/chunks/94730671-0f73873f7f5896de.js → ...tatic/chunks/94730671-0f73873f7f5896de.js b/...tatic/chunks/94730671-0f73873f7f5896de.js → ...tatic/chunks/94730671-0f73873f7f5896de.js
diff --git a/...ext/static/chunks/995-34374f39bb210839.js → ...ext/static/chunks/995-34374f39bb210839.js b/...ext/static/chunks/995-34374f39bb210839.js → ...ext/static/chunks/995-34374f39bb210839.js
diff --git a/...hunks/app/(site)/page-5b6e14439f55739b.js → ...hunks/app/(site)/page-5b6e14439f55739b.js b/...hunks/app/(site)/page-5b6e14439f55739b.js → ...hunks/app/(site)/page-5b6e14439f55739b.js
diff --git a/.../chunks/app/chat/page-9c8e223f40771eb6.js → .../chunks/app/chat/page-9c8e223f40771eb6.js b/.../chunks/app/chat/page-9c8e223f40771eb6.js → .../chunks/app/chat/page-9c8e223f40771eb6.js
diff --git a/...tic/chunks/app/layout-df2dea9dba0ceb06.js → ...tic/chunks/app/layout-df2dea9dba0ceb06.js b/...tic/chunks/app/layout-df2dea9dba0ceb06.js → ...tic/chunks/app/layout-df2dea9dba0ceb06.js
diff --git a/.../chunks/app/not-found-c76dccfb8b88da53.js → .../chunks/app/not-found-c76dccfb8b88da53.js b/.../chunks/app/not-found-c76dccfb8b88da53.js → .../chunks/app/not-found-c76dccfb8b88da53.js
diff --git a/...tatic/chunks/d3ac728e-0c798b3b8aa3bf53.js → ...tatic/chunks/d3ac728e-0c798b3b8aa3bf53.js b/...tatic/chunks/d3ac728e-0c798b3b8aa3bf53.js → ...tatic/chunks/d3ac728e-0c798b3b8aa3bf53.js
diff --git a/...tatic/chunks/fd9d1056-32c33f3919735051.js → ...tatic/chunks/fd9d1056-32c33f3919735051.js b/...tatic/chunks/fd9d1056-32c33f3919735051.js → ...tatic/chunks/fd9d1056-32c33f3919735051.js
diff --git a/...atic/chunks/framework-00a8ba1a63cfdc9e.js → ...atic/chunks/framework-00a8ba1a63cfdc9e.js b/...atic/chunks/framework-00a8ba1a63cfdc9e.js → ...atic/chunks/framework-00a8ba1a63cfdc9e.js
diff --git a/...tatic/chunks/main-app-e95f89b5006af8a8.js → ...tatic/chunks/main-app-e95f89b5006af8a8.js b/...tatic/chunks/main-app-e95f89b5006af8a8.js → ...tatic/chunks/main-app-e95f89b5006af8a8.js
diff --git a/...xt/static/chunks/main-bf1416cb53f2b4c0.js → ...xt/static/chunks/main-bf1416cb53f2b4c0.js b/...xt/static/chunks/main-bf1416cb53f2b4c0.js → ...xt/static/chunks/main-bf1416cb53f2b4c0.js
diff --git a/...tic/chunks/pages/_app-d21e88acd55d90f1.js → ...tic/chunks/pages/_app-d21e88acd55d90f1.js b/...tic/chunks/pages/_app-d21e88acd55d90f1.js → ...tic/chunks/pages/_app-d21e88acd55d90f1.js
diff --git a/...c/chunks/pages/_error-d6107f1aac0c574c.js → ...c/chunks/pages/_error-d6107f1aac0c574c.js b/...c/chunks/pages/_error-d6107f1aac0c574c.js → ...c/chunks/pages/_error-d6107f1aac0c574c.js
diff --git a/...atic/chunks/polyfills-c67a75d1b6f99dc8.js → ...atic/chunks/polyfills-c67a75d1b6f99dc8.js b/...atic/chunks/polyfills-c67a75d1b6f99dc8.js → ...atic/chunks/polyfills-c67a75d1b6f99dc8.js
diff --git a/...static/chunks/webpack-ee8b17d5a5297ccd.js → ...static/chunks/webpack-ee8b17d5a5297ccd.js b/...static/chunks/webpack-ee8b17d5a5297ccd.js → ...static/chunks/webpack-ee8b17d5a5297ccd.js
diff --git a/.../ui/_next/static/css/429544bd3cd8ce3a.css → .../ui/_next/static/css/429544bd3cd8ce3a.css b/.../ui/_next/static/css/429544bd3cd8ce3a.css → .../ui/_next/static/css/429544bd3cd8ce3a.css
diff --git a/.../ui/_next/static/css/5b67f082b31cfc7b.css → .../ui/_next/static/css/5b67f082b31cfc7b.css b/.../ui/_next/static/css/5b67f082b31cfc7b.css → .../ui/_next/static/css/5b67f082b31cfc7b.css
diff --git a/.../ui/_next/static/css/9e63023b20ddb15e.css → .../ui/_next/static/css/9e63023b20ddb15e.css b/.../ui/_next/static/css/9e63023b20ddb15e.css → .../ui/_next/static/css/9e63023b20ddb15e.css
diff --git a/...-fp16-0f34ff9/src/ui/apple-touch-icon.png → ...uct-fp16-ad2d/src/ui/apple-touch-icon.png b/...-fp16-0f34ff9/src/ui/apple-touch-icon.png → ...uct-fp16-ad2d/src/ui/apple-touch-icon.png
diff --git a/...2b-instruct-fp16-0f34ff9/src/ui/chat.html → ...ma/2b-instruct-fp16-ad2d/src/ui/chat.html b/...2b-instruct-fp16-0f34ff9/src/ui/chat.html → ...ma/2b-instruct-fp16-ad2d/src/ui/chat.html
diff --git a/.../2b-instruct-fp16-0f34ff9/src/ui/chat.txt → ...mma/2b-instruct-fp16-ad2d/src/ui/chat.txt b/.../2b-instruct-fp16-0f34ff9/src/ui/chat.txt → ...mma/2b-instruct-fp16-ad2d/src/ui/chat.txt
diff --git a/...uct-fp16-0f34ff9/src/ui/favicon-16x16.png → ...struct-fp16-ad2d/src/ui/favicon-16x16.png b/...uct-fp16-0f34ff9/src/ui/favicon-16x16.png → ...struct-fp16-ad2d/src/ui/favicon-16x16.png
diff --git a/...-instruct-fp16-0f34ff9/src/ui/favicon.ico → .../2b-instruct-fp16-ad2d/src/ui/favicon.ico b/...-instruct-fp16-0f34ff9/src/ui/favicon.ico → .../2b-instruct-fp16-ad2d/src/ui/favicon.ico
diff --git a/...b-instruct-fp16-0f34ff9/src/ui/index.html → ...a/2b-instruct-fp16-ad2d/src/ui/index.html b/...b-instruct-fp16-0f34ff9/src/ui/index.html → ...a/2b-instruct-fp16-ad2d/src/ui/index.html
diff --git a/...2b-instruct-fp16-0f34ff9/src/ui/index.txt → ...ma/2b-instruct-fp16-ad2d/src/ui/index.txt b/...2b-instruct-fp16-0f34ff9/src/ui/index.txt → ...ma/2b-instruct-fp16-ad2d/src/ui/index.txt
diff --git a/.../gemma/2b-instruct-fp16-0f34ff9/README.md → ...gemma/7b-instruct-awq-4bit-5b23/README.md b/.../gemma/2b-instruct-fp16-0f34ff9/README.md → ...gemma/7b-instruct-awq-4bit-5b23/README.md
@@ -1,4 +1,4 @@
-# gemma:2b-instruct-fp16-0f34ff9
+# gemma:7b-instruct-awq-4bit-5b23
 
 [![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)
 [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)

diff --git a/...struct-awq-4bit-5dd6145/apis/openapi.yaml → ...-instruct-awq-4bit-5b23/apis/openapi.yaml b/...struct-awq-4bit-5dd6145/apis/openapi.yaml → ...-instruct-awq-4bit-5b23/apis/openapi.yaml
@@ -230,6 +230,7 @@ components:
             - $ref: '#/components/schemas/ChatCompletionAssistantMessageParam'
             - $ref: '#/components/schemas/ChatCompletionToolMessageParam'
             - $ref: '#/components/schemas/ChatCompletionFunctionMessageParam'
+            - $ref: '#/components/schemas/CustomChatCompletionMessageParam'
           title: Messages
           type: array
         min_p:
@@ -325,6 +326,7 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+          default: 0
           title: Top Logprobs
         top_p:
           anyOf:
@@ -396,6 +398,7 @@ components:
               anyOf:
               - $ref: '#/components/schemas/ChatCompletionContentPartTextParam'
               - $ref: '#/components/schemas/ChatCompletionContentPartImageParam'
+              - $ref: '#/components/schemas/CustomChatCompletionContentPartParam'
             type: array
           title: Content
         name:
@@ -657,6 +660,39 @@ components:
       - prompt
       title: CompletionRequest
       type: object
+    CustomChatCompletionContentPartParam:
+      additionalProperties: true
+      properties:
+        type:
+          title: Type
+          type: string
+      required:
+      - type
+      title: CustomChatCompletionContentPartParam
+      type: object
+    CustomChatCompletionMessageParam:
+      description: Enables custom roles in the Chat Completion API.
+      properties:
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/ChatCompletionContentPartTextParam'
+              - $ref: '#/components/schemas/ChatCompletionContentPartImageParam'
+              - $ref: '#/components/schemas/CustomChatCompletionContentPartParam'
+            type: array
+          title: Content
+        name:
+          title: Name
+          type: string
+        role:
+          title: Role
+          type: string
+      required:
+      - role
+      title: CustomChatCompletionMessageParam
+      type: object
     Function:
       properties:
         arguments:

diff --git a/...nstruct-awq-4bit-5dd6145/apis/schema.json → ...b-instruct-awq-4bit-5b23/apis/schema.json b/...nstruct-awq-4bit-5dd6145/apis/schema.json → ...b-instruct-awq-4bit-5b23/apis/schema.json
diff --git a/...a/7b-instruct-awq-4bit-5dd6145/bento.yaml → ...emma/7b-instruct-awq-4bit-5b23/bento.yaml b/...a/7b-instruct-awq-4bit-5dd6145/bento.yaml → ...emma/7b-instruct-awq-4bit-5b23/bento.yaml
@@ -1,16 +1,14 @@
 service: service:VLLM
 name: gemma
-version: 7b-instruct-awq-4bit-5dd6145
+version: 7b-instruct-awq-4bit-5b23
 bentoml_version: 1.2.19
-creation_time: '2024-07-02T14:30:04.464984+00:00'
+creation_time: '2024-07-03T09:43:50.470130+00:00'
 labels:
   openllm_alias: 7b-4bit,7b-instruct-4bit
   openllm_hf_model_id: casperhansen/gemma-7b-it-awq
   owner: bentoml-team
   platforms: linux
-  service_home: /chat
-  source_directory: vllm-chat
-  source_repo: https://github.com/bentoml/openllm-repo-recipe.git
+  source: https://github.com/bentoml/openllm-repo-recipe/tree/main/vllm-chat
 models: []
 runners: []
 entry_service: gemma
@@ -118,7 +116,7 @@ schema:
 apis: []
 docker:
   distro: debian
-  python_version: '3.11'
+  python_version: '3.9'
   cuda_version: null
   env:
     HF_TOKEN: ''