Update all bentos

bentoml · Jul 3, 2024 · 46d3ccb · 46d3ccb
1 parent 9ff7b16
commit 46d3ccb
Show file tree

Hide file tree

Showing 665 changed files with 17,193 additions and 2,775 deletions.
diff --git a/bentoml/bentos/gemma/2b-instruct-fp16/README.md b/bentoml/bentos/gemma/2b-instruct-fp16/README.md
@@ -1,6 +1,6 @@
 # gemma:2b-instruct-fp16
 
-[![pypi_status](https://img.shields.io/badge/BentoML-1.2.17-informational)](https://pypi.org/project/BentoML)
+[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)
 [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)
 [![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)
 [![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)

diff --git a/bentoml/bentos/gemma/2b-instruct-fp16/apis/openapi.yaml b/bentoml/bentos/gemma/2b-instruct-fp16/apis/openapi.yaml
@@ -770,6 +770,23 @@ components:
       - type
       title: InvalidArgument
       type: object
+    Message:
+      properties:
+        content:
+          title: Content
+          type: string
+        role:
+          enum:
+          - system
+          - user
+          - assistant
+          title: Role
+          type: string
+      required:
+      - role
+      - content
+      title: Message
+      type: object
     NotFound:
       description: Not Found
       properties:
@@ -831,9 +848,7 @@ components:
           - content: What is the meaning of life?
             role: user
           items:
-            additionalProperties:
-              type: string
-            type: object
+            $ref: '#/components/schemas/Message'
           title: Messages
           type: array
         model:
@@ -888,7 +903,7 @@ info:
   contact:
     email: contact@bentoml.com
     name: BentoML Team
-  description: "# gemma:dev\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.2.17-informational)](https://pypi.org/project/BentoML)\n\
+  description: "# gemma:dev\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)\n\
     [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)\n\
     [![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)\n\
     [![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)\n\
@@ -923,9 +938,7 @@ paths:
                   - content: What is the meaning of life?
                     role: user
                   items:
-                    additionalProperties:
-                      type: string
-                    type: object
+                    $ref: '#/components/schemas/Message'
                   title: Messages
                   type: array
                 model:

diff --git a/bentoml/bentos/gemma/2b-instruct-fp16/apis/schema.json b/bentoml/bentos/gemma/2b-instruct-fp16/apis/schema.json
@@ -16,9 +16,26 @@
               }
             ],
             "items": {
-              "additionalProperties": {
-                "type": "string"
+              "properties": {
+                "role": {
+                  "enum": [
+                    "system",
+                    "user",
+                    "assistant"
+                  ],
+                  "title": "Role",
+                  "type": "string"
+                },
+                "content": {
+                  "title": "Content",
+                  "type": "string"
+                }
               },
+              "required": [
+                "role",
+                "content"
+              ],
+              "title": "Message",
               "type": "object"
             },
             "title": "Messages",

diff --git a/bentoml/bentos/gemma/2b-instruct-fp16/bento.yaml b/bentoml/bentos/gemma/2b-instruct-fp16/bento.yaml
@@ -1,8 +1,8 @@
 service: service:VLLM
 name: gemma
 version: 2b-instruct-fp16
-bentoml_version: 1.2.17
-creation_time: '2024-06-21T07:57:18.133866+00:00'
+bentoml_version: 1.2.19
+creation_time: '2024-07-03T06:36:38.163948+00:00'
 labels:
   owner: bentoml-team
   platforms: linux
@@ -40,8 +40,21 @@ schema:
           - role: user
             content: What is the meaning of life?
           items:
-            additionalProperties:
-              type: string
+            properties:
+              role:
+                enum:
+                - system
+                - user
+                - assistant
+                title: Role
+                type: string
+              content:
+                title: Content
+                type: string
+            required:
+            - role
+            - content
+            title: Message
             type: object
           title: Messages
           type: array

diff --git a/bentoml/bentos/gemma/2b-instruct-fp16/env/python/install.sh b/bentoml/bentos/gemma/2b-instruct-fp16/env/python/install.sh
@@ -12,7 +12,7 @@ PIP_ARGS=()
 REQUIREMENTS_TXT="$BASEDIR/requirements.txt"
 REQUIREMENTS_LOCK="$BASEDIR/requirements.lock.txt"
 WHEELS_DIR="$BASEDIR/wheels"
-BENTOML_VERSION=${BENTOML_VERSION:-1.2.17}
+BENTOML_VERSION=${BENTOML_VERSION:-1.2.19}
 # Install python packages, prefer installing the requirements.lock.txt file if it exist
 pushd "$BASEDIR" &>/dev/null
 if [ -f "$REQUIREMENTS_LOCK" ]; then

diff --git a/bentoml/bentos/gemma/2b-instruct-fp16/env/python/requirements.txt b/bentoml/bentos/gemma/2b-instruct-fp16/env/python/requirements.txt
@@ -1,4 +1,4 @@
-bentoml==1.2.17
+bentoml==1.2.19
 torch==2.3.0
 vllm==0.4.3
 numpy==1.26.0

diff --git a/bentoml/bentos/gemma/2b-instruct-fp16/src/bentofile.yaml b/bentoml/bentos/gemma/2b-instruct-fp16/src/bentofile.yaml
@@ -0,0 +1,50 @@
+conda:
+  channels: null
+  dependencies: null
+  environment_yml: null
+  pip: null
+description: null
+docker:
+  base_image: null
+  cuda_version: null
+  distro: debian
+  dockerfile_template: null
+  env:
+    HF_TOKEN: ''
+  python_version: '3.11'
+  setup_script: null
+  system_packages: null
+envs:
+- name: HF_TOKEN
+exclude: []
+include:
+- '*.py'
+- ui/*
+- ui/chunks/*
+- ui/css/*
+- ui/media/*
+- ui/chunks/pages/*
+- bentovllm_openai/*.py
+- chat_templates/chat_templates/*.jinja
+- chat_templates/generation_configs/*.json
+labels:
+  owner: bentoml-team
+  platforms: linux
+  service_home: /chat
+  source_directory: vllm-chat
+  source_repo: https://github.com/bentoml/openllm-repo-recipe.git
+models: []
+name: null
+python:
+  extra_index_url: null
+  find_links: null
+  index_url: null
+  lock_packages: false
+  no_index: null
+  pack_git_packages: true
+  packages: null
+  pip_args: null
+  requirements_txt: ./requirements.txt
+  trusted_host: null
+  wheels: null
+service: service:VLLM
diff --git a/bentoml/bentos/gemma/2b-instruct-fp16/src/service.py b/bentoml/bentos/gemma/2b-instruct-fp16/src/service.py
@@ -9,12 +9,13 @@
 import bentoml
 import fastapi
 import fastapi.staticfiles
+import pydantic
 import vllm.entrypoints.openai.api_server as vllm_api_server
 import yaml
 from annotated_types import Ge, Le
 from bento_constants import CONSTANT_YAML
 from fastapi.responses import FileResponse
-from typing_extensions import Annotated
+from typing_extensions import Annotated, Literal
 
 CONSTANTS = yaml.safe_load(CONSTANT_YAML)
 
@@ -37,6 +38,10 @@
     ["/models", vllm_api_server.show_available_models, ["GET"]],
 ]
 
+class Message(pydantic.BaseModel):
+    role: Literal["system", "user", "assistant"]
+    content: str
+
 
 for route, endpoint, methods in OPENAI_ENDPOINTS:
     openai_api_app.add_api_route(
@@ -143,7 +148,7 @@ async def generate(
     @bentoml.api(route="/api/chat")
     async def chat(
         self,
-        messages: list[dict[str, str]] = [
+        messages: list[Message] = [
             {"role": "user", "content": "What is the meaning of life?"}
         ],
         model: str = ENGINE_CONFIG["model"],
@@ -160,54 +165,58 @@ async def chat(
         """
         from vllm import SamplingParams
 
-        if OVERRIDE_CHAT_TEMPLATE:  # community chat template
-            gen_config = _get_gen_config(CONSTANTS["chat_template"])
-            if not stop:
-                if gen_config["stop_str"]:
-                    stop = [gen_config["stop_str"]]
-                else:
-                    stop = []
-            system_prompt = gen_config["system_prompt"]
-            self.tokenizer.chat_template = gen_config["template"]
-        else:
-            if not stop:
-                if self.tokenizer.eos_token is not None:
-                    stop = [self.tokenizer.eos_token]
-                else:
-                    stop = []
-            system_prompt = None
-
-        # normalize inputs
-        if stop_token_ids is None:
-            stop_token_ids = []
-
-        SAMPLING_PARAM = SamplingParams(
-            max_tokens=max_tokens,
-            stop_token_ids=stop_token_ids,
-            stop=stop,
-        )
-        if system_prompt and messages[0].get("role") != "system":
-            messages = [dict(role="system", content=system_prompt)] + messages
-
-        prompt = self.tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True,
-        )
-
-        stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)
-
-        cursor = 0
-        strip_flag = True
-        async for request_output in stream:
-            text = request_output.outputs[0].text
-            assistant_message = text[cursor:]
-            if not strip_flag:  # strip the leading whitespace
-                yield assistant_message
-            elif assistant_message.strip():
-                strip_flag = False
-                yield assistant_message.lstrip()
-            cursor = len(text)
+        try:
+            if OVERRIDE_CHAT_TEMPLATE:  # community chat template
+                gen_config = _get_gen_config(CONSTANTS["chat_template"])
+                if not stop:
+                    if gen_config["stop_str"]:
+                        stop = [gen_config["stop_str"]]
+                    else:
+                        stop = []
+                system_prompt = gen_config["system_prompt"]
+                self.tokenizer.chat_template = gen_config["template"]
+            else:
+                if not stop:
+                    if self.tokenizer.eos_token is not None:
+                        stop = [self.tokenizer.eos_token]
+                    else:
+                        stop = []
+                system_prompt = None
+
+            # normalize inputs
+            if stop_token_ids is None:
+                stop_token_ids = []
+
+            SAMPLING_PARAM = SamplingParams(
+                max_tokens=max_tokens,
+                stop_token_ids=stop_token_ids,
+                stop=stop,
+            )
+            if system_prompt and messages[0].role != "system":
+                messages = [dict(role="system", content=system_prompt)] + messages
+
+            prompt = self.tokenizer.apply_chat_template(
+                messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+
+            stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)
+
+            cursor = 0
+            strip_flag = True
+            async for request_output in stream:
+                text = request_output.outputs[0].text
+                assistant_message = text[cursor:]
+                if not strip_flag:  # strip the leading whitespace
+                    yield assistant_message
+                elif assistant_message.strip():
+                    strip_flag = False
+                    yield assistant_message.lstrip()
+                cursor = len(text)
+        except Exception as e:
+            logger.error(f"Error in chat API: {e}")
+            yield f"Error in chat API: {e}"
 
 
 @functools.lru_cache(maxsize=1)

diff --git a/bentoml/bentos/gemma/2b/README.md b/bentoml/bentos/gemma/2b/README.md
@@ -1,6 +1,6 @@
 # gemma:2b-instruct-fp16
 
-[![pypi_status](https://img.shields.io/badge/BentoML-1.2.17-informational)](https://pypi.org/project/BentoML)
+[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)
 [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)
 [![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)
 [![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)

diff --git a/bentoml/bentos/gemma/2b/apis/openapi.yaml b/bentoml/bentos/gemma/2b/apis/openapi.yaml
@@ -770,6 +770,23 @@ components:
       - type
       title: InvalidArgument
       type: object
+    Message:
+      properties:
+        content:
+          title: Content
+          type: string
+        role:
+          enum:
+          - system
+          - user
+          - assistant
+          title: Role
+          type: string
+      required:
+      - role
+      - content
+      title: Message
+      type: object
     NotFound:
       description: Not Found
       properties:
@@ -831,9 +848,7 @@ components:
           - content: What is the meaning of life?
             role: user
           items:
-            additionalProperties:
-              type: string
-            type: object
+            $ref: '#/components/schemas/Message'
           title: Messages
           type: array
         model:
@@ -888,7 +903,7 @@ info:
   contact:
     email: contact@bentoml.com
     name: BentoML Team
-  description: "# gemma:dev\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.2.17-informational)](https://pypi.org/project/BentoML)\n\
+  description: "# gemma:dev\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)\n\
     [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)\n\
     [![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)\n\
     [![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)\n\
@@ -923,9 +938,7 @@ paths:
                   - content: What is the meaning of life?
                     role: user
                   items:
-                    additionalProperties:
-                      type: string
-                    type: object
+                    $ref: '#/components/schemas/Message'
                   title: Messages
                   type: array
                 model: