Skip to content

Commit

Permalink
Update all bentos
Browse files Browse the repository at this point in the history
  • Loading branch information
rickzx committed Jul 3, 2024
1 parent 9ff7b16 commit 46d3ccb
Show file tree
Hide file tree
Showing 665 changed files with 17,193 additions and 2,775 deletions.
2 changes: 1 addition & 1 deletion bentoml/bentos/gemma/2b-instruct-fp16/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# gemma:2b-instruct-fp16

[![pypi_status](https://img.shields.io/badge/BentoML-1.2.17-informational)](https://pypi.org/project/BentoML)
[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)
[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)
[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)
[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)
Expand Down
27 changes: 20 additions & 7 deletions bentoml/bentos/gemma/2b-instruct-fp16/apis/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,23 @@ components:
- type
title: InvalidArgument
type: object
Message:
properties:
content:
title: Content
type: string
role:
enum:
- system
- user
- assistant
title: Role
type: string
required:
- role
- content
title: Message
type: object
NotFound:
description: Not Found
properties:
Expand Down Expand Up @@ -831,9 +848,7 @@ components:
- content: What is the meaning of life?
role: user
items:
additionalProperties:
type: string
type: object
$ref: '#/components/schemas/Message'
title: Messages
type: array
model:
Expand Down Expand Up @@ -888,7 +903,7 @@ info:
contact:
email: contact@bentoml.com
name: BentoML Team
description: "# gemma:dev\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.2.17-informational)](https://pypi.org/project/BentoML)\n\
description: "# gemma:dev\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)\n\
[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)\n\
[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)\n\
[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)\n\
Expand Down Expand Up @@ -923,9 +938,7 @@ paths:
- content: What is the meaning of life?
role: user
items:
additionalProperties:
type: string
type: object
$ref: '#/components/schemas/Message'
title: Messages
type: array
model:
Expand Down
21 changes: 19 additions & 2 deletions bentoml/bentos/gemma/2b-instruct-fp16/apis/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,26 @@
}
],
"items": {
"additionalProperties": {
"type": "string"
"properties": {
"role": {
"enum": [
"system",
"user",
"assistant"
],
"title": "Role",
"type": "string"
},
"content": {
"title": "Content",
"type": "string"
}
},
"required": [
"role",
"content"
],
"title": "Message",
"type": "object"
},
"title": "Messages",
Expand Down
21 changes: 17 additions & 4 deletions bentoml/bentos/gemma/2b-instruct-fp16/bento.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
service: service:VLLM
name: gemma
version: 2b-instruct-fp16
bentoml_version: 1.2.17
creation_time: '2024-06-21T07:57:18.133866+00:00'
bentoml_version: 1.2.19
creation_time: '2024-07-03T06:36:38.163948+00:00'
labels:
owner: bentoml-team
platforms: linux
Expand Down Expand Up @@ -40,8 +40,21 @@ schema:
- role: user
content: What is the meaning of life?
items:
additionalProperties:
type: string
properties:
role:
enum:
- system
- user
- assistant
title: Role
type: string
content:
title: Content
type: string
required:
- role
- content
title: Message
type: object
title: Messages
type: array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ PIP_ARGS=()
REQUIREMENTS_TXT="$BASEDIR/requirements.txt"
REQUIREMENTS_LOCK="$BASEDIR/requirements.lock.txt"
WHEELS_DIR="$BASEDIR/wheels"
BENTOML_VERSION=${BENTOML_VERSION:-1.2.17}
BENTOML_VERSION=${BENTOML_VERSION:-1.2.19}
# Install python packages, prefer installing the requirements.lock.txt file if it exist
pushd "$BASEDIR" &>/dev/null
if [ -f "$REQUIREMENTS_LOCK" ]; then
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
bentoml==1.2.17
bentoml==1.2.19
torch==2.3.0
vllm==0.4.3
numpy==1.26.0
Expand Down
50 changes: 50 additions & 0 deletions bentoml/bentos/gemma/2b-instruct-fp16/src/bentofile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
conda:
channels: null
dependencies: null
environment_yml: null
pip: null
description: null
docker:
base_image: null
cuda_version: null
distro: debian
dockerfile_template: null
env:
HF_TOKEN: ''
python_version: '3.11'
setup_script: null
system_packages: null
envs:
- name: HF_TOKEN
exclude: []
include:
- '*.py'
- ui/*
- ui/chunks/*
- ui/css/*
- ui/media/*
- ui/chunks/pages/*
- bentovllm_openai/*.py
- chat_templates/chat_templates/*.jinja
- chat_templates/generation_configs/*.json
labels:
owner: bentoml-team
platforms: linux
service_home: /chat
source_directory: vllm-chat
source_repo: https://github.com/bentoml/openllm-repo-recipe.git
models: []
name: null
python:
extra_index_url: null
find_links: null
index_url: null
lock_packages: false
no_index: null
pack_git_packages: true
packages: null
pip_args: null
requirements_txt: ./requirements.txt
trusted_host: null
wheels: null
service: service:VLLM
109 changes: 59 additions & 50 deletions bentoml/bentos/gemma/2b-instruct-fp16/src/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
import bentoml
import fastapi
import fastapi.staticfiles
import pydantic
import vllm.entrypoints.openai.api_server as vllm_api_server
import yaml
from annotated_types import Ge, Le
from bento_constants import CONSTANT_YAML
from fastapi.responses import FileResponse
from typing_extensions import Annotated
from typing_extensions import Annotated, Literal

CONSTANTS = yaml.safe_load(CONSTANT_YAML)

Expand All @@ -37,6 +38,10 @@
["/models", vllm_api_server.show_available_models, ["GET"]],
]

class Message(pydantic.BaseModel):
role: Literal["system", "user", "assistant"]
content: str


for route, endpoint, methods in OPENAI_ENDPOINTS:
openai_api_app.add_api_route(
Expand Down Expand Up @@ -143,7 +148,7 @@ async def generate(
@bentoml.api(route="/api/chat")
async def chat(
self,
messages: list[dict[str, str]] = [
messages: list[Message] = [
{"role": "user", "content": "What is the meaning of life?"}
],
model: str = ENGINE_CONFIG["model"],
Expand All @@ -160,54 +165,58 @@ async def chat(
"""
from vllm import SamplingParams

if OVERRIDE_CHAT_TEMPLATE: # community chat template
gen_config = _get_gen_config(CONSTANTS["chat_template"])
if not stop:
if gen_config["stop_str"]:
stop = [gen_config["stop_str"]]
else:
stop = []
system_prompt = gen_config["system_prompt"]
self.tokenizer.chat_template = gen_config["template"]
else:
if not stop:
if self.tokenizer.eos_token is not None:
stop = [self.tokenizer.eos_token]
else:
stop = []
system_prompt = None

# normalize inputs
if stop_token_ids is None:
stop_token_ids = []

SAMPLING_PARAM = SamplingParams(
max_tokens=max_tokens,
stop_token_ids=stop_token_ids,
stop=stop,
)
if system_prompt and messages[0].get("role") != "system":
messages = [dict(role="system", content=system_prompt)] + messages

prompt = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)

stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)

cursor = 0
strip_flag = True
async for request_output in stream:
text = request_output.outputs[0].text
assistant_message = text[cursor:]
if not strip_flag: # strip the leading whitespace
yield assistant_message
elif assistant_message.strip():
strip_flag = False
yield assistant_message.lstrip()
cursor = len(text)
try:
if OVERRIDE_CHAT_TEMPLATE: # community chat template
gen_config = _get_gen_config(CONSTANTS["chat_template"])
if not stop:
if gen_config["stop_str"]:
stop = [gen_config["stop_str"]]
else:
stop = []
system_prompt = gen_config["system_prompt"]
self.tokenizer.chat_template = gen_config["template"]
else:
if not stop:
if self.tokenizer.eos_token is not None:
stop = [self.tokenizer.eos_token]
else:
stop = []
system_prompt = None

# normalize inputs
if stop_token_ids is None:
stop_token_ids = []

SAMPLING_PARAM = SamplingParams(
max_tokens=max_tokens,
stop_token_ids=stop_token_ids,
stop=stop,
)
if system_prompt and messages[0].role != "system":
messages = [dict(role="system", content=system_prompt)] + messages

prompt = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)

stream = await self.engine.add_request(uuid.uuid4().hex, prompt, SAMPLING_PARAM)

cursor = 0
strip_flag = True
async for request_output in stream:
text = request_output.outputs[0].text
assistant_message = text[cursor:]
if not strip_flag: # strip the leading whitespace
yield assistant_message
elif assistant_message.strip():
strip_flag = False
yield assistant_message.lstrip()
cursor = len(text)
except Exception as e:
logger.error(f"Error in chat API: {e}")
yield f"Error in chat API: {e}"


@functools.lru_cache(maxsize=1)
Expand Down
2 changes: 1 addition & 1 deletion bentoml/bentos/gemma/2b/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# gemma:2b-instruct-fp16

[![pypi_status](https://img.shields.io/badge/BentoML-1.2.17-informational)](https://pypi.org/project/BentoML)
[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)
[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)
[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)
[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)
Expand Down
27 changes: 20 additions & 7 deletions bentoml/bentos/gemma/2b/apis/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,23 @@ components:
- type
title: InvalidArgument
type: object
Message:
properties:
content:
title: Content
type: string
role:
enum:
- system
- user
- assistant
title: Role
type: string
required:
- role
- content
title: Message
type: object
NotFound:
description: Not Found
properties:
Expand Down Expand Up @@ -831,9 +848,7 @@ components:
- content: What is the meaning of life?
role: user
items:
additionalProperties:
type: string
type: object
$ref: '#/components/schemas/Message'
title: Messages
type: array
model:
Expand Down Expand Up @@ -888,7 +903,7 @@ info:
contact:
email: contact@bentoml.com
name: BentoML Team
description: "# gemma:dev\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.2.17-informational)](https://pypi.org/project/BentoML)\n\
description: "# gemma:dev\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)\n\
[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)\n\
[![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)\n\
[![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)\n\
Expand Down Expand Up @@ -923,9 +938,7 @@ paths:
- content: What is the meaning of life?
role: user
items:
additionalProperties:
type: string
type: object
$ref: '#/components/schemas/Message'
title: Messages
type: array
model:
Expand Down
Loading

0 comments on commit 46d3ccb

Please sign in to comment.