Skip to content

Commit

Permalink
fix for pixtral
Browse files Browse the repository at this point in the history
  • Loading branch information
bojiang committed Sep 27, 2024
1 parent 35c3e49 commit 2e9a9e2
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 24 deletions.
22 changes: 22 additions & 0 deletions src/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -757,3 +757,25 @@
extra_labels:
openllm_alias: 11b-vision
model_name: meta-llama/Llama-3.2-11B-Vision-Instruct
'pixtral:12b-240910':
project: vllm-chat
service_config:
name: pixtral
traffic:
timeout: 300
resources:
gpu: 1
gpu_type: nvidia-a100-80gb
engine_config:
model: mistral-community/pixtral-12b-240910
tokenizer_mode: mistral
enable_prefix_caching: true
enable_chunked_prefill: false
limit_mm_per_prompt:
image: 1
max_model_len: 16384
extra_labels:
openllm_alias: 12b, 12b-vision
model_name: mistral-community/pixtral-12b-240910
extra_requirements:
- mistral_common[opencv]
32 changes: 8 additions & 24 deletions src/vllm-chat/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import traceback
from argparse import Namespace
from typing import AsyncGenerator, Literal, Optional, Union
from typing import AsyncGenerator, Literal, Optional, Union, Sequence

import bentoml
import fastapi
Expand All @@ -29,7 +29,7 @@ class ImageContent(pydantic.BaseModel):

class Message(pydantic.BaseModel):
role: Literal["system", "user", "assistant"] = "user"
content: list[Union[TextContent, ImageContent]]
content: Sequence[Union[TextContent, ImageContent]]


PARAMETER_YAML = os.path.join(os.path.dirname(__file__), "openllm_config.yaml")
Expand Down Expand Up @@ -114,31 +114,15 @@ def __init__(self) -> None:
init_app_state(self.engine, model_config, openai_api_app.state, args)

@bentoml.api
async def generate(
self, prompt: str = "what is this?"
) -> AsyncGenerator[str, None]:
from openai import AsyncOpenAI

client = AsyncOpenAI(base_url="http://127.0.0.1:3000/v1", api_key="dummy")
content = [TextContent(text=prompt)]
message = Message(role="user", content=content)

try:
completion = await client.chat.completions.create( # type: ignore
model=ENGINE_CONFIG["model"],
messages=[message.model_dump()], # type: ignore
stream=True,
)
async for chunk in completion:
yield chunk.choices[0].delta.content or ""
except Exception:
yield traceback.format_exc()
# async for text in self.generate_with_image(prompt):
# yield text
async def generate(self, prompt: str = "what is this?") -> AsyncGenerator[str, None]:
async for text in self.generate_with_image(prompt):
yield text

@bentoml.api
async def generate_with_image(
self, prompt: str = "what is this?", image: Optional[PIL.Image.Image] = None
self,
prompt: str = "what is this?",
image: Optional[PIL.Image.Image] = None,
) -> AsyncGenerator[str, None]:
from openai import AsyncOpenAI

Expand Down

0 comments on commit 2e9a9e2

Please sign in to comment.