Skip to content

Commit

Permalink
fix(llama-cpp): always set stream to true (#2)
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
  • Loading branch information
aarnphm authored Jul 11, 2024
1 parent 36b74d0 commit a417bc4
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions source/llamacpp-chat/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import fastapi.staticfiles
import os
from fastapi.responses import FileResponse
from typing_extensions import Annotated, Literal
from typing_extensions import Literal
import sys
import pydantic
from bentoml.io import SSE
Expand Down Expand Up @@ -76,7 +76,7 @@ async def catch_all(full_path: str):
@bentoml.mount_asgi_app(openai_api_app, path="/v1")
@bentoml.service(**SERVICE_CONFIG)
class LlamaCppChat:

def __init__(self) -> None:
self.llm = Llama.from_pretrained(
repo_id=ENGINE_CONFIG["model"],
Expand All @@ -97,7 +97,7 @@ async def chat_completions(
Le(ENGINE_CONFIG["max_model_len"]),
] = ENGINE_CONFIG["max_model_len"],
stop: Optional[list[str]] = None,
stream: Optional[bool] = False,
stream: Optional[bool] = True,
temperature: Optional[float] = 0,
top_p: Optional[float] = 1.0,
frequency_penalty: Optional[float] = 0.0,
Expand All @@ -107,6 +107,7 @@ async def chat_completions(
"""
try:
response = self.llm.create_chat_completion(
model=model,
messages=messages,
max_tokens=max_tokens,
stream=stream,
Expand All @@ -125,7 +126,7 @@ async def chat_completions(
except Exception as e:
print(e)
yield SSE(data=str(e)).marshal()

yield SSE(data="[DONE]").marshal()
except Exception as e:
yield SSE(data=str(e)).marshal()
Expand Down

0 comments on commit a417bc4

Please sign in to comment.