Skip to content

Commit

Permalink
format llamacpp
Browse files Browse the repository at this point in the history
  • Loading branch information
bojiang committed Jul 5, 2024
1 parent b666737 commit ddd7094
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 53 deletions.
6 changes: 3 additions & 3 deletions source/llamacpp-chat/bentofile.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
service: "service:Phi3"
service: "service:LlamaCppChat"
labels:
owner: bentoml-team
platforms: macos
source_repo: https://github.com/bentoml/openllm-repo-recipe/tree/main/llamacpp-chat
source: https://github.com/bentoml/openllm-models-feed/tree/main/source/llamacpp-chat
include:
- "*.py"
- "ui/*"
python:
requirements_txt: "./requirements.txt"
lock_packages: false
lock_packages: true
envs:
- name: CMAKE_ARGS
value: "-DLLAMA_METAL=on"
4 changes: 2 additions & 2 deletions source/llamacpp-chat/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
huggingface-hub==0.23.4
huggingface-hub
llama_cpp_python==0.2.79
fastapi==0.111.0
fastapi
47 changes: 2 additions & 45 deletions source/llamacpp-chat/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ async def catch_all(full_path: str):
sys.modules.pop("prometheus_client")


@bentoml.mount_asgi_app(ui_app, path="/chat")
@bentoml.mount_asgi_app(ui_app, path="/ui")
@bentoml.mount_asgi_app(openai_api_app, path="/v1")
@bentoml.service(**SERVICE_CONFIG)
class Phi3:
class LlamaCppChat:

def __init__(self) -> None:
self.llm = Llama.from_pretrained(
Expand All @@ -84,37 +84,6 @@ def __init__(self) -> None:
verbose=False,
)

@bentoml.api(route="/api/chat")
async def chat(
self,
messages: list[Message] = [
{"role": "user", "content": "What is the meaning of life?"}
],
model: str = ENGINE_CONFIG["model"],
max_tokens: Annotated[
int,
Ge(128),
Le(ENGINE_CONFIG["max_model_len"]),
] = ENGINE_CONFIG["max_model_len"],
stop: Optional[list[str]] = None,
stop_token_ids: Optional[list[int]] = None,
) -> AsyncGenerator[str, None]:
"""
light-weight chat API that takes in a list of messages and returns a response
"""
response = self.llm.create_chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
stop=stop,
)

for chunk in response:
try:
yield chunk["choices"][0]["delta"]["content"]
except KeyError:
yield ""

@bentoml.api(route="/v1/chat/completions")
async def chat_completions(
self,
Expand Down Expand Up @@ -161,15 +130,3 @@ async def chat_completions(
except Exception as e:
yield SSE(data=str(e)).marshal()
yield SSE(data="[DONE]").marshal()

if __name__ == "__main__":
phi3 = Phi3()
response = phi3.llm.create_chat_completion(
messages = [
{"role": "system", "content": SYS_PROMPT},
{"role": "user", "content": "Explain superconductors like I'm five years old"}
],
max_tokens=256,
#stream=True,
)
print(response)
2 changes: 1 addition & 1 deletion source/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def hash_directory(directory_path):
model_version = f"{model_version}-{directory_hash[:4]}"

subprocess.run(
["bentoml", "build", str(tempdir), "--version", model_version],
[sys.executable, "-m", "bentoml", "build", str(tempdir), "--version", model_version],
check=True,
cwd=tempdir,
env=os.environ,
Expand Down
2 changes: 1 addition & 1 deletion source/vllm-chat/bentofile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ service: "service:VLLM"
labels:
owner: bentoml-team
platforms: linux
source: https://github.com/bentoml/openllm-repo-recipe/tree/main/vllm-chat
source: https://github.com/bentoml/openllm-models-feed/tree/main/source/vllm-chat
include:
- "*.py"
- "ui/*"
Expand Down
2 changes: 1 addition & 1 deletion source/vllm-chat/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ async def catch_all(full_path: str):


@bentoml.mount_asgi_app(openai_api_app, path="/v1")
@bentoml.mount_asgi_app(ui_app, path="/chat")
@bentoml.mount_asgi_app(ui_app, path="/ui")
@bentoml.service(**SERVICE_CONFIG)
class VLLM:
def __init__(self) -> None:
Expand Down

0 comments on commit ddd7094

Please sign in to comment.