Skip to content

Commit

Permalink
add llamacpp
Browse files Browse the repository at this point in the history
  • Loading branch information
bojiang committed Jul 5, 2024
1 parent ddd7094 commit a3ced7c
Show file tree
Hide file tree
Showing 2,016 changed files with 9,654 additions and 8,487 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# gemma:2b-instruct-fp16-ad2d
# gemma:2b-instruct-fp16-da36

[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)
[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1036,39 +1036,6 @@ paths:
tags:
- Service APIs
x-bentoml-name: generate
/chat/:
get:
operationId: serve_chat_html__get
responses:
'200':
content:
application/json:
schema: {}
description: Successful Response
summary: Serve Chat Html
/chat/{full_path}:
get:
operationId: catch_all__full_path__get
parameters:
- in: path
name: full_path
required: true
schema:
title: Full Path
type: string
responses:
'200':
content:
application/json:
schema: {}
description: Successful Response
'422':
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
description: Validation Error
summary: Catch All
/healthz:
get:
description: Health check endpoint. Expecting an empty response with status
Expand Down Expand Up @@ -1109,6 +1076,39 @@ paths:
description: Successful Response
tags:
- Infrastructure
/ui/:
get:
operationId: serve_chat_html__get
responses:
'200':
content:
application/json:
schema: {}
description: Successful Response
summary: Serve Chat Html
/ui/{full_path}:
get:
operationId: catch_all__full_path__get
parameters:
- in: path
name: full_path
required: true
schema:
title: Full Path
type: string
responses:
'200':
content:
application/json:
schema: {}
description: Successful Response
'422':
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
description: Validation Error
summary: Catch All
/v1/chat/completions:
post:
operationId: create_chat_completion_chat_completions_post
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
service: service:VLLM
name: gemma
version: 2b-instruct-fp16-ad2d
version: 2b-instruct-fp16-da36
bentoml_version: 1.2.19
creation_time: '2024-07-03T09:42:28.113622+00:00'
creation_time: '2024-07-05T07:31:05.934035+00:00'
labels:
openllm_alias: 2b,2b-instruct
openllm_hf_model_id: google/gemma-2b-it
owner: bentoml-team
platforms: linux
source: https://github.com/bentoml/openllm-repo-recipe/tree/main/vllm-chat
source: https://github.com/bentoml/openllm-models-feed/tree/main/source/vllm-chat
models: []
runners: []
entry_service: gemma
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ attrs==23.2.0
bentoml==1.2.19
build==1.2.1
cattrs==23.1.2
certifi==2024.6.2
certifi==2024.7.4
charset-normalizer==3.3.2
circus==0.18.0
click==8.1.7
click-option-group==0.5.6
cloudpickle==3.0.0
cmake==3.29.6
cmake==3.30.0
deepmerge==1.1.1
deprecated==1.2.14
diskcache==5.6.3
Expand Down Expand Up @@ -70,7 +70,7 @@ nvidia-ml-py==11.525.150
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.5.82
nvidia-nvtx-cu12==12.1.105
openai==1.35.9
openai==1.35.10
opentelemetry-api==1.20.0
opentelemetry-instrumentation==0.41b0
opentelemetry-instrumentation-aiohttp-client==0.41b0
Expand All @@ -89,8 +89,8 @@ prometheus-fastapi-instrumentator==7.0.0
protobuf==5.27.2
psutil==6.0.0
py-cpuinfo==9.0.0
pydantic==2.8.0
pydantic-core==2.20.0
pydantic==2.8.2
pydantic-core==2.20.1
pygments==2.18.0
pyparsing==3.1.2
pyproject-hooks==1.1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ labels:
openllm_hf_model_id: google/gemma-2b-it
owner: bentoml-team
platforms: linux
source: https://github.com/bentoml/openllm-repo-recipe/tree/main/vllm-chat
source: https://github.com/bentoml/openllm-models-feed/tree/main/source/vllm-chat
models: []
name: null
python:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ async def catch_all(full_path: str):


@bentoml.mount_asgi_app(openai_api_app, path="/v1")
@bentoml.mount_asgi_app(ui_app, path="/chat")
@bentoml.mount_asgi_app(ui_app, path="/ui")
@bentoml.service(**SERVICE_CONFIG)
class VLLM:
def __init__(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# gemma:7b-instruct-awq-4bit-5b23
# gemma:7b-instruct-awq-4bit-6a74

[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)
[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1036,39 +1036,6 @@ paths:
tags:
- Service APIs
x-bentoml-name: generate
/chat/:
get:
operationId: serve_chat_html__get
responses:
'200':
content:
application/json:
schema: {}
description: Successful Response
summary: Serve Chat Html
/chat/{full_path}:
get:
operationId: catch_all__full_path__get
parameters:
- in: path
name: full_path
required: true
schema:
title: Full Path
type: string
responses:
'200':
content:
application/json:
schema: {}
description: Successful Response
'422':
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
description: Validation Error
summary: Catch All
/healthz:
get:
description: Health check endpoint. Expecting an empty response with status
Expand Down Expand Up @@ -1109,6 +1076,39 @@ paths:
description: Successful Response
tags:
- Infrastructure
/ui/:
get:
operationId: serve_chat_html__get
responses:
'200':
content:
application/json:
schema: {}
description: Successful Response
summary: Serve Chat Html
/ui/{full_path}:
get:
operationId: catch_all__full_path__get
parameters:
- in: path
name: full_path
required: true
schema:
title: Full Path
type: string
responses:
'200':
content:
application/json:
schema: {}
description: Successful Response
'422':
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
description: Validation Error
summary: Catch All
/v1/chat/completions:
post:
operationId: create_chat_completion_chat_completions_post
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
service: service:VLLM
name: gemma
version: 7b-instruct-awq-4bit-5b23
version: 7b-instruct-awq-4bit-6a74
bentoml_version: 1.2.19
creation_time: '2024-07-03T09:43:50.470130+00:00'
creation_time: '2024-07-05T07:32:14.773611+00:00'
labels:
openllm_alias: 7b-4bit,7b-instruct-4bit
openllm_hf_model_id: casperhansen/gemma-7b-it-awq
owner: bentoml-team
platforms: linux
source: https://github.com/bentoml/openllm-repo-recipe/tree/main/vllm-chat
source: https://github.com/bentoml/openllm-models-feed/tree/main/source/vllm-chat
models: []
runners: []
entry_service: gemma
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ attrs==23.2.0
bentoml==1.2.19
build==1.2.1
cattrs==23.1.2
certifi==2024.6.2
certifi==2024.7.4
charset-normalizer==3.3.2
circus==0.18.0
click==8.1.7
click-option-group==0.5.6
cloudpickle==3.0.0
cmake==3.29.6
cmake==3.30.0
deepmerge==1.1.1
deprecated==1.2.14
diskcache==5.6.3
Expand Down Expand Up @@ -70,7 +70,7 @@ nvidia-ml-py==11.525.150
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.5.82
nvidia-nvtx-cu12==12.1.105
openai==1.35.9
openai==1.35.10
opentelemetry-api==1.20.0
opentelemetry-instrumentation==0.41b0
opentelemetry-instrumentation-aiohttp-client==0.41b0
Expand All @@ -89,8 +89,8 @@ prometheus-fastapi-instrumentator==7.0.0
protobuf==5.27.2
psutil==6.0.0
py-cpuinfo==9.0.0
pydantic==2.8.0
pydantic-core==2.20.0
pydantic==2.8.2
pydantic-core==2.20.1
pygments==2.18.0
pyparsing==3.1.2
pyproject-hooks==1.1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ labels:
openllm_hf_model_id: casperhansen/gemma-7b-it-awq
owner: bentoml-team
platforms: linux
source: https://github.com/bentoml/openllm-repo-recipe/tree/main/vllm-chat
source: https://github.com/bentoml/openllm-models-feed/tree/main/source/vllm-chat
models: []
name: null
python:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ async def catch_all(full_path: str):


@bentoml.mount_asgi_app(openai_api_app, path="/v1")
@bentoml.mount_asgi_app(ui_app, path="/chat")
@bentoml.mount_asgi_app(ui_app, path="/ui")
@bentoml.service(**SERVICE_CONFIG)
class VLLM:
def __init__(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# gemma:7b-instruct-fp16-262a
# gemma:7b-instruct-fp16-dafc

[![pypi_status](https://img.shields.io/badge/BentoML-1.2.19-informational)](https://pypi.org/project/BentoML)
[![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.com/)
Expand Down
Loading

0 comments on commit a3ced7c

Please sign in to comment.