Skip to content

Commit

Permalink
Add new custom inference service for Kserve
Browse files Browse the repository at this point in the history
  • Loading branch information
alemorm committed Mar 27, 2024
1 parent 94737e6 commit 144cf0f
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 33 deletions.
26 changes: 18 additions & 8 deletions examples/3d-brain-mri/container/deploy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from kserve import (V1beta1InferenceService, V1beta1InferenceServiceSpec,
V1beta1PredictorSpec, V1beta1TorchServeSpec, constants)
from kubernetes import client
from kubernetes.client import V1ResourceRequirements, V1Toleration
from kubernetes.client import V1ResourceRequirements, V1Toleration, V1Container, V1EnvVar

# =====================================================================================

Expand Down Expand Up @@ -113,6 +113,12 @@ def parse_args():
help="TorchServe maximum delay in ms for batch aggregation",
default=5000,
)
parser.add_argument(
"--response-timeout",
type=str,
help="TorchServe maximum response timeout in s for inference",
default=240,
)
parser.add_argument(
"--k8s-config-file",
type=str,
Expand Down Expand Up @@ -308,19 +314,23 @@ def create_inference_service(
else:
predictor_spec = V1beta1PredictorSpec(
tolerations=tol,
pytorch=(
V1beta1TorchServeSpec(
protocol_version="v2",
storage_uri=f"s3://{commit}.master.{repo}.{project}/{model_name}",
containers=[
V1Container(
name='kserve-container',
args=[ 'torchserve', '--start', '--model-store=/mnt/models/model-store', '--ts-config=/mnt/models/config/config.properties'],
image='pytorch/torchserve-kfs:0.9.0-gpu',
env=[V1EnvVar(name='STORAGE_URI',value=f"s3://{commit}.master.{repo}.{project}/{model_name}"),
V1EnvVar(name='TS_SERVICE_ENVELOPE',value='kservev2'),
V1EnvVar(name='PROTOCOL_VERSION',value='v2')],
resources=(
V1ResourceRequirements(
requests=resource_requirements["requests"],
limits=resource_requirements["limits"],
)
),
)
)
),
service_account_name=sa,
],
service_account_name=sa
)
isvc = V1beta1InferenceService(
api_version=api_version,
Expand Down
5 changes: 3 additions & 2 deletions examples/3d-brain-mri/container/deploy/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def create_properties_file(model_name, model_version, args):
model_store=/mnt/models/model-store
max_request_size=%s
max_response_size=%s
model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"%s":{"%s":{"defaultVersion":true,"marName":"%s.mar","minWorkers":%s,"maxWorkers":%s,"batchSize":%s,"maxBatchDelay":%s,"responseTimeout":120}}}}""" % (
model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"%s":{"%s":{"defaultVersion":true,"marName":"%s.mar","minWorkers":%s,"maxWorkers":%s,"batchSize":%s,"maxBatchDelay":%s,"responseTimeout":%s}}}}""" % (
args.max_request_size,
args.max_response_size,
model_name,
Expand All @@ -85,7 +85,8 @@ def create_properties_file(model_name, model_version, args):
args.min_workers,
args.max_workers,
args.batch_size,
args.batch_delay
args.batch_delay,
args.response_timeout
)

conf_prop = open("config.properties", "w")
Expand Down
17 changes: 3 additions & 14 deletions examples/3d-brain-mri/pipelines/_on_prem_deployment-pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
"/bin/sh"
],
"stdin": [
"python deploy.py --deployment-name pdk-3d-brain-mri-deploy --service-account-name pach-deploy --max-request-size 400553500 --max-response-size 400553500 --resource-requests nvidia.com/gpu=1,memory=8Gi --resource-limits nvidia.com/gpu=1,memory=8Gi --tolerations accelerator=Tesla-T4"
"python deploy.py --deployment-name pdk-3d-brain-mri-deploy --git-url https://git@github.com:/determined-ai/pdk.git --handler brain_mri_handler.py --git-ref dev_3dmri --sub-dir examples/3d-brain-mri/container/deploy --service-account-name pach-deploy --max-request-size 400553500 --max-response-size 400553500 --resource-requests nvidia.com/gpu=1,memory=16Gi --resource-limits nvidia.com/gpu=1,memory=16Gi --tolerations accelerator=Tesla-T4"
],
"image": "alemor/pdk:3d-brain-deploy-v0.0.5",
"image": "alemor/pdk:3d-brain-deploy-v0.0.6",
"secrets": [
{
"name": "pipeline-secret",
Expand All @@ -40,19 +40,8 @@
"name": "pipeline-secret",
"key": "kserve_namespace",
"env_var": "KSERVE_NAMESPACE"
},
{
"name": "pach-kserve-creds",
"key": "AWS_ACCESS_KEY_ID",
"env_var": "AWS_ACCESS_KEY_ID"
},
{
"name": "pach-kserve-creds",
"key": "AWS_SECRET_ACCESS_KEY",
"env_var": "AWS_SECRET_ACCESS_KEY"
}
]
},
"autoscaling": true,
"pod_patch": "[{\"op\": \"add\",\"path\": \"/volumes/-\",\"value\": {\"name\": \"det-checkpoints\",\"hostpath\": {\"path\": \"/mnt/efs/shared_fs/determined\",\"type\": \"Directory\"}}}, {\"op\": \"add\",\"path\": \"/containers/0/volumeMounts/-\",\"value\": {\"mountPath\": \"/determined_shared_fs\",\"name\": \"det-checkpoints\"}}]"
"autoscaling": true
}
13 changes: 6 additions & 7 deletions examples/3d-brain-mri/pipelines/_on_prem_training-pipeline.json
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
{
"pipeline": {
"name": "brain-mri-train"
"name": "3d-brain-mri-train"
},
"description": "Detects changed files into a repository and triggers a retraining on that dataset",
"input": {
"pfs": {
"project": "pdk-3d-brain-mri",
"repo": "3d-brain-mri-data",
"name": "data",
"repo": "brain-mri-data",
"branch": "master",
"glob": "/",
"empty_files": true
"emptyFiles": true
}
},
"transform": {
"cmd": [
"/bin/sh"
],
"stdin": [
"python train.py --git-url https://git@github.com:/determined-ai/pdk.git --git-ref main --sub-dir examples/brain-mri/experiment --config const.yaml --repo brain-mri-data --model brain-mri --project pdk-brain-mri"
"python train.py --git-url https://git@github.com:/determined-ai/pdk.git --git-ref main --sub-dir examples/brain-mri/experiment --config const.yaml --repo 3d-brain-mri-data --model brain-mri --project pdk-3d-brain-mri"
],
"image": "pachyderm/pdk:train-v0.0.5",
"secrets": [
Expand All @@ -43,6 +43,5 @@
}
]
},
"autoscaling": true,
"pod_patch": "[{\"op\": \"add\",\"path\": \"/volumes/-\",\"value\": {\"name\": \"det-checkpoints\",\"hostpath\": {\"path\": \"/mnt/efs/shared_fs/determined\",\"type\": \"Directory\"}}}, {\"op\": \"add\",\"path\": \"/containers/0/volumeMounts/-\",\"value\": {\"mountPath\": \"/determined_shared_fs\",\"name\": \"det-checkpoints\"}}]"
"autoscaling": true
}
4 changes: 2 additions & 2 deletions examples/3d-brain-mri/pipelines/deployment-pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
"/bin/sh"
],
"stdin": [
"python deploy.py --deployment-name pdk-3d-brain-mri-deploy --cloud-model-host gcp --cloud-model-bucket alemor-pdk-gcp-repo-models --git-url https://git@github.com:/determined-ai/pdk.git --handler brain_mri_handler.py --git-ref dev_3dmri --sub-dir examples/3d-brain-mri/container/deploy --max-request-size 400553500 --max-response-size 400553500 --resource-requests nvidia.com/gpu=1,memory=8Gi --resource-limits nvidia.com/gpu=4,memory=8Gi --tolerations nvidia.com/gpu=present"
"python deploy.py --deployment-name pdk-3d-brain-mri-deploy --cloud-model-host aws --cloud-model-bucket alemor-pdk-gcp-repo-models --git-url https://git@github.com:/determined-ai/pdk.git --handler brain_mri_handler.py --git-ref dev_3dmri --sub-dir examples/3d-brain-mri/container/deploy --max-request-size 400553500 --max-response-size 400553500 --resource-requests nvidia.com/gpu=1,memory=16Gi --resource-limits nvidia.com/gpu=4,memory=16Gi --tolerations nvidia.com/gpu=present"
],
"image": "alemor/pdk:3d-brain-deploy-v0.0.5",
"image": "alemor/pdk:3d-brain-deploy-v0.0.6",
"secrets": [
{
"name": "pipeline-secret",
Expand Down

0 comments on commit 144cf0f

Please sign in to comment.