Skip to content

Commit

Permalink
Fixes - undo lifespan context, doesn't work don't want to debug
Browse files Browse the repository at this point in the history
  • Loading branch information
marius-baseten committed Sep 19, 2024
1 parent c5e0d36 commit 7d39fde
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 26 deletions.
26 changes: 10 additions & 16 deletions truss/templates/control/control/application.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import asyncio
import contextlib
import logging
import re
from pathlib import Path
Expand Down Expand Up @@ -46,20 +45,6 @@ async def handle_model_load_failed(_, error):
return JSONResponse({"error": str(error)}, 503)


@contextlib.asynccontextmanager
async def lifespan_context(app: FastAPI):
# Before start.
yield # Run.
# Shutdown.
# FastApi handles the term signal to start the shutdown flow. Here we
# make sure that the inference server is stopeed when control server
# shuts down. Inference server has logic to wait until all requests are
# finished before exiting. By waiting on that, we inherit the same
# behavior for control server.
app.state.logger.info("Term signal received, shutting down.")
app.state.inference_server_process_controller.terminate_with_wait()


def create_app(base_config: Dict):
app_state = State()
setup_logging()
Expand Down Expand Up @@ -114,11 +99,20 @@ async def start_background_inference_startup():
ModelLoadFailed: handle_model_load_failed,
Exception: generic_error_handler,
},
lifespan=lifespan_context,
)
app.state = app_state
app.include_router(control_app)

@app.on_event("shutdown")
def on_shutdown():
# FastApi handles the term signal to start the shutdown flow. Here we
# make sure that the inference server is stopeed when control server
# shuts down. Inference server has logic to wait until all requests are
# finished before exiting. By waiting on that, we inherit the same
# behavior for control server.
app.state.logger.info("Term signal received, shutting down.")
app.state.inference_server_process_controller.terminate_with_wait()

return app


Expand Down
1 change: 0 additions & 1 deletion truss/templates/control/control/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ async def proxy(request: Request):
),
stop=stop_after_attempt(INFERENCE_SERVER_START_WAIT_SECS),
wait=wait_fixed(1),
reraise=True,
):
with attempt:
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def inference_server_startup_flow(
for attempt in Retrying(
stop=stop_after_attempt(15),
wait=wait_exponential(multiplier=2, min=1, max=4),
reraise=True,
):
with attempt:
try:
Expand Down
7 changes: 1 addition & 6 deletions truss/templates/shared/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil
import sys
from pathlib import Path
from typing import List, TypeVar
from typing import List

import psutil
import requests
Expand Down Expand Up @@ -80,11 +80,6 @@ def kill_child_processes(parent_pid: int):
process.kill()


X = TypeVar("X")
Y = TypeVar("Y")
Z = TypeVar("Z")


def download_from_url_using_requests(URL: str, download_to: Path):
# Streaming download to keep memory usage low
resp = requests.get(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@

class Model:
async def predict(self, model_input: Any) -> Dict[str, List]:
for i in range(100):
for i in range(5):
yield str(i)
1 change: 0 additions & 1 deletion truss/util/data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

X = TypeVar("X")
Y = TypeVar("Y")
Z = TypeVar("Z")


def transform_optional(x: Optional[X], fn: Callable[[X], Optional[Y]]) -> Optional[Y]:
Expand Down

0 comments on commit 7d39fde

Please sign in to comment.