basetenlabs · marius-baseten · Sep 19, 2024 · Sep 16, 2024 · Sep 18, 2024 · Sep 18, 2024
diff --git a/truss/__init__.py b/truss/__init__.py
@@ -1,7 +1,13 @@
+import warnings
 from pathlib import Path
 
+from pydantic import PydanticDeprecatedSince20
 from single_source import get_version
 
+# Suppress Pydantic V1 warnings, because we have to use it for backwards compat.
+warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)
+
+
 __version__ = get_version(__name__, Path(__file__).parent.parent)
 
 

diff --git a/truss/config/trt_llm.py b/truss/config/trt_llm.py
@@ -1,13 +1,17 @@
 import json
 import logging
+import warnings
 from enum import Enum
 from typing import Optional
 
 from huggingface_hub.errors import HFValidationError
 from huggingface_hub.utils import validate_repo_id
-from pydantic import BaseModel, validator
+from pydantic import BaseModel, PydanticDeprecatedSince20, validator
 from rich.console import Console
 
+# Suppress Pydantic V1 warnings, because we have to use it for backwards compat.
+warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 

diff --git a/truss/remote/baseten/service.py b/truss/remote/baseten/service.py
@@ -1,6 +1,7 @@
 import enum
 import time
 import urllib.parse
+import warnings
 from typing import (
     Any,
     Dict,
@@ -17,6 +18,9 @@
 from truss.truss_handle import TrussHandle
 from truss.util.errors import RemoteNetworkError
 
+# "classes created inside an enum will not become a member" -> intended here anyway.
+warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*enum.*")
+
 DEFAULT_STREAM_ENCODING = "utf-8"
 
 

diff --git a/truss/remote/remote_factory.py b/truss/remote/remote_factory.py
@@ -1,11 +1,12 @@
 import inspect
 
 try:
+    from configparser import DEFAULTSECT, ConfigParser  # type: ignore
+except ImportError:
+    # We need to do this for old python.
     from configparser import DEFAULTSECT
     from configparser import SafeConfigParser as ConfigParser
-except ImportError:
-    # We need to do this for py312 and onwards.
-    from configparser import DEFAULTSECT, ConfigParser  # type: ignore
+
 
 from functools import partial
 from operator import is_not

diff --git a/truss/templates/control/control/application.py b/truss/templates/control/control/application.py
@@ -1,4 +1,5 @@
 import asyncio
+import contextlib
 import logging
 import re
 from pathlib import Path
@@ -45,6 +46,20 @@ async def handle_model_load_failed(_, error):
     return JSONResponse({"error": str(error)}, 503)
 
 
+@contextlib.asynccontextmanager
+async def lifespan_context(app: FastAPI):
+    # Before start.
+    yield  # Run.
+    # Shutdown.
+    # FastApi handles the term signal to start the shutdown flow. Here we
+    # make sure that the inference server is stopeed when control server
+    # shuts down. Inference server has logic to wait until all requests are
+    # finished before exiting. By waiting on that, we inherit the same
+    # behavior for control server.
+    app.state.logger.info("Term signal received, shutting down.")
+    app.state.inference_server_process_controller.terminate_with_wait()
+
+
 def create_app(base_config: Dict):
     app_state = State()
     setup_logging()
@@ -99,20 +114,11 @@ async def start_background_inference_startup():
             ModelLoadFailed: handle_model_load_failed,
             Exception: generic_error_handler,
         },
+        lifespan=lifespan_context,
     )
     app.state = app_state
     app.include_router(control_app)
 
-    @app.on_event("shutdown")
-    def on_shutdown():
-        # FastApi handles the term signal to start the shutdown flow. Here we
-        # make sure that the inference server is stopeed when control server
-        # shuts down. Inference server has logic to wait until all requests are
-        # finished before exiting. By waiting on that, we inherit the same
-        # behavior for control server.
-        app.state.logger.info("Term signal received, shutting down.")
-        app.state.inference_server_process_controller.terminate_with_wait()
-
     return app
 
 

diff --git a/truss/templates/server/common/errors.py b/truss/templates/server/common/errors.py
@@ -14,6 +14,7 @@
 )
 
 import fastapi
+import starlette.responses
 from fastapi import HTTPException
 from fastapi.responses import JSONResponse
 
@@ -62,6 +63,10 @@ def _make_baseten_error_headers(error_code: int) -> Mapping[str, str]:
     }
 
 
+def add_error_headers_to_user_response(response: starlette.responses.Response) -> None:
+    response.headers.update(_make_baseten_error_headers(_BASETEN_CLIENT_ERROR_CODE))
+
+
 def _make_baseten_response(
     http_status: int,
     info: Union[str, Exception],
@@ -75,9 +80,7 @@ def _make_baseten_response(
     )
 
 
-async def exception_handler(
-    request: fastapi.Request, exc: Exception
-) -> fastapi.Response:
+async def exception_handler(_: fastapi.Request, exc: Exception) -> fastapi.Response:
     if isinstance(exc, ModelMissingError):
         return _make_baseten_response(
             HTTPStatus.NOT_FOUND.value, exc, _BASETEN_DOWNSTREAM_ERROR_CODE

diff --git a/truss/templates/server/model_wrapper.py b/truss/templates/server/model_wrapper.py
@@ -119,18 +119,18 @@ def from_signature(
                 if _is_request_type(param1.annotation):
                     raise errors.ModelDefinitionError(
                         f"`{method_name}` method with two arguments is not allowed to "
-                        "have only request as first argument, must be second. "
+                        "have request as first argument, request must be second. "
                         f"Got: {signature}"
                     )
-            if not (param2.annotation and _is_request_type(param1.annotation)):
+            if not (param2.annotation and _is_request_type(param2.annotation)):
                 raise errors.ModelDefinitionError(
                     f"`{method_name}` method with two arguments must have request as "
                     f"second argument (type annotated). Got: {signature} "
                 )
             return cls.INPUTS_AND_REQUEST
         else:
             raise errors.ModelDefinitionError(
-                f"`{method_name}` method cannot have more than to arguments. "
+                f"`{method_name}` method cannot have more than two arguments. "
                 f"Got: {signature}"
             )
 
@@ -545,7 +545,8 @@ async def __call__(
                     with errors.intercept_exceptions(self._logger):
                         raise errors.ModelDefinitionError(
                             "If the predict function returns a generator (streaming), "
-                            "you cannot use postprocessing."
+                            "you cannot use postprocessing. Include all processing in "
+                            "the predict method."
                         )
 
                 if request.headers.get("accept") == "application/json":
@@ -564,8 +565,8 @@ async def __call__(
             if self.model_descriptor.postprocess:
                 with errors.intercept_exceptions(self._logger):
                     raise errors.ModelDefinitionError(
-                        "If the predict function returns a response object, "
-                        "you cannot use postprocessing."
+                        "If the predict function returns a response object, you cannot "
+                        "use postprocessing."
                     )
             else:
                 return predict_result

diff --git a/truss/templates/server/truss_server.py b/truss/templates/server/truss_server.py
@@ -7,6 +7,7 @@
 import socket
 import sys
 import time
+from http import HTTPStatus
 from pathlib import Path
 from typing import Dict, List, Optional, Union
 
@@ -195,6 +196,8 @@ async def predict(
                 # media_type in StreamingResponse sets the Content-Type header
                 return StreamingResponse(result, media_type="application/octet-stream")
             elif isinstance(result, Response):
+                if result.status_code >= HTTPStatus.MULTIPLE_CHOICES.value:
+                    errors.add_error_headers_to_user_response(result)
                 return result
 
             response_headers = {}

diff --git a/truss/tests/templates/server/test_model_wrapper.py b/truss/tests/templates/server/test_model_wrapper.py
@@ -1,4 +1,4 @@
 import importlib
 import os
 import sys
 import time
@@ -75,6 +75,7 @@
         assert model_wrapper.load_failed()
 
 
+@pytest.mark.anyio
 @pytest.mark.integration
 async def test_model_wrapper_streaming_timeout(app_path):
     if "model_wrapper" in sys.modules:
@@ -92,7 +93,7 @@
     assert model_wrapper._config.get("runtime").get("streaming_read_timeout") == 5
 
 
-@pytest.mark.asyncio
+@pytest.mark.anyio
 async def test_trt_llm_truss_init_extension(trt_llm_truss_container_fs, helpers):
     app_path = trt_llm_truss_container_fs / "app"
     packages_path = trt_llm_truss_container_fs / "packages"
@@ -116,7 +117,7 @@
             ), "Expected extension_name was not called"
 
 
-@pytest.mark.asyncio
+@pytest.mark.anyio
 async def test_trt_llm_truss_predict(trt_llm_truss_container_fs, helpers):
     app_path = trt_llm_truss_container_fs / "app"
     packages_path = trt_llm_truss_container_fs / "packages"
@@ -151,7 +152,7 @@
             assert resp == expected_predict_response
 
 
-@pytest.mark.asyncio
+@pytest.mark.anyio
 async def test_trt_llm_truss_missing_model_py(trt_llm_truss_container_fs, helpers):
     app_path = trt_llm_truss_container_fs / "app"
     (app_path / "model" / "model.py").unlink()