From f3f1664d2d8a8ad7278832d254cd15a0fdbdbac0 Mon Sep 17 00:00:00 2001
From: Abu Qader <48742992+aspctu@users.noreply.github.com>
Date: Thu, 7 Mar 2024 17:24:42 -0500
Subject: [PATCH 1/2] init (#855)

---
 truss/config/trt_llm.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/truss/config/trt_llm.py b/truss/config/trt_llm.py
index c223070c8..cfd2783f3 100644
--- a/truss/config/trt_llm.py
+++ b/truss/config/trt_llm.py
@@ -11,6 +11,7 @@
 class TRTLLMModelArchitecture(Enum):
     LLAMA: str = "llama"
     MISTRAL: str = "mistral"
+    DEEPSEEK: str = "deepseek"
 
 
 class TRTLLMQuantizationType(Enum):
@@ -26,18 +27,18 @@ class TRTLLMQuantizationType(Enum):
 
 class TrussTRTLLMPluginConfiguration(BaseModel):
     multi_block_mode: bool = False
-    paged_kv_cache: bool = False
+    paged_kv_cache: bool = True
     use_fused_mlp: bool = False
 
 
 class TrussTRTLLMBuildConfiguration(BaseModel):
-    huggingface_ckpt_repository: str
     base_model_architecture: TRTLLMModelArchitecture
     max_input_len: int
     max_output_len: int
     max_batch_size: int
-    max_beam_width: int = 1
+    max_beam_width: int
     max_prompt_embedding_table_size: int = 0
+    huggingface_ckpt_repository: Optional[str]
     gather_all_token_logits: bool = False
     strongly_typed: bool = False
     quantization_type: TRTLLMQuantizationType = TRTLLMQuantizationType.NO_QUANT

From 58b10938647662d34439e21e43dda36279a9b840 Mon Sep 17 00:00:00 2001
From: Sidharth Shanker <sid.shanker@baseten.co>
Date: Thu, 7 Mar 2024 22:28:21 +0000
Subject: [PATCH 2/2] 0.9.3.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 552bd66ff..c090123fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "truss"
-version = "0.9.2"
+version = "0.9.3"
 description = "A seamless bridge from model development to model delivery"
 license = "MIT"
 readme = "README.md"