Merge branch 'release'

basetenlabs · Mar 7, 2024 · 09b3f5c · 09b3f5c
2 parents 57d4b74 + 58b1093
commit 09b3f5c
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/truss/config/trt_llm.py b/truss/config/trt_llm.py
@@ -11,6 +11,7 @@
 class TRTLLMModelArchitecture(Enum):
     LLAMA: str = "llama"
     MISTRAL: str = "mistral"
+    DEEPSEEK: str = "deepseek"
 
 
 class TRTLLMQuantizationType(Enum):
@@ -26,18 +27,18 @@ class TRTLLMQuantizationType(Enum):
 
 class TrussTRTLLMPluginConfiguration(BaseModel):
     multi_block_mode: bool = False
-    paged_kv_cache: bool = False
+    paged_kv_cache: bool = True
     use_fused_mlp: bool = False
 
 
 class TrussTRTLLMBuildConfiguration(BaseModel):
-    huggingface_ckpt_repository: str
     base_model_architecture: TRTLLMModelArchitecture
     max_input_len: int
     max_output_len: int
     max_batch_size: int
-    max_beam_width: int = 1
+    max_beam_width: int
     max_prompt_embedding_table_size: int = 0
+    huggingface_ckpt_repository: Optional[str]
     gather_all_token_logits: bool = False
     strongly_typed: bool = False
     quantization_type: TRTLLMQuantizationType = TRTLLMQuantizationType.NO_QUANT