From f3f1664d2d8a8ad7278832d254cd15a0fdbdbac0 Mon Sep 17 00:00:00 2001 From: Abu Qader <48742992+aspctu@users.noreply.github.com> Date: Thu, 7 Mar 2024 17:24:42 -0500 Subject: [PATCH 1/2] init (#855) --- truss/config/trt_llm.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/truss/config/trt_llm.py b/truss/config/trt_llm.py index c223070c8..cfd2783f3 100644 --- a/truss/config/trt_llm.py +++ b/truss/config/trt_llm.py @@ -11,6 +11,7 @@ class TRTLLMModelArchitecture(Enum): LLAMA: str = "llama" MISTRAL: str = "mistral" + DEEPSEEK: str = "deepseek" class TRTLLMQuantizationType(Enum): @@ -26,18 +27,18 @@ class TRTLLMQuantizationType(Enum): class TrussTRTLLMPluginConfiguration(BaseModel): multi_block_mode: bool = False - paged_kv_cache: bool = False + paged_kv_cache: bool = True use_fused_mlp: bool = False class TrussTRTLLMBuildConfiguration(BaseModel): - huggingface_ckpt_repository: str base_model_architecture: TRTLLMModelArchitecture max_input_len: int max_output_len: int max_batch_size: int - max_beam_width: int = 1 + max_beam_width: int max_prompt_embedding_table_size: int = 0 + huggingface_ckpt_repository: Optional[str] gather_all_token_logits: bool = False strongly_typed: bool = False quantization_type: TRTLLMQuantizationType = TRTLLMQuantizationType.NO_QUANT From 58b10938647662d34439e21e43dda36279a9b840 Mon Sep 17 00:00:00 2001 From: Sidharth Shanker Date: Thu, 7 Mar 2024 22:28:21 +0000 Subject: [PATCH 2/2] 0.9.3. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 552bd66ff..c090123fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "truss" -version = "0.9.2" +version = "0.9.3" description = "A seamless bridge from model development to model delivery" license = "MIT" readme = "README.md"