Skip to content

Commit

Permalink
Merge branch 'release'
Browse files Browse the repository at this point in the history
  • Loading branch information
basetenbot committed Mar 7, 2024
2 parents 57d4b74 + 58b1093 commit 09b3f5c
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions truss/config/trt_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
class TRTLLMModelArchitecture(Enum):
LLAMA: str = "llama"
MISTRAL: str = "mistral"
DEEPSEEK: str = "deepseek"


class TRTLLMQuantizationType(Enum):
Expand All @@ -26,18 +27,18 @@ class TRTLLMQuantizationType(Enum):

class TrussTRTLLMPluginConfiguration(BaseModel):
multi_block_mode: bool = False
paged_kv_cache: bool = False
paged_kv_cache: bool = True
use_fused_mlp: bool = False


class TrussTRTLLMBuildConfiguration(BaseModel):
huggingface_ckpt_repository: str
base_model_architecture: TRTLLMModelArchitecture
max_input_len: int
max_output_len: int
max_batch_size: int
max_beam_width: int = 1
max_beam_width: int
max_prompt_embedding_table_size: int = 0
huggingface_ckpt_repository: Optional[str]
gather_all_token_logits: bool = False
strongly_typed: bool = False
quantization_type: TRTLLMQuantizationType = TRTLLMQuantizationType.NO_QUANT
Expand Down

0 comments on commit 09b3f5c

Please sign in to comment.