From 813bcd290cad58ded4689617d7b3c55cacb23669 Mon Sep 17 00:00:00 2001 From: agent Date: Thu, 17 Oct 2024 11:09:31 +0800 Subject: [PATCH] chore: add qwen2.5 32b awq --- src/recipe.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/recipe.yaml b/src/recipe.yaml index 7b5ed356..6788baf9 100644 --- a/src/recipe.yaml +++ b/src/recipe.yaml @@ -976,3 +976,20 @@ memory: 60Gi traffic: timeout: 300 +'qwen2.5:32b-instruct-awq-4bit': + project: vllm-chat + extra_labels: + model_name: Qwen/Qwen2.5-32B-Instruct-AWQ + openllm_alias: 32b-4bit + platforms: linux + engine_config: + max_model_len: 20480 + model: Qwen/Qwen2.5-32B-Instruct-AWQ + project: vllm-chat + service_config: + name: qwen2.5 + resources: + gpu: 1 + gpu_type: nvidia-tesla-a100 + traffic: + timeout: 300