Skip to content

Commit

Permalink
fix(llamacpp): qwen2.5 for mac
Browse files Browse the repository at this point in the history
  • Loading branch information
agent authored and agent committed Oct 8, 2024
1 parent 14433c1 commit 123b177
Showing 1 changed file with 49 additions and 2 deletions.
51 changes: 49 additions & 2 deletions src/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,13 @@
value: "-DGGML_METAL=on"
'qwen2.5:32b-instruct-ggml-fp16-darwin':
project: llamacpp-chat
extra_labels:
model_name: Qwen/Qwen2.5-32B-Instruct-GGUF
openllm_alias: 32b-ggml-fp16
platforms: macos
extra_envs:
- name: CMAKE_ARGS
value: "-DGGML_METAL=on"
engine_config:
max_model_len: 2048
additional_files:
Expand All @@ -913,16 +920,56 @@
- qwen2.5-32b-instruct-fp16-00017-of-00017.gguf
filename: qwen2.5-32b-instruct-fp16-00001-of-00017.gguf
repo_id: Qwen/Qwen2.5-32B-Instruct-GGUF
service_config:
name: qwen2.5
resources:
memory: 60Gi
traffic:
timeout: 300
'qwen2.5:14b-instruct-ggml-q4-darwin':
project: llamacpp-chat
extra_labels:
model_name: Qwen/Qwen2.5-72B-Instruct-GGUF
openllm_alias: 32b-ggml-fp16
model_name: Qwen/Qwen2.5-14B-Instruct-GGUF
openllm_alias: 14b-ggml-q4
platforms: macos
extra_envs:
- name: CMAKE_ARGS
value: "-DGGML_METAL=on"
engine_config:
max_model_len: 2048
additional_files:
- qwen2.5-14b-instruct-q4_0-00001-of-00003.gguf
- qwen2.5-14b-instruct-q4_0-00002-of-00003.gguf
- qwen2.5-14b-instruct-q4_0-00003-of-00003.gguf
filename: qwen2.5-14b-instruct-q4_0-00001-of-00003.gguf
repo_id: Qwen/Qwen2.5-14B-Instruct-GGUF
service_config:
name: qwen2.5
resources:
memory: 60Gi
traffic:
timeout: 300
'qwen2.5:14b-instruct-ggml-q8-darwin':
project: llamacpp-chat
extra_labels:
model_name: Qwen/Qwen2.5-14B-Instruct-GGUF
openllm_alias: 14b-ggml-q8
platforms: macos
extra_envs:
- name: CMAKE_ARGS
value: "-DGGML_METAL=on"
engine_config:
max_model_len: 2048
additional_files:
- qwen2.5-14b-instruct-q8_0-00001-of-00004.gguf
- qwen2.5-14b-instruct-q8_0-00002-of-00004.gguf
- qwen2.5-14b-instruct-q8_0-00003-of-00004.gguf
- qwen2.5-14b-instruct-q8_0-00004-of-00004.gguf
filename: qwen2.5-14b-instruct-q8_0-00001-of-00004.gguf
repo_id: Qwen/Qwen2.5-14B-Instruct-GGUF
service_config:
name: qwen2.5
resources:
memory: 60Gi
traffic:
timeout: 300

0 comments on commit 123b177

Please sign in to comment.