From 38b3193359e5d18222c1e855e216178fb878ff93 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 29 May 2024 15:10:41 +0200 Subject: [PATCH 01/11] Revamp llama.cpp docs --- .../models/providers/llamacpp.md | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/docs/source/configuration/models/providers/llamacpp.md b/docs/source/configuration/models/providers/llamacpp.md index 85d6dc72f61..c86c63ddfda 100644 --- a/docs/source/configuration/models/providers/llamacpp.md +++ b/docs/source/configuration/models/providers/llamacpp.md @@ -7,32 +7,41 @@ Chat UI supports the llama.cpp API server directly without the need for an adapter. You can do this using the `llamacpp` endpoint type. -If you want to run Chat UI with llama.cpp, you can do the following, using Zephyr as an example model: +If you want to run Chat UI with llama.cpp, you can do the following, using [microsoft/Phi-3-mini-4k-instruct-gguf](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) as an example model: -1. Get [the weights](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/tree/main) from the hub -2. Run the server with the following command: `./server -m models/zephyr-7b-beta.Q4_K_M.gguf -c 2048 -np 3` -3. Add the following to your `.env.local`: +```bash +# install llama.cpp +brew install llama.cpp +# start llama.cpp server +llama-server --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf --hf-file Phi-3-mini-4k-instruct-q4.gguf -c 4096 +``` + +A LLaMA.cpp HTTP Server will start on `http://localhost:8080` (to change the port or any other default options, please find [LLaMA.cpp HTTP Server readme](https://github.com/ggerganov/llama.cpp/tree/master/examples/server)). + +Add the following to your `.env.local`: ```ini MODELS=`[ { - "name": "Local Zephyr", - "chatPromptTemplate": "<|system|>\n{{preprompt}}\n{{#each messages}}{{#ifUser}}<|user|>\n{{content}}\n<|assistant|>\n{{/ifUser}}{{#ifAssistant}}{{content}}\n{{/ifAssistant}}{{/each}}", + "name": "Local microsoft/Phi-3-mini-4k-instruct-gguf", + "tokenizer": "microsoft/Phi-3-mini-4k-instruct-gguf", + "preprompt": "", + "chatPromptTemplate": "{{preprompt}}{{#each messages}}{{#ifUser}}<|user|>\n{{content}}<|end|>\n<|assistant|>\n{{/ifUser}}{{#ifAssistant}}{{content}}<|end|>\n{{/ifAssistant}}{{/each}}", "parameters": { - "temperature": 0.1, - "top_p": 0.95, - "repetition_penalty": 1.2, - "top_k": 50, - "truncate": 1000, - "max_new_tokens": 2048, - "stop": [""] + "stop": ["<|end|>", "<|endoftext|>", "<|assistant|>"], + "temperature": 0.7, + "max_new_tokens": 1024, + "truncate": 3071 }, - "endpoints": [ - { - "url": "http://127.0.0.1:8080", - "type": "llamacpp" - } - ] - } + "endpoints": [{ + "type" : "llamacpp", + "baseURL": "http://localhost:8080" + }], + }, ]` ``` + +
+ + +
\ No newline at end of file From f62d124a6299bba00a242bc72042c7374ce9b86d Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 29 May 2024 15:12:08 +0200 Subject: [PATCH 02/11] format --- docs/source/configuration/models/providers/llamacpp.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/configuration/models/providers/llamacpp.md b/docs/source/configuration/models/providers/llamacpp.md index c86c63ddfda..ceeed7e501d 100644 --- a/docs/source/configuration/models/providers/llamacpp.md +++ b/docs/source/configuration/models/providers/llamacpp.md @@ -44,4 +44,4 @@ MODELS=`[
-
\ No newline at end of file + From 1028b8cce6f494ef4cbcfbf047c9e54a46e3294f Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 29 May 2024 16:33:18 +0200 Subject: [PATCH 03/11] update readme --- README.md | 74 ++++++++++++++++++- .../models/providers/llamacpp.md | 2 +- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2a9ca5b8b55..b677c945c5b 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,69 @@ A chat interface using open source models, eg OpenAssistant or Llama. It is a Sv 7. [Deploying to a HF Space](#deploying-to-a-hf-space) 8. [Building](#building) +## Quickstart Locally + +You can quickly have a locally running chat-ui & LLM text-generation server thanks to chat-ui's [llama.cpp server support](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). + +**Step 1 (Start llama.cpp server):** + +```bash +# install llama.cpp +brew install llama.cpp +# start llama.cpp server (using hf.co/microsoft/Phi-3-mini-4k-instruct-gguf as an example) +llama-server --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf --hf-file Phi-3-mini-4k-instruct-q4.gguf -c 4096 +``` + +A local LLaMA.cpp HTTP Server will start on `http://localhost:8080` + +read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). + +**Step 2 (tell chat-ui to use local llama.cpp server):** + +Add the following to your `.env.local`: + +```ini +MODELS=`[ + { + "name": "Local microsoft/Phi-3-mini-4k-instruct-gguf", + "tokenizer": "microsoft/Phi-3-mini-4k-instruct-gguf", + "preprompt": "", + "chatPromptTemplate": "{{preprompt}}{{#each messages}}{{#ifUser}}<|user|>\n{{content}}<|end|>\n<|assistant|>\n{{/ifUser}}{{#ifAssistant}}{{content}}<|end|>\n{{/ifAssistant}}{{/each}}", + "parameters": { + "stop": ["<|end|>", "<|endoftext|>", "<|assistant|>"], + "temperature": 0.7, + "max_new_tokens": 1024, + "truncate": 3071 + }, + "endpoints": [{ + "type" : "llamacpp", + "baseURL": "http://localhost:8080" + }], + }, +]` +``` + +read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). + +**Step 3 (make sure you have MongoDb running locally):** + +```bash +docker run -d -p 27017:27017 --name mongo-chatui mongo:latest +``` + +read more [here](#database). + +**Step 4 (start chat-ui):** + +```bash +git clone https://github.com/huggingface/chat-ui +cd chat-ui +npm install +npm run dev -- --open +``` + +read more [here](#launch). + ## No Setup Deploy If you don't want to configure, setup, and launch your own Chat UI yourself, you can use this option as a fast deploy alternative. @@ -415,11 +478,14 @@ MODELS=`[{ chat-ui also supports the llama.cpp API server directly without the need for an adapter. You can do this using the `llamacpp` endpoint type. -If you want to run chat-ui with llama.cpp, you can do the following, using Zephyr as an example model: +If you want to run Chat UI with llama.cpp, you can do the following, using [microsoft/Phi-3-mini-4k-instruct-gguf](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) as an example model: -1. Get [the weights](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/tree/main) from the hub -2. Run the server with the following command: `./server -m models/zephyr-7b-beta.Q4_K_M.gguf -c 2048 -np 3` -3. Add the following to your `.env.local`: +```bash +# install llama.cpp +brew install llama.cpp +# start llama.cpp server +llama-server --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf --hf-file Phi-3-mini-4k-instruct-q4.gguf -c 4096 +``` ```env MODELS=`[ diff --git a/docs/source/configuration/models/providers/llamacpp.md b/docs/source/configuration/models/providers/llamacpp.md index ceeed7e501d..c92b507a5f0 100644 --- a/docs/source/configuration/models/providers/llamacpp.md +++ b/docs/source/configuration/models/providers/llamacpp.md @@ -16,7 +16,7 @@ brew install llama.cpp llama-server --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf --hf-file Phi-3-mini-4k-instruct-q4.gguf -c 4096 ``` -A LLaMA.cpp HTTP Server will start on `http://localhost:8080` (to change the port or any other default options, please find [LLaMA.cpp HTTP Server readme](https://github.com/ggerganov/llama.cpp/tree/master/examples/server)). +A local LLaMA.cpp HTTP Server will start on `http://localhost:8080` (to change the port or any other default options, please find [LLaMA.cpp HTTP Server readme](https://github.com/ggerganov/llama.cpp/tree/master/examples/server)). Add the following to your `.env.local`: From 56f0247478c9f6976d3d5800209baac0cb8e893f Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 29 May 2024 16:35:19 +0200 Subject: [PATCH 04/11] update index page --- docs/source/index.md | 68 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/docs/source/index.md b/docs/source/index.md index c1f8a602e89..91fd709562a 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -9,3 +9,71 @@ Open source chat interface with support for tools, web search, multimodal and ma 🐙 **Multimodal**: Accepts image file uploads on supported providers 👤 **OpenID**: Optionally setup OpenID for user authentication + +## Quickstart Locally + +You can quickly have a locally running chat-ui & LLM text-generation server thanks to chat-ui's [llama.cpp server support](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). + +**Step 1 (Start llama.cpp server):** + +```bash +# install llama.cpp +brew install llama.cpp +# start llama.cpp server (using hf.co/microsoft/Phi-3-mini-4k-instruct-gguf as an example) +llama-server --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf --hf-file Phi-3-mini-4k-instruct-q4.gguf -c 4096 +``` + +A local LLaMA.cpp HTTP Server will start on `http://localhost:8080` + +read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). + +**Step 2 (tell chat-ui to use local llama.cpp server):** + +Add the following to your `.env.local`: + +```ini +MODELS=`[ + { + "name": "Local microsoft/Phi-3-mini-4k-instruct-gguf", + "tokenizer": "microsoft/Phi-3-mini-4k-instruct-gguf", + "preprompt": "", + "chatPromptTemplate": "{{preprompt}}{{#each messages}}{{#ifUser}}<|user|>\n{{content}}<|end|>\n<|assistant|>\n{{/ifUser}}{{#ifAssistant}}{{content}}<|end|>\n{{/ifAssistant}}{{/each}}", + "parameters": { + "stop": ["<|end|>", "<|endoftext|>", "<|assistant|>"], + "temperature": 0.7, + "max_new_tokens": 1024, + "truncate": 3071 + }, + "endpoints": [{ + "type" : "llamacpp", + "baseURL": "http://localhost:8080" + }], + }, +]` +``` + +read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). + +**Step 3 (make sure you have MongoDb running locally):** + +```bash +docker run -d -p 27017:27017 --name mongo-chatui mongo:latest +``` + +read more [here](https://github.com/huggingface/chat-ui?tab=readme-ov-file#database). + +**Step 4 (start chat-ui):** + +```bash +git clone https://github.com/huggingface/chat-ui +cd chat-ui +npm install +npm run dev -- --open +``` + +read more [here](https://github.com/huggingface/chat-ui?tab=readme-ov-file#launch). + +
+ + +
From 37acc3822b9511602c99c557b894422d78ddc90b Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 29 May 2024 16:35:39 +0200 Subject: [PATCH 05/11] update readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index b677c945c5b..39e7c1d765f 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,8 @@ npm run dev -- --open read more [here](#launch). + + ## No Setup Deploy If you don't want to configure, setup, and launch your own Chat UI yourself, you can use this option as a fast deploy alternative. From aef3d9b1280b0a6d0b3c641c251dbf4b774efd92 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 29 May 2024 16:38:48 +0200 Subject: [PATCH 06/11] bertter fomratting --- README.md | 29 ++++++++++++++--------------- docs/source/index.md | 8 +++----- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 39e7c1d765f..e593102cf48 100644 --- a/README.md +++ b/README.md @@ -20,15 +20,16 @@ load_balancing_strategy: random A chat interface using open source models, eg OpenAssistant or Llama. It is a SvelteKit app and it powers the [HuggingChat app on hf.co/chat](https://huggingface.co/chat). -0. [No Setup Deploy](#no-setup-deploy) -1. [Setup](#setup) -2. [Launch](#launch) -3. [Web Search](#web-search) -4. [Text Embedding Models](#text-embedding-models) -5. [Extra parameters](#extra-parameters) -6. [Common issues](#common-issues) -7. [Deploying to a HF Space](#deploying-to-a-hf-space) -8. [Building](#building) +0. [Quickstart Locally](#quickstart-locally) +1. [No Setup Deploy](#no-setup-deploy) +2. [Setup](#setup) +3. [Launch](#launch) +4. [Web Search](#web-search) +5. [Text Embedding Models](#text-embedding-models) +6. [Extra parameters](#extra-parameters) +7. [Common issues](#common-issues) +8. [Deploying to a HF Space](#deploying-to-a-hf-space) +9. [Building](#building) ## Quickstart Locally @@ -43,9 +44,7 @@ brew install llama.cpp llama-server --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf --hf-file Phi-3-mini-4k-instruct-q4.gguf -c 4096 ``` -A local LLaMA.cpp HTTP Server will start on `http://localhost:8080` - -read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). +A local LLaMA.cpp HTTP Server will start on `http://localhost:8080`. Read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). **Step 2 (tell chat-ui to use local llama.cpp server):** @@ -72,7 +71,7 @@ MODELS=`[ ]` ``` -read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). +Read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). **Step 3 (make sure you have MongoDb running locally):** @@ -80,7 +79,7 @@ read more [here](https://huggingface.co/docs/chat-ui/configuration/models/provid docker run -d -p 27017:27017 --name mongo-chatui mongo:latest ``` -read more [here](#database). +Read more [here](#database). **Step 4 (start chat-ui):** @@ -91,7 +90,7 @@ npm install npm run dev -- --open ``` -read more [here](#launch). +Read more [here](#launch). diff --git a/docs/source/index.md b/docs/source/index.md index 91fd709562a..3abe38e280e 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -23,9 +23,7 @@ brew install llama.cpp llama-server --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf --hf-file Phi-3-mini-4k-instruct-q4.gguf -c 4096 ``` -A local LLaMA.cpp HTTP Server will start on `http://localhost:8080` - -read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). +A local LLaMA.cpp HTTP Server will start on `http://localhost:8080`. Read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). **Step 2 (tell chat-ui to use local llama.cpp server):** @@ -52,7 +50,7 @@ MODELS=`[ ]` ``` -read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). +Read more [here](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). **Step 3 (make sure you have MongoDb running locally):** @@ -60,7 +58,7 @@ read more [here](https://huggingface.co/docs/chat-ui/configuration/models/provid docker run -d -p 27017:27017 --name mongo-chatui mongo:latest ``` -read more [here](https://github.com/huggingface/chat-ui?tab=readme-ov-file#database). +Read more [here](https://github.com/huggingface/chat-ui?tab=Readme-ov-file#database). **Step 4 (start chat-ui):** From 3b73abff0f503ade664a4a45c008962e13c21d7e Mon Sep 17 00:00:00 2001 From: Mishig Date: Wed, 29 May 2024 16:42:20 +0200 Subject: [PATCH 07/11] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Victor Muštar --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e593102cf48..44d0216ffcb 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ A chat interface using open source models, eg OpenAssistant or Llama. It is a Sv 8. [Deploying to a HF Space](#deploying-to-a-hf-space) 9. [Building](#building) -## Quickstart Locally +## Quickstart You can quickly have a locally running chat-ui & LLM text-generation server thanks to chat-ui's [llama.cpp server support](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). From f6fd032f814631e07baf59f26bc83296c6b6b435 Mon Sep 17 00:00:00 2001 From: Mishig Date: Wed, 29 May 2024 16:42:27 +0200 Subject: [PATCH 08/11] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Victor Muštar --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 44d0216ffcb..bed18d5cf93 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ A chat interface using open source models, eg OpenAssistant or Llama. It is a Sv ## Quickstart -You can quickly have a locally running chat-ui & LLM text-generation server thanks to chat-ui's [llama.cpp server support](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). +You can quickly start a locally running chat-ui & LLM text-generation server thanks to chat-ui's [llama.cpp server support](https://huggingface.co/docs/chat-ui/configuration/models/providers/llamacpp). **Step 1 (Start llama.cpp server):** From c6ec6818aa0512d725c2c2aac8028a7a18528828 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 29 May 2024 16:43:01 +0200 Subject: [PATCH 09/11] fix hashlink --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bed18d5cf93..20741a9daf1 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ load_balancing_strategy: random A chat interface using open source models, eg OpenAssistant or Llama. It is a SvelteKit app and it powers the [HuggingChat app on hf.co/chat](https://huggingface.co/chat). -0. [Quickstart Locally](#quickstart-locally) +0. [Quickstart](#quickstart) 1. [No Setup Deploy](#no-setup-deploy) 2. [Setup](#setup) 3. [Launch](#launch) From 50edca76928a1fc6d3b131f07ae4315165d18b7f Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 29 May 2024 16:50:33 +0200 Subject: [PATCH 10/11] document llama hf args --- docs/source/configuration/models/providers/llamacpp.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/configuration/models/providers/llamacpp.md b/docs/source/configuration/models/providers/llamacpp.md index c92b507a5f0..10b59772654 100644 --- a/docs/source/configuration/models/providers/llamacpp.md +++ b/docs/source/configuration/models/providers/llamacpp.md @@ -16,6 +16,8 @@ brew install llama.cpp llama-server --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf --hf-file Phi-3-mini-4k-instruct-q4.gguf -c 4096 ``` +*note: you can swap the `hf-repo` and `hf-file` with your fav GGUF on the [Hub](https://huggingface.co/models?library=gguf). For example: `--hf-repo TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF` for [this repo](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF) & `--hf-file tinyllama-1.1b-chat-v1.0.Q4_0.gguf` for [this file](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf).* + A local LLaMA.cpp HTTP Server will start on `http://localhost:8080` (to change the port or any other default options, please find [LLaMA.cpp HTTP Server readme](https://github.com/ggerganov/llama.cpp/tree/master/examples/server)). Add the following to your `.env.local`: From 6f27fb71bfccf323c09b6c16f20aaa6c6d28a5ff Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 29 May 2024 16:52:38 +0200 Subject: [PATCH 11/11] format --- docs/source/configuration/models/providers/llamacpp.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/configuration/models/providers/llamacpp.md b/docs/source/configuration/models/providers/llamacpp.md index 10b59772654..5dfcc175ec9 100644 --- a/docs/source/configuration/models/providers/llamacpp.md +++ b/docs/source/configuration/models/providers/llamacpp.md @@ -16,7 +16,7 @@ brew install llama.cpp llama-server --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf --hf-file Phi-3-mini-4k-instruct-q4.gguf -c 4096 ``` -*note: you can swap the `hf-repo` and `hf-file` with your fav GGUF on the [Hub](https://huggingface.co/models?library=gguf). For example: `--hf-repo TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF` for [this repo](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF) & `--hf-file tinyllama-1.1b-chat-v1.0.Q4_0.gguf` for [this file](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf).* +_note: you can swap the `hf-repo` and `hf-file` with your fav GGUF on the [Hub](https://huggingface.co/models?library=gguf). For example: `--hf-repo TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF` for [this repo](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF) & `--hf-file tinyllama-1.1b-chat-v1.0.Q4_0.gguf` for [this file](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf)._ A local LLaMA.cpp HTTP Server will start on `http://localhost:8080` (to change the port or any other default options, please find [LLaMA.cpp HTTP Server readme](https://github.com/ggerganov/llama.cpp/tree/master/examples/server)).