Merge branch 'upstream' into concedo_experimental

# Conflicts: # .devops/llama-cli-intel.Dockerfile # .devops/llama-server-intel.Dockerfile # README.md # ggml/src/CMakeLists.txt # tests/test-chat-template.cpp
LostRuins · Jul 24, 2024 · cca2fa9 · cca2fa9
2 parents e28c42d + f19bf99
commit cca2fa9
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 2 deletions.
diff --git a/common/common.cpp b/common/common.cpp
@@ -2724,7 +2724,7 @@ std::string llama_chat_format_single(const struct llama_model * model,
         const llama_chat_msg & new_msg,
         bool add_ass) {
     std::ostringstream ss;
-    auto fmt_past_msg = llama_chat_apply_template(model, tmpl, past_msg, false);
+    auto fmt_past_msg = past_msg.empty() ? "" : llama_chat_apply_template(model, tmpl, past_msg, false);
     std::vector<llama_chat_msg> chat_new(past_msg);
     // if the past_msg ends with a newline, we must preserve it in the formatted version
     if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') {

diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -125,6 +125,7 @@ static std::string chat_add_and_format(struct llama_model * model, std::vector<l
     auto formatted = llama_chat_format_single(
         model, g_params->chat_template, chat_msgs, new_msg, role == "user");
     chat_msgs.push_back({role, content});
+    LOG("formatted: %s\n", formatted.c_str());
     return formatted;
 }
 

diff --git a/include/llama.h b/include/llama.h
@@ -529,12 +529,16 @@ extern "C" {
             struct llama_lora_adapter * adapter,
             float scale);
 
-    // Remove a LoRA adapter from given context
+    // Remove a specific LoRA adapter from given context
     // Return -1 if the adapter is not present in the context
     LLAMA_API int32_t llama_lora_adapter_remove(
             struct llama_context * ctx,
             struct llama_lora_adapter * adapter);
 
+    // Remove all LoRA adapters from given context
+    LLAMA_API void llama_lora_adapter_clear(
+            struct llama_context * ctx);
+
     // Manually free a LoRA adapter
     // Note: loaded adapters will be free when the associated model is deleted
     LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);

diff --git a/src/llama.cpp b/src/llama.cpp
@@ -16246,6 +16246,10 @@ int32_t llama_lora_adapter_remove(
     return -1;
 }
 
+void llama_lora_adapter_clear(struct llama_context * ctx) {
+    ctx->lora_adapters.clear();
+}
+
 void llama_lora_adapter_free(struct llama_lora_adapter * adapter) {
     delete adapter;
 }