From 2ebe7b71db42753a72fbab8224518159cfcbbe91 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Tue, 10 Sep 2024 17:07:16 -0500 Subject: [PATCH 01/15] Attempts json output for extract --- DESCRIPTION | 1 + NAMESPACE | 1 + R/llm-extract.R | 6 ++++-- R/m-backend-prompt.R | 2 +- R/mall.R | 1 + 5 files changed, 8 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 40ec43e..b3ff088 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,6 +16,7 @@ Imports: cli, dplyr, glue, + jsonlite, ollamar, rlang Suggests: diff --git a/NAMESPACE b/NAMESPACE index 9330444..072095d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ import(rlang) importFrom(dplyr,bind_cols) importFrom(dplyr,mutate) importFrom(dplyr,tibble) +importFrom(jsonlite,fromJSON) importFrom(ollamar,generate) importFrom(ollamar,list_models) importFrom(ollamar,test_connection) diff --git a/R/llm-extract.R b/R/llm-extract.R index 3f043ce..77cd6c2 100644 --- a/R/llm-extract.R +++ b/R/llm-extract.R @@ -39,7 +39,7 @@ llm_extract.data.frame <- function(.data, resp <- map( resp, \(x) ({ - x <- trimws(strsplit(x, "\\|")[[1]]) + x <- strsplit(x, "\\|")[[1]] names(x) <- clean_names(labels) x }) @@ -76,10 +76,12 @@ llm_extract.data.frame <- function(.data, llm_vec_extract <- function(x, labels = c(), additional_prompt = "") { - llm_vec_prompt( + resp <- llm_vec_prompt( x = x, prompt_label = "extract", labels = labels, additional_prompt = additional_prompt ) + print(resp) + map_chr(resp, \(x) paste0(as.character(fromJSON(x)), collapse = "|")) } diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index 7223a97..51aef8b 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -44,7 +44,7 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { "Extract the {labels} being referred to on the text.", "I expect {no_labels} item(s) exactly.", "No capitalization. No explanations.", - "Return the response in a simple pipe separated list, no headers.", + "Return the response in pure JSON format, no code decorators.", additional, "The answer is based on the following text:" )) diff --git a/R/mall.R b/R/mall.R index a468176..68c7389 100644 --- a/R/mall.R +++ b/R/mall.R @@ -1,6 +1,7 @@ #' @importFrom ollamar generate test_connection list_models #' @importFrom dplyr mutate tibble bind_cols #' @importFrom utils menu +#' @importFrom jsonlite fromJSON #' @import rlang #' @import glue #' @import cli From 3e8bdb7919dba3a76307d0a8d4d730ce68a43531 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 07:03:22 -0500 Subject: [PATCH 02/15] Moves JSON blurb to system call --- R/m-backend-generate.R | 1 + R/m-backend-prompt.R | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/R/m-backend-generate.R b/R/m-backend-generate.R index a9741fb..9824360 100644 --- a/R/m-backend-generate.R +++ b/R/m-backend-generate.R @@ -24,6 +24,7 @@ m_backend_generate.mall_ollama <- function(backend, x, base_prompt) { .args <- c( prompt = glue("{base_prompt}\n{x}"), output = "text", + system = "You are an assistant that only speak JSON. Do not write normal text", args ) exec("generate", !!!.args) diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index 51aef8b..c50b57f 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -44,7 +44,6 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { "Extract the {labels} being referred to on the text.", "I expect {no_labels} item(s) exactly.", "No capitalization. No explanations.", - "Return the response in pure JSON format, no code decorators.", additional, "The answer is based on the following text:" )) From c4818e11ea1baac4aee90eebf66e00519eb28def Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 07:14:11 -0500 Subject: [PATCH 03/15] Changest name from generate to submit --- NAMESPACE | 6 +++--- R/llm-custom.R | 2 +- R/llm-extract.R | 1 - R/m-backend-prompt.R | 2 +- R/{m-backend-generate.R => m-backend-submit.R} | 10 +++++----- ...m_backend_generate.Rd => m_backend_submit.Rd} | 16 ++++++++-------- 6 files changed, 18 insertions(+), 19 deletions(-) rename R/{m-backend-generate.R => m-backend-submit.R} (77%) rename man/{m_backend_generate.Rd => m_backend_submit.Rd} (70%) diff --git a/NAMESPACE b/NAMESPACE index 072095d..c397156 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,9 +8,9 @@ S3method(llm_sentiment,data.frame) S3method(llm_summarize,"tbl_Spark SQL") S3method(llm_summarize,data.frame) S3method(llm_translate,data.frame) -S3method(m_backend_generate,mall_ollama) -S3method(m_backend_generate,mall_simulate_llm) S3method(m_backend_prompt,mall_defaults) +S3method(m_backend_submit,mall_ollama) +S3method(m_backend_submit,mall_simulate_llm) S3method(print,mall_defaults) export(llm_classify) export(llm_custom) @@ -25,8 +25,8 @@ export(llm_vec_extract) export(llm_vec_sentiment) export(llm_vec_summarize) export(llm_vec_translate) -export(m_backend_generate) export(m_backend_prompt) +export(m_backend_submit) import(cli) import(glue) import(rlang) diff --git a/R/llm-custom.R b/R/llm-custom.R index 77c2bc2..22363c0 100644 --- a/R/llm-custom.R +++ b/R/llm-custom.R @@ -41,7 +41,7 @@ llm_custom.data.frame <- function(.data, #' @export llm_vec_custom <- function(x, prompt, valid_resps = NULL) { llm_use(.silent = TRUE, force = FALSE) - resp <- m_backend_generate(defaults_get(), x, prompt) + resp <- m_backend_submit(defaults_get(), x, prompt) if (!is.null(valid_resps)) { errors <- !resp %in% valid_resps resp[errors] <- NA diff --git a/R/llm-extract.R b/R/llm-extract.R index 77cd6c2..b8f0212 100644 --- a/R/llm-extract.R +++ b/R/llm-extract.R @@ -82,6 +82,5 @@ llm_vec_extract <- function(x, labels = labels, additional_prompt = additional_prompt ) - print(resp) map_chr(resp, \(x) paste0(as.character(fromJSON(x)), collapse = "|")) } diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index c50b57f..803c699 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -1,4 +1,4 @@ -#' @rdname m_backend_generate +#' @rdname m_backend_submit #' @export m_backend_prompt <- function(backend, additional) { UseMethod("m_backend_prompt") diff --git a/R/m-backend-generate.R b/R/m-backend-submit.R similarity index 77% rename from R/m-backend-generate.R rename to R/m-backend-submit.R index 9824360..cca2ac1 100644 --- a/R/m-backend-generate.R +++ b/R/m-backend-submit.R @@ -5,17 +5,17 @@ #' @param base_prompt The instructions to the LLM about what to do with `x` #' @param additional Additional text to insert to the `base_prompt` #' -#' @returns `m_backend_generate` does not return an object. `m_backend_prompt` +#' @returns `m_backend_submit` does not return an object. `m_backend_prompt` #' returns a list of functions that contain the base prompts. #' #' @keywords internal #' @export -m_backend_generate <- function(backend, x, base_prompt) { - UseMethod("m_backend_generate") +m_backend_submit <- function(backend, x, base_prompt) { + UseMethod("m_backend_submit") } #' @export -m_backend_generate.mall_ollama <- function(backend, x, base_prompt) { +m_backend_submit.mall_ollama <- function(backend, x, base_prompt) { args <- as.list(backend) args$backend <- NULL map_chr( @@ -33,7 +33,7 @@ m_backend_generate.mall_ollama <- function(backend, x, base_prompt) { } #' @export -m_backend_generate.mall_simulate_llm <- function(backend, x, base_prompt) { +m_backend_submit.mall_simulate_llm <- function(backend, x, base_prompt) { args <- backend class(args) <- "list" if (args$model == "pipe") { diff --git a/man/m_backend_generate.Rd b/man/m_backend_submit.Rd similarity index 70% rename from man/m_backend_generate.Rd rename to man/m_backend_submit.Rd index 61878e1..f7fe8fa 100644 --- a/man/m_backend_generate.Rd +++ b/man/m_backend_submit.Rd @@ -1,25 +1,25 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/m-backend-generate.R, R/m-backend-prompt.R -\name{m_backend_generate} -\alias{m_backend_generate} +% Please edit documentation in R/m-backend-prompt.R, R/m-backend-submit.R +\name{m_backend_prompt} \alias{m_backend_prompt} +\alias{m_backend_submit} \title{Functions to integrate different back-ends} \usage{ -m_backend_generate(backend, x, base_prompt) - m_backend_prompt(backend, additional) + +m_backend_submit(backend, x, base_prompt) } \arguments{ \item{backend}{An \code{mall_defaults} object} +\item{additional}{Additional text to insert to the \code{base_prompt}} + \item{x}{The body of the text to be submitted to the LLM} \item{base_prompt}{The instructions to the LLM about what to do with \code{x}} - -\item{additional}{Additional text to insert to the \code{base_prompt}} } \value{ -\code{m_backend_generate} does not return an object. \code{m_backend_prompt} +\code{m_backend_submit} does not return an object. \code{m_backend_prompt} returns a list of functions that contain the base prompts. } \description{ From e75dafa650d238709dc749564a8acbb2a17e40e7 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 08:16:02 -0500 Subject: [PATCH 04/15] Starts switch of ollama call to chat, extends prompt to list --- NAMESPACE | 2 +- R/m-backend-prompt.R | 26 ++++++++++++++++++-------- R/m-backend-submit.R | 2 +- R/mall.R | 2 +- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c397156..a79e90c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,7 +34,7 @@ importFrom(dplyr,bind_cols) importFrom(dplyr,mutate) importFrom(dplyr,tibble) importFrom(jsonlite,fromJSON) -importFrom(ollamar,generate) +importFrom(ollamar,chat) importFrom(ollamar,list_models) importFrom(ollamar,test_connection) importFrom(utils,menu) diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index 803c699..f8cebd1 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -39,14 +39,24 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { extract = function(labels) { no_labels <- length(labels) labels <- paste0(labels, collapse = ", ") - glue(paste( - "You are a helpful text extraction engine.", - "Extract the {labels} being referred to on the text.", - "I expect {no_labels} item(s) exactly.", - "No capitalization. No explanations.", - additional, - "The answer is based on the following text:" - )) + list( + list( + role = "system", + content = paste("You are an assistant that only speak JSON.", + "Do not write normal text") + ), + list( + role = "user", + content = glue(paste( + "You are a helpful text extraction engine.", + "Extract the {labels} being referred to on the text.", + "I expect {no_labels} item(s) exactly.", + "No capitalization. No explanations.", + "{additional}", + "The answer is based on the following text:" + )) + ) + ) }, translate = function(language) { glue(paste( diff --git a/R/m-backend-submit.R b/R/m-backend-submit.R index cca2ac1..0c5291e 100644 --- a/R/m-backend-submit.R +++ b/R/m-backend-submit.R @@ -27,7 +27,7 @@ m_backend_submit.mall_ollama <- function(backend, x, base_prompt) { system = "You are an assistant that only speak JSON. Do not write normal text", args ) - exec("generate", !!!.args) + exec("chat", !!!.args) } ) } diff --git a/R/mall.R b/R/mall.R index 68c7389..051f5b9 100644 --- a/R/mall.R +++ b/R/mall.R @@ -1,4 +1,4 @@ -#' @importFrom ollamar generate test_connection list_models +#' @importFrom ollamar chat test_connection list_models #' @importFrom dplyr mutate tibble bind_cols #' @importFrom utils menu #' @importFrom jsonlite fromJSON From 3f0a275dc7eb1e775585590f0b0228e813f630cc Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 09:52:07 -0500 Subject: [PATCH 05/15] Adds x to backend prompt --- R/llm-classify.R | 3 ++- R/m-backend-prompt.R | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/R/llm-classify.R b/R/llm-classify.R index 81cfa4e..75e95f9 100644 --- a/R/llm-classify.R +++ b/R/llm-classify.R @@ -45,7 +45,8 @@ llm_vec_classify <- function(x, labels, additional_prompt = "") { llm_vec_prompt( - x = x, prompt_label = "classify", + x = x, + prompt_label = "classify", additional_prompt = additional_prompt, labels = labels, valid_resps = labels diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index f8cebd1..0ed06df 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -5,16 +5,16 @@ m_backend_prompt <- function(backend, additional) { } #' @export -m_backend_prompt.mall_defaults <- function(backend, additional = "") { +m_backend_prompt.mall_defaults <- function(backend, x = "", additional = "") { list( sentiment = function(options) { options <- paste0(options, collapse = ", ") - x <- glue(paste( + glue(paste( "You are a helpful sentiment engine.", "Return only one of the following answers: {options}.", "No capitalization. No explanations.", - additional, - "The answer is based on the following text:" + "{additional}", + "The answer is based on the following text:\n{x}" )) }, summarize = function(max_words) { @@ -22,8 +22,8 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { "You are a helpful summarization engine.", "Your answer will contain no no capitalization and no explanations.", "Return no more than {max_words} words.", - additional, - "The answer is the summary of the following text:" + "{additional}", + "The answer is the summary of the following text:\n{x}" )) }, classify = function(labels) { @@ -32,8 +32,8 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { "You are a helpful classification engine.", "Determine if the text refers to one of the following: {labels}.", "No capitalization. No explanations.", - additional, - "The answer is based on the following text:" + "{additional}", + "The answer is based on the following text:\n{x}" )) }, extract = function(labels) { @@ -53,7 +53,7 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { "I expect {no_labels} item(s) exactly.", "No capitalization. No explanations.", "{additional}", - "The answer is based on the following text:" + "The answer is based on the following text:\n{x}" )) ) ) @@ -63,8 +63,8 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { "You are a helpful translation engine.", "You will return only the translation text, no explanations.", "The target language to translate to is: {language}.", - additional, - "The answer is the summary of the following text:" + "{additional}", + "The answer is the summary of the following text:\n{x}" )) } ) From f66b5ce8e5da6d26e008fae32331853aaa07e940 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 11:37:29 -0500 Subject: [PATCH 06/15] Expands selection between character and list --- R/llm-custom.R | 16 ++++++++++++---- R/m-backend-prompt.R | 10 +++++----- R/m-backend-submit.R | 14 ++++++-------- man/llm_custom.Rd | 4 ++-- man/m_backend_submit.Rd | 8 +++----- 5 files changed, 28 insertions(+), 24 deletions(-) diff --git a/R/llm-custom.R b/R/llm-custom.R index 22363c0..5508324 100644 --- a/R/llm-custom.R +++ b/R/llm-custom.R @@ -15,7 +15,7 @@ llm_custom <- function( .data, col, - prompt, + prompt = "", pred_name = ".pred", valid_resps = "") { UseMethod("llm_custom") @@ -24,7 +24,7 @@ llm_custom <- function( #' @export llm_custom.data.frame <- function(.data, col, - prompt, + prompt = "", pred_name = ".pred", valid_resps = NULL) { mutate( @@ -39,9 +39,17 @@ llm_custom.data.frame <- function(.data, #' @rdname llm_custom #' @export -llm_vec_custom <- function(x, prompt, valid_resps = NULL) { +llm_vec_custom <- function(x, prompt = "", valid_resps = NULL) { llm_use(.silent = TRUE, force = FALSE) - resp <- m_backend_submit(defaults_get(), x, prompt) + if(inherits(x, "list")) { + to_llm <- x + } else { + to_llm <- list( + role = "user", + content = glue("{prompt}\n{x}") + ) + } + resp <- m_backend_submit(defaults_get(), to_llm) if (!is.null(valid_resps)) { errors <- !resp %in% valid_resps resp[errors] <- NA diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index 0ed06df..b34b388 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -1,6 +1,6 @@ #' @rdname m_backend_submit #' @export -m_backend_prompt <- function(backend, additional) { +m_backend_prompt <- function(backend, x, additional) { UseMethod("m_backend_prompt") } @@ -70,8 +70,8 @@ m_backend_prompt.mall_defaults <- function(backend, x = "", additional = "") { ) } -get_prompt <- function(label, ..., .additional = "") { - defaults <- m_backend_prompt(defaults_get(), .additional) +get_prompt <- function(label, x, ..., .additional = "") { + defaults <- m_backend_prompt(defaults_get(), x = x, additional = .additional) fn <- defaults[[label]] fn(...) } @@ -83,6 +83,6 @@ llm_vec_prompt <- function(x, valid_resps = NULL, ...) { llm_use(.silent = TRUE, force = FALSE) - prompt <- get_prompt(prompt_label, ..., .additional = additional_prompt) - llm_vec_custom(x, prompt, valid_resps) + x <- get_prompt(prompt_label, x = x, ..., .additional = additional_prompt) + llm_vec_custom(x = x, valid_resps = valid_resps) } diff --git a/R/m-backend-submit.R b/R/m-backend-submit.R index 0c5291e..74a9053 100644 --- a/R/m-backend-submit.R +++ b/R/m-backend-submit.R @@ -2,7 +2,6 @@ #' #' @param backend An `mall_defaults` object #' @param x The body of the text to be submitted to the LLM -#' @param base_prompt The instructions to the LLM about what to do with `x` #' @param additional Additional text to insert to the `base_prompt` #' #' @returns `m_backend_submit` does not return an object. `m_backend_prompt` @@ -10,21 +9,22 @@ #' #' @keywords internal #' @export -m_backend_submit <- function(backend, x, base_prompt) { +m_backend_submit <- function(backend, x) { UseMethod("m_backend_submit") } #' @export -m_backend_submit.mall_ollama <- function(backend, x, base_prompt) { +m_backend_submit.mall_ollama <- function(backend, x) { args <- as.list(backend) args$backend <- NULL + args <- + print(x) map_chr( x, \(x) { .args <- c( - prompt = glue("{base_prompt}\n{x}"), + messages = x, output = "text", - system = "You are an assistant that only speak JSON. Do not write normal text", args ) exec("chat", !!!.args) @@ -33,14 +33,12 @@ m_backend_submit.mall_ollama <- function(backend, x, base_prompt) { } #' @export -m_backend_submit.mall_simulate_llm <- function(backend, x, base_prompt) { +m_backend_submit.mall_simulate_llm <- function(backend, x) { args <- backend class(args) <- "list" if (args$model == "pipe") { out <- trimws(strsplit(x, "\\|")[[1]][[2]]) } else if (args$model == "prompt") { - out <- glue("{base_prompt}\n{x}") - } else if (args$model == "echo") { out <- x } else { out <- list( diff --git a/man/llm_custom.Rd b/man/llm_custom.Rd index c723271..d6cce66 100644 --- a/man/llm_custom.Rd +++ b/man/llm_custom.Rd @@ -5,9 +5,9 @@ \alias{llm_vec_custom} \title{Send a custom prompt to the LLM} \usage{ -llm_custom(.data, col, prompt, pred_name = ".pred", valid_resps = "") +llm_custom(.data, col, prompt = "", pred_name = ".pred", valid_resps = "") -llm_vec_custom(x, prompt, valid_resps = NULL) +llm_vec_custom(x, prompt = "", valid_resps = NULL) } \arguments{ \item{.data}{A \code{data.frame} or \code{tbl} object that contains the text to be analyzed} diff --git a/man/m_backend_submit.Rd b/man/m_backend_submit.Rd index f7fe8fa..6f745d4 100644 --- a/man/m_backend_submit.Rd +++ b/man/m_backend_submit.Rd @@ -5,18 +5,16 @@ \alias{m_backend_submit} \title{Functions to integrate different back-ends} \usage{ -m_backend_prompt(backend, additional) +m_backend_prompt(backend, x, additional) -m_backend_submit(backend, x, base_prompt) +m_backend_submit(backend, x) } \arguments{ \item{backend}{An \code{mall_defaults} object} -\item{additional}{Additional text to insert to the \code{base_prompt}} - \item{x}{The body of the text to be submitted to the LLM} -\item{base_prompt}{The instructions to the LLM about what to do with \code{x}} +\item{additional}{Additional text to insert to the \code{base_prompt}} } \value{ \code{m_backend_submit} does not return an object. \code{m_backend_prompt} From b4f4f27e16eb63689db52986193036dd7670eaab Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 14:16:23 -0500 Subject: [PATCH 07/15] Starts to get shape --- R/llm-custom.R | 10 +--------- R/m-backend-prompt.R | 22 +++++++++++----------- R/m-backend-submit.R | 14 ++++++++------ man/m_backend_submit.Rd | 8 +++++--- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/R/llm-custom.R b/R/llm-custom.R index 5508324..0e0fad4 100644 --- a/R/llm-custom.R +++ b/R/llm-custom.R @@ -41,15 +41,7 @@ llm_custom.data.frame <- function(.data, #' @export llm_vec_custom <- function(x, prompt = "", valid_resps = NULL) { llm_use(.silent = TRUE, force = FALSE) - if(inherits(x, "list")) { - to_llm <- x - } else { - to_llm <- list( - role = "user", - content = glue("{prompt}\n{x}") - ) - } - resp <- m_backend_submit(defaults_get(), to_llm) + resp <- m_backend_submit(defaults_get(), x, prompt) if (!is.null(valid_resps)) { errors <- !resp %in% valid_resps resp[errors] <- NA diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index b34b388..1e70f85 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -1,11 +1,11 @@ #' @rdname m_backend_submit #' @export -m_backend_prompt <- function(backend, x, additional) { +m_backend_prompt <- function(backend, additional) { UseMethod("m_backend_prompt") } #' @export -m_backend_prompt.mall_defaults <- function(backend, x = "", additional = "") { +m_backend_prompt.mall_defaults <- function(backend, additional = "") { list( sentiment = function(options) { options <- paste0(options, collapse = ", ") @@ -14,7 +14,7 @@ m_backend_prompt.mall_defaults <- function(backend, x = "", additional = "") { "Return only one of the following answers: {options}.", "No capitalization. No explanations.", "{additional}", - "The answer is based on the following text:\n{x}" + "The answer is based on the following text:\n{{x}}" )) }, summarize = function(max_words) { @@ -23,7 +23,7 @@ m_backend_prompt.mall_defaults <- function(backend, x = "", additional = "") { "Your answer will contain no no capitalization and no explanations.", "Return no more than {max_words} words.", "{additional}", - "The answer is the summary of the following text:\n{x}" + "The answer is the summary of the following text:\n{{x}}" )) }, classify = function(labels) { @@ -33,7 +33,7 @@ m_backend_prompt.mall_defaults <- function(backend, x = "", additional = "") { "Determine if the text refers to one of the following: {labels}.", "No capitalization. No explanations.", "{additional}", - "The answer is based on the following text:\n{x}" + "The answer is based on the following text:\n{{x}}" )) }, extract = function(labels) { @@ -53,7 +53,7 @@ m_backend_prompt.mall_defaults <- function(backend, x = "", additional = "") { "I expect {no_labels} item(s) exactly.", "No capitalization. No explanations.", "{additional}", - "The answer is based on the following text:\n{x}" + "The answer is based on the following text:\n{{x}}" )) ) ) @@ -64,14 +64,14 @@ m_backend_prompt.mall_defaults <- function(backend, x = "", additional = "") { "You will return only the translation text, no explanations.", "The target language to translate to is: {language}.", "{additional}", - "The answer is the summary of the following text:\n{x}" + "The answer is the summary of the following text:\n{{x}}" )) } ) } -get_prompt <- function(label, x, ..., .additional = "") { - defaults <- m_backend_prompt(defaults_get(), x = x, additional = .additional) +get_prompt <- function(label, ..., .additional = "") { + defaults <- m_backend_prompt(defaults_get(), additional = .additional) fn <- defaults[[label]] fn(...) } @@ -83,6 +83,6 @@ llm_vec_prompt <- function(x, valid_resps = NULL, ...) { llm_use(.silent = TRUE, force = FALSE) - x <- get_prompt(prompt_label, x = x, ..., .additional = additional_prompt) - llm_vec_custom(x = x, valid_resps = valid_resps) + prompt <- get_prompt(prompt_label, ..., .additional = additional_prompt) + llm_vec_custom(x, prompt, valid_resps = valid_resps) } diff --git a/R/m-backend-submit.R b/R/m-backend-submit.R index 74a9053..0cae3ff 100644 --- a/R/m-backend-submit.R +++ b/R/m-backend-submit.R @@ -2,6 +2,7 @@ #' #' @param backend An `mall_defaults` object #' @param x The body of the text to be submitted to the LLM +#' @param prompt The additional information to add to the submission #' @param additional Additional text to insert to the `base_prompt` #' #' @returns `m_backend_submit` does not return an object. `m_backend_prompt` @@ -9,31 +10,32 @@ #' #' @keywords internal #' @export -m_backend_submit <- function(backend, x) { +m_backend_submit <- function(backend, x, prompt) { UseMethod("m_backend_submit") } #' @export -m_backend_submit.mall_ollama <- function(backend, x) { +m_backend_submit.mall_ollama <- function(backend, x, prompt) { args <- as.list(backend) args$backend <- NULL - args <- - print(x) + map_chr( x, \(x) { .args <- c( - messages = x, + #messages = map(prompt, \(i) map(i, \(j) glue(j, x = x))), + messages = prompt, output = "text", args ) + print(.args) exec("chat", !!!.args) } ) } #' @export -m_backend_submit.mall_simulate_llm <- function(backend, x) { +m_backend_submit.mall_simulate_llm <- function(backend, x, prompt) { args <- backend class(args) <- "list" if (args$model == "pipe") { diff --git a/man/m_backend_submit.Rd b/man/m_backend_submit.Rd index 6f745d4..d517182 100644 --- a/man/m_backend_submit.Rd +++ b/man/m_backend_submit.Rd @@ -5,16 +5,18 @@ \alias{m_backend_submit} \title{Functions to integrate different back-ends} \usage{ -m_backend_prompt(backend, x, additional) +m_backend_prompt(backend, additional) -m_backend_submit(backend, x) +m_backend_submit(backend, x, prompt) } \arguments{ \item{backend}{An \code{mall_defaults} object} +\item{additional}{Additional text to insert to the \code{base_prompt}} + \item{x}{The body of the text to be submitted to the LLM} -\item{additional}{Additional text to insert to the \code{base_prompt}} +\item{prompt}{The additional information to add to the submission} } \value{ \code{m_backend_submit} does not return an object. \code{m_backend_prompt} From 9976b7b7345ed3822b843f871edc882fb12fcaf1 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 14:18:51 -0500 Subject: [PATCH 08/15] No errors, bad output --- R/m-backend-submit.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/m-backend-submit.R b/R/m-backend-submit.R index 0cae3ff..cc8ec80 100644 --- a/R/m-backend-submit.R +++ b/R/m-backend-submit.R @@ -23,12 +23,10 @@ m_backend_submit.mall_ollama <- function(backend, x, prompt) { x, \(x) { .args <- c( - #messages = map(prompt, \(i) map(i, \(j) glue(j, x = x))), - messages = prompt, + messages = list(map(prompt, \(i) map(i, \(j) glue(j, x = x)))), output = "text", args ) - print(.args) exec("chat", !!!.args) } ) From 64376b3b60c0c08b7a128b7944889d0a0c3cc157 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 16:30:52 -0500 Subject: [PATCH 09/15] Finally gaining stability --- R/llm-extract.R | 4 +++- R/m-backend-prompt.R | 10 ++++++---- R/m-backend-submit.R | 1 - 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/R/llm-extract.R b/R/llm-extract.R index b8f0212..03c9e90 100644 --- a/R/llm-extract.R +++ b/R/llm-extract.R @@ -82,5 +82,7 @@ llm_vec_extract <- function(x, labels = labels, additional_prompt = additional_prompt ) - map_chr(resp, \(x) paste0(as.character(fromJSON(x)), collapse = "|")) + print(resp) + map(resp, fromJSON) + #map_chr(resp, \(x) paste0(as.character(fromJSON(x, flatten = TRUE)), collapse = "|")) } diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index 1e70f85..25e2624 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -38,20 +38,22 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { }, extract = function(labels) { no_labels <- length(labels) - labels <- paste0(labels, collapse = ", ") + col_labels <- paste0(labels, collapse = ", ") + json_labels <- paste0("\"", labels,"\":your answer", collapse = ",") + json_labels <- paste0("{{", json_labels, "}}") list( list( role = "system", - content = paste("You are an assistant that only speak JSON.", - "Do not write normal text") + content = "You only speak simple JSON. Do not write normal text. You will avoid extraneous white spaces " ), list( role = "user", content = glue(paste( "You are a helpful text extraction engine.", - "Extract the {labels} being referred to on the text.", + "Extract the {col_labels} being referred to on the text.", "I expect {no_labels} item(s) exactly.", "No capitalization. No explanations.", + "You will use this JSON this format exclusively: {json_labels} .", "{additional}", "The answer is based on the following text:\n{{x}}" )) diff --git a/R/m-backend-submit.R b/R/m-backend-submit.R index cc8ec80..0132c5b 100644 --- a/R/m-backend-submit.R +++ b/R/m-backend-submit.R @@ -18,7 +18,6 @@ m_backend_submit <- function(backend, x, prompt) { m_backend_submit.mall_ollama <- function(backend, x, prompt) { args <- as.list(backend) args$backend <- NULL - map_chr( x, \(x) { From fe6ae1485810c44547b98e3d787fb37c9bebfdc4 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 16:38:50 -0500 Subject: [PATCH 10/15] Some cleanup of the code --- R/llm-extract.R | 7 ++++--- R/m-backend-prompt.R | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/R/llm-extract.R b/R/llm-extract.R index 03c9e90..bbdb288 100644 --- a/R/llm-extract.R +++ b/R/llm-extract.R @@ -82,7 +82,8 @@ llm_vec_extract <- function(x, labels = labels, additional_prompt = additional_prompt ) - print(resp) - map(resp, fromJSON) - #map_chr(resp, \(x) paste0(as.character(fromJSON(x, flatten = TRUE)), collapse = "|")) + map_chr( + resp, + \(x) paste0(as.character(fromJSON(x, flatten = TRUE)), collapse = "|") + ) } diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index 25e2624..45a8c4b 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -44,14 +44,14 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { list( list( role = "system", - content = "You only speak simple JSON. Do not write normal text. You will avoid extraneous white spaces " + content = "You only speak simple JSON. Do not write normal text." ), list( role = "user", content = glue(paste( "You are a helpful text extraction engine.", "Extract the {col_labels} being referred to on the text.", - "I expect {no_labels} item(s) exactly.", + "I expect {no_labels} item{ifelse(no_labels > 1, 's', '')} exactly.", "No capitalization. No explanations.", "You will use this JSON this format exclusively: {json_labels} .", "{additional}", From 354eb10ce21496aa87ab31a87c0661871c001b49 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 16:48:17 -0500 Subject: [PATCH 11/15] Updates format for the other prompts --- R/m-backend-prompt.R | 87 +++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index 45a8c4b..d34c34b 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -9,65 +9,86 @@ m_backend_prompt.mall_defaults <- function(backend, additional = "") { list( sentiment = function(options) { options <- paste0(options, collapse = ", ") - glue(paste( - "You are a helpful sentiment engine.", - "Return only one of the following answers: {options}.", - "No capitalization. No explanations.", - "{additional}", - "The answer is based on the following text:\n{{x}}" - )) + list( + list( + role = "user", + content = glue(paste( + "You are a helpful sentiment engine.", + "Return only one of the following answers: {options}.", + "No capitalization. No explanations.", + "{additional}", + "The answer is based on the following text:\n{{x}}" + )) + ) + ) }, summarize = function(max_words) { - glue(paste( - "You are a helpful summarization engine.", - "Your answer will contain no no capitalization and no explanations.", - "Return no more than {max_words} words.", - "{additional}", - "The answer is the summary of the following text:\n{{x}}" - )) + list( + list( + role = "user", + content = glue(paste( + "You are a helpful summarization engine.", + "Your answer will contain no no capitalization and no explanations.", + "Return no more than {max_words} words.", + "{additional}", + "The answer is the summary of the following text:\n{{x}}" + )) + ) + ) }, classify = function(labels) { labels <- paste0(labels, collapse = ", ") - glue(paste( - "You are a helpful classification engine.", - "Determine if the text refers to one of the following: {labels}.", - "No capitalization. No explanations.", - "{additional}", - "The answer is based on the following text:\n{{x}}" - )) + list( + list( + role = "user", + content = glue(paste( + "You are a helpful classification engine.", + "Determine if the text refers to one of the following: {labels}.", + "No capitalization. No explanations.", + "{additional}", + "The answer is based on the following text:\n{{x}}" + )) + ) + ) }, extract = function(labels) { no_labels <- length(labels) col_labels <- paste0(labels, collapse = ", ") - json_labels <- paste0("\"", labels,"\":your answer", collapse = ",") - json_labels <- paste0("{{", json_labels, "}}") + json_labels <- paste0("\"", labels, "\":your answer", collapse = ",") + json_labels <- paste0("{{", json_labels, "}}") + plural <- ifelse(no_labels > 1, "s", "") list( list( role = "system", content = "You only speak simple JSON. Do not write normal text." ), list( - role = "user", + role = "user", content = glue(paste( "You are a helpful text extraction engine.", "Extract the {col_labels} being referred to on the text.", - "I expect {no_labels} item{ifelse(no_labels > 1, 's', '')} exactly.", + "I expect {no_labels} item{plural} exactly.", "No capitalization. No explanations.", "You will use this JSON this format exclusively: {json_labels} .", "{additional}", "The answer is based on the following text:\n{{x}}" - )) + )) ) ) }, translate = function(language) { - glue(paste( - "You are a helpful translation engine.", - "You will return only the translation text, no explanations.", - "The target language to translate to is: {language}.", - "{additional}", - "The answer is the summary of the following text:\n{{x}}" - )) + list( + list( + role = "user", + content = glue(paste( + "You are a helpful translation engine.", + "You will return only the translation text, no explanations.", + "The target language to translate to is: {language}.", + "{additional}", + "The answer is the summary of the following text:\n{{x}}" + )) + ) + ) } ) } From 4af042d6d1c6ab6bab7029582f5a46145b5717cc Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 17:27:38 -0500 Subject: [PATCH 12/15] Encapsulates in lists if prompt is text only --- R/llm-custom.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/llm-custom.R b/R/llm-custom.R index 0e0fad4..f9064c0 100644 --- a/R/llm-custom.R +++ b/R/llm-custom.R @@ -41,6 +41,15 @@ llm_custom.data.frame <- function(.data, #' @export llm_vec_custom <- function(x, prompt = "", valid_resps = NULL) { llm_use(.silent = TRUE, force = FALSE) + if(!inherits(prompt, "list")) { + p_split <- strsplit(prompt, "\\{\\{x\\}\\}")[[1]] + if(length(p_split) == 1 && p_split == prompt) { + content <- glue("{prompt}\n{{x}}") + } else { + content <- prompt + } + prompt <- list(list(role = "user", content = content)) + } resp <- m_backend_submit(defaults_get(), x, prompt) if (!is.null(valid_resps)) { errors <- !resp %in% valid_resps From 23ca066930f07ca7b26c45c0c11fc9474e38bf94 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 17:30:04 -0500 Subject: [PATCH 13/15] styler updates --- R/llm-custom.R | 4 ++-- R/llm-extract.R | 4 ++-- R/m-backend-submit.R | 2 +- tests/testthat/test-llm-classify.R | 12 ++++++------ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/llm-custom.R b/R/llm-custom.R index f9064c0..c5d627b 100644 --- a/R/llm-custom.R +++ b/R/llm-custom.R @@ -41,9 +41,9 @@ llm_custom.data.frame <- function(.data, #' @export llm_vec_custom <- function(x, prompt = "", valid_resps = NULL) { llm_use(.silent = TRUE, force = FALSE) - if(!inherits(prompt, "list")) { + if (!inherits(prompt, "list")) { p_split <- strsplit(prompt, "\\{\\{x\\}\\}")[[1]] - if(length(p_split) == 1 && p_split == prompt) { + if (length(p_split) == 1 && p_split == prompt) { content <- glue("{prompt}\n{{x}}") } else { content <- prompt diff --git a/R/llm-extract.R b/R/llm-extract.R index bbdb288..e467a5b 100644 --- a/R/llm-extract.R +++ b/R/llm-extract.R @@ -83,7 +83,7 @@ llm_vec_extract <- function(x, additional_prompt = additional_prompt ) map_chr( - resp, + resp, \(x) paste0(as.character(fromJSON(x, flatten = TRUE)), collapse = "|") - ) + ) } diff --git a/R/m-backend-submit.R b/R/m-backend-submit.R index 0132c5b..dfb57dd 100644 --- a/R/m-backend-submit.R +++ b/R/m-backend-submit.R @@ -2,7 +2,7 @@ #' #' @param backend An `mall_defaults` object #' @param x The body of the text to be submitted to the LLM -#' @param prompt The additional information to add to the submission +#' @param prompt The additional information to add to the submission #' @param additional Additional text to insert to the `base_prompt` #' #' @returns `m_backend_submit` does not return an object. `m_backend_prompt` diff --git a/tests/testthat/test-llm-classify.R b/tests/testthat/test-llm-classify.R index acafa2a..7c44c5e 100644 --- a/tests/testthat/test-llm-classify.R +++ b/tests/testthat/test-llm-classify.R @@ -1,6 +1,6 @@ test_that("Classify works", { test_text <- "this is a test" - llm_use("simulate_llm", "echo", .silent = TRUE) + llm_use("simulate_llm", "echo", .silent = TRUE) expect_equal( llm_vec_classify(test_text, labels = test_text), test_text @@ -14,14 +14,14 @@ test_that("Classify works", { llm_classify(data.frame(x = test_text), x, labels = test_text), data.frame(x = test_text, .classify = test_text) ) - + expect_equal( llm_classify( - data.frame(x = test_text), - x, - labels = test_text, + data.frame(x = test_text), + x, + labels = test_text, pred_name = "new" - ), + ), data.frame(x = test_text, new = test_text) ) }) From ca3d0900b7562af23fa512fdcec5d05be42ca58d Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 17:42:56 -0500 Subject: [PATCH 14/15] Fixes tests --- R/m-backend-submit.R | 4 +++- tests/testthat/test-llm-custom.R | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/R/m-backend-submit.R b/R/m-backend-submit.R index dfb57dd..944e462 100644 --- a/R/m-backend-submit.R +++ b/R/m-backend-submit.R @@ -32,12 +32,14 @@ m_backend_submit.mall_ollama <- function(backend, x, prompt) { } #' @export -m_backend_submit.mall_simulate_llm <- function(backend, x, prompt) { +m_backend_submit.mall_simulate_llm <- function(backend, x, base_prompt) { args <- backend class(args) <- "list" if (args$model == "pipe") { out <- trimws(strsplit(x, "\\|")[[1]][[2]]) } else if (args$model == "prompt") { + out <- glue("{base_prompt}\n{x}") + } else if (args$model == "echo") { out <- x } else { out <- list( diff --git a/tests/testthat/test-llm-custom.R b/tests/testthat/test-llm-custom.R index a930dc4..fdfc9a5 100644 --- a/tests/testthat/test-llm-custom.R +++ b/tests/testthat/test-llm-custom.R @@ -1,9 +1,9 @@ test_that("Custom works", { test_text <- "this is a test" - llm_use("simulate_llm", "prompt", .silent = TRUE) + llm_use("simulate_llm", "echo", .silent = TRUE) expect_equal( llm_vec_custom(test_text, "this is a test: "), - paste0("this is a test: \n", test_text) + test_text ) expect_message( x <- llm_vec_custom(test_text, "this is a test: ", valid_resps = "not valid") @@ -12,11 +12,11 @@ test_that("Custom works", { expect_equal( llm_custom(data.frame(x = test_text), x, "this is a test: "), - data.frame(x = test_text, .pred = paste0("this is a test: \n", test_text)) + data.frame(x = test_text, .pred = test_text) ) expect_equal( llm_custom(data.frame(x = test_text), x, "this is a test: ", pred_name = "new"), - data.frame(x = test_text, new = paste0("this is a test: \n", test_text)) + data.frame(x = test_text, new = test_text) ) }) From bab67892a2d6e54e27f49f3a26f15df919321dda Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Wed, 11 Sep 2024 17:53:45 -0500 Subject: [PATCH 15/15] Version bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b3ff088..0cb08b7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mall Title: Run multiple 'Large Language Model' predictions against a table, or vectors -Version: 0.0.0.9003 +Version: 0.0.0.9004 Authors@R: person("Edgar", "Ruiz", , "first.last@example.com", role = c("aut", "cre")) Description: Run multiple 'Large Language Model' predictions against a table. The