From a5bb6a0b2055cbb67eb4661177549c534689a6ed Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sat, 28 Sep 2024 17:49:25 -0500 Subject: [PATCH 1/8] Switches README to llama3.2 --- README.Rmd | 28 ++++++++--------- README.md | 88 ++++++++++++++++++++++++++---------------------------- 2 files changed, 56 insertions(+), 60 deletions(-) diff --git a/README.Rmd b/README.Rmd index 3d31700..8e9da8f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,7 +16,7 @@ library(dbplyr) library(tictoc) library(DBI) source("utils/knitr-print.R") -mall::llm_use("ollama", "llama3.1", seed = 100, .cache = "_readme_cache") +mall::llm_use("ollama", "llama3.2", seed = 100, .cache = "_readme_cache") ``` # mall @@ -78,9 +78,9 @@ This saves the data scientist the need to write and tune an NLP model. [Installation guide](https://hauselin.github.io/ollama-r/#installation) - Download an LLM model. For example, I have been developing this package using -Llama 3.1 to test. To get that model you can run: +Llama 3.2 to test. To get that model you can run: ```r - ollamar::pull("llama3.1") + ollamar::pull("llama3.2") ``` ### With Databricks @@ -221,7 +221,7 @@ function that actually runs the prediction. In the case of Ollama, that function is [`generate()`](https://hauselin.github.io/ollama-r/reference/generate.html). ```{r, eval = FALSE} -llm_use("ollama", "llama3.1", seed = 100, temperature = 0.2) +llm_use("ollama", "llama3.2", seed = 100, temperature = 0) ``` ## Key considerations @@ -232,7 +232,7 @@ If using this method with an LLM locally available, the cost will be a long running time. Unless using a very specialized LLM, a given LLM is a general model. It was fitted using a vast amount of data. So determining a response for each row, takes longer than if using a manually created NLP model. The default model -used in Ollama is Llama 3.1, which was fitted using 8B parameters. +used in Ollama is Llama 3.2, which was fitted using 8B parameters. If using an external LLM service, the consideration will need to be for the billing costs of using such service. Keep in mind that you will be sending a lot @@ -260,27 +260,25 @@ library(classmap) data(data_bookReviews) -book_reviews <- data_bookReviews |> - head(100) |> - as_tibble() - -glimpse(book_reviews) +data_bookReviews |> + glimpse() ``` As per the docs, `sentiment` is a factor indicating the sentiment of the review: negative (1) or positive (2) ```{r} -length(strsplit(paste(book_reviews, collapse = " "), " ")[[1]]) +length(strsplit(paste(head(data_bookReviews$review, 100), collapse = " "), " ")[[1]]) ``` Just to get an idea of how much data we're processing, I'm using a very, very simple word count. So we're analyzing a bit over 20 thousand words. ```{r} -reviews_llm <- book_reviews |> +reviews_llm <- data_bookReviews |> + head(100) |> llm_sentiment( col = review, - options = c("positive", "negative"), + options = c("positive" ~ 2, "negative" ~ 1), pred_name = "predicted" ) ``` @@ -314,8 +312,8 @@ will not be of the "truth", but rather the package's results recorded in library(forcats) reviews_llm |> - mutate(fct_pred = as.factor(ifelse(predicted == "positive", 2, 1))) |> - yardstick::accuracy(sentiment, fct_pred) + mutate(predicted = as.factor(predicted)) |> + yardstick::accuracy(sentiment, predicted) ``` ## Vector functions diff --git a/README.md b/README.md index 684b87a..9300556 100644 --- a/README.md +++ b/README.md @@ -61,10 +61,10 @@ scientist the need to write and tune an NLP model. guide](https://hauselin.github.io/ollama-r/#installation) - Download an LLM model. For example, I have been developing this - package using Llama 3.1 to test. To get that model you can run: + package using Llama 3.2 to test. To get that model you can run: ``` r - ollamar::pull("llama3.1") + ollamar::pull("llama3.2") ``` ### With Databricks @@ -153,11 +153,11 @@ number of words to output (`max_words`): reviews |> llm_summarize(review, max_words = 5) #> # A tibble: 3 × 2 -#> review .summary -#> -#> 1 This has been the best TV I've ever used. Gr… very good tv experience overall -#> 2 I regret buying this laptop. It is too slow … slow and noisy laptop purchase -#> 3 Not sure how to feel about my new washing ma… mixed feelings about new washer +#> review .summary +#> +#> 1 This has been the best TV I've ever used. Gr… it's a great tv +#> 2 I regret buying this laptop. It is too slow … laptop purchase was a mistake +#> 3 Not sure how to feel about my new washing ma… having mixed feelings about it ``` To control the name of the prediction field, you can change `pred_name` @@ -167,11 +167,11 @@ argument. This works with the other `llm_` functions as well. reviews |> llm_summarize(review, max_words = 5, pred_name = "review_summary") #> # A tibble: 3 × 2 -#> review review_summary -#> -#> 1 This has been the best TV I've ever used. Gr… very good tv experience overall -#> 2 I regret buying this laptop. It is too slow … slow and noisy laptop purchase -#> 3 Not sure how to feel about my new washing ma… mixed feelings about new washer +#> review review_summary +#> +#> 1 This has been the best TV I've ever used. Gr… it's a great tv +#> 2 I regret buying this laptop. It is too slow … laptop purchase was a mistake +#> 3 Not sure how to feel about my new washing ma… having mixed feelings about it ``` ### Classify @@ -184,7 +184,7 @@ reviews |> #> # A tibble: 3 × 2 #> review .classify #> -#> 1 This has been the best TV I've ever used. Gr… appliance +#> 1 This has been the best TV I've ever used. Gr… computer #> 2 I regret buying this laptop. It is too slow … computer #> 3 Not sure how to feel about my new washing ma… appliance ``` @@ -221,9 +221,9 @@ reviews |> #> # A tibble: 3 × 2 #> review .translation #> -#> 1 This has been the best TV I've ever used. Gr… Este ha sido el mejor televisor… -#> 2 I regret buying this laptop. It is too slow … Lamento haber comprado esta lap… -#> 3 Not sure how to feel about my new washing ma… No estoy seguro de cómo sentirm… +#> 1 This has been the best TV I've ever used. Gr… Esta ha sido la mejor televisió… +#> 2 I regret buying this laptop. It is too slow … Me arrepiento de comprar este p… +#> 3 Not sure how to feel about my new washing ma… No estoy seguro de cómo me sien… ``` ### Custom prompt @@ -265,7 +265,7 @@ Ollama, that function is [`generate()`](https://hauselin.github.io/ollama-r/reference/generate.html). ``` r -llm_use("ollama", "llama3.1", seed = 100, temperature = 0.2) +llm_use("ollama", "llama3.2", seed = 100, temperature = 0) ``` ## Key considerations @@ -276,7 +276,7 @@ If using this method with an LLM locally available, the cost will be a long running time. Unless using a very specialized LLM, a given LLM is a general model. It was fitted using a vast amount of data. So determining a response for each row, takes longer than if using a manually created -NLP model. The default model used in Ollama is Llama 3.1, which was +NLP model. The default model used in Ollama is Llama 3.2, which was fitted using 8B parameters. If using an external LLM service, the consideration will need to be for @@ -306,12 +306,9 @@ library(classmap) data(data_bookReviews) -book_reviews <- data_bookReviews |> - head(100) |> - as_tibble() - -glimpse(book_reviews) -#> Rows: 100 +data_bookReviews |> + glimpse() +#> Rows: 1,000 #> Columns: 2 #> $ review "i got this as both a book and an audio file. i had waited t… #> $ sentiment 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 1, … @@ -321,21 +318,22 @@ As per the docs, `sentiment` is a factor indicating the sentiment of the review: negative (1) or positive (2) ``` r -length(strsplit(paste(book_reviews, collapse = " "), " ")[[1]]) -#> [1] 20571 +length(strsplit(paste(head(data_bookReviews$review, 100), collapse = " "), " ")[[1]]) +#> [1] 20470 ``` Just to get an idea of how much data we’re processing, I’m using a very, very simple word count. So we’re analyzing a bit over 20 thousand words. ``` r -reviews_llm <- book_reviews |> +reviews_llm <- data_bookReviews |> + head(100) |> llm_sentiment( col = review, - options = c("positive", "negative"), + options = c("positive" ~ 2, "negative" ~ 1), pred_name = "predicted" ) -#> ! There were 1 predictions with invalid output, they were coerced to NA +#> ! There were 2 predictions with invalid output, they were coerced to NA ``` As far as **time**, on my Apple M3 machine, it took about 3 minutes to @@ -359,18 +357,18 @@ This is what the new table looks like: ``` r reviews_llm #> # A tibble: 100 × 3 -#> review sentiment predicted -#> -#> 1 "i got this as both a book and an audio file… 1 negative -#> 2 "this book places too much emphasis on spend… 1 negative -#> 3 "remember the hollywood blacklist? the holly… 2 negative -#> 4 "while i appreciate what tipler was attempti… 1 negative -#> 5 "the others in the series were great, and i … 1 negative -#> 6 "a few good things, but she's lost her edge … 1 negative -#> 7 "words cannot describe how ripped off and di… 1 negative -#> 8 "1. the persective of most writers is shaped… 1 negative -#> 9 "i have been a huge fan of michael crichton … 1 negative -#> 10 "i saw dr. polk on c-span a month or two ago… 2 positive +#> review sentiment predicted +#> +#> 1 "i got this as both a book and an audio file… 1 1 +#> 2 "this book places too much emphasis on spend… 1 1 +#> 3 "remember the hollywood blacklist? the holly… 2 2 +#> 4 "while i appreciate what tipler was attempti… 1 1 +#> 5 "the others in the series were great, and i … 1 1 +#> 6 "a few good things, but she's lost her edge … 1 1 +#> 7 "words cannot describe how ripped off and di… 1 1 +#> 8 "1. the persective of most writers is shaped… 1 NA +#> 9 "i have been a huge fan of michael crichton … 1 1 +#> 10 "i saw dr. polk on c-span a month or two ago… 2 2 #> # ℹ 90 more rows ``` @@ -382,12 +380,12 @@ recorded in `sentiment`. library(forcats) reviews_llm |> - mutate(fct_pred = as.factor(ifelse(predicted == "positive", 2, 1))) |> - yardstick::accuracy(sentiment, fct_pred) + mutate(predicted = as.factor(predicted)) |> + yardstick::accuracy(sentiment, predicted) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> -#> 1 accuracy binary 0.939 +#> 1 accuracy binary 0.980 ``` ## Vector functions @@ -404,5 +402,5 @@ llm_vec_sentiment("I am happy") ``` r llm_vec_translate("Este es el mejor dia!", "english") -#> [1] "This is the best day!" +#> [1] "It's the best day!" ``` From 601da7fa4744a2cfc1189e47c5263971661b64af Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sat, 28 Sep 2024 17:53:24 -0500 Subject: [PATCH 2/8] Switches rest of package to llama 3.2 --- R/llm-classify.R | 2 +- R/llm-custom.R | 2 +- R/llm-extract.R | 2 +- R/llm-sentiment.R | 2 +- R/llm-summarize.R | 2 +- R/llm-translate.R | 2 +- R/llm-use.R | 4 ++-- man/llm_classify.Rd | 2 +- man/llm_custom.Rd | 2 +- man/llm_extract.Rd | 2 +- man/llm_sentiment.Rd | 2 +- man/llm_summarize.Rd | 2 +- man/llm_translate.Rd | 2 +- man/llm_use.Rd | 4 ++-- tests/testthat/_snaps/llm-classify.md | 8 ++++---- tests/testthat/_snaps/llm-summarize.md | 8 ++++---- tests/testthat/_snaps/llm-translate.md | 8 ++++---- tests/testthat/_snaps/zzz-cache.md | 4 ++-- tests/testthat/helper-ollama.R | 2 +- tests/testthat/test-llm-classify.R | 2 +- tests/testthat/test-m-backend-submit.R | 2 +- tests/testthat/test-zzz-cache.R | 2 +- 22 files changed, 34 insertions(+), 34 deletions(-) diff --git a/R/llm-classify.R b/R/llm-classify.R index f6a4ae1..0d12fc5 100644 --- a/R/llm-classify.R +++ b/R/llm-classify.R @@ -24,7 +24,7 @@ #' #' data("reviews") #' -#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +#' llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) #' #' llm_classify(reviews, review, c("appliance", "computer")) #' diff --git a/R/llm-custom.R b/R/llm-custom.R index 6f7a9b3..aa6be55 100644 --- a/R/llm-custom.R +++ b/R/llm-custom.R @@ -15,7 +15,7 @@ #' #' data("reviews") #' -#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +#' llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) #' #' my_prompt <- paste( #' "Answer a question.", diff --git a/R/llm-extract.R b/R/llm-extract.R index 36e8031..8992187 100644 --- a/R/llm-extract.R +++ b/R/llm-extract.R @@ -17,7 +17,7 @@ #' #' data("reviews") #' -#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +#' llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) #' #' # Use 'labels' to let the function know what to extract #' llm_extract(reviews, review, labels = "product") diff --git a/R/llm-sentiment.R b/R/llm-sentiment.R index 010df67..f1b377a 100644 --- a/R/llm-sentiment.R +++ b/R/llm-sentiment.R @@ -16,7 +16,7 @@ #' #' data("reviews") #' -#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +#' llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) #' #' llm_sentiment(reviews, review) #' diff --git a/R/llm-summarize.R b/R/llm-summarize.R index 3efe78b..000d955 100644 --- a/R/llm-summarize.R +++ b/R/llm-summarize.R @@ -12,7 +12,7 @@ #' #' data("reviews") #' -#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +#' llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) #' #' # Use max_words to set the maximum number of words to use for the summary #' llm_summarize(reviews, review, max_words = 5) diff --git a/R/llm-translate.R b/R/llm-translate.R index 69f8470..83ed133 100644 --- a/R/llm-translate.R +++ b/R/llm-translate.R @@ -12,7 +12,7 @@ #' #' data("reviews") #' -#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +#' llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) #' #' # Pass the desired language to translate to #' llm_translate(reviews, review, "spanish") diff --git a/R/llm-use.R b/R/llm-use.R index 87acef6..d024119 100644 --- a/R/llm-use.R +++ b/R/llm-use.R @@ -19,11 +19,11 @@ #' \dontrun{ #' library(mall) #' -#' llm_use("ollama", "llama3.1") +#' llm_use("ollama", "llama3.2") #' #' # Additional arguments will be passed 'as-is' to the #' # downstream R function in this example, to ollama::chat() -#' llm_use("ollama", "llama3.1", seed = 100, temp = 0.1) +#' llm_use("ollama", "llama3.2", seed = 100, temp = 0.1) #' #' # During the R session, you can change any argument #' # individually and it will retain all of previous diff --git a/man/llm_classify.Rd b/man/llm_classify.Rd index 1ba7072..7ea2dc2 100644 --- a/man/llm_classify.Rd +++ b/man/llm_classify.Rd @@ -49,7 +49,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) llm_classify(reviews, review, c("appliance", "computer")) diff --git a/man/llm_custom.Rd b/man/llm_custom.Rd index 5a5cfc9..71e1343 100644 --- a/man/llm_custom.Rd +++ b/man/llm_custom.Rd @@ -40,7 +40,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) my_prompt <- paste( "Answer a question.", diff --git a/man/llm_extract.Rd b/man/llm_extract.Rd index 65c8c50..5d7ff25 100644 --- a/man/llm_extract.Rd +++ b/man/llm_extract.Rd @@ -55,7 +55,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) # Use 'labels' to let the function know what to extract llm_extract(reviews, review, labels = "product") diff --git a/man/llm_sentiment.Rd b/man/llm_sentiment.Rd index 5f15183..943b484 100644 --- a/man/llm_sentiment.Rd +++ b/man/llm_sentiment.Rd @@ -54,7 +54,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) llm_sentiment(reviews, review) diff --git a/man/llm_summarize.Rd b/man/llm_summarize.Rd index c2e6c19..5ba3b7c 100644 --- a/man/llm_summarize.Rd +++ b/man/llm_summarize.Rd @@ -48,7 +48,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) # Use max_words to set the maximum number of words to use for the summary llm_summarize(reviews, review, max_words = 5) diff --git a/man/llm_translate.Rd b/man/llm_translate.Rd index a61eb9a..9ffc563 100644 --- a/man/llm_translate.Rd +++ b/man/llm_translate.Rd @@ -48,7 +48,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) # Pass the desired language to translate to llm_translate(reviews, review, "spanish") diff --git a/man/llm_use.Rd b/man/llm_use.Rd index cc7087c..15720b3 100644 --- a/man/llm_use.Rd +++ b/man/llm_use.Rd @@ -44,11 +44,11 @@ R session \dontrun{ library(mall) -llm_use("ollama", "llama3.1") +llm_use("ollama", "llama3.2") # Additional arguments will be passed 'as-is' to the # downstream R function in this example, to ollama::chat() -llm_use("ollama", "llama3.1", seed = 100, temp = 0.1) +llm_use("ollama", "llama3.2", seed = 100, temp = 0.1) # During the R session, you can change any argument # individually and it will retain all of previous diff --git a/tests/testthat/_snaps/llm-classify.md b/tests/testthat/_snaps/llm-classify.md index 6584816..980521d 100644 --- a/tests/testthat/_snaps/llm-classify.md +++ b/tests/testthat/_snaps/llm-classify.md @@ -13,7 +13,7 @@ llm_vec_classify("this is a test", c("a", "b"), preview = TRUE) Output ollamar::chat(messages = list(list(role = "user", content = "You are a helpful classification engine. Determine if the text refers to one of the following: a, b. No capitalization. No explanations. The answer is based on the following text:\nthis is a test")), - output = "text", model = "llama3.1", seed = 100) + output = "text", model = "llama3.2", seed = 100) # Classify on Ollama works @@ -25,7 +25,7 @@ 2 I regret buying this laptop. It is too slow and the keyboard is too noisy 3 Not sure how to feel about my new washing machine. Great color, but hard to figure .classify - 1 appliance + 1 computer 2 computer 3 appliance @@ -40,7 +40,7 @@ 2 I regret buying this laptop. It is too slow and the keyboard is too noisy 3 Not sure how to feel about my new washing machine. Great color, but hard to figure new - 1 appliance + 1 computer 2 computer 3 appliance @@ -56,6 +56,6 @@ 3 Not sure how to feel about my new washing machine. Great color, but hard to figure new 1 appliance - 2 appliance + 2 computer 3 appliance diff --git a/tests/testthat/_snaps/llm-summarize.md b/tests/testthat/_snaps/llm-summarize.md index 3a7ae57..b254919 100644 --- a/tests/testthat/_snaps/llm-summarize.md +++ b/tests/testthat/_snaps/llm-summarize.md @@ -25,8 +25,8 @@ 1 This has been the best TV I've ever used. Great screen, and sound. 2 I regret buying this laptop. It is too slow and the keyboard is too noisy 3 Not sure how to feel about my new washing machine. Great color, but hard to figure - .summary - 1 very good tv experience overall - 2 slow and noisy laptop purchase - 3 mixed feelings about new washer + .summary + 1 it's a great tv + 2 laptop purchase was a mistake + 3 having mixed feelings about it diff --git a/tests/testthat/_snaps/llm-translate.md b/tests/testthat/_snaps/llm-translate.md index b319411..e589ae2 100644 --- a/tests/testthat/_snaps/llm-translate.md +++ b/tests/testthat/_snaps/llm-translate.md @@ -7,8 +7,8 @@ 1 This has been the best TV I've ever used. Great screen, and sound. 2 I regret buying this laptop. It is too slow and the keyboard is too noisy 3 Not sure how to feel about my new washing machine. Great color, but hard to figure - .translation - 1 Este ha sido el mejor televisor que he utilizado. Gran pantalla y sonido. - 2 Lamento haber comprado esta laptop. Está demasiado lenta y el teclado es demasiado ruidoso. - 3 No estoy seguro de cómo sentirme sobre mi nueva lavadora. Color genial, pero difícil de manejar. + .translation + 1 Esta ha sido la mejor televisión que he utilizado. Gran pantalla y sonido. + 2 Me arrepiento de comprar este portátil. Es demasiado lento y la tecla es demasiado ruidosa. + 3 No estoy seguro de cómo me siento sobre mi nueva lavadora. Un gran color, pero difícil de entender diff --git a/tests/testthat/_snaps/zzz-cache.md b/tests/testthat/_snaps/zzz-cache.md index b6d85ca..c5111c7 100644 --- a/tests/testthat/_snaps/zzz-cache.md +++ b/tests/testthat/_snaps/zzz-cache.md @@ -13,6 +13,8 @@ _mall_cache/29/296f3116c07dab7f3ecb4a71776e3b64.json _mall_cache/2c _mall_cache/2c/2cbb57fd4a7e7178c489d068db063433.json + _mall_cache/42 + _mall_cache/42/425e0dc8e9dcadd3482b98fdfa127f30.json _mall_cache/44 _mall_cache/44/44fd00c39a9697e24e93943ef5f2ad1b.json _mall_cache/57 @@ -27,6 +29,4 @@ _mall_cache/b0/b02d0fab954e183a98787fa897b47d59.json _mall_cache/b7 _mall_cache/b7/b7c613386c94b2500b2b733632fedd1a.json - _mall_cache/c2 - _mall_cache/c2/c2e2ca95eaaa64b8926b185d6eeec18f.json diff --git a/tests/testthat/helper-ollama.R b/tests/testthat/helper-ollama.R index dff0d65..85bdb5f 100644 --- a/tests/testthat/helper-ollama.R +++ b/tests/testthat/helper-ollama.R @@ -21,7 +21,7 @@ skip_if_no_ollama <- function() { .mall_test$ollama_present <- TRUE llm_use( backend = "ollama", - model = "llama3.1", + model = "llama3.2", seed = 100, .silent = TRUE, .force = TRUE, diff --git a/tests/testthat/test-llm-classify.R b/tests/testthat/test-llm-classify.R index ab48270..c63e01b 100644 --- a/tests/testthat/test-llm-classify.R +++ b/tests/testthat/test-llm-classify.R @@ -34,7 +34,7 @@ test_that("Classify translates expected Spark SQL", { }) test_that("Preview works", { - llm_use("ollama", "llama3.1", seed = 100, .silent = FALSE) + llm_use("ollama", "llama3.2", seed = 100, .silent = FALSE) expect_snapshot( llm_vec_classify("this is a test", c("a", "b"), preview = TRUE) ) diff --git a/tests/testthat/test-m-backend-submit.R b/tests/testthat/test-m-backend-submit.R index bff26ee..e116eef 100644 --- a/tests/testthat/test-m-backend-submit.R +++ b/tests/testthat/test-m-backend-submit.R @@ -2,7 +2,7 @@ test_that("Ollama code is covered", { local_mocked_bindings( chat = function(...) "positive" ) - llm_use("ollama", "llama3.1", .silent = TRUE, .force = TRUE) + llm_use("ollama", "llama3.2", .silent = TRUE, .force = TRUE) expect_equal( llm_vec_sentiment("I am happy"), "positive" diff --git a/tests/testthat/test-zzz-cache.R b/tests/testthat/test-zzz-cache.R index 29adf18..1ca913d 100644 --- a/tests/testthat/test-zzz-cache.R +++ b/tests/testthat/test-zzz-cache.R @@ -8,7 +8,7 @@ test_that("Ollama cache exists and delete", { skip_if_no_ollama() expect_equal( length(fs::dir_ls("_ollama_cache", recurse = TRUE)), - 55 + 53 ) fs::dir_delete("_ollama_cache") }) From 5a77b89192909f78a846a7d97bc4dce129764940 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 29 Sep 2024 09:43:18 -0500 Subject: [PATCH 3/8] First full run using llama3.2 --- NAMESPACE | 1 + R/m-backend-prompt.R | 30 +++++++++++++++++++ R/m-defaults.R | 10 ++++++- .../llm_classify/execute-results/html.json | 4 +-- .../llm_custom/execute-results/html.json | 4 +-- .../llm_extract/execute-results/html.json | 4 +-- .../llm_sentiment/execute-results/html.json | 4 +-- .../llm_summarize/execute-results/html.json | 4 +-- .../llm_translate/execute-results/html.json | 4 +-- .../llm_use/execute-results/html.json | 4 +-- reference/llm_classify.qmd | 2 +- reference/llm_custom.qmd | 2 +- reference/llm_extract.qmd | 2 +- reference/llm_sentiment.qmd | 2 +- reference/llm_summarize.qmd | 2 +- reference/llm_translate.qmd | 2 +- reference/llm_use.qmd | 4 +-- 17 files changed, 62 insertions(+), 23 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 33a9ea7..93c2220 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ S3method(llm_sentiment,data.frame) S3method(llm_summarize,"tbl_Spark SQL") S3method(llm_summarize,data.frame) S3method(llm_translate,data.frame) +S3method(m_backend_prompt,mall_llama3.2) S3method(m_backend_prompt,mall_session) S3method(m_backend_submit,mall_ollama) S3method(m_backend_submit,mall_simulate_llm) diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index 8d87fd6..b1acdd0 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -4,6 +4,36 @@ m_backend_prompt <- function(backend, additional) { UseMethod("m_backend_prompt") } +#' @export +m_backend_prompt.mall_llama3.2<- function(backend, additional = "") { + base_method <- NextMethod() + base_method$extract = function(labels) { + no_labels <- length(labels) + col_labels <- paste0(labels, collapse = ", ") + plural <- ifelse(no_labels > 1, "s", "") + text_multi <- ifelse( + no_labels > 1, + "Return the response exclusively in a pipe separated list, and no headers. ", + "" + ) + list( + list( + role = "user", + content = glue(paste( + "You are a helpful text extraction engine.", + "Extract the {col_labels} being referred to on the text.", + "I expect {no_labels} item{plural} exactly.", + "No capitalization. No explanations.", + "{text_multi}", + "{additional}", + "The answer is based on the following text:\n{{x}}" + )) + ) + ) + } + base_method +} + #' @export m_backend_prompt.mall_session <- function(backend, additional = "") { list( diff --git a/R/m-defaults.R b/R/m-defaults.R index 3c0b860..78be805 100644 --- a/R/m-defaults.R +++ b/R/m-defaults.R @@ -5,8 +5,16 @@ m_defaults_set <- function(...) { nm <- names(new_args[i]) defaults[[nm]] <- new_args[[i]] } + model <- defaults[["model"]] + split_model <- strsplit(model, "\\:")[[1]] + if(length(split_model > 1)) { + sub_model <- split_model[[1]] + } else { + sub_model <- NULL + } obj_class <- clean_names(c( - defaults[["model"]], + model, + sub_model, defaults[["backend"]], "session" )) diff --git a/_freeze/reference/llm_classify/execute-results/html.json b/_freeze/reference/llm_classify/execute-results/html.json index 1859982..f0ef55c 100644 --- a/_freeze/reference/llm_classify/execute-results/html.json +++ b/_freeze/reference/llm_classify/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "a7d53cb4734c6342a45fdb1a54cb0bbb", + "hash": "2654553ad72a6ca1b62748f913913568", "result": { "engine": "knitr", - "markdown": "---\ntitle: \"Categorize data as one of options given\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-classify.R](https://github.com/edgararuiz/mall/blob/main/R/llm-classify.R)\n\n## llm_classify\n\n## Description\n Use a Large Language Model (LLM) to classify the provided text as one of the options provided via the `labels` argument. \n\n\n## Usage\n```r\n \nllm_classify( \n .data, \n col, \n labels, \n pred_name = \".classify\", \n additional_prompt = \"\" \n) \n \nllm_vec_classify(x, labels, additional_prompt = \"\", preview = FALSE) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| labels | A character vector with at least 2 labels to classify the text as |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_classify` returns a `data.frame` or `tbl` object. `llm_vec_classify` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.1\", seed = 100, .silent = TRUE) \n \nllm_classify(reviews, review, c(\"appliance\", \"computer\")) \n#> # A tibble: 3 × 2\n#> review .classify\n#> \n#> 1 This has been the best TV I've ever used. Gr… appliance\n#> 2 I regret buying this laptop. It is too slow … computer \n#> 3 Not sure how to feel about my new washing ma… appliance\n \n# Use 'pred_name' to customize the new column's name \nllm_classify( \n reviews, \n review, \n c(\"appliance\", \"computer\"), \n pred_name = \"prod_type\" \n) \n#> # A tibble: 3 × 2\n#> review prod_type\n#> \n#> 1 This has been the best TV I've ever used. Gr… appliance\n#> 2 I regret buying this laptop. It is too slow … computer \n#> 3 Not sure how to feel about my new washing ma… appliance\n \n# Pass custom values for each classification \nllm_classify(reviews, review, c(\"appliance\" ~ 1, \"computer\" ~ 2)) \n#> # A tibble: 3 × 2\n#> review .classify\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. 1\n#> 2 I regret buying this laptop. It is too slow and the keyboard is too… 2\n#> 3 Not sure how to feel about my new washing machine. Great color, but… 1\n \n# For character vectors, instead of a data frame, use this function \nllm_vec_classify( \n c(\"this is important!\", \"just whenever\"), \n c(\"urgent\", \"not urgent\") \n) \n#> [1] \"urgent\" \"not urgent\"\n \n# To preview the first call that will be made to the downstream R function \nllm_vec_classify( \n c(\"this is important!\", \"just whenever\"), \n c(\"urgent\", \"not urgent\"), \n preview = TRUE \n) \n#> ollamar::chat(messages = list(list(role = \"user\", content = \"You are a helpful classification engine. Determine if the text refers to one of the following: urgent, not urgent. No capitalization. No explanations. The answer is based on the following text:\\nthis is important!\")), \n#> output = \"text\", model = \"llama3.1\", seed = 100)\n```\n:::\n", + "markdown": "---\ntitle: \"Categorize data as one of options given\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-classify.R](https://github.com/edgararuiz/mall/blob/main/R/llm-classify.R)\n\n## llm_classify\n\n## Description\n Use a Large Language Model (LLM) to classify the provided text as one of the options provided via the `labels` argument. \n\n\n## Usage\n```r\n \nllm_classify( \n .data, \n col, \n labels, \n pred_name = \".classify\", \n additional_prompt = \"\" \n) \n \nllm_vec_classify(x, labels, additional_prompt = \"\", preview = FALSE) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| labels | A character vector with at least 2 labels to classify the text as |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_classify` returns a `data.frame` or `tbl` object. `llm_vec_classify` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.2\", seed = 100, .silent = TRUE) \n \nllm_classify(reviews, review, c(\"appliance\", \"computer\")) \n#> # A tibble: 3 × 2\n#> review .classify\n#> \n#> 1 This has been the best TV I've ever used. Gr… computer \n#> 2 I regret buying this laptop. It is too slow … computer \n#> 3 Not sure how to feel about my new washing ma… appliance\n \n# Use 'pred_name' to customize the new column's name \nllm_classify( \n reviews, \n review, \n c(\"appliance\", \"computer\"), \n pred_name = \"prod_type\" \n) \n#> # A tibble: 3 × 2\n#> review prod_type\n#> \n#> 1 This has been the best TV I've ever used. Gr… computer \n#> 2 I regret buying this laptop. It is too slow … computer \n#> 3 Not sure how to feel about my new washing ma… appliance\n \n# Pass custom values for each classification \nllm_classify(reviews, review, c(\"appliance\" ~ 1, \"computer\" ~ 2)) \n#> ! There were 2 predictions with invalid output, they were coerced to NA\n#> # A tibble: 3 × 2\n#> review .classify\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. NA\n#> 2 I regret buying this laptop. It is too slow and the keyboard is too… 2\n#> 3 Not sure how to feel about my new washing machine. Great color, but… NA\n \n# For character vectors, instead of a data frame, use this function \nllm_vec_classify( \n c(\"this is important!\", \"just whenever\"), \n c(\"urgent\", \"not urgent\") \n) \n#> [1] \"urgent\" \"urgent\"\n \n# To preview the first call that will be made to the downstream R function \nllm_vec_classify( \n c(\"this is important!\", \"just whenever\"), \n c(\"urgent\", \"not urgent\"), \n preview = TRUE \n) \n#> ollamar::chat(messages = list(list(role = \"user\", content = \"You are a helpful classification engine. Determine if the text refers to one of the following: urgent, not urgent. No capitalization. No explanations. The answer is based on the following text:\\nthis is important!\")), \n#> output = \"text\", model = \"llama3.2\", seed = 100)\n```\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/reference/llm_custom/execute-results/html.json b/_freeze/reference/llm_custom/execute-results/html.json index 3923de1..e746465 100644 --- a/_freeze/reference/llm_custom/execute-results/html.json +++ b/_freeze/reference/llm_custom/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "857e69734ecad598efa862f9087c9899", + "hash": "9f9fb9cfdaebdc5ea55c78df85881f4f", "result": { "engine": "knitr", - "markdown": "---\ntitle: \"Send a custom prompt to the LLM\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-custom.R](https://github.com/edgararuiz/mall/blob/main/R/llm-custom.R)\n\n## llm_custom\n\n## Description\n Use a Large Language Model (LLM) to process the provided text using the instructions from `prompt` \n\n\n## Usage\n```r\n \nllm_custom(.data, col, prompt = \"\", pred_name = \".pred\", valid_resps = \"\") \n \nllm_vec_custom(x, prompt = \"\", valid_resps = NULL) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| prompt | The prompt to append to each record sent to the LLM |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| valid_resps | If the response from the LLM is not open, but deterministic, provide the options in a vector. This function will set to `NA` any response not in the options |\n| x | A vector that contains the text to be analyzed |\n\n\n\n## Value\n `llm_custom` returns a `data.frame` or `tbl` object. `llm_vec_custom` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.1\", seed = 100, .silent = TRUE) \n \nmy_prompt <- paste( \n \"Answer a question.\", \n \"Return only the answer, no explanation\", \n \"Acceptable answers are 'yes', 'no'\", \n \"Answer this about the following text, is this a happy customer?:\" \n) \n \nreviews |> \n llm_custom(review, my_prompt) \n#> # A tibble: 3 × 2\n#> review .pred\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. Yes \n#> 2 I regret buying this laptop. It is too slow and the keyboard is too noi… No \n#> 3 Not sure how to feel about my new washing machine. Great color, but har… No\n```\n:::\n", + "markdown": "---\ntitle: \"Send a custom prompt to the LLM\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-custom.R](https://github.com/edgararuiz/mall/blob/main/R/llm-custom.R)\n\n## llm_custom\n\n## Description\n Use a Large Language Model (LLM) to process the provided text using the instructions from `prompt` \n\n\n## Usage\n```r\n \nllm_custom(.data, col, prompt = \"\", pred_name = \".pred\", valid_resps = \"\") \n \nllm_vec_custom(x, prompt = \"\", valid_resps = NULL) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| prompt | The prompt to append to each record sent to the LLM |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| valid_resps | If the response from the LLM is not open, but deterministic, provide the options in a vector. This function will set to `NA` any response not in the options |\n| x | A vector that contains the text to be analyzed |\n\n\n\n## Value\n `llm_custom` returns a `data.frame` or `tbl` object. `llm_vec_custom` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.2\", seed = 100, .silent = TRUE) \n \nmy_prompt <- paste( \n \"Answer a question.\", \n \"Return only the answer, no explanation\", \n \"Acceptable answers are 'yes', 'no'\", \n \"Answer this about the following text, is this a happy customer?:\" \n) \n \nreviews |> \n llm_custom(review, my_prompt) \n#> # A tibble: 3 × 2\n#> review .pred\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. Yes \n#> 2 I regret buying this laptop. It is too slow and the keyboard is too noi… No \n#> 3 Not sure how to feel about my new washing machine. Great color, but har… No\n```\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/reference/llm_extract/execute-results/html.json b/_freeze/reference/llm_extract/execute-results/html.json index a5428a1..5ff0d5e 100644 --- a/_freeze/reference/llm_extract/execute-results/html.json +++ b/_freeze/reference/llm_extract/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "b6e0106b71d0c66eacc9e6fc7cd5542c", + "hash": "fa18360fb7c78438dcd9a82a136ef52a", "result": { "engine": "knitr", - "markdown": "---\ntitle: \"Extract entities from text\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-extract.R](https://github.com/edgararuiz/mall/blob/main/R/llm-extract.R)\n\n## llm_extract\n\n## Description\n Use a Large Language Model (LLM) to extract specific entity, or entities, from the provided text \n\n\n## Usage\n```r\n \nllm_extract( \n .data, \n col, \n labels, \n expand_cols = FALSE, \n additional_prompt = \"\", \n pred_name = \".extract\" \n) \n \nllm_vec_extract(x, labels = c(), additional_prompt = \"\", preview = FALSE) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| labels | A vector with the entities to extract from the text |\n| expand_cols | If multiple `labels` are passed, this is a flag that tells the function to create a new column per item in `labels`. If `labels` is a named vector, this function will use those names as the new column names, if not, the function will use a sanitized version of the content as the name. |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_extract` returns a `data.frame` or `tbl` object. `llm_vec_extract` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.1\", seed = 100, .silent = TRUE) \n \n# Use 'labels' to let the function know what to extract \nllm_extract(reviews, review, labels = \"product\") \n#> # A tibble: 3 × 2\n#> review .extract \n#> \n#> 1 This has been the best TV I've ever used. Gr… tv \n#> 2 I regret buying this laptop. It is too slow … laptop \n#> 3 Not sure how to feel about my new washing ma… washing machine\n \n# Use 'pred_name' to customize the new column's name \nllm_extract(reviews, review, \"product\", pred_name = \"prod\") \n#> # A tibble: 3 × 2\n#> review prod \n#> \n#> 1 This has been the best TV I've ever used. Gr… tv \n#> 2 I regret buying this laptop. It is too slow … laptop \n#> 3 Not sure how to feel about my new washing ma… washing machine\n \n# Pass a vector to request multiple things, the results will be pipe delimeted \n# in a single column \nllm_extract(reviews, review, c(\"product\", \"feelings\")) \n#> # A tibble: 3 × 2\n#> review .extract \n#> \n#> 1 This has been the best TV I've ever used. Gr… tv | happiness \n#> 2 I regret buying this laptop. It is too slow … laptop | disappointment \n#> 3 Not sure how to feel about my new washing ma… washing machine | disappointment\n \n# To get multiple columns, use 'expand_cols' \nllm_extract(reviews, review, c(\"product\", \"feelings\"), expand_cols = TRUE) \n#> # A tibble: 3 × 3\n#> review product feelings \n#> \n#> 1 This has been the best TV I've ever used. Gr… \"tv \" \" happiness\" \n#> 2 I regret buying this laptop. It is too slow … \"laptop \" \" disappoint…\n#> 3 Not sure how to feel about my new washing ma… \"washing machine \" \" disappoint…\n \n# Pass a named vector to set the resulting column names \nllm_extract( \n .data = reviews, \n col = review, \n labels = c(prod = \"product\", feels = \"feelings\"), \n expand_cols = TRUE \n) \n#> # A tibble: 3 × 3\n#> review prod feels \n#> \n#> 1 This has been the best TV I've ever used. Gr… \"tv \" \" happiness\" \n#> 2 I regret buying this laptop. It is too slow … \"laptop \" \" disappoint…\n#> 3 Not sure how to feel about my new washing ma… \"washing machine \" \" disappoint…\n \n# For character vectors, instead of a data frame, use this function \nllm_vec_extract(\"bob smith, 123 3rd street\", c(\"name\", \"address\")) \n#> [1] \"bob smith | 123 3rd street\"\n \n# To preview the first call that will be made to the downstream R function \nllm_vec_extract( \n \"bob smith, 123 3rd street\", \n c(\"name\", \"address\"), \n preview = TRUE \n) \n#> ollamar::chat(messages = list(list(role = \"user\", content = \"You are a helpful text extraction engine. Extract the name, address being referred to on the text. I expect 2 items exactly. No capitalization. No explanations. Return the response in a simple list, pipe separated, and no headers. The answer is based on the following text:\\nbob smith, 123 3rd street\")), \n#> output = \"text\", model = \"llama3.1\", seed = 100)\n```\n:::\n", + "markdown": "---\ntitle: \"Extract entities from text\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-extract.R](https://github.com/edgararuiz/mall/blob/main/R/llm-extract.R)\n\n## llm_extract\n\n## Description\n Use a Large Language Model (LLM) to extract specific entity, or entities, from the provided text \n\n\n## Usage\n```r\n \nllm_extract( \n .data, \n col, \n labels, \n expand_cols = FALSE, \n additional_prompt = \"\", \n pred_name = \".extract\" \n) \n \nllm_vec_extract(x, labels = c(), additional_prompt = \"\", preview = FALSE) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| labels | A vector with the entities to extract from the text |\n| expand_cols | If multiple `labels` are passed, this is a flag that tells the function to create a new column per item in `labels`. If `labels` is a named vector, this function will use those names as the new column names, if not, the function will use a sanitized version of the content as the name. |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_extract` returns a `data.frame` or `tbl` object. `llm_vec_extract` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.2\", seed = 100, .silent = TRUE) \n \n# Use 'labels' to let the function know what to extract \nllm_extract(reviews, review, labels = \"product\") \n#> # A tibble: 3 × 2\n#> review .extract \n#> \n#> 1 This has been the best TV I've ever used. Gr… tv \n#> 2 I regret buying this laptop. It is too slow … laptop \n#> 3 Not sure how to feel about my new washing ma… washing machine\n \n# Use 'pred_name' to customize the new column's name \nllm_extract(reviews, review, \"product\", pred_name = \"prod\") \n#> # A tibble: 3 × 2\n#> review prod \n#> \n#> 1 This has been the best TV I've ever used. Gr… tv \n#> 2 I regret buying this laptop. It is too slow … laptop \n#> 3 Not sure how to feel about my new washing ma… washing machine\n \n# Pass a vector to request multiple things, the results will be pipe delimeted \n# in a single column \nllm_extract(reviews, review, c(\"product\", \"feelings\")) \n#> # A tibble: 3 × 2\n#> review .extract \n#> \n#> 1 This has been the best TV I've ever used. Gr… tv | great \n#> 2 I regret buying this laptop. It is too slow … laptop|frustration \n#> 3 Not sure how to feel about my new washing ma… washing machine | confusion\n \n# To get multiple columns, use 'expand_cols' \nllm_extract(reviews, review, c(\"product\", \"feelings\"), expand_cols = TRUE) \n#> # A tibble: 3 × 3\n#> review product feelings \n#> \n#> 1 This has been the best TV I've ever used. Gr… \"tv \" \" great\" \n#> 2 I regret buying this laptop. It is too slow … \"laptop\" \"frustration\"\n#> 3 Not sure how to feel about my new washing ma… \"washing machine \" \" confusion\"\n \n# Pass a named vector to set the resulting column names \nllm_extract( \n .data = reviews, \n col = review, \n labels = c(prod = \"product\", feels = \"feelings\"), \n expand_cols = TRUE \n) \n#> # A tibble: 3 × 3\n#> review prod feels \n#> \n#> 1 This has been the best TV I've ever used. Gr… \"tv \" \" great\" \n#> 2 I regret buying this laptop. It is too slow … \"laptop\" \"frustration\"\n#> 3 Not sure how to feel about my new washing ma… \"washing machine \" \" confusion\"\n \n# For character vectors, instead of a data frame, use this function \nllm_vec_extract(\"bob smith, 123 3rd street\", c(\"name\", \"address\")) \n#> [1] \"bob smith | 123 3rd street\"\n \n# To preview the first call that will be made to the downstream R function \nllm_vec_extract( \n \"bob smith, 123 3rd street\", \n c(\"name\", \"address\"), \n preview = TRUE \n) \n#> ollamar::chat(messages = list(list(role = \"user\", content = \"You are a helpful text extraction engine. Extract the name, address being referred to on the text. I expect 2 items exactly. No capitalization. No explanations. Return the response exclusively in a pipe separated list, and no headers. The answer is based on the following text:\\nbob smith, 123 3rd street\")), \n#> output = \"text\", model = \"llama3.2\", seed = 100)\n```\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/reference/llm_sentiment/execute-results/html.json b/_freeze/reference/llm_sentiment/execute-results/html.json index 5e91981..bff4d77 100644 --- a/_freeze/reference/llm_sentiment/execute-results/html.json +++ b/_freeze/reference/llm_sentiment/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "f7731cf8df59e5c46acb3036056235d7", + "hash": "ce5d3cf8515ce8aee247eeb4715bcbc0", "result": { "engine": "knitr", - "markdown": "---\ntitle: \"Sentiment analysis\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-sentiment.R](https://github.com/edgararuiz/mall/blob/main/R/llm-sentiment.R)\n\n## llm_sentiment\n\n## Description\n Use a Large Language Model (LLM) to perform sentiment analysis from the provided text \n\n\n## Usage\n```r\n \nllm_sentiment( \n .data, \n col, \n options = c(\"positive\", \"negative\", \"neutral\"), \n pred_name = \".sentiment\", \n additional_prompt = \"\" \n) \n \nllm_vec_sentiment( \n x, \n options = c(\"positive\", \"negative\", \"neutral\"), \n additional_prompt = \"\", \n preview = FALSE \n) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| options | A vector with the options that the LLM should use to assign a sentiment to the text. Defaults to: 'positive', 'negative', 'neutral' |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_sentiment` returns a `data.frame` or `tbl` object. `llm_vec_sentiment` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.1\", seed = 100, .silent = TRUE) \n \nllm_sentiment(reviews, review) \n#> # A tibble: 3 × 2\n#> review .sentiment\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. positive \n#> 2 I regret buying this laptop. It is too slow and the keyboard is to… negative \n#> 3 Not sure how to feel about my new washing machine. Great color, bu… neutral\n \n# Use 'pred_name' to customize the new column's name \nllm_sentiment(reviews, review, pred_name = \"review_sentiment\") \n#> # A tibble: 3 × 2\n#> review review_sentiment\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and … positive \n#> 2 I regret buying this laptop. It is too slow and the keyboard… negative \n#> 3 Not sure how to feel about my new washing machine. Great col… neutral\n \n# Pass custom sentiment options \nllm_sentiment(reviews, review, c(\"positive\", \"negative\")) \n#> # A tibble: 3 × 2\n#> review .sentiment\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. positive \n#> 2 I regret buying this laptop. It is too slow and the keyboard is to… negative \n#> 3 Not sure how to feel about my new washing machine. Great color, bu… negative\n \n# Specify values to return per sentiment \nllm_sentiment(reviews, review, c(\"positive\" ~ 1, \"negative\" ~ 0)) \n#> # A tibble: 3 × 2\n#> review .sentiment\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. 1\n#> 2 I regret buying this laptop. It is too slow and the keyboard is to… 0\n#> 3 Not sure how to feel about my new washing machine. Great color, bu… 0\n \n# For character vectors, instead of a data frame, use this function \nllm_vec_sentiment(c(\"I am happy\", \"I am sad\")) \n#> [1] \"positive\" \"negative\"\n \n# To preview the first call that will be made to the downstream R function \nllm_vec_sentiment(c(\"I am happy\", \"I am sad\"), preview = TRUE) \n#> ollamar::chat(messages = list(list(role = \"user\", content = \"You are a helpful sentiment engine. Return only one of the following answers: positive, negative, neutral. No capitalization. No explanations. The answer is based on the following text:\\nI am happy\")), \n#> output = \"text\", model = \"llama3.1\", seed = 100)\n```\n:::\n", + "markdown": "---\ntitle: \"Sentiment analysis\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-sentiment.R](https://github.com/edgararuiz/mall/blob/main/R/llm-sentiment.R)\n\n## llm_sentiment\n\n## Description\n Use a Large Language Model (LLM) to perform sentiment analysis from the provided text \n\n\n## Usage\n```r\n \nllm_sentiment( \n .data, \n col, \n options = c(\"positive\", \"negative\", \"neutral\"), \n pred_name = \".sentiment\", \n additional_prompt = \"\" \n) \n \nllm_vec_sentiment( \n x, \n options = c(\"positive\", \"negative\", \"neutral\"), \n additional_prompt = \"\", \n preview = FALSE \n) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| options | A vector with the options that the LLM should use to assign a sentiment to the text. Defaults to: 'positive', 'negative', 'neutral' |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_sentiment` returns a `data.frame` or `tbl` object. `llm_vec_sentiment` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.2\", seed = 100, .silent = TRUE) \n \nllm_sentiment(reviews, review) \n#> # A tibble: 3 × 2\n#> review .sentiment\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. positive \n#> 2 I regret buying this laptop. It is too slow and the keyboard is to… negative \n#> 3 Not sure how to feel about my new washing machine. Great color, bu… neutral\n \n# Use 'pred_name' to customize the new column's name \nllm_sentiment(reviews, review, pred_name = \"review_sentiment\") \n#> # A tibble: 3 × 2\n#> review review_sentiment\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and … positive \n#> 2 I regret buying this laptop. It is too slow and the keyboard… negative \n#> 3 Not sure how to feel about my new washing machine. Great col… neutral\n \n# Pass custom sentiment options \nllm_sentiment(reviews, review, c(\"positive\", \"negative\")) \n#> # A tibble: 3 × 2\n#> review .sentiment\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. positive \n#> 2 I regret buying this laptop. It is too slow and the keyboard is to… negative \n#> 3 Not sure how to feel about my new washing machine. Great color, bu… negative\n \n# Specify values to return per sentiment \nllm_sentiment(reviews, review, c(\"positive\" ~ 1, \"negative\" ~ 0)) \n#> # A tibble: 3 × 2\n#> review .sentiment\n#> \n#> 1 This has been the best TV I've ever used. Great screen, and sound. 1\n#> 2 I regret buying this laptop. It is too slow and the keyboard is to… 0\n#> 3 Not sure how to feel about my new washing machine. Great color, bu… 0\n \n# For character vectors, instead of a data frame, use this function \nllm_vec_sentiment(c(\"I am happy\", \"I am sad\")) \n#> [1] \"positive\" \"negative\"\n \n# To preview the first call that will be made to the downstream R function \nllm_vec_sentiment(c(\"I am happy\", \"I am sad\"), preview = TRUE) \n#> ollamar::chat(messages = list(list(role = \"user\", content = \"You are a helpful sentiment engine. Return only one of the following answers: positive, negative, neutral. No capitalization. No explanations. The answer is based on the following text:\\nI am happy\")), \n#> output = \"text\", model = \"llama3.2\", seed = 100)\n```\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/reference/llm_summarize/execute-results/html.json b/_freeze/reference/llm_summarize/execute-results/html.json index 0f279e1..97d568e 100644 --- a/_freeze/reference/llm_summarize/execute-results/html.json +++ b/_freeze/reference/llm_summarize/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "4410868db92aa67748b56c5b97777052", + "hash": "7dcf1326b18f3451fa4dc840a052e68e", "result": { "engine": "knitr", - "markdown": "---\ntitle: \"Summarize text\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-summarize.R](https://github.com/edgararuiz/mall/blob/main/R/llm-summarize.R)\n\n## llm_summarize\n\n## Description\n Use a Large Language Model (LLM) to summarize text \n\n\n## Usage\n```r\n \nllm_summarize( \n .data, \n col, \n max_words = 10, \n pred_name = \".summary\", \n additional_prompt = \"\" \n) \n \nllm_vec_summarize(x, max_words = 10, additional_prompt = \"\", preview = FALSE) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| max_words | The maximum number of words that the LLM should use in the summary. Defaults to 10. |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_summarize` returns a `data.frame` or `tbl` object. `llm_vec_summarize` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.1\", seed = 100, .silent = TRUE) \n \n# Use max_words to set the maximum number of words to use for the summary \nllm_summarize(reviews, review, max_words = 5) \n#> # A tibble: 3 × 2\n#> review .summary \n#> \n#> 1 This has been the best TV I've ever used. Gr… excellent tv with great features\n#> 2 I regret buying this laptop. It is too slow … laptop is too slow noisy \n#> 3 Not sure how to feel about my new washing ma… mixed feelings about new washer\n \n# Use 'pred_name' to customize the new column's name \nllm_summarize(reviews, review, 5, pred_name = \"review_summary\") \n#> # A tibble: 3 × 2\n#> review review_summary \n#> \n#> 1 This has been the best TV I've ever used. Gr… excellent tv with great features\n#> 2 I regret buying this laptop. It is too slow … laptop is too slow noisy \n#> 3 Not sure how to feel about my new washing ma… mixed feelings about new washer\n \n# For character vectors, instead of a data frame, use this function \nllm_vec_summarize( \n \"This has been the best TV I've ever used. Great screen, and sound.\", \n max_words = 5 \n) \n#> [1] \"excellent tv with great features\"\n \n# To preview the first call that will be made to the downstream R function \nllm_vec_summarize( \n \"This has been the best TV I've ever used. Great screen, and sound.\", \n max_words = 5, \n preview = TRUE \n) \n#> ollamar::chat(messages = list(list(role = \"user\", content = \"You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 5 words. The answer is the summary of the following text:\\nThis has been the best TV I've ever used. Great screen, and sound.\")), \n#> output = \"text\", model = \"llama3.1\", seed = 100)\n```\n:::\n", + "markdown": "---\ntitle: \"Summarize text\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-summarize.R](https://github.com/edgararuiz/mall/blob/main/R/llm-summarize.R)\n\n## llm_summarize\n\n## Description\n Use a Large Language Model (LLM) to summarize text \n\n\n## Usage\n```r\n \nllm_summarize( \n .data, \n col, \n max_words = 10, \n pred_name = \".summary\", \n additional_prompt = \"\" \n) \n \nllm_vec_summarize(x, max_words = 10, additional_prompt = \"\", preview = FALSE) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| max_words | The maximum number of words that the LLM should use in the summary. Defaults to 10. |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_summarize` returns a `data.frame` or `tbl` object. `llm_vec_summarize` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.2\", seed = 100, .silent = TRUE) \n \n# Use max_words to set the maximum number of words to use for the summary \nllm_summarize(reviews, review, max_words = 5) \n#> # A tibble: 3 × 2\n#> review .summary \n#> \n#> 1 This has been the best TV I've ever used. Gr… it's a great tv \n#> 2 I regret buying this laptop. It is too slow … laptop purchase was a mistake \n#> 3 Not sure how to feel about my new washing ma… having mixed feelings about it\n \n# Use 'pred_name' to customize the new column's name \nllm_summarize(reviews, review, 5, pred_name = \"review_summary\") \n#> # A tibble: 3 × 2\n#> review review_summary \n#> \n#> 1 This has been the best TV I've ever used. Gr… it's a great tv \n#> 2 I regret buying this laptop. It is too slow … laptop purchase was a mistake \n#> 3 Not sure how to feel about my new washing ma… having mixed feelings about it\n \n# For character vectors, instead of a data frame, use this function \nllm_vec_summarize( \n \"This has been the best TV I've ever used. Great screen, and sound.\", \n max_words = 5 \n) \n#> [1] \"it's a great tv\"\n \n# To preview the first call that will be made to the downstream R function \nllm_vec_summarize( \n \"This has been the best TV I've ever used. Great screen, and sound.\", \n max_words = 5, \n preview = TRUE \n) \n#> ollamar::chat(messages = list(list(role = \"user\", content = \"You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 5 words. The answer is the summary of the following text:\\nThis has been the best TV I've ever used. Great screen, and sound.\")), \n#> output = \"text\", model = \"llama3.2\", seed = 100)\n```\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/reference/llm_translate/execute-results/html.json b/_freeze/reference/llm_translate/execute-results/html.json index cca1ef5..fd5b557 100644 --- a/_freeze/reference/llm_translate/execute-results/html.json +++ b/_freeze/reference/llm_translate/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "9ee109d2749405387f08fdb5dbae52a6", + "hash": "446270788110e4132cda33c384ad9125", "result": { "engine": "knitr", - "markdown": "---\ntitle: \"Translates text to a specific language\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-translate.R](https://github.com/edgararuiz/mall/blob/main/R/llm-translate.R)\n\n## llm_translate\n\n## Description\n Use a Large Language Model (LLM) to translate a text to a specific language \n\n\n## Usage\n```r\n \nllm_translate( \n .data, \n col, \n language, \n pred_name = \".translation\", \n additional_prompt = \"\" \n) \n \nllm_vec_translate(x, language, additional_prompt = \"\", preview = FALSE) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| language | Target language to translate the text to |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_translate` returns a `data.frame` or `tbl` object. `llm_vec_translate` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.1\", seed = 100, .silent = TRUE) \n \n# Pass the desired language to translate to \nllm_translate(reviews, review, \"spanish\") \n#> # A tibble: 3 × 2\n#> review .translation \n#> \n#> 1 This has been the best TV I've ever used. Gr… Ha sido la mejor televisión que…\n#> 2 I regret buying this laptop. It is too slow … Me arrepiento de haber comprado…\n#> 3 Not sure how to feel about my new washing ma… No estoy seguro de cómo sentirm…\n```\n:::\n", + "markdown": "---\ntitle: \"Translates text to a specific language\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-translate.R](https://github.com/edgararuiz/mall/blob/main/R/llm-translate.R)\n\n## llm_translate\n\n## Description\n Use a Large Language Model (LLM) to translate a text to a specific language \n\n\n## Usage\n```r\n \nllm_translate( \n .data, \n col, \n language, \n pred_name = \".translation\", \n additional_prompt = \"\" \n) \n \nllm_vec_translate(x, language, additional_prompt = \"\", preview = FALSE) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| .data | A `data.frame` or `tbl` object that contains the text to be analyzed |\n| col | The name of the field to analyze, supports `tidy-eval` |\n| language | Target language to translate the text to |\n| pred_name | A character vector with the name of the new column where the prediction will be placed |\n| additional_prompt | Inserts this text into the prompt sent to the LLM |\n| x | A vector that contains the text to be analyzed |\n| preview | It returns the R call that would have been used to run the prediction. It only returns the first record in `x`. Defaults to `FALSE` Applies to vector function only. |\n\n\n\n## Value\n `llm_translate` returns a `data.frame` or `tbl` object. `llm_vec_translate` returns a vector that is the same length as `x`. \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \ndata(\"reviews\") \n \nllm_use(\"ollama\", \"llama3.2\", seed = 100, .silent = TRUE) \n \n# Pass the desired language to translate to \nllm_translate(reviews, review, \"spanish\") \n#> # A tibble: 3 × 2\n#> review .translation \n#> \n#> 1 This has been the best TV I've ever used. Gr… Esta ha sido la mejor televisió…\n#> 2 I regret buying this laptop. It is too slow … Me arrepiento de comprar este p…\n#> 3 Not sure how to feel about my new washing ma… No estoy seguro de cómo me sien…\n```\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/_freeze/reference/llm_use/execute-results/html.json b/_freeze/reference/llm_use/execute-results/html.json index 4c1592f..71920e3 100644 --- a/_freeze/reference/llm_use/execute-results/html.json +++ b/_freeze/reference/llm_use/execute-results/html.json @@ -1,8 +1,8 @@ { - "hash": "b9e7cd8226754f77d77478f5ac5b4d71", + "hash": "84eedf7eec066709f406e09aee9d91c6", "result": { "engine": "knitr", - "markdown": "---\ntitle: \"Specify the model to use\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-use.R](https://github.com/edgararuiz/mall/blob/main/R/llm-use.R)\n\n## llm_use\n\n## Description\n Allows us to specify the back-end provider, model to use during the current R session \n\n\n## Usage\n```r\n \nllm_use( \n backend = NULL, \n model = NULL, \n ..., \n .silent = FALSE, \n .cache = NULL, \n .force = FALSE \n) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| backend | The name of an supported back-end provider. Currently only 'ollama' is supported. |\n| model | The name of model supported by the back-end provider |\n| ... | Additional arguments that this function will pass down to the integrating function. In the case of Ollama, it will pass those arguments to `ollamar::chat()`. |\n| .silent | Avoids console output |\n| .cache | The path to save model results, so they can be re-used if the same operation is ran again. To turn off, set this argument to an empty character: `\"\"`. 'It defaults to '_mall_cache'. If this argument is left `NULL` when calling this function, no changes to the path will be made. |\n| .force | Flag that tell the function to reset all of the settings in the R session |\n\n\n\n## Value\n A `mall_session` object \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \nllm_use(\"ollama\", \"llama3.1\") \n#> \n#> ── mall session object\n#> Backend: ollama\n#> LLM session: model:llama3.1\n#> R session: cache_folder:_mall_cache\n \n# Additional arguments will be passed 'as-is' to the \n# downstream R function in this example, to ollama::chat() \nllm_use(\"ollama\", \"llama3.1\", seed = 100, temp = 0.1) \n#> \n#> ── mall session object \n#> Backend: ollamaLLM session: model:llama3.1\n#> seed:100\n#> temp:0.1\n#> R session: cache_folder:_mall_cache\n \n# During the R session, you can change any argument \n# individually and it will retain all of previous \n# arguments used \nllm_use(temp = 0.3) \n#> \n#> ── mall session object \n#> Backend: ollamaLLM session: model:llama3.1\n#> seed:100\n#> temp:0.3\n#> R session: cache_folder:_mall_cache\n \n# Use .cache to modify the target folder for caching \nllm_use(.cache = \"_my_cache\") \n#> \n#> ── mall session object \n#> Backend: ollamaLLM session: model:llama3.1\n#> seed:100\n#> temp:0.3\n#> R session: cache_folder:_my_cache\n \n# Leave .cache empty to turn off this functionality \nllm_use(.cache = \"\") \n#> \n#> ── mall session object \n#> Backend: ollamaLLM session: model:llama3.1\n#> seed:100\n#> temp:0.3\n \n# Use .silent to avoid the print out \nllm_use(.silent = TRUE) \n \n```\n:::\n", + "markdown": "---\ntitle: \"Specify the model to use\"\nexecute:\n eval: true\n freeze: true\n---\n\n\n\n\n\n[R/llm-use.R](https://github.com/edgararuiz/mall/blob/main/R/llm-use.R)\n\n## llm_use\n\n## Description\n Allows us to specify the back-end provider, model to use during the current R session \n\n\n## Usage\n```r\n \nllm_use( \n backend = NULL, \n model = NULL, \n ..., \n .silent = FALSE, \n .cache = NULL, \n .force = FALSE \n) \n```\n\n## Arguments\n|Arguments|Description|\n|---|---|\n| backend | The name of an supported back-end provider. Currently only 'ollama' is supported. |\n| model | The name of model supported by the back-end provider |\n| ... | Additional arguments that this function will pass down to the integrating function. In the case of Ollama, it will pass those arguments to `ollamar::chat()`. |\n| .silent | Avoids console output |\n| .cache | The path to save model results, so they can be re-used if the same operation is ran again. To turn off, set this argument to an empty character: `\"\"`. 'It defaults to '_mall_cache'. If this argument is left `NULL` when calling this function, no changes to the path will be made. |\n| .force | Flag that tell the function to reset all of the settings in the R session |\n\n\n\n## Value\n A `mall_session` object \n\n\n## Examples\n\n\n::: {.cell}\n\n```{.r .cell-code}\n \nlibrary(mall) \n \nllm_use(\"ollama\", \"llama3.2\") \n#> \n#> ── mall session object\n#> Backend: ollama\n#> LLM session: model:llama3.2\n#> R session: cache_folder:_mall_cache\n \n# Additional arguments will be passed 'as-is' to the \n# downstream R function in this example, to ollama::chat() \nllm_use(\"ollama\", \"llama3.2\", seed = 100, temp = 0.1) \n#> \n#> ── mall session object \n#> Backend: ollamaLLM session: model:llama3.2\n#> seed:100\n#> temp:0.1\n#> R session: cache_folder:_mall_cache\n \n# During the R session, you can change any argument \n# individually and it will retain all of previous \n# arguments used \nllm_use(temp = 0.3) \n#> \n#> ── mall session object \n#> Backend: ollamaLLM session: model:llama3.2\n#> seed:100\n#> temp:0.3\n#> R session: cache_folder:_mall_cache\n \n# Use .cache to modify the target folder for caching \nllm_use(.cache = \"_my_cache\") \n#> \n#> ── mall session object \n#> Backend: ollamaLLM session: model:llama3.2\n#> seed:100\n#> temp:0.3\n#> R session: cache_folder:_my_cache\n \n# Leave .cache empty to turn off this functionality \nllm_use(.cache = \"\") \n#> \n#> ── mall session object \n#> Backend: ollamaLLM session: model:llama3.2\n#> seed:100\n#> temp:0.3\n \n# Use .silent to avoid the print out \nllm_use(.silent = TRUE) \n \n```\n:::\n", "supporting": [], "filters": [ "rmarkdown/pagebreak.lua" diff --git a/reference/llm_classify.qmd b/reference/llm_classify.qmd index 72dd397..cad27c9 100644 --- a/reference/llm_classify.qmd +++ b/reference/llm_classify.qmd @@ -56,7 +56,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) llm_classify(reviews, review, c("appliance", "computer")) diff --git a/reference/llm_custom.qmd b/reference/llm_custom.qmd index bea3702..c645a9e 100644 --- a/reference/llm_custom.qmd +++ b/reference/llm_custom.qmd @@ -49,7 +49,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) my_prompt <- paste( "Answer a question.", diff --git a/reference/llm_extract.qmd b/reference/llm_extract.qmd index 2344ba2..af36446 100644 --- a/reference/llm_extract.qmd +++ b/reference/llm_extract.qmd @@ -58,7 +58,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) # Use 'labels' to let the function know what to extract llm_extract(reviews, review, labels = "product") diff --git a/reference/llm_sentiment.qmd b/reference/llm_sentiment.qmd index e96be4f..3b34d89 100644 --- a/reference/llm_sentiment.qmd +++ b/reference/llm_sentiment.qmd @@ -61,7 +61,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) llm_sentiment(reviews, review) diff --git a/reference/llm_summarize.qmd b/reference/llm_summarize.qmd index c83343d..fe3557e 100644 --- a/reference/llm_summarize.qmd +++ b/reference/llm_summarize.qmd @@ -56,7 +56,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) # Use max_words to set the maximum number of words to use for the summary llm_summarize(reviews, review, max_words = 5) diff --git a/reference/llm_translate.qmd b/reference/llm_translate.qmd index 934a892..3930faa 100644 --- a/reference/llm_translate.qmd +++ b/reference/llm_translate.qmd @@ -56,7 +56,7 @@ library(mall) data("reviews") -llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) +llm_use("ollama", "llama3.2", seed = 100, .silent = TRUE) # Pass the desired language to translate to llm_translate(reviews, review, "spanish") diff --git a/reference/llm_use.qmd b/reference/llm_use.qmd index c987f6f..c6d07a7 100644 --- a/reference/llm_use.qmd +++ b/reference/llm_use.qmd @@ -52,11 +52,11 @@ llm_use( library(mall) -llm_use("ollama", "llama3.1") +llm_use("ollama", "llama3.2") # Additional arguments will be passed 'as-is' to the # downstream R function in this example, to ollama::chat() -llm_use("ollama", "llama3.1", seed = 100, temp = 0.1) +llm_use("ollama", "llama3.2", seed = 100, temp = 0.1) # During the R session, you can change any argument # individually and it will retain all of previous From 802e8de7f412491d7fcdc2c39f73c9b9e278d8f2 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 29 Sep 2024 10:02:34 -0500 Subject: [PATCH 4/8] Custom classify for llama 3.2 --- R/m-backend-prompt.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index b1acdd0..2634312 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -31,6 +31,25 @@ m_backend_prompt.mall_llama3.2<- function(backend, additional = "") { ) ) } + base_method$classify <- function(labels) { + labels <- process_labels( + x = labels, + if_character = "Determine if the text refers to one of the following: {x}", + if_formula = "If it classifies as {f_lhs(x)} then return {f_rhs(x)}" + ) + list( + list( + role = "user", + content = glue(paste( + "You are a helpful classification engine.", + "{labels}.", + "No capitalization. No explanations.", + "{additional}", + "The answer is based on the following text:\n{{x}}" + )) + ) + ) + } base_method } From bbfe4f2a9ca169c041b596fb46213e18c1f85f0a Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 29 Sep 2024 10:02:44 -0500 Subject: [PATCH 5/8] Updates test snapshots --- tests/testthat/_snaps/llm-extract.md | 2 +- tests/testthat/_snaps/zzz-cache.md | 48 ++++++++++++++-------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/testthat/_snaps/llm-extract.md b/tests/testthat/_snaps/llm-extract.md index da82dcf..1ce3ad0 100644 --- a/tests/testthat/_snaps/llm-extract.md +++ b/tests/testthat/_snaps/llm-extract.md @@ -32,5 +32,5 @@ Code llm_vec_extract("bob smith, 105 2nd street", c("name", "address")) Output - [1] "bob smith | 105 2nd street" + [1] "| bob smith | 105 2nd street |" diff --git a/tests/testthat/_snaps/zzz-cache.md b/tests/testthat/_snaps/zzz-cache.md index c5111c7..d1da860 100644 --- a/tests/testthat/_snaps/zzz-cache.md +++ b/tests/testthat/_snaps/zzz-cache.md @@ -3,30 +3,30 @@ Code fs::dir_ls("_mall_cache", recurse = TRUE) Output - _mall_cache/00 - _mall_cache/00/004088f786ed0f6a3abc08f2aa55ae2b.json - _mall_cache/14 - _mall_cache/14/14afc26cb4f76497b80b5552b2b1e217.json - _mall_cache/18 - _mall_cache/18/18560280fe5b5a85f2d66fa2dc89aa00.json - _mall_cache/29 - _mall_cache/29/296f3116c07dab7f3ecb4a71776e3b64.json - _mall_cache/2c - _mall_cache/2c/2cbb57fd4a7e7178c489d068db063433.json + _mall_cache/0b + _mall_cache/0b/0b18bbcac64aff6c9121b7b6d08e6458.json + _mall_cache/1a + _mall_cache/1a/1a1434659a6539a64dafc4dc1adf5503.json + _mall_cache/1c + _mall_cache/1c/1c74876f58e4060900dc2c3711777cc7.json + _mall_cache/32 + _mall_cache/32/32f10622095aad218080f2ec26382b2a.json + _mall_cache/3b + _mall_cache/3b/3b3111d4036392f8e32a6dc4457d515e.json _mall_cache/42 _mall_cache/42/425e0dc8e9dcadd3482b98fdfa127f30.json - _mall_cache/44 - _mall_cache/44/44fd00c39a9697e24e93943ef5f2ad1b.json - _mall_cache/57 - _mall_cache/57/5702ff773afb880c746037a5d8254019.json - _mall_cache/65 - _mall_cache/65/65c76a53ebea14a6695adf433fb2faa6.json - _mall_cache/98 - _mall_cache/98/98a43dc690b06455d6b0a5046db31d84.json - _mall_cache/9c - _mall_cache/9c/9c4ed89921994aa00c712bada91ef941.json - _mall_cache/b0 - _mall_cache/b0/b02d0fab954e183a98787fa897b47d59.json - _mall_cache/b7 - _mall_cache/b7/b7c613386c94b2500b2b733632fedd1a.json + _mall_cache/83 + _mall_cache/83/837cf64a31cf9d16de22d95feaafd72b.json + _mall_cache/84 + _mall_cache/84/84609d770fdd4eb65d2f232e0c93f15c.json + _mall_cache/a5 + _mall_cache/a5/a5ae06127c321290a08d42be969db936.json + _mall_cache/bf + _mall_cache/bf/bf5790d2673fe4e32e382f282ae2a095.json + _mall_cache/ca + _mall_cache/ca/cac76c2359dbefe94ff9007c014996bf.json + _mall_cache/db + _mall_cache/db/db9fde654fc2c2066010bba4733dcc87.json + _mall_cache/ff + _mall_cache/ff/ffea2bb0bc69b4de643fadd6e1d9b0fb.json From 4f5a12cfe33adcba6782467d2f6b15cdcbe9b0bc Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 29 Sep 2024 10:03:06 -0500 Subject: [PATCH 6/8] Updates README --- README.Rmd | 11 ++++++----- README.md | 13 +++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/README.Rmd b/README.Rmd index 8e9da8f..8ee327d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -218,7 +218,7 @@ use. Calling `llm_use()` directly will let you specify the model and backend to use. You can also setup additional arguments that will be passed down to the function that actually runs the prediction. In the case of Ollama, that function -is [`generate()`](https://hauselin.github.io/ollama-r/reference/generate.html). +is [`chat()`](https://hauselin.github.io/ollama-r/reference/chat.html). ```{r, eval = FALSE} llm_use("ollama", "llama3.2", seed = 100, temperature = 0) @@ -232,7 +232,8 @@ If using this method with an LLM locally available, the cost will be a long running time. Unless using a very specialized LLM, a given LLM is a general model. It was fitted using a vast amount of data. So determining a response for each row, takes longer than if using a manually created NLP model. The default model -used in Ollama is Llama 3.2, which was fitted using 8B parameters. +used in Ollama is [Llama 3.2](https://ollama.com/library/llama3.2), +which was fitted using 3B parameters. If using an external LLM service, the consideration will need to be for the billing costs of using such service. Keep in mind that you will be sending a lot @@ -283,9 +284,9 @@ reviews_llm <- data_bookReviews |> ) ``` -As far as **time**, on my Apple M3 machine, it took about 3 minutes to process, -100 rows, containing 20 thousand words. Setting `temp` to 0.2 in `llm_use()`, -made the model run a bit faster. +As far as **time**, on my Apple M3 machine, it took about 1.5 minutes to process, +100 rows, containing 20 thousand words. Setting `temp` to 0 in `llm_use()`, +made the model run faster. The package uses `purrr` to send each prompt individually to the LLM. But, I did try a few different ways to speed up the process, unsuccessfully: diff --git a/README.md b/README.md index 9300556..24dddc1 100644 --- a/README.md +++ b/README.md @@ -262,7 +262,7 @@ Calling `llm_use()` directly will let you specify the model and backend to use. You can also setup additional arguments that will be passed down to the function that actually runs the prediction. In the case of Ollama, that function is -[`generate()`](https://hauselin.github.io/ollama-r/reference/generate.html). +[`chat()`](https://hauselin.github.io/ollama-r/reference/chat.html). ``` r llm_use("ollama", "llama3.2", seed = 100, temperature = 0) @@ -276,8 +276,9 @@ If using this method with an LLM locally available, the cost will be a long running time. Unless using a very specialized LLM, a given LLM is a general model. It was fitted using a vast amount of data. So determining a response for each row, takes longer than if using a manually created -NLP model. The default model used in Ollama is Llama 3.2, which was -fitted using 8B parameters. +NLP model. The default model used in Ollama is [Llama +3.2](https://ollama.com/library/llama3.2), which was fitted using 3B +parameters. If using an external LLM service, the consideration will need to be for the billing costs of using such service. Keep in mind that you will be @@ -336,9 +337,9 @@ reviews_llm <- data_bookReviews |> #> ! There were 2 predictions with invalid output, they were coerced to NA ``` -As far as **time**, on my Apple M3 machine, it took about 3 minutes to -process, 100 rows, containing 20 thousand words. Setting `temp` to 0.2 -in `llm_use()`, made the model run a bit faster. +As far as **time**, on my Apple M3 machine, it took about 1.5 minutes to +process, 100 rows, containing 20 thousand words. Setting `temp` to 0 in +`llm_use()`, made the model run faster. The package uses `purrr` to send each prompt individually to the LLM. But, I did try a few different ways to speed up the process, From dc6095b1cfb4f57d1541ab703e975fa2f7b3da75 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 29 Sep 2024 10:06:23 -0500 Subject: [PATCH 7/8] styler updates --- R/m-backend-prompt.R | 4 ++-- R/m-defaults.R | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/m-backend-prompt.R b/R/m-backend-prompt.R index 2634312..915e7db 100644 --- a/R/m-backend-prompt.R +++ b/R/m-backend-prompt.R @@ -5,9 +5,9 @@ m_backend_prompt <- function(backend, additional) { } #' @export -m_backend_prompt.mall_llama3.2<- function(backend, additional = "") { +m_backend_prompt.mall_llama3.2 <- function(backend, additional = "") { base_method <- NextMethod() - base_method$extract = function(labels) { + base_method$extract <- function(labels) { no_labels <- length(labels) col_labels <- paste0(labels, collapse = ", ") plural <- ifelse(no_labels > 1, "s", "") diff --git a/R/m-defaults.R b/R/m-defaults.R index 78be805..107074a 100644 --- a/R/m-defaults.R +++ b/R/m-defaults.R @@ -7,14 +7,14 @@ m_defaults_set <- function(...) { } model <- defaults[["model"]] split_model <- strsplit(model, "\\:")[[1]] - if(length(split_model > 1)) { + if (length(split_model > 1)) { sub_model <- split_model[[1]] } else { sub_model <- NULL } obj_class <- clean_names(c( - model, - sub_model, + model, + sub_model, defaults[["backend"]], "session" )) From dd8fdf01b4414fd47e8cdebb885f65c4f280c5ca Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 29 Sep 2024 12:52:35 -0500 Subject: [PATCH 8/8] Tests for new llama3.2 customized prompt --- R/m-defaults.R | 2 +- tests/testthat/_snaps/zzz-cache.md | 48 +++++++++++++------------- tests/testthat/test-m-backend-prompt.R | 13 +++++++ 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/R/m-defaults.R b/R/m-defaults.R index 107074a..3827dba 100644 --- a/R/m-defaults.R +++ b/R/m-defaults.R @@ -7,7 +7,7 @@ m_defaults_set <- function(...) { } model <- defaults[["model"]] split_model <- strsplit(model, "\\:")[[1]] - if (length(split_model > 1)) { + if (length(split_model) > 1) { sub_model <- split_model[[1]] } else { sub_model <- NULL diff --git a/tests/testthat/_snaps/zzz-cache.md b/tests/testthat/_snaps/zzz-cache.md index d1da860..c5111c7 100644 --- a/tests/testthat/_snaps/zzz-cache.md +++ b/tests/testthat/_snaps/zzz-cache.md @@ -3,30 +3,30 @@ Code fs::dir_ls("_mall_cache", recurse = TRUE) Output - _mall_cache/0b - _mall_cache/0b/0b18bbcac64aff6c9121b7b6d08e6458.json - _mall_cache/1a - _mall_cache/1a/1a1434659a6539a64dafc4dc1adf5503.json - _mall_cache/1c - _mall_cache/1c/1c74876f58e4060900dc2c3711777cc7.json - _mall_cache/32 - _mall_cache/32/32f10622095aad218080f2ec26382b2a.json - _mall_cache/3b - _mall_cache/3b/3b3111d4036392f8e32a6dc4457d515e.json + _mall_cache/00 + _mall_cache/00/004088f786ed0f6a3abc08f2aa55ae2b.json + _mall_cache/14 + _mall_cache/14/14afc26cb4f76497b80b5552b2b1e217.json + _mall_cache/18 + _mall_cache/18/18560280fe5b5a85f2d66fa2dc89aa00.json + _mall_cache/29 + _mall_cache/29/296f3116c07dab7f3ecb4a71776e3b64.json + _mall_cache/2c + _mall_cache/2c/2cbb57fd4a7e7178c489d068db063433.json _mall_cache/42 _mall_cache/42/425e0dc8e9dcadd3482b98fdfa127f30.json - _mall_cache/83 - _mall_cache/83/837cf64a31cf9d16de22d95feaafd72b.json - _mall_cache/84 - _mall_cache/84/84609d770fdd4eb65d2f232e0c93f15c.json - _mall_cache/a5 - _mall_cache/a5/a5ae06127c321290a08d42be969db936.json - _mall_cache/bf - _mall_cache/bf/bf5790d2673fe4e32e382f282ae2a095.json - _mall_cache/ca - _mall_cache/ca/cac76c2359dbefe94ff9007c014996bf.json - _mall_cache/db - _mall_cache/db/db9fde654fc2c2066010bba4733dcc87.json - _mall_cache/ff - _mall_cache/ff/ffea2bb0bc69b4de643fadd6e1d9b0fb.json + _mall_cache/44 + _mall_cache/44/44fd00c39a9697e24e93943ef5f2ad1b.json + _mall_cache/57 + _mall_cache/57/5702ff773afb880c746037a5d8254019.json + _mall_cache/65 + _mall_cache/65/65c76a53ebea14a6695adf433fb2faa6.json + _mall_cache/98 + _mall_cache/98/98a43dc690b06455d6b0a5046db31d84.json + _mall_cache/9c + _mall_cache/9c/9c4ed89921994aa00c712bada91ef941.json + _mall_cache/b0 + _mall_cache/b0/b02d0fab954e183a98787fa897b47d59.json + _mall_cache/b7 + _mall_cache/b7/b7c613386c94b2500b2b733632fedd1a.json diff --git a/tests/testthat/test-m-backend-prompt.R b/tests/testthat/test-m-backend-prompt.R index ccd497b..f939921 100644 --- a/tests/testthat/test-m-backend-prompt.R +++ b/tests/testthat/test-m-backend-prompt.R @@ -12,3 +12,16 @@ test_that("Prompt handles list()", { list(list(role = "user", content = test_text)) ) }) + +test_that("Prompt handles list()", { + backend <- llm_use("ollama", "llama3.2:latest", .silent = TRUE) + x <- m_backend_prompt(backend) + x_extract <- x$extract(labels = c("a", "b")) + x_classify <- x$classify(labels = c("a" ~ 1, "b" ~ 2)) + backend <- llm_use("ollama", "llama1", .silent = TRUE) + y <- m_backend_prompt(backend) + y_extract <- y$extract(labels = c("a", "b")) + y_classify <- y$classify(labels = c("a" ~ 1, "b" ~ 2)) + expect_false(x_extract[[1]]$content == y_extract[[1]]$content) + expect_false(x_classify[[1]]$content == y_classify[[1]]$content) +})