Merge pull request #16 from edgararuiz/updates

Updates
mlverse · Sep 24, 2024 · d768496 · d768496
2 parents 4712cb7 + f04606f
commit d768496
Show file tree

Hide file tree

Showing 41 changed files with 328 additions and 291 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -25,3 +25,6 @@ Suggests:
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
 URL: https://edgararuiz.github.io/mall/
+Depends: 
+    R (>= 2.10)
+LazyData: true
diff --git a/R/data-reviews.R b/R/data-reviews.R
@@ -0,0 +1,10 @@
+#' Mini reviews data set
+#'
+#' @format A data frame that contains 3 records. The records are of fictitious
+#' product reviews.
+#' @examples
+#' library(mall)
+#' data(reviews)
+#' reviews
+#'
+"reviews"
diff --git a/R/llm-classify.R b/R/llm-classify.R
@@ -22,41 +22,36 @@
 #' \dontrun{
 #' library(mall)
 #'
-#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE)
+#' data("reviews")
 #'
-#' reviews <- data.frame(review = c(
-#'   "This has been the best TV I've ever used. Great screen, and sound.",
-#'   "I regret buying this laptop. It is too slow and the keyboard is too noisy",
-#'   "Not sure how to feel about my new washing machine. Great color, but hard to figure"
-#' ))
+#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE)
 #'
 #' llm_classify(reviews, review, c("appliance", "computer"))
 #'
 #' # Use 'pred_name' to customize the new column's name
 #' llm_classify(
-#'   reviews, 
+#'   reviews,
 #'   review,
-#'   c("appliance", "computer"), 
+#'   c("appliance", "computer"),
 #'   pred_name = "prod_type"
-#'   )
+#' )
 #'
-#' # Pass custom values for each classification 
+#' # Pass custom values for each classification
 #' llm_classify(reviews, review, c("appliance" ~ 1, "computer" ~ 2))
 #'
 #' # For character vectors, instead of a data frame, use this function
 #' llm_vec_classify(
-#'   c("this is important!", "just whenever"), 
+#'   c("this is important!", "just whenever"),
 #'   c("urgent", "not urgent")
-#'   )
-#' 
+#' )
+#'
 #' # To preview the first call that will be made to the downstream R function
 #' llm_vec_classify(
-#'   c("this is important!", "just whenever"), 
-#'   c("urgent", "not urgent"), 
+#'   c("this is important!", "just whenever"),
+#'   c("urgent", "not urgent"),
 #'   preview = TRUE
-#'   )
-#' 
-#' } 
+#' )
+#' }
 #' @export
 llm_classify <- function(.data,
                          col,

diff --git a/R/llm-custom.R b/R/llm-custom.R
@@ -13,25 +13,20 @@
 #' \dontrun{
 #' library(mall)
 #'
+#' data("reviews")
+#'
 #' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE)
 #'
-#' reviews <- data.frame(review = c(
-#'   "This has been the best TV I've ever used. Great screen, and sound.",
-#'   "I regret buying this laptop. It is too slow and the keyboard is too noisy",
-#'   "Not sure how to feel about my new washing machine. Great color, but hard to figure"
-#' ))
-#' 
 #' my_prompt <- paste(
 #'   "Answer a question.",
 #'   "Return only the answer, no explanation",
 #'   "Acceptable answers are 'yes', 'no'",
 #'   "Answer this about the following text, is this a happy customer?:"
 #' )
-#' 
+#'
 #' reviews |>
 #'   llm_custom(review, my_prompt)
-#' 
-#' } 
+#' }
 #' @returns `llm_custom` returns a `data.frame` or `tbl` object.
 #' `llm_vec_custom` returns a vector that is the same length as `x`.
 #' @export

diff --git a/R/llm-extract.R b/R/llm-extract.R
@@ -13,46 +13,42 @@
 #' the content as the name.
 #' @examples
 #' \dontrun{
-#' library(mall) 
-#' 
-#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) 
-#' 
-#' reviews <- data.frame(review = c( 
-#'   "This has been the best TV I've ever used. Great screen, and sound.", 
-#'   "I regret buying this laptop. It is too slow and the keyboard is too noisy", 
-#'   "Not sure how to feel about my new washing machine. Great color, but hard to figure" 
-#' )) 
-#' 
+#' library(mall)
+#'
+#' data("reviews")
+#'
+#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE)
+#'
 #' # Use 'labels' to let the function know what to extract
 #' llm_extract(reviews, review, labels = "product")
-#' 
-#' # Use 'pred_name' to customize the new column's name 
-#' llm_extract(reviews, review, "product", pred_name = "prod") 
-#' 
+#'
+#' # Use 'pred_name' to customize the new column's name
+#' llm_extract(reviews, review, "product", pred_name = "prod")
+#'
 #' # Pass a vector to request multiple things, the results will be pipe delimeted
 #' # in a single column
 #' llm_extract(reviews, review, c("product", "feelings"))
-#' 
+#'
 #' # To get multiple columns, use 'expand_cols'
 #' llm_extract(reviews, review, c("product", "feelings"), expand_cols = TRUE)
-#' 
+#'
 #' # Pass a named vector to set the resulting column names
 #' llm_extract(
 #'   .data = reviews,
-#'   col =  review,
-#'   labels =  c(prod = "product", feels = "feelings"),
+#'   col = review,
+#'   labels = c(prod = "product", feels = "feelings"),
 #'   expand_cols = TRUE
-#'   )
-#' 
-#' # For character vectors, instead of a data frame, use this function 
-#' llm_vec_extract("bob smith, 123 3rd street", c("name", "address")) 
-#' 
-#' # To preview the first call that will be made to the downstream R function 
+#' )
+#'
+#' # For character vectors, instead of a data frame, use this function
+#' llm_vec_extract("bob smith, 123 3rd street", c("name", "address"))
+#'
+#' # To preview the first call that will be made to the downstream R function
 #' llm_vec_extract(
-#'   "bob smith, 123 3rd street", 
-#'   c("name", "address"), 
+#'   "bob smith, 123 3rd street",
+#'   c("name", "address"),
 #'   preview = TRUE
-#'   ) 
+#' )
 #' }
 #' @returns `llm_extract` returns a `data.frame` or `tbl` object.
 #' `llm_vec_extract` returns a vector that is the same length as `x`.

diff --git a/R/llm-sentiment.R b/R/llm-sentiment.R
@@ -14,13 +14,9 @@
 #' \dontrun{
 #' library(mall)
 #'
-#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE)
+#' data("reviews")
 #'
-#' reviews <- data.frame(review = c(
-#'   "This has been the best TV I've ever used. Great screen, and sound.",
-#'   "I regret buying this laptop. It is too slow and the keyboard is too noisy",
-#'   "Not sure how to feel about my new washing machine. Great color, but hard to figure"
-#' ))
+#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE)
 #'
 #' llm_sentiment(reviews, review)
 #'
@@ -35,10 +31,9 @@
 #'
 #' # For character vectors, instead of a data frame, use this function
 #' llm_vec_sentiment(c("I am happy", "I am sad"))
-#' 
+#'
 #' # To preview the first call that will be made to the downstream R function
 #' llm_vec_sentiment(c("I am happy", "I am sad"), preview = TRUE)
-#' 
 #' }
 #' @export
 llm_sentiment <- function(.data,

diff --git a/R/llm-summarize.R b/R/llm-summarize.R
@@ -8,34 +8,30 @@
 #' summary. Defaults to 10.
 #' @examples
 #' \dontrun{
-#' library(mall) 
-#' 
-#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE) 
-#' 
-#' reviews <- data.frame(review = c( 
-#'   "This has been the best TV I've ever used. Great screen, and sound.", 
-#'   "I regret buying this laptop. It is too slow and the keyboard is too noisy", 
-#'   "Not sure how to feel about my new washing machine. Great color, but hard to figure" 
-#' )) 
-#' 
+#' library(mall)
+#'
+#' data("reviews")
+#'
+#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE)
+#'
 #' # Use max_words to set the maximum number of words to use for the summary
-#' llm_summarize(reviews, review, max_words = 5) 
-#' 
-#' # Use 'pred_name' to customize the new column's name 
-#' llm_summarize(reviews, review, 5, pred_name = "review_summary") 
-#' 
-#' # For character vectors, instead of a data frame, use this function 
+#' llm_summarize(reviews, review, max_words = 5)
+#'
+#' # Use 'pred_name' to customize the new column's name
+#' llm_summarize(reviews, review, 5, pred_name = "review_summary")
+#'
+#' # For character vectors, instead of a data frame, use this function
 #' llm_vec_summarize(
 #'   "This has been the best TV I've ever used. Great screen, and sound.",
 #'   max_words = 5
-#'   ) 
-#' 
+#' )
+#'
 #' # To preview the first call that will be made to the downstream R function
 #' llm_vec_summarize(
 #'   "This has been the best TV I've ever used. Great screen, and sound.",
-#'   max_words = 5, 
+#'   max_words = 5,
 #'   preview = TRUE
-#' ) 
+#' )
 #' }
 #' @returns `llm_summarize` returns a `data.frame` or `tbl` object.
 #' `llm_vec_summarize` returns a vector that is the same length as `x`.

diff --git a/R/llm-translate.R b/R/llm-translate.R
@@ -10,17 +10,12 @@
 #' \dontrun{
 #' library(mall)
 #'
-#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE)
+#' data("reviews")
 #'
-#' reviews <- data.frame(review = c(
-#'   "This has been the best TV I've ever used. Great screen, and sound.",
-#'   "I regret buying this laptop. It is too slow and the keyboard is too noisy",
-#'   "Not sure how to feel about my new washing machine. Great color, but hard to figure"
-#' ))
+#' llm_use("ollama", "llama3.1", seed = 100, .silent = TRUE)
 #'
 #' # Pass the desired language to translate to
 #' llm_translate(reviews, review, "spanish")
-#' 
 #' }
 #' @returns `llm_translate` returns a `data.frame` or `tbl` object.
 #' `llm_vec_translate` returns a vector that is the same length as `x`.

diff --git a/R/llm-use.R b/R/llm-use.R
@@ -18,24 +18,24 @@
 #' @examples
 #' \dontrun{
 #' library(mall)
-#' 
+#'
 #' llm_use("ollama", "llama3.1")
-#' 
-#' # Additional arguments will be passed 'as-is' to the 
+#'
+#' # Additional arguments will be passed 'as-is' to the
 #' # downstream R function in this example, to ollama::chat()
 #' llm_use("ollama", "llama3.1", seed = 100, temp = 0.1)
-#' 
-#' # During the R session, you can change any argument 
+#'
+#' # During the R session, you can change any argument
 #' # individually and it will retain all of previous
 #' # arguments used
 #' llm_use(temp = 0.3)
-#' 
+#'
 #' # Use .cache to modify the target folder for caching
 #' llm_use(.cache = "_my_cache")
-#' 
-#' # Leave .cache empty to turn off this functionality 
+#'
+#' # Leave .cache empty to turn off this functionality
 #' llm_use(.cache = "")
-#' 
+#'
 #' # Use .silent to avoid the print out
 #' llm_use(.silent = TRUE)
 #' }

diff --git a/R/m-defaults.R b/R/m-defaults.R
@@ -63,7 +63,13 @@ print.mall_session <- function(x, ...) {
     args <- set_names(args, " ")
     cli_bullets(args)
   }
-  session <- imap(x$session, \(x, y) glue("{col_yellow({paste0(y, ':')})}{x}"))
-  label_argument <- "{col_green('R session:')}"
-  cli_inform(paste(label_argument, session[[1]]))
+  session <- x$session
+  if (session$cache_folder == "") {
+    session$cache_folder <- NULL
+  }
+  if (length(session) > 0) {
+    session <- imap(session, \(x, y) glue("{col_yellow({paste0(y, ':')})}{x}"))
+    label_argument <- "{col_green('R session:')}"
+    cli_inform(paste(label_argument, session[[1]]))
+  }
 }
diff --git a/README.Rmd b/README.Rmd
@@ -94,30 +94,26 @@ installed if you are using Databricks only.*
 
 ## LLM functions
 
-We will start with a very small table with product reviews:
-
-```{r}
-library(dplyr)
-
-reviews <- tribble(
-  ~review,
-  "This has been the best TV I've ever used. Great screen, and sound.",
-  "I regret buying this laptop. It is too slow and the keyboard is too noisy",
-  "Not sure how to feel about my new washing machine. Great color, but hard to figure"
-)
-```
-
 ### Sentiment
 
 Primarily,  `mall` provides verb-like functions that expect a `tbl` as 
 their first argument. This allows us to use them in piped operations. 
 
-For the first example, we'll asses the sentiment of each review. In order to 
-do this we will call `llm_sentiment()`:
+We will start with loading a very small data set contained in `mall`. It has
+3 product reviews that we will use as the source of our examples.
 
 ```{r}
 library(mall)
 
+data("reviews")
+
+reviews
+```
+
+For the first example, we'll asses the sentiment of each review. In order to 
+do this we will call `llm_sentiment()`:
+
+```{r}
 reviews |>
   llm_sentiment(review)
 ```