Merge branch 'master' of github.com:luckinet/tabshiftr

luckinet · Feb 16, 2024 · d4ff383 · d4ff383
2 parents b3f51ae + 11af0b2
commit d4ff383
Show file tree

Hide file tree

Showing 14 changed files with 163 additions and 13 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: tabshiftr
 Title: Reshape Disorganised Messy Data
-Version: 0.4.2
+Version: 0.5.0
 Authors@R: 
     c(person(given = "Steffen",
              family = "Ehrmann",
@@ -59,7 +59,8 @@ Imports:
     crayon,
     methods,
     purrr,
-    stringr
+    stringr,
+    lubridate
 RoxygenNote: 7.2.3
 Suggests: 
     knitr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -61,6 +61,7 @@ importFrom(dplyr,row_number)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
 importFrom(dplyr,ungroup)
+importFrom(lubridate,is.Date)
 importFrom(magrittr,"%>%")
 importFrom(methods,new)
 importFrom(purrr,map)
@@ -80,6 +81,7 @@ importFrom(rlang,is_quosure)
 importFrom(rlang,prim_name)
 importFrom(stats,na.omit)
 importFrom(stringr,coll)
+importFrom(stringr,str_c)
 importFrom(stringr,str_count)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract_all)
@@ -88,6 +90,7 @@ importFrom(stringr,str_split)
 importFrom(stringr,str_sub)
 importFrom(testthat,expect_identical)
 importFrom(tibble,as_tibble)
+importFrom(tibble,as_tibble_row)
 importFrom(tibble,rownames_to_column)
 importFrom(tibble,tibble)
 importFrom(tidyr,everything)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# tabshiftr 0.5.0 - variable types
+
+- include the possibility to specify variable data type, which will result in a column of that type
+- include header into `setFormat()` again, which enables providing tables where the column names are in the header, where they will be spliced into the table.
+
 # tabshiftr 0.4.2
 
 - include split and merge functionality for cluster ID.

diff --git a/R/helpers.R b/R/helpers.R
@@ -216,6 +216,87 @@
 
 }
 
+
+#' Get the column types of a tibble
+#'
+#' @param input [\code{data.frame(1)}]\cr table of which to get the column
+#'   types.
+#' @param collapse [\code{logical(1)}]\cr whether or not to paste all column
+#'   types into one string.
+#' @importFrom checkmate assertDataFrame assertLogical
+#' @importFrom tibble tibble
+#' @importFrom purrr map
+#' @importFrom dplyr left_join pull
+#' @importFrom stringr str_c
+
+.getColTypes <- function(input = NULL, collapse = TRUE){
+
+  assertDataFrame(x = input)
+  assertLogical(x = collapse, len = 1)
+
+  types <- tibble(col_type = c("character", "integer", "numeric", "double", "logical", "Date", "units", "sfc_POLYGON", "arrow_binary"),
+                  code = c("c", "i", "n", "d", "l", "D", "u", "g", "a"))
+
+  out <- map(1:dim(input)[2], function(ix){
+    class(input[[ix]])[1]
+  }) %>%
+    unlist() %>%
+    tibble(col_type = .) %>%
+    left_join(y = types, by = "col_type") %>%
+    pull("code")
+
+  if(collapse){
+    out <- out %>%
+      str_c(collapse = "")
+  }
+
+  return(out)
+
+}
+
+
+#' Splice the header into the table
+#'
+#' @param input [\code{data.frame(1)}]\cr table of which the header should be
+#'   shifted into the table.
+#' @param rows [\{integeris(1)]\cr the number of rows to shift into the table.
+#' @importFrom checkmate assertDataFrame assertIntegerish
+#' @importFrom dplyr mutate across bind_rows
+#' @importFrom tidyselect where
+#' @importFrom lubridate is.Date
+#' @importFrom tibble as_tibble_row
+
+.spliceHeader <- function(input, rows = NULL){
+
+  assertDataFrame(x = input)
+  assertIntegerish(x = rows, len = 1, lower = 0, upper = dim(input)[1], any.missing = FALSE)
+
+  input <- input %>%
+    mutate(across(where(is.double) | where(is.integer) |  where(is.logical) | where(is.Date), as.character))
+
+  if(rows != 0L){
+
+    non_char <- .getColTypes(input = input, collapse = FALSE) != "c"
+
+    if(rows != 1){
+      stop("! implement case where more than one rows need to be shifted !")
+    } else {
+      vec <- colnames(input)
+      names(vec) <- paste0("X", seq_along(vec))
+      vec <- as_tibble_row(vec)
+      vec[, non_char] <- NA
+
+      colnames(input) <- paste0("X", seq_along(vec))
+
+      input <- bind_rows(vec, input)
+    }
+
+  }
+
+  return(input)
+}
+
+
 #' Match variables
 #'
 #' This function matches id and observed variables and reshapes them accordingly
@@ -540,7 +621,7 @@
 
 #' Evaluate .sum constructs
 #'
-#' @param input [\code{character(1)}]\cr table to reorganise.
+#' @param input [\code{data.frame(1)}]\cr table to reorganise.
 #' @param groups [\code{list(3)}]\cr the groups-slot from a schema.
 #' @param data [\code{integerish(.)}]\cr the cell column or row that should be
 #'   adapted to groupings.
@@ -595,7 +676,7 @@
 
 #' Evaluate .find constructs
 #'
-#' @param input [\code{character(1)}]\cr table to reorganise.
+#' @param input [\code{data.frame(1)}]\cr table to reorganise.
 #' @param col [\code{list(2)}]\cr the output of the respective .find construct
 #'   used to match in columns.
 #' @param row [\code{list(2)}]\cr the output of the respective .find construct

diff --git a/R/reorganise.R b/R/reorganise.R
@@ -45,8 +45,7 @@ reorganise <- function(input = NULL, schema = NULL){
   # check validity of arguments
   assertDataFrame(x = input)
 
-  input <- input %>%
-    mutate_all(as.character)
+  input <- .spliceHeader(input = input, rows = schema@format$header)
 
   # 1. add missing information in schema ----
   schema <- validateSchema(input = input, schema = schema)

diff --git a/R/schema.R b/R/schema.R
@@ -158,8 +158,13 @@ setValidity(Class = "schema", function(object){
     if(length(object@format) == 0){
       errors <- c(errors, "the slot 'format' does not contain any entries.")
     }
-    if(!all(names(object@format) %in% c("del", "dec", "na", "flags"))){
-      errors <- c(errors, "'names(schema$format)' must be a permutation of set {del,dec,na,flags}")
+    if(!all(names(object@format) %in% c("header", "del", "dec", "na", "flags"))){
+      errors <- c(errors, "'names(schema$format)' must be a permutation of set {header,del,dec,na,flags}")
+    }
+    if(!is.null(object@format$header)){
+      if(!is.integer(object@format$header)){
+        errors <- c(errors, "'schema$format$header' must must have a integer value.")
+      }
     }
     if(!is.null(object@format$del)){
       if(!is.character(object@format$del)){

diff --git a/R/setFormat.R b/R/setFormat.R
@@ -7,6 +7,12 @@
 #' @param schema [\code{schema(1)}]\cr In case this information is added to an
 #'   already existing schema, provide that schema here (overwrites previous
 #'   information).
+#' @param header [\code{integerish(1)}]\cr The number of header rows. Optimally,
+#'   a table is read so that column names are ignored (for example
+#'   \code{readr::read_csv(file = ..., col_names = FALSE)}). If relatively well
+#'   defined tables are processed, where the header is always only one row, the
+#'   table can be read in with the default and the header can be spliced into
+#'   the table by specifying the number of rows here.
 #' @param decimal [\code{character(1)}]\cr The symbols that should be
 #'   interpreted as decimal separator.
 #' @param thousand [\code{character(1)}]\cr The symbols that should be
@@ -27,11 +33,12 @@
 #' @importFrom dplyr bind_rows
 #' @export
 
-setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
-                      na_values = NULL, flags = NULL){
+setFormat <- function(schema = NULL, header = 0L, decimal = NULL,
+                      thousand = NULL, na_values = NULL, flags = NULL){
 
   # assertions ----
   assertClass(x = schema, classes = "schema", null.ok = TRUE)
+  assertIntegerish(x = header, len = 1, lower = 0L, any.missing = FALSE)
   assertCharacter(x = decimal, len = 1, any.missing = FALSE, null.ok = TRUE)
   assertCharacter(x = thousand, len = 1, any.missing = FALSE, null.ok = TRUE)
   assertCharacter(x = na_values, any.missing = FALSE, null.ok = TRUE)
@@ -45,6 +52,10 @@ setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
     schema <- schema_default
   }
 
+  if(!is.null(header)){
+    schema@format$header <- header
+  }
+
   if(!is.null(decimal)){
     schema@format$dec <- decimal
   }

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -32,11 +32,13 @@ reference:
   - title: reorganise tables
     contents:
       - reorganise
-  - title: other helpers
+  - title: other helper functions
     contents:
       - .eval_find
       - .eval_sum
       - .expect_valid_table
+      - .getColTypes
+      - .shiftHeader
       - .tidyVars
       - .updateFormat
       - show,schema-method

diff --git a/data/schema_default.rda b/data/schema_default.rda
diff --git a/man/dot-eval_find.Rd b/man/dot-eval_find.Rd
diff --git a/man/dot-eval_sum.Rd b/man/dot-eval_sum.Rd
diff --git a/man/dot-getColTypes.Rd b/man/dot-getColTypes.Rd
diff --git a/man/dot-spliceHeader.Rd b/man/dot-spliceHeader.Rd
diff --git a/man/setFormat.Rd b/man/setFormat.Rd