incl. header row number in schema and a new function to splice the he…

…ader into the table, in case the table has colnames in the header
luckinet · Feb 16, 2024 · 8a6a067 · 8a6a067
1 parent 4e4ca60
commit 8a6a067
Show file tree

Hide file tree

Showing 5 changed files with 104 additions and 8 deletions.
diff --git a/R/helpers.R b/R/helpers.R
@@ -135,6 +135,87 @@
 
 }
 
+
+#' Get the column types of a tibble
+#'
+#' @param input [\code{data.frame(1)}]\cr table of which to get the column
+#'   types.
+#' @param collapse [\code{logical(1)}]\cr whether or not to paste all column
+#'   types into one string.
+#' @importFrom checkmate assertDataFrame assertLogical
+#' @importFrom tibble tibble
+#' @importFrom purrr map
+#' @importFrom dplyr left_join pull
+#' @importFrom stringr str_c
+
+.getColTypes <- function(input = NULL, collapse = TRUE){
+
+  assertDataFrame(x = input)
+  assertLogical(x = collapse, len = 1)
+
+  types <- tibble(col_type = c("character", "integer", "numeric", "double", "logical", "Date", "units", "sfc_POLYGON", "arrow_binary"),
+                  code = c("c", "i", "n", "d", "l", "D", "u", "g", "a"))
+
+  out <- map(1:dim(input)[2], function(ix){
+    class(input[[ix]])[1]
+  }) %>%
+    unlist() %>%
+    tibble(col_type = .) %>%
+    left_join(y = types, by = "col_type") %>%
+    pull("code")
+
+  if(collapse){
+    out <- out %>%
+      str_c(collapse = "")
+  }
+
+  return(out)
+
+}
+
+
+#' Splice the header into the table
+#'
+#' @param input [\code{data.frame(1)}]\cr table of which the header should be
+#'   shifted into the table.
+#' @param rows [\{integeris(1)]\cr the number of rows to shift into the table.
+#' @importFrom checkmate assertDataFrame assertIntegerish
+#' @importFrom dplyr mutate across bind_rows
+#' @importFrom tidyselect where
+#' @importFrom lubridate is.Date
+#' @importFrom tibble as_tibble_row
+
+.spliceHeader <- function(input, rows = NULL){
+
+  assertDataFrame(x = input)
+  assertIntegerish(x = rows, len = 1, lower = 0, upper = dim(input)[1], any.missing = FALSE)
+
+  input <- input %>%
+    mutate(across(where(is.double) | where(is.integer) |  where(is.logical) | where(is.Date), as.character))
+
+  if(rows != 0L){
+
+    non_char <- .getColTypes(input = input, collapse = FALSE) != "c"
+
+    if(rows != 1){
+      stop("! implement case where more than one rows need to be shifted !")
+    } else {
+      vec <- colnames(input)
+      names(vec) <- paste0("X", seq_along(vec))
+      vec <- as_tibble_row(vec)
+      vec[, non_char] <- NA
+
+      colnames(input) <- paste0("X", seq_along(vec))
+
+      input <- bind_rows(vec, input)
+    }
+
+  }
+
+  return(input)
+}
+
+
 #' Match variables
 #'
 #' This function matches id and observed variables and reshapes them accordingly
@@ -454,7 +535,7 @@
 
 #' Evaluate .sum constructs
 #'
-#' @param input [\code{character(1)}]\cr table to reorganise.
+#' @param input [\code{data.frame(1)}]\cr table to reorganise.
 #' @param groups [\code{list(3)}]\cr the groups-slot from a schema.
 #' @param data [\code{integerish(.)}]\cr the cell column or row that should be
 #'   adapted to groupings.
@@ -509,7 +590,7 @@
 
 #' Evaluate .find constructs
 #'
-#' @param input [\code{character(1)}]\cr table to reorganise.
+#' @param input [\code{data.frame(1)}]\cr table to reorganise.
 #' @param col [\code{list(2)}]\cr the output of the respective .find construct
 #'   used to match in columns.
 #' @param row [\code{list(2)}]\cr the output of the respective .find construct

diff --git a/R/reorganise.R b/R/reorganise.R
@@ -45,8 +45,7 @@ reorganise <- function(input = NULL, schema = NULL){
   # check validity of arguments
   assertDataFrame(x = input)
 
-  input <- input %>%
-    mutate_all(as.character)
+  input <- .spliceHeader(input = input, rows = schema@format$header)
 
   # 1. add missing information in schema ----
   schema <- validateSchema(input = input, schema = schema)

diff --git a/R/schema.R b/R/schema.R
@@ -158,8 +158,13 @@ setValidity(Class = "schema", function(object){
     if(length(object@format) == 0){
       errors <- c(errors, "the slot 'format' does not contain any entries.")
     }
-    if(!all(names(object@format) %in% c("del", "dec", "na", "flags"))){
-      errors <- c(errors, "'names(schema$format)' must be a permutation of set {del,dec,na,flags}")
+    if(!all(names(object@format) %in% c("header", "del", "dec", "na", "flags"))){
+      errors <- c(errors, "'names(schema$format)' must be a permutation of set {header,del,dec,na,flags}")
+    }
+    if(!is.null(object@format$header)){
+      if(!is.integer(object@format$header)){
+        errors <- c(errors, "'schema$format$header' must must have a integer value.")
+      }
     }
     if(!is.null(object@format$del)){
       if(!is.character(object@format$del)){

diff --git a/R/setFormat.R b/R/setFormat.R
@@ -7,6 +7,12 @@
 #' @param schema [\code{schema(1)}]\cr In case this information is added to an
 #'   already existing schema, provide that schema here (overwrites previous
 #'   information).
+#' @param header [\code{integerish(1)}]\cr The number of header rows. Optimally,
+#'   a table is read so that column names are ignored (for example
+#'   \code{readr::read_csv(file = ..., col_names = FALSE)}). If relatively well
+#'   defined tables are processed, where the header is always only one row, the
+#'   table can be read in with the default and the header can be spliced into
+#'   the table by specifying the number of rows here.
 #' @param decimal [\code{character(1)}]\cr The symbols that should be
 #'   interpreted as decimal separator.
 #' @param thousand [\code{character(1)}]\cr The symbols that should be
@@ -27,10 +33,11 @@
 #' @importFrom dplyr bind_rows
 #' @export
 
-setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
-                      na_values = NULL, flags = NULL){
+setFormat <- function(schema = NULL, header = 0L, decimal = NULL,
+                      thousand = NULL, na_values = NULL, flags = NULL){
 
   assertClass(x = schema, classes = "schema", null.ok = TRUE)
+  assertIntegerish(x = header, len = 1, lower = 0L, any.missing = FALSE)
   assertCharacter(x = decimal, len = 1, any.missing = FALSE, null.ok = TRUE)
   assertCharacter(x = thousand, len = 1, any.missing = FALSE, null.ok = TRUE)
   assertCharacter(x = na_values, any.missing = FALSE, null.ok = TRUE)
@@ -43,6 +50,10 @@ setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
     schema <- schema_default
   }
 
+  if(!is.null(header)){
+    schema@format$header <- header
+  }
+
   if(!is.null(decimal)){
     schema@format$dec <- decimal
   }

diff --git a/data/schema_default.rda b/data/schema_default.rda