Skip to content

Commit

Permalink
incl. header row number in schema and a new function to splice the he…
Browse files Browse the repository at this point in the history
…ader into the table, in case the table has colnames in the header
  • Loading branch information
EhrmannS committed Feb 16, 2024
1 parent 4e4ca60 commit 8a6a067
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 8 deletions.
85 changes: 83 additions & 2 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,87 @@

}


#' Get the column types of a tibble
#'
#' @param input [\code{data.frame(1)}]\cr table of which to get the column
#' types.
#' @param collapse [\code{logical(1)}]\cr whether or not to paste all column
#' types into one string.
#' @importFrom checkmate assertDataFrame assertLogical
#' @importFrom tibble tibble
#' @importFrom purrr map
#' @importFrom dplyr left_join pull
#' @importFrom stringr str_c

.getColTypes <- function(input = NULL, collapse = TRUE){

assertDataFrame(x = input)
assertLogical(x = collapse, len = 1)

types <- tibble(col_type = c("character", "integer", "numeric", "double", "logical", "Date", "units", "sfc_POLYGON", "arrow_binary"),
code = c("c", "i", "n", "d", "l", "D", "u", "g", "a"))

out <- map(1:dim(input)[2], function(ix){
class(input[[ix]])[1]
}) %>%
unlist() %>%
tibble(col_type = .) %>%
left_join(y = types, by = "col_type") %>%
pull("code")

if(collapse){
out <- out %>%
str_c(collapse = "")
}

return(out)

}


#' Splice the header into the table
#'
#' @param input [\code{data.frame(1)}]\cr table of which the header should be
#' shifted into the table.
#' @param rows [\{integeris(1)]\cr the number of rows to shift into the table.
#' @importFrom checkmate assertDataFrame assertIntegerish
#' @importFrom dplyr mutate across bind_rows
#' @importFrom tidyselect where
#' @importFrom lubridate is.Date
#' @importFrom tibble as_tibble_row

.spliceHeader <- function(input, rows = NULL){

assertDataFrame(x = input)
assertIntegerish(x = rows, len = 1, lower = 0, upper = dim(input)[1], any.missing = FALSE)

input <- input %>%
mutate(across(where(is.double) | where(is.integer) | where(is.logical) | where(is.Date), as.character))

if(rows != 0L){

non_char <- .getColTypes(input = input, collapse = FALSE) != "c"

if(rows != 1){
stop("! implement case where more than one rows need to be shifted !")
} else {
vec <- colnames(input)
names(vec) <- paste0("X", seq_along(vec))
vec <- as_tibble_row(vec)
vec[, non_char] <- NA

colnames(input) <- paste0("X", seq_along(vec))

input <- bind_rows(vec, input)
}

}

return(input)
}


#' Match variables
#'
#' This function matches id and observed variables and reshapes them accordingly
Expand Down Expand Up @@ -454,7 +535,7 @@

#' Evaluate .sum constructs
#'
#' @param input [\code{character(1)}]\cr table to reorganise.
#' @param input [\code{data.frame(1)}]\cr table to reorganise.
#' @param groups [\code{list(3)}]\cr the groups-slot from a schema.
#' @param data [\code{integerish(.)}]\cr the cell column or row that should be
#' adapted to groupings.
Expand Down Expand Up @@ -509,7 +590,7 @@

#' Evaluate .find constructs
#'
#' @param input [\code{character(1)}]\cr table to reorganise.
#' @param input [\code{data.frame(1)}]\cr table to reorganise.
#' @param col [\code{list(2)}]\cr the output of the respective .find construct
#' used to match in columns.
#' @param row [\code{list(2)}]\cr the output of the respective .find construct
Expand Down
3 changes: 1 addition & 2 deletions R/reorganise.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ reorganise <- function(input = NULL, schema = NULL){
# check validity of arguments
assertDataFrame(x = input)

input <- input %>%
mutate_all(as.character)
input <- .spliceHeader(input = input, rows = schema@format$header)

# 1. add missing information in schema ----
schema <- validateSchema(input = input, schema = schema)
Expand Down
9 changes: 7 additions & 2 deletions R/schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,13 @@ setValidity(Class = "schema", function(object){
if(length(object@format) == 0){
errors <- c(errors, "the slot 'format' does not contain any entries.")
}
if(!all(names(object@format) %in% c("del", "dec", "na", "flags"))){
errors <- c(errors, "'names(schema$format)' must be a permutation of set {del,dec,na,flags}")
if(!all(names(object@format) %in% c("header", "del", "dec", "na", "flags"))){
errors <- c(errors, "'names(schema$format)' must be a permutation of set {header,del,dec,na,flags}")
}
if(!is.null(object@format$header)){
if(!is.integer(object@format$header)){
errors <- c(errors, "'schema$format$header' must must have a integer value.")
}
}
if(!is.null(object@format$del)){
if(!is.character(object@format$del)){
Expand Down
15 changes: 13 additions & 2 deletions R/setFormat.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
#' @param schema [\code{schema(1)}]\cr In case this information is added to an
#' already existing schema, provide that schema here (overwrites previous
#' information).
#' @param header [\code{integerish(1)}]\cr The number of header rows. Optimally,
#' a table is read so that column names are ignored (for example
#' \code{readr::read_csv(file = ..., col_names = FALSE)}). If relatively well
#' defined tables are processed, where the header is always only one row, the
#' table can be read in with the default and the header can be spliced into
#' the table by specifying the number of rows here.
#' @param decimal [\code{character(1)}]\cr The symbols that should be
#' interpreted as decimal separator.
#' @param thousand [\code{character(1)}]\cr The symbols that should be
Expand All @@ -27,10 +33,11 @@
#' @importFrom dplyr bind_rows
#' @export

setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
na_values = NULL, flags = NULL){
setFormat <- function(schema = NULL, header = 0L, decimal = NULL,
thousand = NULL, na_values = NULL, flags = NULL){

assertClass(x = schema, classes = "schema", null.ok = TRUE)
assertIntegerish(x = header, len = 1, lower = 0L, any.missing = FALSE)
assertCharacter(x = decimal, len = 1, any.missing = FALSE, null.ok = TRUE)
assertCharacter(x = thousand, len = 1, any.missing = FALSE, null.ok = TRUE)
assertCharacter(x = na_values, any.missing = FALSE, null.ok = TRUE)
Expand All @@ -43,6 +50,10 @@ setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
schema <- schema_default
}

if(!is.null(header)){
schema@format$header <- header
}

if(!is.null(decimal)){
schema@format$dec <- decimal
}
Expand Down
Binary file modified data/schema_default.rda
Binary file not shown.

0 comments on commit 8a6a067

Please sign in to comment.