Skip to content

Commit

Permalink
Merge branch 'master' of github.com:luckinet/tabshiftr
Browse files Browse the repository at this point in the history
  • Loading branch information
EhrmannS committed Feb 16, 2024
2 parents b3f51ae + 11af0b2 commit d4ff383
Show file tree
Hide file tree
Showing 14 changed files with 163 additions and 13 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tabshiftr
Title: Reshape Disorganised Messy Data
Version: 0.4.2
Version: 0.5.0
Authors@R:
c(person(given = "Steffen",
family = "Ehrmann",
Expand Down Expand Up @@ -59,7 +59,8 @@ Imports:
crayon,
methods,
purrr,
stringr
stringr,
lubridate
RoxygenNote: 7.2.3
Suggests:
knitr,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ importFrom(dplyr,row_number)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(dplyr,ungroup)
importFrom(lubridate,is.Date)
importFrom(magrittr,"%>%")
importFrom(methods,new)
importFrom(purrr,map)
Expand All @@ -80,6 +81,7 @@ importFrom(rlang,is_quosure)
importFrom(rlang,prim_name)
importFrom(stats,na.omit)
importFrom(stringr,coll)
importFrom(stringr,str_c)
importFrom(stringr,str_count)
importFrom(stringr,str_detect)
importFrom(stringr,str_extract_all)
Expand All @@ -88,6 +90,7 @@ importFrom(stringr,str_split)
importFrom(stringr,str_sub)
importFrom(testthat,expect_identical)
importFrom(tibble,as_tibble)
importFrom(tibble,as_tibble_row)
importFrom(tibble,rownames_to_column)
importFrom(tibble,tibble)
importFrom(tidyr,everything)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# tabshiftr 0.5.0 - variable types

- include the possibility to specify variable data type, which will result in a column of that type
- include header into `setFormat()` again, which enables providing tables where the column names are in the header, where they will be spliced into the table.

# tabshiftr 0.4.2

- include split and merge functionality for cluster ID.
Expand Down
85 changes: 83 additions & 2 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,87 @@

}


#' Get the column types of a tibble
#'
#' @param input [\code{data.frame(1)}]\cr table of which to get the column
#' types.
#' @param collapse [\code{logical(1)}]\cr whether or not to paste all column
#' types into one string.
#' @importFrom checkmate assertDataFrame assertLogical
#' @importFrom tibble tibble
#' @importFrom purrr map
#' @importFrom dplyr left_join pull
#' @importFrom stringr str_c

.getColTypes <- function(input = NULL, collapse = TRUE){

assertDataFrame(x = input)
assertLogical(x = collapse, len = 1)

types <- tibble(col_type = c("character", "integer", "numeric", "double", "logical", "Date", "units", "sfc_POLYGON", "arrow_binary"),
code = c("c", "i", "n", "d", "l", "D", "u", "g", "a"))

out <- map(1:dim(input)[2], function(ix){
class(input[[ix]])[1]
}) %>%
unlist() %>%
tibble(col_type = .) %>%
left_join(y = types, by = "col_type") %>%
pull("code")

if(collapse){
out <- out %>%
str_c(collapse = "")
}

return(out)

}


#' Splice the header into the table
#'
#' @param input [\code{data.frame(1)}]\cr table of which the header should be
#' shifted into the table.
#' @param rows [\{integeris(1)]\cr the number of rows to shift into the table.
#' @importFrom checkmate assertDataFrame assertIntegerish
#' @importFrom dplyr mutate across bind_rows
#' @importFrom tidyselect where
#' @importFrom lubridate is.Date
#' @importFrom tibble as_tibble_row

.spliceHeader <- function(input, rows = NULL){

assertDataFrame(x = input)
assertIntegerish(x = rows, len = 1, lower = 0, upper = dim(input)[1], any.missing = FALSE)

input <- input %>%
mutate(across(where(is.double) | where(is.integer) | where(is.logical) | where(is.Date), as.character))

if(rows != 0L){

non_char <- .getColTypes(input = input, collapse = FALSE) != "c"

if(rows != 1){
stop("! implement case where more than one rows need to be shifted !")
} else {
vec <- colnames(input)
names(vec) <- paste0("X", seq_along(vec))
vec <- as_tibble_row(vec)
vec[, non_char] <- NA

colnames(input) <- paste0("X", seq_along(vec))

input <- bind_rows(vec, input)
}

}

return(input)
}


#' Match variables
#'
#' This function matches id and observed variables and reshapes them accordingly
Expand Down Expand Up @@ -540,7 +621,7 @@

#' Evaluate .sum constructs
#'
#' @param input [\code{character(1)}]\cr table to reorganise.
#' @param input [\code{data.frame(1)}]\cr table to reorganise.
#' @param groups [\code{list(3)}]\cr the groups-slot from a schema.
#' @param data [\code{integerish(.)}]\cr the cell column or row that should be
#' adapted to groupings.
Expand Down Expand Up @@ -595,7 +676,7 @@

#' Evaluate .find constructs
#'
#' @param input [\code{character(1)}]\cr table to reorganise.
#' @param input [\code{data.frame(1)}]\cr table to reorganise.
#' @param col [\code{list(2)}]\cr the output of the respective .find construct
#' used to match in columns.
#' @param row [\code{list(2)}]\cr the output of the respective .find construct
Expand Down
3 changes: 1 addition & 2 deletions R/reorganise.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ reorganise <- function(input = NULL, schema = NULL){
# check validity of arguments
assertDataFrame(x = input)

input <- input %>%
mutate_all(as.character)
input <- .spliceHeader(input = input, rows = schema@format$header)

# 1. add missing information in schema ----
schema <- validateSchema(input = input, schema = schema)
Expand Down
9 changes: 7 additions & 2 deletions R/schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,13 @@ setValidity(Class = "schema", function(object){
if(length(object@format) == 0){
errors <- c(errors, "the slot 'format' does not contain any entries.")
}
if(!all(names(object@format) %in% c("del", "dec", "na", "flags"))){
errors <- c(errors, "'names(schema$format)' must be a permutation of set {del,dec,na,flags}")
if(!all(names(object@format) %in% c("header", "del", "dec", "na", "flags"))){
errors <- c(errors, "'names(schema$format)' must be a permutation of set {header,del,dec,na,flags}")
}
if(!is.null(object@format$header)){
if(!is.integer(object@format$header)){
errors <- c(errors, "'schema$format$header' must must have a integer value.")
}
}
if(!is.null(object@format$del)){
if(!is.character(object@format$del)){
Expand Down
15 changes: 13 additions & 2 deletions R/setFormat.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
#' @param schema [\code{schema(1)}]\cr In case this information is added to an
#' already existing schema, provide that schema here (overwrites previous
#' information).
#' @param header [\code{integerish(1)}]\cr The number of header rows. Optimally,
#' a table is read so that column names are ignored (for example
#' \code{readr::read_csv(file = ..., col_names = FALSE)}). If relatively well
#' defined tables are processed, where the header is always only one row, the
#' table can be read in with the default and the header can be spliced into
#' the table by specifying the number of rows here.
#' @param decimal [\code{character(1)}]\cr The symbols that should be
#' interpreted as decimal separator.
#' @param thousand [\code{character(1)}]\cr The symbols that should be
Expand All @@ -27,11 +33,12 @@
#' @importFrom dplyr bind_rows
#' @export

setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
na_values = NULL, flags = NULL){
setFormat <- function(schema = NULL, header = 0L, decimal = NULL,
thousand = NULL, na_values = NULL, flags = NULL){

# assertions ----
assertClass(x = schema, classes = "schema", null.ok = TRUE)
assertIntegerish(x = header, len = 1, lower = 0L, any.missing = FALSE)
assertCharacter(x = decimal, len = 1, any.missing = FALSE, null.ok = TRUE)
assertCharacter(x = thousand, len = 1, any.missing = FALSE, null.ok = TRUE)
assertCharacter(x = na_values, any.missing = FALSE, null.ok = TRUE)
Expand All @@ -45,6 +52,10 @@ setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
schema <- schema_default
}

if(!is.null(header)){
schema@format$header <- header
}

if(!is.null(decimal)){
schema@format$dec <- decimal
}
Expand Down
4 changes: 3 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ reference:
- title: reorganise tables
contents:
- reorganise
- title: other helpers
- title: other helper functions
contents:
- .eval_find
- .eval_sum
- .expect_valid_table
- .getColTypes
- .shiftHeader
- .tidyVars
- .updateFormat
- show,schema-method
Expand Down
Binary file modified data/schema_default.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/dot-eval_find.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/dot-eval_sum.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions man/dot-getColTypes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/dot-spliceHeader.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions man/setFormat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d4ff383

Please sign in to comment.