From feb054b7356f362fa84fe8b73d8b1f323b5aa936 Mon Sep 17 00:00:00 2001
From: EhrmannS <steffen.ehrmann@posteo.de>
Date: Fri, 16 Feb 2024 01:54:15 +0100
Subject: [PATCH 1/4] update docs

---
 man/dot-eval_find.Rd    |  2 +-
 man/dot-eval_sum.Rd     |  2 +-
 man/dot-getColTypes.Rd  | 18 ++++++++++++++++++
 man/dot-spliceHeader.Rd | 17 +++++++++++++++++
 man/setFormat.Rd        |  8 ++++++++
 5 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 man/dot-getColTypes.Rd
 create mode 100644 man/dot-spliceHeader.Rd

diff --git a/man/dot-eval_find.Rd b/man/dot-eval_find.Rd
index 54dda85..b90cf3b 100644
--- a/man/dot-eval_find.Rd
+++ b/man/dot-eval_find.Rd
@@ -7,7 +7,7 @@
 .eval_find(input = NULL, col = NULL, row = NULL, clusters = NULL)
 }
 \arguments{
-\item{input}{[\code{character(1)}]\cr table to reorganise.}
+\item{input}{[\code{data.frame(1)}]\cr table to reorganise.}
 
 \item{col}{[\code{list(2)}]\cr the output of the respective .find construct
 used to match in columns.}
diff --git a/man/dot-eval_sum.Rd b/man/dot-eval_sum.Rd
index ffb226d..d634980 100644
--- a/man/dot-eval_sum.Rd
+++ b/man/dot-eval_sum.Rd
@@ -7,7 +7,7 @@
 .eval_sum(input = NULL, groups = NULL, data = NULL)
 }
 \arguments{
-\item{input}{[\code{character(1)}]\cr table to reorganise.}
+\item{input}{[\code{data.frame(1)}]\cr table to reorganise.}
 
 \item{groups}{[\code{list(3)}]\cr the groups-slot from a schema.}
 
diff --git a/man/dot-getColTypes.Rd b/man/dot-getColTypes.Rd
new file mode 100644
index 0000000..fb6d738
--- /dev/null
+++ b/man/dot-getColTypes.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.getColTypes}
+\alias{.getColTypes}
+\title{Get the column types of a tibble}
+\usage{
+.getColTypes(input = NULL, collapse = TRUE)
+}
+\arguments{
+\item{input}{[\code{data.frame(1)}]\cr table of which to get the column
+types.}
+
+\item{collapse}{[\code{logical(1)}]\cr whether or not to paste all column
+types into one string.}
+}
+\description{
+Get the column types of a tibble
+}
diff --git a/man/dot-spliceHeader.Rd b/man/dot-spliceHeader.Rd
new file mode 100644
index 0000000..55661e0
--- /dev/null
+++ b/man/dot-spliceHeader.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helpers.R
+\name{.spliceHeader}
+\alias{.spliceHeader}
+\title{Splice the header into the table}
+\usage{
+.spliceHeader(input, rows = NULL)
+}
+\arguments{
+\item{input}{[\code{data.frame(1)}]\cr table of which the header should be
+shifted into the table.}
+
+\item{rows}{[\{integeris(1)]\cr the number of rows to shift into the table.}
+}
+\description{
+Splice the header into the table
+}
diff --git a/man/setFormat.Rd b/man/setFormat.Rd
index 0f58479..f8ba549 100644
--- a/man/setFormat.Rd
+++ b/man/setFormat.Rd
@@ -6,6 +6,7 @@
 \usage{
 setFormat(
   schema = NULL,
+  header = 0L,
   decimal = NULL,
   thousand = NULL,
   na_values = NULL,
@@ -17,6 +18,13 @@ setFormat(
 already existing schema, provide that schema here (overwrites previous
 information).}
 
+\item{header}{[\code{integerish(1)}]\cr The number of header rows. Optimally,
+a table is read so that column names are ignored (for example
+\code{readr::read_csv(file = ..., col_names = FALSE)}). If relatively well
+defined tables are processed, where the header is always only one row, the
+table can be read in with the default and the header can be spliced into
+the table by specifying the number of rows here.}
+
 \item{decimal}{[\code{character(1)}]\cr The symbols that should be
 interpreted as decimal separator.}
 

From 4e4ca601edc48528e7c4c271b9d9edde275f876f Mon Sep 17 00:00:00 2001
From: EhrmannS <steffen.ehrmann@posteo.de>
Date: Fri, 16 Feb 2024 01:54:28 +0100
Subject: [PATCH 2/4] update package

---
 DESCRIPTION  | 3 ++-
 NAMESPACE    | 3 +++
 _pkgdown.yml | 4 +++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index fdd452f..50e7e10 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -59,7 +59,8 @@ Imports:
     crayon,
     methods,
     purrr,
-    stringr
+    stringr,
+    lubridate
 RoxygenNote: 7.2.3
 Suggests: 
     knitr,
diff --git a/NAMESPACE b/NAMESPACE
index 050d428..f77b6d2 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -61,6 +61,7 @@ importFrom(dplyr,row_number)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
 importFrom(dplyr,ungroup)
+importFrom(lubridate,is.Date)
 importFrom(magrittr,"%>%")
 importFrom(methods,new)
 importFrom(purrr,map)
@@ -80,6 +81,7 @@ importFrom(rlang,is_quosure)
 importFrom(rlang,prim_name)
 importFrom(stats,na.omit)
 importFrom(stringr,coll)
+importFrom(stringr,str_c)
 importFrom(stringr,str_count)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract_all)
@@ -88,6 +90,7 @@ importFrom(stringr,str_split)
 importFrom(stringr,str_sub)
 importFrom(testthat,expect_identical)
 importFrom(tibble,as_tibble)
+importFrom(tibble,as_tibble_row)
 importFrom(tibble,rownames_to_column)
 importFrom(tibble,tibble)
 importFrom(tidyr,everything)
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 9c16e9d..1cd6422 100755
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -32,11 +32,13 @@ reference:
   - title: reorganise tables
     contents:
       - reorganise
-  - title: other helpers
+  - title: other helper functions
     contents:
       - .eval_find
       - .eval_sum
       - .expect_valid_table
+      - .getColTypes
+      - .shiftHeader
       - .tidyVars
       - .updateFormat
       - show,schema-method

From 8a6a0670264cd0494ddc82c406840e51b12006db Mon Sep 17 00:00:00 2001
From: EhrmannS <steffen.ehrmann@posteo.de>
Date: Fri, 16 Feb 2024 01:55:33 +0100
Subject: [PATCH 3/4] incl. header row number in schema and a new function to
 splice the header into the table, in case the table has colnames in the
 header

---
 R/helpers.R             |  85 +++++++++++++++++++++++++++++++++++++++-
 R/reorganise.R          |   3 +-
 R/schema.R              |   9 ++++-
 R/setFormat.R           |  15 ++++++-
 data/schema_default.rda | Bin 335 -> 342 bytes
 5 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/R/helpers.R b/R/helpers.R
index 3a2596c..b4bb8a6 100644
--- a/R/helpers.R
+++ b/R/helpers.R
@@ -135,6 +135,87 @@
 
 }
 
+
+#' Get the column types of a tibble
+#'
+#' @param input [\code{data.frame(1)}]\cr table of which to get the column
+#'   types.
+#' @param collapse [\code{logical(1)}]\cr whether or not to paste all column
+#'   types into one string.
+#' @importFrom checkmate assertDataFrame assertLogical
+#' @importFrom tibble tibble
+#' @importFrom purrr map
+#' @importFrom dplyr left_join pull
+#' @importFrom stringr str_c
+
+.getColTypes <- function(input = NULL, collapse = TRUE){
+
+  assertDataFrame(x = input)
+  assertLogical(x = collapse, len = 1)
+
+  types <- tibble(col_type = c("character", "integer", "numeric", "double", "logical", "Date", "units", "sfc_POLYGON", "arrow_binary"),
+                  code = c("c", "i", "n", "d", "l", "D", "u", "g", "a"))
+
+  out <- map(1:dim(input)[2], function(ix){
+    class(input[[ix]])[1]
+  }) %>%
+    unlist() %>%
+    tibble(col_type = .) %>%
+    left_join(y = types, by = "col_type") %>%
+    pull("code")
+
+  if(collapse){
+    out <- out %>%
+      str_c(collapse = "")
+  }
+
+  return(out)
+
+}
+
+
+#' Splice the header into the table
+#'
+#' @param input [\code{data.frame(1)}]\cr table of which the header should be
+#'   shifted into the table.
+#' @param rows [\{integeris(1)]\cr the number of rows to shift into the table.
+#' @importFrom checkmate assertDataFrame assertIntegerish
+#' @importFrom dplyr mutate across bind_rows
+#' @importFrom tidyselect where
+#' @importFrom lubridate is.Date
+#' @importFrom tibble as_tibble_row
+
+.spliceHeader <- function(input, rows = NULL){
+
+  assertDataFrame(x = input)
+  assertIntegerish(x = rows, len = 1, lower = 0, upper = dim(input)[1], any.missing = FALSE)
+
+  input <- input %>%
+    mutate(across(where(is.double) | where(is.integer) |  where(is.logical) | where(is.Date), as.character))
+
+  if(rows != 0L){
+
+    non_char <- .getColTypes(input = input, collapse = FALSE) != "c"
+
+    if(rows != 1){
+      stop("! implement case where more than one rows need to be shifted !")
+    } else {
+      vec <- colnames(input)
+      names(vec) <- paste0("X", seq_along(vec))
+      vec <- as_tibble_row(vec)
+      vec[, non_char] <- NA
+
+      colnames(input) <- paste0("X", seq_along(vec))
+
+      input <- bind_rows(vec, input)
+    }
+
+  }
+
+  return(input)
+}
+
+
 #' Match variables
 #'
 #' This function matches id and observed variables and reshapes them accordingly
@@ -454,7 +535,7 @@
 
 #' Evaluate .sum constructs
 #'
-#' @param input [\code{character(1)}]\cr table to reorganise.
+#' @param input [\code{data.frame(1)}]\cr table to reorganise.
 #' @param groups [\code{list(3)}]\cr the groups-slot from a schema.
 #' @param data [\code{integerish(.)}]\cr the cell column or row that should be
 #'   adapted to groupings.
@@ -509,7 +590,7 @@
 
 #' Evaluate .find constructs
 #'
-#' @param input [\code{character(1)}]\cr table to reorganise.
+#' @param input [\code{data.frame(1)}]\cr table to reorganise.
 #' @param col [\code{list(2)}]\cr the output of the respective .find construct
 #'   used to match in columns.
 #' @param row [\code{list(2)}]\cr the output of the respective .find construct
diff --git a/R/reorganise.R b/R/reorganise.R
index 62c69ef..46b8094 100755
--- a/R/reorganise.R
+++ b/R/reorganise.R
@@ -45,8 +45,7 @@ reorganise <- function(input = NULL, schema = NULL){
   # check validity of arguments
   assertDataFrame(x = input)
 
-  input <- input %>%
-    mutate_all(as.character)
+  input <- .spliceHeader(input = input, rows = schema@format$header)
 
   # 1. add missing information in schema ----
   schema <- validateSchema(input = input, schema = schema)
diff --git a/R/schema.R b/R/schema.R
index 74fc2a5..aad046c 100755
--- a/R/schema.R
+++ b/R/schema.R
@@ -158,8 +158,13 @@ setValidity(Class = "schema", function(object){
     if(length(object@format) == 0){
       errors <- c(errors, "the slot 'format' does not contain any entries.")
     }
-    if(!all(names(object@format) %in% c("del", "dec", "na", "flags"))){
-      errors <- c(errors, "'names(schema$format)' must be a permutation of set {del,dec,na,flags}")
+    if(!all(names(object@format) %in% c("header", "del", "dec", "na", "flags"))){
+      errors <- c(errors, "'names(schema$format)' must be a permutation of set {header,del,dec,na,flags}")
+    }
+    if(!is.null(object@format$header)){
+      if(!is.integer(object@format$header)){
+        errors <- c(errors, "'schema$format$header' must must have a integer value.")
+      }
     }
     if(!is.null(object@format$del)){
       if(!is.character(object@format$del)){
diff --git a/R/setFormat.R b/R/setFormat.R
index 31d2e2c..23af1f2 100644
--- a/R/setFormat.R
+++ b/R/setFormat.R
@@ -7,6 +7,12 @@
 #' @param schema [\code{schema(1)}]\cr In case this information is added to an
 #'   already existing schema, provide that schema here (overwrites previous
 #'   information).
+#' @param header [\code{integerish(1)}]\cr The number of header rows. Optimally,
+#'   a table is read so that column names are ignored (for example
+#'   \code{readr::read_csv(file = ..., col_names = FALSE)}). If relatively well
+#'   defined tables are processed, where the header is always only one row, the
+#'   table can be read in with the default and the header can be spliced into
+#'   the table by specifying the number of rows here.
 #' @param decimal [\code{character(1)}]\cr The symbols that should be
 #'   interpreted as decimal separator.
 #' @param thousand [\code{character(1)}]\cr The symbols that should be
@@ -27,10 +33,11 @@
 #' @importFrom dplyr bind_rows
 #' @export
 
-setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
-                      na_values = NULL, flags = NULL){
+setFormat <- function(schema = NULL, header = 0L, decimal = NULL,
+                      thousand = NULL, na_values = NULL, flags = NULL){
 
   assertClass(x = schema, classes = "schema", null.ok = TRUE)
+  assertIntegerish(x = header, len = 1, lower = 0L, any.missing = FALSE)
   assertCharacter(x = decimal, len = 1, any.missing = FALSE, null.ok = TRUE)
   assertCharacter(x = thousand, len = 1, any.missing = FALSE, null.ok = TRUE)
   assertCharacter(x = na_values, any.missing = FALSE, null.ok = TRUE)
@@ -43,6 +50,10 @@ setFormat <- function(schema = NULL, decimal = NULL, thousand = NULL,
     schema <- schema_default
   }
 
+  if(!is.null(header)){
+    schema@format$header <- header
+  }
+
   if(!is.null(decimal)){
     schema@format$dec <- decimal
   }
diff --git a/data/schema_default.rda b/data/schema_default.rda
index d5e3b61fd9dfb5ce505666622b13a20ddcac5c48..a5579926451b17a1fe6de033598ab1788f5fda74 100755
GIT binary patch
delta 331
zcmV-R0kr<l0@eZ%LRx4!F+o`-Q(3rBT$lg>LXi;`e^VGkRQ(YKsq9nAYJR7wJd-2T
z9#d)zNur*Sw15K;02&CCQK_e-!$L3uU`-85>}L?9h;&jh3?U;x?jZtDn-_KjvoSl@
zn%b*yF-T^>42Ve=_7`{ws|g~hOX6^1h!YA24PNyx#wmU%2eeHP7Z)${9xNZdGFI$4
zbW^Rge|~#iToTxpOOeA6cBl~qvI|8a5(`+;7oY=Jq=bS2)9!&}y#S>EFrB*7wzj`*
z<eMdQw{yVL0E>oWL~z7As7(I_26oGtX6E!?ejneIN)i{#7($7w_l%Rv;tLqAL8!}>
zd_%<JTsTvZU}8a@vQ~?RC1OM8JFLd>W4_C|F_70jVxwwZWe9FP%{pbrNJtjdAsf}O
d9cHYUL469?hp<})MIcwuyOJrwgoVO_<iMIwjFA8U

delta 324
zcmV-K0lWUz0?z^wLRx4!F+o`-Q(49o&s6{cIgt?-e^??7OqxNdvKne>kZGeqwK5oJ
zWHia37>1f?(9wcKM4q6@sp@HjGz>s`m+ZnReaM!ZR3Mv`<tR*~=50U;m`juScJj?p
zvmDmft(qFI*!sz(;3R6g@5ez^fna0@ryVj;rh=^i8X7y=62sdw9d`ZcgypX-(bi#h
z#x8bff2+j{OA;>-3<N_GgoZRU+C(%<08r8!8xl*Wyem2|A_48PZ6nu7ovf`rPm?5D
zjHm{>F&xg3z;?tRuY&Q?Yi=Zw5#_t9d3PcU*%V?7s!y=?bO9nQr=X3PX8hq^6z;N!
zK!)_=YS<?g66+x7LUmO@Q+<<eCN<C)4R4)SDv@2DuYV2M@WZ%u;}~tU*8`SQOo%PL
W%7@t_Q%GSy;_gVN3KAH?nd+bkFq0Ah


From 11af0b252f1bcf8254d7912e81f9acf0b7959a75 Mon Sep 17 00:00:00 2001
From: EhrmannS <steffen.ehrmann@posteo.de>
Date: Fri, 16 Feb 2024 01:58:09 +0100
Subject: [PATCH 4/4] increase version

---
 DESCRIPTION | 2 +-
 NEWS.md     | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 50e7e10..3aa6d25 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: tabshiftr
 Title: Reshape Disorganised Messy Data
-Version: 0.4.2
+Version: 0.5.0
 Authors@R: 
     c(person(given = "Steffen",
              family = "Ehrmann",
diff --git a/NEWS.md b/NEWS.md
index d926078..3dd46af 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,8 @@
+# tabshiftr 0.5.0 - variable types
+
+- include the possibility to specify variable data type, which will result in a column of that type
+- include header into `setFormat()` again, which enables providing tables where the column names are in the header, where they will be spliced into the table.
+
 # tabshiftr 0.4.2
 
 - include split and merge functionality for cluster ID.