diff --git a/DESCRIPTION b/DESCRIPTION index d2d7aa5..95c20b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,6 +10,7 @@ Maintainer: Edouard Legoupil Description: This package facilitates the data crunching & exploration for dataset collected using xlsform. License: GPL-3 LazyData: TRUE +Encoding: UTF-8 Depends: utils, data.table (>= 1.9.4), diff --git a/NAMESPACE b/NAMESPACE index 119d6ff..755aac8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,10 +8,12 @@ export(kobo_arrange_variablename) export(kobo_atlas_report) export(kobo_bar_multi) export(kobo_bar_multi_facet) +export(kobo_bar_multi_indiv) export(kobo_bar_multi_print) export(kobo_bar_one) export(kobo_bar_one_facet) export(kobo_bar_one_facet_print) +export(kobo_bar_one_indiv) export(kobo_bar_one_print) export(kobo_boxplot_facet) export(kobo_clean) @@ -19,6 +21,7 @@ export(kobo_cluster_report) export(kobo_consolidateone) export(kobo_correlation) export(kobo_corrplot) +export(kobo_crunching) export(kobo_data_downloader) export(kobo_datasets) export(kobo_datasets2) @@ -29,6 +32,7 @@ export(kobo_form) export(kobo_forminfo) export(kobo_get_begin_repeat) export(kobo_histo) +export(kobo_histo_indiv) export(kobo_histo_print) export(kobo_indicator) export(kobo_label) diff --git a/R/kobo_crunch.R b/R/kobo_crunch.R new file mode 100644 index 0000000..b8129f6 --- /dev/null +++ b/R/kobo_crunch.R @@ -0,0 +1,548 @@ +#' @name kobo_crunching +#' @rdname kobo_crunching +#' @title Pick the right function for each question +#' @description This function choose and run the right type of koboloadeR function for data crunching. +#' +#' @param question Question to be treated by the function. It should be the "fullname" (as a string) of the question as generated by kobo_dico(). +#' @param mainDir Path to the project's working directory: mainly for shiny app +#' +#' @return Return the result of the function. +#' +#' @author Elliott Messeiller +#' +#' @examples +#' kobo_crunching("question_name") +#' @export kobo_crunching +#' @examples +#' \dontrun{ +#' kobo_crunching("question_name") +#' } +#' +kobo_crunching <- function(question, mainDir = "") { + if (mainDir == "") { + mainDir <- gsub("/code/shiny_app", "", getwd()) + mainDir <- gsub("/inst/shiny_app", "", mainDir) + } + if (is.na(question) == TRUE | is.character(question) == FALSE) stop("Please enter a question.") + + source(paste0(mainDir, "/code/0-config.R"), local = TRUE) + # Select question in dico + selectquestion <- dico[dico$fullname == question, ] + qtype <- as.character(selectquestion$type) + + if (qtype == "select_one") { + result <- kobo_bar_one_indiv(question, mainDir) + } + if (qtype == "select_multiple") { + result <- kobo_bar_multi_indiv(question, mainDir) + } + if (qtype == "integer" | qtype == "decimal" | qtype == "calculate") { + result <- kobo_histo_indiv(question, mainDir) + } + if (qtype == "geopoint") { + result <- (paste0(question, ": Sorry, ", qtype, " questions cannot be treated at the moment \n")) + } + + return(result) +} + +NULL + +#' @name kobo_bar_one_indiv +#' @rdname kobo_bar_one_indiv +#' @title Generate bar Chart - frequency - for select_one question +#' @description Generate basic data exploration analysis for kobo select_one question. Returns a list containing the question fullname, ggplot object of the plot, and a frequency table, weighted if this is included in the 0-config file. If disaggregation is set into the form, then the graph are "faceted" per disaggregation. +#' +#' +#' @param question Question to be treated by the function. It should be the "fullname" (as a string) of the question as generated by kobo_dico(). +#' @param mainDir Path to the project's working directory: mainly for shiny app +#' +#' @return Returns a list containing the question fullname, ggplot object of the plot, and a frequency table, weighted if this is included in the 0-config file. +#' +#' @author Edouard Legoupil, Elliott Messeiller +#' +#' @examples +#' kobo_bar_one_indiv("question_name") +#' @export kobo_bar_one_indiv +#' @examples +#' \dontrun{ +#' kobo_bar_one_indiv("question_name") +#' } +#' +kobo_bar_one_indiv <- function(question, mainDir = "") { + if (mainDir == "") { + mainDir <- gsub("/code/shiny_app", "", getwd()) + mainDir <- gsub("/inst/shiny_app", "", mainDir) + } + + source(paste0(mainDir, "/code/0-config.R"), local = TRUE) + # Select question in dico + selectquestion <- dico[dico$fullname == question, ] + + # Checks + + ## Check that variable are in the dataset + if (nrow(selectquestion) != 1) stop("Question not in dictionnary", call. = FALSE) + ## Check that it is a select_one question + if ((selectquestion$type %in% c("select_one")) == FALSE) stop("Not a select_one question") + + ## Select disaggregation if exists + selectfacet <- as.character(selectquestion$disaggregation) + + ## Getting choices + selectlistname <- as.character(selectquestion[, "listname"]) + selectchoices <- dico[dico$type == "select_one_d" & dico$listname == selectlistname, c("listname", "name", "labelchoice")] + + ## Matching data from question + dataquestion <- data[question] + + ## Checking that question was answered + + if (sum(is.na(dataquestion)) == nrow(dataquestion)) { + return(paste0("No answer to question:", selectquestion$label, "\n")) + } + + ## Labeling dataframe + dataquestion <- kobo_label(dataquestion, dico) + + ## Replacing empty cells by NAs + # dataquestion[,dataquestion==""]<-NA + + ### Now let's create bar graphs + variablename <- as.character(names(dataquestion)) + title <- attributes(dataquestion)$variable.labels + ordinal <- as.character(dico[dico$fullname == variablename, c("ordinal")]) + + if (is.na(selectfacet) == F && selectfacet != "") { + dataquestion <- cbind(dataquestion, data[selectfacet]) + + } + + if (usedweight == "sampling_frame") { + frequ <- data.frame(svytable(~dataquestion, surveydesign)) + } else { + frequ <- data.frame(table(dataquestion)) + } + + if (ncol(frequ) == 2) { + names(frequ) <- c("Var1", "Freq") + } else { + names(frequ) <- c("Var1", "facet", "Freq") + listname_f <- as.character(dico[dico$fullname == selectfacet, "listname" ]) + choices_f <- unique(dico[dico$listname == listname_f & dico$formpart == "answers", c("name", "label")]) + choices_f <- droplevels(choices_f) + frequ <- frequ %>% + left_join(choices_f, by = c("facet"="name"))%>% + select(Var1, label, Freq) + frequ <- droplevels(frequ) + names(frequ) <- c("Var1", "facet", "Freq") + } + + frequ$freqper <- as.numeric(frequ$Freq / (sum(!is.na(dataquestion[1])))) + frequ$Var1 <- str_wrap(frequ$Var1, width = 15) + + totalanswer <- nrow(dataquestion) + + count_replied <- (sum(!is.na(dataquestion[1]))) + + percentresponse <- paste(round((count_replied / totalanswer * 100), digits = 2), "%", sep = "") + + frequ$Var1 <- as.factor(frequ$Var1) + + if (is.na(ordinal) == T) { + frequ$Var1 <- factor(frequ$Var1, levels = unique(frequ$Var1[order(frequ$freqper)])) + } else { + ordinal_choices <- as.character(selectchoices_questions[selectchoices_questions$qname == variablename, c("labelchoice")]) + frequ$Var1 <- reorder.factor(frequ$Var1, new.order = ordinal_choices) + frequ %>% arrange(Var1) + } + + color <- "#2a87c8" + background_rect <- data.frame(unique(frequ[, c("Var1")])) + names(background_rect) <- c("Var1") + background_rect$freqper <- 1 + + theme_set(theme_gray(base_size = 10)) + + + ## and now the graph + plotfreq <- ggplot(frequ, aes(x = Var1, y = freqper)) + if ("facet" %in% colnames(frequ)) { + plotfreq <- plotfreq + + geom_bar(data = background_rect, aes(x = Var1), stat = "identity", alpha = 0.2) + + geom_bar(stat = "identity", position = "dodge", aes(fill = facet)) + + geom_text(aes(label = paste(round(freqper * 100), "%", sep = ""), fill = facet, hjust = -0.5), position = position_dodge(width = 0.8)) + } else { + plotfreq <- plotfreq + + geom_bar(fill = color, colour = color, stat = "identity") + + geom_text(aes(label = paste(round(frequ$freqper * 100), "%", sep = ""), hjust = -0.5)) + } + plotfreq <- plotfreq + + scale_y_continuous(labels = percent, limits = c(0, 1.05)) + + scale_fill_brewer(palette = "PuBu", name = "Disaggregation") + + labs(x = "", y = "")+ + coord_flip() + + labs(x = "", y = "")+ + ggtitle(str_wrap(title, width = 50)) + + theme( + plot.title = element_text(face = "bold", size = 20), + plot.background = element_rect(fill = "transparent", colour = NA) + ) + + + + ## Formating the frequ table + frequ_nice <- frequ + frequ_nice$freqper <- round(frequ$freqper * 100, 2) + + if(ncol(frequ_nice) == 4){ + names(frequ_nice) <- c("Choices", "Disaggregation", "# answered", "% answered") + }else{ + names(frequ_nice) <- c("Choices", "# answered", "% answered") + } + frequ_nice$Choices <- gsub(pattern = "\\n", " ", frequ_nice$Choices) + + other_info <- data.frame( + title = c("# Answered form", "# Answered question", "% Answered question"), + info = c(totalanswer, count_replied, percentresponse) + ) + + ## Saving outputs to a list + out_indiv <- list(variable = variablename, plot = plotfreq, freq_tbl = frequ_nice, other = other_info) + + return(out_indiv) +} +NULL + +#' @name kobo_bar_multi_indiv +#' @rdname kobo_bar_multi_indiv +#' @title Generate bar Chart - frequency - for select_one question + +#' @description Generate basic data exploration analysis for kobo select_one question. Returns a list containing the question fullname, ggplot object of the plot, and a frequency table, weighted if this is included in the 0-config file. If disaggregation is set into the form, then the graph are "faceted" per disaggregation. +#' +#' +#' @param question Question to be treated by the function. It should be the "fullname" (as a string) of the question as generated by kobo_dico(). +#' @param mainDir Path to the project's working directory: mainly for shiny app +#' +#' @return Returns a list containing the question fullname, ggplot object of the plot, and a frequency table, weighted if this is included in the 0-config file. +#' +#' @author Edouard Legoupil, Elliott Messeiller +#' +#' @examples +#' kobo_bar_multi_indiv("S5_FOOD_ASSESS.q3_food_coping_strat") +#' @export kobo_bar_multi_indiv +#' @examples +#' \donttest{ +#' kobo_bar_multi_indiv("S5_FOOD_ASSESS.q3_food_coping_strat") +#' } +#' +kobo_bar_multi_indiv <- function(question, mainDir = "") { + if (mainDir == "") { + mainDir <- gsub("/code/shiny_app", "", getwd()) + mainDir <- gsub("/inst/shiny_app", "", mainDir) + } + + source(paste0(mainDir, "/code/0-config.R"), local = TRUE) + # Select question in dico + selectquestion <- dico[dico$fullname == question, ] + + # Checks + + ## Check that variable are in the dataset + if (nrow(selectquestion) != 1) stop("Question not in dictionnary", call. = FALSE) + ## Check that it is a select_one question + if (selectquestion$type != "select_multiple") stop(paste0("Not a select_multiple question. Question type in dico for this question : ", selectquestion$type)) + + ## Select disaggregation if exists + selectfacet <- as.character(selectquestion$disaggregation) + + ## Getting choices + selectlistname <- as.character(selectquestion[, "listname"]) + selectchoices <- dico[dico$type == "select_multiple_d" & dico$listname == selectlistname, c("listname", "name", "labelchoice")] + + ## Matching data from question and formatting + dataquestion <- data[, grep(question, names(data))] + dataquestion <- dataquestion %>% + select(-question) %>% + mutate_all(as.numeric) + + + ## Checking that question was answered + + if (sum(is.na(dataquestion)) == nrow(dataquestion) * ncol(dataquestion)) { + return("No answer to question:", selectquestion$label, "\n") + } + + ### Now let's create bar graphs + variablename <- as.character(selectquestion$fullname) + title <- selectquestion$label + ordinal <- as.character(dico[dico$fullname == variablename, c("ordinal")]) + if (usedweight == "sampling_frame") { + ### TO BE FIXED ### + frequ <- data.frame(svytable(~dataquestion, surveydesign)) + } else { + if (is.na(selectfacet) == F && selectfacet != "") { + ### With a disaggregation + dataquestion <- cbind(dataquestion, data[selectfacet]) + names(dataquestion)[ncol(dataquestion)] <- "facet" + + labelschoices <- selectchoices %>% select(-listname) + + frequ <- dataquestion %>% + gather(name, freq, -facet) %>% + dplyr::group_by(facet, name) %>% + dplyr::summarise(freq = sum(freq, na.rm = TRUE)) + + ### Relabel CHOICES + + frequ$name <- gsub(paste0(question, "."), "", frequ$name) + frequ <- frequ %>% + left_join(labelschoices, by = "name") %>% + select(labelchoice, facet, freq) + + names(frequ) <- c("Var1", "facet", "Freq") + listname_f <- as.character(dico[dico$fullname == selectfacet, "listname" ]) + choices_f <- unique(dico[dico$listname == listname_f & dico$formpart == "answers", c("name", "label")]) + + frequ <- frequ %>% + left_join(choices_f, by = c("facet"="name"))%>% + ungroup()%>% + select(Var1, label, Freq) + frequ <- droplevels(frequ) + names(frequ) <- c("Var1", "facet", "Freq") + + + } else { + ### Without disaggregation + labelschoices <- selectchoices %>% + select(-listname) %>% + mutate_all(as.character) + + + frequ <- dataquestion %>% + gather(name, freq) %>% + dplyr::group_by(name) %>% + dplyr::summarise(freq = sum(freq, na.rm = TRUE)) + + frequ$name <- gsub(paste0(question, "."), "", frequ$name) + + #### Relabel CHOICES + frequ <- frequ %>% + left_join(labelschoices, by = "name") %>% + select(labelchoice, freq) + } + } + + if (ncol(frequ) == 2) { + names(frequ) <- c("Var1", "Freq") + } else { + names(frequ) <- c("Var1", "facet", "Freq") + } + + frequ$freqper <- as.numeric(frequ$Freq / (sum(!is.na(dataquestion[1])))) + frequ$Var1 <- str_wrap(frequ$Var1, width = 15) + + totalanswer <- nrow(dataquestion) + + count_replied <- (sum(!is.na(dataquestion[1]))) + + percentresponse <- paste(round((count_replied / totalanswer * 100), digits = 2), "%", sep = "") + + frequ$Var1 <- as.factor(frequ$Var1) + + if (is.na(ordinal) == T) { + frequ$Var1 <- factor(frequ$Var1, levels = unique(frequ$Var1[order(frequ$freqper)])) + } else { + ordinal_choices <- as.character(selectchoices_questions[selectchoices_questions$qname == variablename, c("labelchoice")]) + frequ$Var1 <- reorder.factor(frequ$Var1, new.order = ordinal_choices) + frequ %>% arrange(Var1) + } + + theme_set(theme_gray(base_size = 15)) + color <- "#2a87c8" + + background_rect <- data.frame(unique(frequ[, c("Var1")])) + names(background_rect) <- c("Var1") + background_rect$freqper <- 1 + + + ## and now the graph + plotfreq <- ggplot(frequ, aes(x = Var1, y = freqper)) + if ("facet" %in% colnames(frequ)) { + plotfreq <- plotfreq + + geom_bar(data = background_rect, aes(x = Var1), stat = "identity", alpha = 0.2) + + geom_bar(stat = "identity", position = "dodge", aes(fill = facet)) + + geom_text(aes(label = paste(round(freqper * 100), "%", sep = ""), fill = facet, hjust = -0.5), position = position_dodge(width = 0.8)) + } else { + plotfreq <- plotfreq + + geom_bar(fill = color, colour = color, stat = "identity") + + geom_text(aes(label = paste(round(frequ$freqper * 100), "%", sep = ""), hjust = -0.5)) + } + plotfreq <- plotfreq + + scale_y_continuous(labels = percent, limits = c(0, 1.05)) + + scale_fill_brewer(palette = "PuBu", name = "Disaggregation") + + labs(x = "", y = "")+ + coord_flip() + + ggtitle(str_wrap(title, width = 50)) + + theme( + plot.title = element_text(face = "bold", size = 20), + plot.background = element_rect(fill = "transparent", colour = NA) + ) + + ## Formating the frequ table + frequ_nice <- frequ + frequ_nice$freqper <- round(frequ$freqper * 100, 2) + if(ncol(frequ_nice) == 4){ + names(frequ_nice) <- c("Choices", "Disaggregation", "# answered", "% answered") + }else{ + names(frequ_nice) <- c("Choices", "# answered", "% answered") + } + frequ_nice$Choices <- gsub(pattern = "\\n", " ", frequ_nice$Choices) + + other_info <- data.frame( + title = c("# Answered form", "# Answered question", "% Answered question"), + info = c(totalanswer, count_replied, percentresponse) + ) + + ## Saving outputs to a list + out_indiv <- list(variable = variablename, plot = plotfreq, freq_tbl = frequ_nice, other = other_info) + return(out_indiv) +} +NULL +#' @name kobo_histo_indiv +#' @rdname kobo_histo_indiv +#' @title Generate histograme for individual integer questions +#' +#' @description Automatically generate histogrammes for each of the integer questions in the dataset. ggplot2 is used. +#' +#' @param mainDir Path to the project's working directory: mainly for proper shiny app path +#' +#' @author Edouard Legoupil, Elliott Messeiller +#' +#' @examples +#' kobo_histo_indiv() +#' @export kobo_histo_indiv +#' +#' @examples +#' \dontrun{ +#' kobo_histo_indiv() +#' } +#' +kobo_histo_indiv <- function(question, mainDir = "") { + if (mainDir == "") { + mainDir <- gsub("/code/shiny_app", "", getwd()) + mainDir <- gsub("/inst/shiny_app", "", mainDir) + } + + source(paste0(mainDir, "/code/0-config.R"), local = TRUE) + + selectquestion <- dico[dico$fullname == question, ] + + # Checks + + ## Check that variable are in the dataset + if (nrow(selectquestion) != 1) stop("Question not in dictionnary", call. = FALSE) + ## Check that it is a select_one question + if ((selectquestion$type %in% c("integer", "decimal", "calculate")) == FALSE) stop("Not a question") + + ## Select disaggregation if exists + selectfacet <- as.character(selectquestion$disaggregation) + + ## Getting choices + ## Matching data from question + dataquestion <- data[question] + + ## Checking that question was answered + + if (sum(is.na(dataquestion)) == nrow(dataquestion)) { + return(paste0("No answer to question:", selectquestion$label, "\n")) + } + + ## Labeling dataframe + dataquestion <- kobo_label(dataquestion, dico) + + ## Replacing empty cells by NAs + # dataquestion[,dataquestion==""]<-NA + + ### Now let's create bar graphs + variablename <- as.character(names(dataquestion)) + title <- attributes(dataquestion)$variable.labels + ordinal <- as.character(dico[dico$fullname == variablename, c("ordinal")]) + + if (is.na(selectfacet) == F && selectfacet != "") { + dataquestion <- cbind(dataquestion, data[selectfacet]) + } + + if (usedweight == "sampling_frame") { + ### TO BE FIXED ### + frequ <- data.frame(svytable(~dataquestion, surveydesign)) + } else { + frequ <- dataquestion + } + + if (ncol(frequ) == 1) { + names(frequ) <- "Var1" + } else { + names(frequ) <- c("Var1", "name") + listname_f <- as.character(dico[dico$fullname == selectfacet, "listname" ]) + choices_f <- unique(dico[dico$listname == listname_f & dico$formpart == "answers", c("name", "label")]) + frequ <- frequ %>% + left_join(choices_f, by = "name")%>% + select(Var1, label) + frequ <- droplevels(frequ) + names(frequ) <- c("Var1", "facet") + + } + + totalanswer <- nrow(dataquestion) + + count_replied <- (sum(!is.na(dataquestion[1]))) + + percentresponse <- paste(round((count_replied / totalanswer * 100), digits = 2), "%", sep = "") + + color <- "#2a87c8" + + theme_set(theme_gray(base_size = 10)) + + ## and now the graph + plotfreq <- ggplot(frequ, aes(x = Var1)) + if ("facet" %in% colnames(frequ)) { + plotfreq <- plotfreq + + geom_density(aes(fill = facet), adjust = 2, alpha = 0.2) + } else { + plotfreq <- plotfreq + + geom_density(adjust = 2, alpha = 0.2) + } + plotfreq <- plotfreq + + scale_x_continuous(expand = c(0, 0)) + + scale_fill_brewer(palette = "PuBu", name = "Disaggregation") + + labs(x = "", y = "")+ + ggtitle(str_wrap(title, width = 50)) + + theme( + plot.title = element_text(face = "bold", size = 20), + plot.background = element_rect(fill = "transparent", colour = NA) + ) + + ## Formating the frequ table + frequ_nice <- frequ %>% + table() %>% + data.frame() %>% + mutate(freqper = paste0(round(Freq / sum(Freq) * 100), "%")) + + + if(ncol(frequ_nice) == 4){ + names(frequ_nice) <- c("Variable", "Disaggregation", "# answered", "% answered") + }else{ + names(frequ_nice) <- c("Variable", "# answered", "% answered") + } + + other_info <- data.frame( + title = c("# Answered form", "# Answered question", "% Answered question"), + info = c(totalanswer, count_replied, percentresponse) + ) + + ## Saving outputs to a list + out_indiv <- list(variable = variablename, plot = plotfreq, freq_tbl = frequ_nice, summary = summary(frequ), other = other_info) + return(out_indiv) +} +NULL \ No newline at end of file diff --git a/R/kobo_dico.R b/R/kobo_dico.R index 738c227..53a37fc 100644 --- a/R/kobo_dico.R +++ b/R/kobo_dico.R @@ -24,81 +24,83 @@ #' kobo_dico <- function(form) { - + #kobo_form(formid, user = user, api = api) cat("\n Your form should be placed within the `data` folder. \n \n") # read the survey tab of ODK from - form_tmp <- paste0("data/",form) - + mainDir <- gsub("/code/shiny_app", "", getwd()) + mainDir <- gsub("/inst/shiny_app", "", mainDir) + form_tmp <- paste(mainDir, "data", form, sep = "/", collapse = "/") + ### First review all questions from survey sheet ################################################# survey <- read_excel(form_tmp, sheet = "survey") - + ## Rename the variable label names(survey)[names(survey) == "label::English"] <- "label" names(survey)[names(survey) == "label::english"] <- "label" cat("Checking now for additional information within your xlsform. Note that you can insert them in the xls and re-run the function! \n \n ") - - - + + + ### add column if not present ################################################# if ("disaggregation" %in% colnames(survey)) { - cat("1- Good: You have a column `disaggregation` in your survey worksheet.\n"); + cat("1- Good: You have a column `disaggregation` in your survey worksheet.\n"); } else {cat("1- No column `disaggregation` in your survey worksheet. Creating a dummy one for the moment...\n"); survey$disaggregation <- ""} - + if ("correlate" %in% colnames(survey)) { cat("2- Good: You have a column `correlate` in your survey worksheet. This will be used to define the variables that should be checked for correlation between each others.\n"); } else {cat("2- No column `correlate` in your survey worksheet. Creating a dummy one for the moment...\n"); survey$correlate <- ""} - + if ("chapter" %in% colnames(survey)) { cat("3- Good: You have a column `chapter` in your survey worksheet. This will be used to breakdown the generated report\n"); } else {cat("3- No column `chapter` in your survey worksheet. Creating a dummy one for the moment ...\n"); survey$chapter <- ""} - + if ("structuralequation.risk" %in% colnames(survey)) { cat("4- Good: You have a column `structuralequation.risk` in your survey worksheet. This will be used to configure the vulnerability structural equation model\n"); } else {cat("4- No column `structuralequation.risk` in your survey worksheet. Creating a dummy one for the moment...\n"); survey$structuralequation.risk <- ""} - + if ("structuralequation.coping" %in% colnames(survey)) { cat("4- Good: You have a column `structuralequation.coping` in your survey worksheet. This will be used to configure the vulnerability structural equation model\n"); } else {cat("4- No column `structuralequation.coping` in your survey worksheet. Creating a dummy one for the moment...\n"); survey$structuralequation.coping <- ""} - + if ("structuralequation.resilience" %in% colnames(survey)) { cat("4- Good: You have a column `structuralequation.resilience` in your survey worksheet. This will be used to configure the vulnerability structural equation model\n"); } else {cat("4- No column `structuralequation.resilience` in your survey worksheet. Creating a dummy one for the moment...\n"); survey$structuralequation.resilience <- ""} - - + + if ("anonymise" %in% colnames(survey)) { cat("5- Good: You have a column `anonymise` in your survey worksheet. This will be used to anonymise the dataset.\n"); } else {cat("5- No column `anonymise` in your survey worksheet. Creating a dummy one for the moment filled as `non-anonymised`. Other options to record are `Remove`, `Reference`, `Mask`, `Generalise` (see readme file) ...\n"); survey$anonymise <- "default-non-anonymised"} - + if ("variable" %in% colnames(survey)) { cat("6- Good: You have a column `variable` in your survey worksheet. This will be used to flag ordinal variable.\n"); } else {cat("6- No column `variable` in your survey worksheet. Creating a dummy one for the moment (see readme file). ...\n"); survey$variable <- ""} - + ## Adding clean cluster predict if ("clean" %in% colnames(survey)) { @@ -106,41 +108,41 @@ kobo_dico <- function(form) { } else {cat("7- No column `clean` in your survey worksheet. Creating a dummy one for the moment (see readme file). ...\n"); survey$clean <- "no"} - + if ("cluster" %in% colnames(survey)) { cat("8- Good: You have a column `cluster` in your survey worksheet. This will be used to flag variables to be used for clustering exploration.\n"); } else {cat("8- No column `cluster` in your survey worksheet. Creating a dummy one for the moment (see readme file). ...\n"); survey$cluster <- ""} - + if ("predict" %in% colnames(survey)) { cat("9- Good: You have a column `predict` in your survey worksheet. This will be used to flag variables to be used for clustering exploration.\n"); } else {cat("9- No column `predict` in your survey worksheet. Creating a dummy one for the moment (see readme file). ...\n"); survey$predict <- ""} - + if ("mappoint" %in% colnames(survey)) { cat("10- Good: You have a column `mappoint` in your survey worksheet. This will be used to flag variables to be used for clustering exploration.\n"); } else {cat("10- No column `mappoint` in your survey worksheet. Creating a dummy one for the moment (see readme file). ...\n"); survey$mappoint <- ""} - + if ("mappoly" %in% colnames(survey)) { cat("11- Good: You have a column `mappoly` in your survey worksheet. This will be used to flag variables to be used for clustering exploration.\n"); } else {cat("11- No column `mappoly` in your survey worksheet. Creating a dummy one for the moment (see readme file). ...\n"); survey$mappoly <- ""} - + ## Avoid columns without names survey <- survey[ ,c("type", "name" , "label", #"repeatsummarize", "variable","disaggregation", "chapter", "structuralequation.risk","structuralequation.coping","structuralequation.resilience","anonymise","correlate","clean","cluster","predict","mappoint","mappoly" - # "indicator","indicatorgroup","indicatortype", - # "indicatorlevel","dataexternal","indicatorcalculation","indicatornormalisation" + # "indicator","indicatorgroup","indicatortype", + # "indicatorlevel","dataexternal","indicatorcalculation","indicatornormalisation" #"indicator","select", "Comment", "indicatordirect", "indicatorgroup" ## This indicator reference # "label::English", #"label::Arabic" ,"hint::Arabic", @@ -148,63 +150,63 @@ kobo_dico <- function(form) { # "constraint_message::English", "default", "appearance", "calculation", "read_only" , # "repeat_count" )] - + ## need to delete empty rows from the form survey <- as.data.frame(survey[!is.na(survey$type), ]) - + #str(survey) #levels(as.factor(survey$type)) - + ### We can now extract the id of the list name to reconstruct the full label fo rthe question cat(" \n Now extracting list name from questions type.\n \n") survey$listname <- "" - - + + ## Extract for select_one survey$listname <- with(survey, ifelse(grepl("select_one", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, survey$type) , paste0( substr(survey$type , (regexpr("select_one", survey$type , ignore.case=FALSE, fixed=TRUE))+10,250)),survey$listname)) - + survey$type <- with(survey, ifelse(grepl("select_one", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, survey$type), paste0("select_one"),survey$type)) - + ## Extract for select multiple & clean type field survey$listname <- with(survey, ifelse(grepl("select_multiple", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, survey$type), paste0( substr(survey$type , (regexpr("select_multiple", survey$type , ignore.case=FALSE, fixed=TRUE))+16,250)),survey$listname )) - - - survey$type <- with(survey, ifelse(grepl("select_multiple", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, survey$type), paste0("select_multiple_d"),survey$type)) - + + + survey$type <- with(survey, ifelse(grepl("select_multiple", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, survey$type), paste0("select_multiple"),survey$type)) + ## handle case where we have "or_other" #survey$listname <- with(survey, ifelse(grepl("or_other", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, survey$listname) , # paste0( substr(survey$listname , 1, (nchar(survey$listname)-8 ))),survey$listname)) - + ## handle case where we have "or_other" survey$listname <- with(survey, ifelse(grepl("or_other", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, survey$listname) , paste0( substr(survey$listname , 1, (nchar(survey$listname)-8 ))), survey$listname)) - + ## Remove trailing space survey$listname <- trim(survey$listname) survey$label <- trim(survey$label) #str(survey) - + ## Now creating full name in order to match with data variables name - + ### identify Repeat questions with nest levels cat("\n Be careful! The current function only support 2 levels of nested repeat - for instance household / Case / Individual. \n \n") survey$qrepeat <- "" for(i in 2:nrow(survey)) { #Check based on repeat type - if (survey[ i, c("type")] %in% c("begin repeat","begin_repeat") && survey[ i - 1, c("qrepeat")] == "") {survey[ i, c("qrepeat")] <- "repeatnest1"} + if (survey[ i, c("type")] %in% c("begin repeat","begin_repeat") && survey[ i - 1, c("qrepeat")] == "") {survey[ i, c("qrepeat")] <- "repeatnest1"} else if (survey[ i, c("type")] %in% c("begin repeat","begin_repeat") && survey[ i - 1, c("qrepeat")] == "repeatnest1") {survey[ i, c("qrepeat")] <- "repeatnest2"} else if (!(survey[ i, c("type")] %in% c("end repeat","end_repeat")) && survey[ i - 1, c("qrepeat")] == "repeatnest1") {survey[ i, c("qrepeat")] <- "repeatnest1"} else if (!(survey[ i, c("type")] %in% c("end repeat","end_repeat")) && survey[ i - 1, c("qrepeat")] == "repeatnest2") {survey[ i, c("qrepeat")] <- "repeatnest2"} else if (survey[ i, c("type")] %in% c("end repeat","end_repeat") && survey[ i - 1, c("qrepeat")] == "repeatnest1" ) {survey[ i, c("qrepeat")] <- ""} else if (survey[ i, c("type")] %in% c("end repeat","end_repeat") && survey[ i - 1, c("qrepeat")] == "repeatnest2" ) {survey[ i, c("qrepeat")] <- "repeatnest1"} - + else {survey[ i, c("qrepeat")] <- ""} } - + ### identify Repeat questions survey$qrepeatlabel <- "household" nestable <- survey[survey$type %in% c("begin_repeat","begin repeat") , c("name","qrepeat","type")] @@ -213,72 +215,72 @@ kobo_dico <- function(form) { { # Now insert the repeat label based on name # i <-230 - if ( survey[ i, c("type")] == "begin repeat" ) {survey[ i, c("qrepeatlabel")] <- survey[ i, c("name")]} + if ( survey[ i, c("type")] == "begin repeat" ) {survey[ i, c("qrepeatlabel")] <- survey[ i, c("name")]} else if ( survey[ i, c("type")] !="end repeat" && survey[ i - 1, c("qrepeat")] == "repeatnest1" ) {survey[ i, c("qrepeatlabel")] <- survey[ i - 1, c("qrepeatlabel")] } else if ( survey[ i, c("type")] !="end repeat" && survey[ i - 1, c("qrepeat")] == "repeatnest2" ) {survey[ i, c("qrepeatlabel")] <- survey[ i - 1, c("qrepeatlabel")] } else if ( survey[ i, c("type")] == "end repeat" && survey[ i - 1, c("qrepeat")] == "repeatnest1") {survey[ i, c("qrepeatlabel")] <- "household"} else if ( survey[ i, c("type")] == "end repeat" && survey[ i - 1, c("qrepeat")] == "repeatnest2") { nestabove <- as.character(survey[ i - 1, c("qrepeatlabel")]) - nestabovenum <- as.integer(which(nestable$name == nestabove ) -1) - survey[ i, c("qrepeatlabel")] <- as.character( nestable[ nestabovenum , 1] ) } - + nestabovenum <- as.integer(which(nestable$name == nestabove ) -1) + survey[ i, c("qrepeatlabel")] <- as.character( nestable[ nestabovenum , 1] ) } + ## Sometimes it seems that we get an underscore for type else if ( survey[ i, c("type")] == "begin_repeat" ) {survey[ i, c("qrepeatlabel")] <- survey[ i, c("name")]} else if ( survey[ i, c("type")] !="end_repeat" && survey[ i - 1, c("qrepeat")] == "repeatnest1" ) {survey[ i, c("qrepeatlabel")] <- survey[ i - 1, c("qrepeatlabel")] } else if ( survey[ i, c("type")] !="end_repeat" && survey[ i - 1, c("qrepeat")] == "repeatnest2" ) {survey[ i, c("qrepeatlabel")] <- survey[ i - 1, c("qrepeatlabel")] } else if ( survey[ i, c("type")] == "end_repeat" && survey[ i - 1, c("qrepeat")] == "repeatnest1") {survey[ i, c("qrepeatlabel")] <- "household"} else if ( survey[ i, c("type")] == "end_repeat" && survey[ i - 1, c("qrepeat")] == "repeatnest2") { nestabove <- as.character(survey[ i - 1, c("qrepeatlabel")]) - nestabovenum <- as.integer(which(nestable$name == nestabove ) -1) - survey[ i, c("qrepeatlabel")] <- as.character( nestable[ nestabovenum , 1] ) } - + nestabovenum <- as.integer(which(nestable$name == nestabove ) -1) + survey[ i, c("qrepeatlabel")] <- as.character( nestable[ nestabovenum , 1] ) } + else {survey[ i, c("qrepeatlabel")] <- "household"} } - + ### Get question levels in order to match the variable name survey$qlevel <- "" for(i in 2:nrow(survey)) { if (survey[ i, c("type")] == "begin group" && survey[ i - 1, c("qlevel")] == "" ) {survey[ i, c("qlevel")] <- "level1"} else if (survey[ i, c("type")] == "begin_group" && survey[ i - 1, c("qlevel")] == "" ) {survey[ i, c("qlevel")] <- "level1"} - + else if (survey[ i, c("type")] == "begin group" && survey[ i - 1, c("qlevel")] == "level1") {survey[ i, c("qlevel")] <- "level2"} else if (survey[ i, c("type")] == "begin_group" && survey[ i - 1, c("qlevel")] == "level1") {survey[ i, c("qlevel")] <- "level2"} - + else if (survey[ i, c("type")] == "begin group" && survey[ i - 1, c("qlevel")] == "level2") {survey[ i, c("qlevel")] <- "level3"} else if (survey[ i, c("type")] == "begin_group" && survey[ i - 1, c("qlevel")] == "level2") {survey[ i, c("qlevel")] <- "level3"} - + else if (survey[ i, c("type")] == "begin group" && survey[ i - 1, c("qlevel")] == "level3") {survey[ i, c("qlevel")] <- "level4"} else if (survey[ i, c("type")] == "begin_group" && survey[ i - 1, c("qlevel")] == "level3") {survey[ i, c("qlevel")] <- "level4"} - + else if (survey[ i, c("type")] == "begin group" && survey[ i - 1, c("qlevel")] == "level4") {survey[ i, c("qlevel")] <- "level5"} else if (survey[ i, c("type")] == "begin_group" && survey[ i - 1, c("qlevel")] == "level4") {survey[ i, c("qlevel")] <- "level5"} - + ## Now end of group - + else if (survey[ i, c("type")] == "end group" && survey[ i - 1, c("qlevel")] == "level1") {survey[ i, c("qlevel")] <- "" } else if (survey[ i, c("type")] == "end_group" && survey[ i - 1, c("qlevel")] == "level1") {survey[ i, c("qlevel")] <- "" } - + else if (survey[ i, c("type")] == "end group" && survey[ i - 1, c("qlevel")] == "level2") {survey[ i, c("qlevel")] <- "level1"} else if (survey[ i, c("type")] == "end_group" && survey[ i - 1, c("qlevel")] == "level2") {survey[ i, c("qlevel")] <- "level1"} - + else if (survey[ i, c("type")] == "end group" && survey[ i - 1, c("qlevel")] == "level3") {survey[ i, c("qlevel")] <- "level2"} else if (survey[ i, c("type")] == "end_group" && survey[ i - 1, c("qlevel")] == "level3") {survey[ i, c("qlevel")] <- "level2"} - + else if (survey[ i, c("type")] == "end group" && survey[ i - 1, c("qlevel")] == "level4") {survey[ i, c("qlevel")] <- "level3"} else if (survey[ i, c("type")] == "end_group" && survey[ i - 1, c("qlevel")] == "level4") {survey[ i, c("qlevel")] <- "level3"} - + else if (survey[ i, c("type")] == "end group" && survey[ i - 1, c("qlevel")] == "level5") {survey[ i, c("qlevel")] <- "level4"} else if (survey[ i, c("type")] == "end_group" && survey[ i - 1, c("qlevel")] == "level5") {survey[ i, c("qlevel")] <- "level4"} - + else {survey[ i, c("qlevel")] <- survey[ i - 1, c("qlevel")]} } - + ### Get question groups in order to match the variable name ## Concatenation ofqlevel & qrepeat & type - survey$type2 <- survey$type - survey$type2[survey$type2 %in% c("begin_group","begin group","end_group","end group")] + survey$type2 <- survey$type + survey$type2[survey$type2 %in% c("begin_group","begin group","end_group","end group")] ## We need to handle situation with both repeat & group ## set <- as.data.frame(unique(dico[c("qlevel","qrepeat", "type")])) ## So 12 cases to handle - + cat(" \n Now rebuilding the variable full path in order to match with variable name from the exported dataset. \n Note that there should not be any dots in the orginal variables. \n Double Check as well there's no duplicate for the name column in the survey worksheet\n \n") @@ -288,41 +290,41 @@ kobo_dico <- function(form) { #i <- 54 #i <- 20 #survey[ 113, c("qgroup")] - if (survey[ i, c("qlevel")] %in% c("level1","level2","level3","level4","level5") && - survey[ i, c("qrepeat")] %in% c("", "repeatnest1", "repeatnest2") && - !(survey[ i, c("type")] %in% c("begin_group","begin group","end_group","end group","begin_repeat","begin repeat","end_repeat","end repeat")) ) - - {survey[ i, c("qgroup")] <- survey[ i - 1, c("qgroup")] - - + if (survey[ i, c("qlevel")] %in% c("level1","level2","level3","level4","level5") && + survey[ i, c("qrepeat")] %in% c("", "repeatnest1", "repeatnest2") && + !(survey[ i, c("type")] %in% c("begin_group","begin group","end_group","end group","begin_repeat","begin repeat","end_repeat","end repeat")) ) + + {survey[ i, c("qgroup")] <- survey[ i - 1, c("qgroup")] + + } else if (survey[ i, c("qlevel")] %in% c("level1") && - survey[ i, c("qrepeat")] %in% c("", "repeatnest1", "repeatnest2") && - survey[ i, c("type")] %in% c("begin_group","begin group") ) - - {survey[ i, c("qgroup")] <- survey[ i, c("name")] - + survey[ i, c("qrepeat")] %in% c("", "repeatnest1", "repeatnest2") && + survey[ i, c("type")] %in% c("begin_group","begin group") ) + + {survey[ i, c("qgroup")] <- survey[ i, c("name")] + } else if (survey[ i, c("qlevel")] %in% c("level2","level3","level4","level5") && - survey[ i, c("qrepeat")] %in% c("", "repeatnest1", "repeatnest2") && - survey[ i, c("type")] %in% c("begin_group","begin group") ) - - {survey[ i, c("qgroup")] <- paste(survey[ i - 1, c("qgroup")], survey[ i, c("name")],sep = ".") - + survey[ i, c("qrepeat")] %in% c("", "repeatnest1", "repeatnest2") && + survey[ i, c("type")] %in% c("begin_group","begin group") ) + + {survey[ i, c("qgroup")] <- paste(survey[ i - 1, c("qgroup")], survey[ i, c("name")],sep = ".") + } else if (survey[ i, c("qlevel")] %in% c("level1","level2","level3","level4","level5") && - survey[ i, c("qrepeat")] %in% c("repeatnest1", "repeatnest2") && - survey[ i, c("type")] %in% c("begin_repeat","begin repeat") ) - - {survey[ i, c("qgroup")] <- paste(survey[ i - 1, c("qgroup")], survey[ i, c("qrepeatlabel")], sep = ".") - + survey[ i, c("qrepeat")] %in% c("repeatnest1", "repeatnest2") && + survey[ i, c("type")] %in% c("begin_repeat","begin repeat") ) + + {survey[ i, c("qgroup")] <- paste(survey[ i - 1, c("qgroup")], survey[ i, c("qrepeatlabel")], sep = ".") + } else if (survey[ i, c("qlevel")] %in% c("level1","level2","level3","level4","level5") && - survey[ i, c("qrepeat")] %in% c("", "repeatnest1", "repeatnest2") && - survey[ i, c("type")] %in% c("end_group","end group","end_repeat","end repeat") ) - - {survey[ i, c("qgroup")] <- substr(survey[ i - 1, c("qgroup")] ,0, regexpr("\\.[^\\.]*$", survey[ i - 1, c("qgroup")] ) - 1) - + survey[ i, c("qrepeat")] %in% c("", "repeatnest1", "repeatnest2") && + survey[ i, c("type")] %in% c("end_group","end group","end_repeat","end repeat") ) + + {survey[ i, c("qgroup")] <- substr(survey[ i - 1, c("qgroup")] ,0, regexpr("\\.[^\\.]*$", survey[ i - 1, c("qgroup")] ) - 1) + } else {survey[ i, c("qgroup")] <- ""} } - - + + survey$fullname <- "" ## levels(as.factor(survey$type)) ## Need to loop around the data frame in order to concatenate full name as observed in data dump @@ -332,17 +334,30 @@ kobo_dico <- function(form) { if (survey[ i, c("qlevel")] == "") {survey[ i, c("fullname")] <- survey[ i, c("name")]} else {survey[ i, c("fullname")] <- paste(survey[ i, c("qgroup")],survey[ i, c("name")],sep = ".") } } - + ## a few colummns to adjust to match questions & choices survey$labelchoice <- survey$label - survey$order <- "" + survey$ordinal <- "" survey$weight <- "" survey$score <- "" survey$recategorise <- "" + + ## Relabelling disaggregation with fullname + + for (i in 1:nrow(survey)){ + name <- as.character(survey[i,"disaggregation"]) + if(name != "" && is.na(name)== FALSE){ + if(name %in% survey[,"fullname"]){ + survey[i, "disaggregation"] <- name + }else{ + survey[i, "disaggregation"] <- as.character(survey[survey$name == name & is.na(survey$name) == FALSE, "fullname"]) + } + }else{ + survey[i, "disaggregation"] <- NA + } + } - - - #### + #### #### Now looking at choices --######################################################################################################### #rm(choices) choices <- read_excel(form_tmp, sheet = "choices") @@ -350,132 +365,134 @@ kobo_dico <- function(form) { names(choices)[names(choices) == "label::english"] <- "label" names(choices)[names(choices) == "list name"] <- "listname" names(choices)[names(choices) == "list_name"] <- "listname" - + ## Remove trailing space choices$listname <- trim(choices$listname) choices$label <- trim(choices$label) - - if ("order" %in% colnames(choices)) + + if ("ordinal" %in% colnames(choices)) { - cat("12 - Good: You have a column `order` in your `choices` worksheet.\n"); + cat("12 - Good: You have a column `ordinal` in your `choices` worksheet.\n"); } else - {cat("12 - No column `order` in your `choices` worksheet. Creating a dummy one for the moment...\n"); - choices$order <- ""} - + {cat("12 - No column `ordinal` in your `choices` worksheet. Creating a dummy one for the moment...\n"); + choices$ordinal <- ""} + if ("weight" %in% colnames(choices)) { cat("13 - Good: You have a column `weight` in your `choices` worksheet.\n"); } else {cat("13 - No column `weight` in your `choices` worksheet. Creating a dummy one for the moment...\n"); choices$weight <- ""} - + if ("recategorise" %in% colnames(choices)) { cat("14 - Good: You have a column `recategorise` in your `choices` worksheet.\n"); } else {cat("14 - No column `recategorise` in your `choices` worksheet. Creating a dummy one for the moment...\n"); choices$recategorise <- ""} - + if ("score" %in% colnames(choices)) { cat("13 - Good: You have a column `score` in your `choices` worksheet.\n"); } else {cat("13 - No column `score` in your `choices` worksheet. Creating a dummy one for the moment...\n"); choices$score <- ""} - - choices <- choices[,c("listname", "name", "label", "order", "weight","score","recategorise")] + + choices <- choices[,c("listname", "name", "label", "ordinal", "weight","score","recategorise")] names(choices)[names(choices) == "label"] <- "labelchoice" #rm(choices) choices <- join(x = choices, y = survey, by = "listname", type = "left") - + choices$type <- with(choices, ifelse(grepl("select_one", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, choices$type), paste0("select_one_d"),choices$type)) - - choices$type <- with(choices, ifelse(grepl("select_multiple_d", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, choices$type), - paste0("select_multiple"),choices$type)) - - + + choices$type <- with(choices, ifelse(grepl("select_multiple", ignore.case = TRUE, fixed = FALSE, useBytes = FALSE, choices$type), + paste0("select_multiple_d"),choices$type)) + + names(choices)[9] <- "nameq" names(choices)[10] <- "labelq" - choices$labelfull <- paste0(choices$labelq, sep = ": ", choices$labelchoice) + choices$labelfull <- paste0(choices$labelchoice) choices$namefull <- paste0(choices$fullname, sep = ".", choices$name) - - - + + + #### Now Row bing questions & choices######################################################################################################## # - #names(choices) -"type", "name", "namefull", "labelfull", "listname", "qrepeat", "qlevel", "qgroup" - ## not kept: "nameq" "labelq" ,"fullname", "label", - #names(survey) - "type" "name", "fullname", "label", "listname", "qrepeat"m "qlevel", "qgroup" + #names(choices) -"type", "name", "namefull", "labelfull", "listname", "qrepeat", "qlevel", "qgroup" + ## not kept: "nameq" "labelq" ,"fullname", "label", + #names(survey) - "type" "name", "fullname", "label", "listname", "qrepeat"m "qlevel", "qgroup" choices2 <- choices[ ,c("type", "name", "namefull", "labelfull", "chapter","disaggregation","correlate", "structuralequation.risk","structuralequation.coping","structuralequation.resilience","anonymise", "clean","cluster","predict","mappoint","mappoly", "listname", "qrepeat","qrepeatlabel", "qlevel", "qgroup", "labelchoice", - #"repeatsummarize", - "variable", - #"indicator","indicatorgroup","indicatortype", "indicatorlevel","dataexternal","indicatorcalculation","indicatornormalisation", - "order", "weight","score", "recategorise")] - - + #"repeatsummarize", + "variable", + #"indicator","indicatorgroup","indicatortype", "indicatorlevel","dataexternal","indicatorcalculation","indicatornormalisation", + "ordinal", "weight","score", "recategorise")] + + names(choices2)[names(choices2) == "namefull"] <- "fullname" names(choices2)[names(choices2) == "labelfull"] <- "label" - - + + survey2 <- survey[,c("type", "name", "fullname", "label", "chapter", "disaggregation","correlate", "structuralequation.risk","structuralequation.coping","structuralequation.resilience","anonymise", "clean","cluster","predict","mappoint","mappoly", "listname", "qrepeat","qrepeatlabel", "qlevel", "qgroup", "labelchoice", #"repeatsummarize", "variable", - + #"indicator","indicatorgroup","indicatortype", "indicatorlevel","dataexternal","indicatorcalculation","indicatornormalisation", - "order", "weight","score", "recategorise")] - + "ordinal", "weight","score", "recategorise")] + ### Check -- normally there should not be duplicate #choices3 <- choices2[!duplicated(choices2$fullname), ] - + # names(choices2) # names(survey2) - + survey2$formpart <- "questions" choices2$formpart <- "answers" - + dico <- rbind(survey2,choices2) - - + + ## Remove trailing space dico$fullname <- trim(dico$fullname) dico$listname <- trim(dico$listname) - - + + ## Trim long label... dico$label <- substring(dico$label, 0, 85) - - + + ## A few fix on the dico dico <- dico[ !is.na(dico$name), ] dico <- dico[ !is.na(dico$type), ] - + ## Exclude repeat questions -- still need more work #levels(as.factor(dico$qrepeat)) ## Changing type for flatenned repeat questions - + #dico$type[dico$qrepeat== "repeat" & dico$type %in% c("integer")] <- "integerlist" #dico$type[dico$qrepeat== "repeat" & dico$type %in% c("text")] <- "textlist" #dico$type[dico$qrepeat== "repeat" & dico$type %in% c("select_one")] <- "select_onelist" - + #dico$type[dico$qrepeat== "repeat" & dico$type %in% c("select_one_d")] <- "integer" #dico$type[dico$qrepeat== "repeat" & dico$type %in% c("select_multiple")] <- "integer" - + #dico[dico$qrepeat== "repeat" & dico$type %in% c("select_multiple")] - + #if (dico$qrepeat== "repeat" && dico$type %in% c("select_one_d", "select_multiple")) {dico$type <- "integer" # cat("Note that select_one & select_multiple questions within REPEAT part are converted to integer (results are summed up).\n") #} else { dico$type <- dico$type - # cat("Note that select_one & select_multiple questions within REPEAT part are converted to integer (results are summed up).\n") - - write.csv(dico, paste0("data/dico_",form,".csv"), row.names = FALSE, na = "") - - # f_csv(dico) -# return(dico) + # cat("Note that select_one & select_multiple questions within REPEAT part are converted to integer (results are summed up).\n") + + write.csv(dico, paste0(mainDir,"/data/dico_",form,".csv"), row.names = FALSE, na = "") + write("\ndico <- read.csv(paste0(mainDir,'/data/dico_',form,'.csv'))\n",paste0(mainDir,"/code/0-config.R"), append=TRUE ) + + + # f_csv(dico) + # return(dico) } NULL diff --git a/R/kobo_label.R b/R/kobo_label.R index f70c070..65d7df9 100644 --- a/R/kobo_label.R +++ b/R/kobo_label.R @@ -9,7 +9,7 @@ #' @param dico ( generated from kobo_dico) #' #' -#' @return A "data.table" with the full data.label. To be used for graphs generation. +#' @return A "data.table" with the full data.label, and choices labels. To be used for graphs generation. #' #' @author Edouard Legoupil #' @@ -33,12 +33,23 @@ kobo_label <- function(datalabel, dico) { names(data.label)[1] <- "fullname" data.label <- join (x=data.label, y=dico, by="fullname", type="left" ) # write.csv(data.label, "out/datalabel.csv") - for (i in 1:nrow(data.label)) { attributes(datalabel)$variable.labels[ i] <- as.character(data.label[ i, c("label")]) } + for (i in 1:nrow(data.label)) { + attributes(datalabel)$variable.labels[ i] <- as.character(data.label[ i, c("label")]) + + if(data.label$type[i] %in% c("select_one", "select_multiple_d")){ + variablename <- data.label$fullname[i] + variableLabel <- as.character(data.label[ i, c("label")]) + listName <- as.character(data.label[data.label$fullname == variablename, "listname"]) + choicesLabel <- unlist(dico[dico$listname == listName & dico$formpart == "answers", "label"]) + choicesName <- unlist(dico[dico$listname == listName & dico$formpart == "answers", "name"]) + datalabel[[variablename]] <- mapvalues(datalabel[[variablename]], from = as.character(choicesName), to = as.character(choicesLabel), warn_missing = FALSE) + } + } test <- data.label[ !(is.na(data.label$name)), ] if (nrow(data.label) > nrow(test)) { cat (paste0("you have ",nrow(data.label), " variables in you frame but only ",nrow(test) ," were relabeled.\n")) cat(" You may double check that the form and the data are matching \n") cat("Double check as well that you did download the data with the correct header (i.e. full path with point delimiters) \n") - } else { cat ("All variables were mapped. great \n")} + } return(datalabel) } diff --git a/R/kobo_projectinit.R b/R/kobo_projectinit.R index 965da64..77f828b 100644 --- a/R/kobo_projectinit.R +++ b/R/kobo_projectinit.R @@ -145,6 +145,14 @@ kobo_projectinit <- function() { if (!file.exists(destfile)) { file.copy(paste(path_correct,"/koboloadeR/script/XLSform_template.xlsx", sep = ""), destfile) } + destfile = paste0(mainDir,"/code/data_koboloadeR.xlsx") + if (!file.exists(destfile)) { + file.copy(paste(path_correct,"/koboloadeR/script/data_koboloadeR.xlsx", sep = ""), destfile) + } + destfile = paste0(mainDir,"/code/form_koboloadeR.xls") + if (!file.exists(destfile)) { + file.copy(paste(path_correct,"/koboloadeR/script/form_koboloadeR.xls", sep = ""), destfile) + } ## shiny_app Subfolder creation #### diff --git a/R/kobo_question.R b/R/kobo_question.R index 7fc69e2..460b034 100644 --- a/R/kobo_question.R +++ b/R/kobo_question.R @@ -23,6 +23,7 @@ #' kobo_question <- function(question,mainDir='') { + library(stringr) # Source project config parameters if (mainDir==''){ mainDir <- getwd() @@ -91,8 +92,7 @@ kobo_question <- function(question,mainDir='') { frequ[,1] <- selectchoices_questions$labelchoice[match(frequ[,1], selectchoices_questions$name)] frequ[,1] <- factor(frequ[,1]) - } - else{ + }else{ frequ<-data.frame(table(data.single[1])) } names(frequ)<- c("Var1","Freq") @@ -671,7 +671,7 @@ kobo_question <- function(question,mainDir='') { } else{ selectinteger <- as.character(select_question[, c("fullname")]) - data.integer <- data [selectinteger ] + data.integer <- data [selectinteger] selectfacet <- as.character(select_question[select_question$disaggregation!="" , c("fullname")]) selectfacet <- selectfacet[!is.na(selectfacet)] @@ -685,7 +685,7 @@ kobo_question <- function(question,mainDir='') { title <- select_question$label ## Ensure that the variable is recognised as numeric - select.data.integer <- data.frame(as.numeric(na.omit(data.integer[ ,1]))) + data.integer[,question] <- as.numeric(data.integer[,question]) #str(data.integer[ , i]) totalanswer <- nrow(data.integer) @@ -698,7 +698,7 @@ kobo_question <- function(question,mainDir='') { # trendline on histogram by adding geom_density - histograms <- ggplot(data=select.data.integer, aes(select.data.integer)) + + histograms <- ggplot(data=data.integer, aes(x = data.integer[,question])) + geom_histogram(aes(y =..density..), fill="#2a87c8", alpha = .6, binwidth=0.5) + geom_density(adjust=2) + scale_x_continuous(expand = c(0,0)) + @@ -708,9 +708,9 @@ kobo_question <- function(question,mainDir='') { print(histograms) cat("\n") cat("\n") - names(select.data.integer) <- select_question$label + names(data.integer) <- select_question$label cat("\n") - print(summary(select.data.integer)) + print(summary(data.integer)) cat("\n") cat(paste0("Out of ", totalanswer," respondents, ", count_replied," (",percentresponse,")"," answered to this question.")) cat("\n") diff --git a/R/kobo_to_xlsform.R b/R/kobo_to_xlsform.R index f016503..df560a3 100644 --- a/R/kobo_to_xlsform.R +++ b/R/kobo_to_xlsform.R @@ -8,6 +8,9 @@ #' #' Note that this function only works with \code{data.frames}. The function #' will throw an error for any other object types. +#' +#' @param df The dataframe object to be processed. For groups of questions to be processed, +#' they must have been exported from an ODK plateform with dots (".") as separator. #' #' @param form The full filename of the form to be accessed (xls or xlsx file). #' It is assumed that the form is stored in the data folder. @@ -46,16 +49,6 @@ kobo_to_xlsform <- function(df,form = "form.xls", anonymise = rep(as.character(NA), ncol(df)), stringsAsFactors = FALSE) - ## Fill survey type - for(i in seq_along(df)) { - #i <-12 - #cat(i) - if(is.factor(df[,i])) { - survey[i,]$type <- paste0('select_one ', as.character(names(df[i])), '_choices') - } else { - survey[i,]$type <- class(df[,i])[1] - } - } ## build choices sheet choices <- data.frame(list_name = as.character(NA), name = as.character(NA), @@ -63,35 +56,134 @@ kobo_to_xlsform <- function(df,form = "form.xls", order = as.integer(NA), stringsAsFactors = FALSE) - ## Loop around variables to build choices based on factor levels - for(i in seq_along(df)) { - #i <-2 - if(is.factor(df[,i])) { + ## Fill survey type + for(i in 1:ncol(df)) { + classcol <- class(df[,i][[1]])[1] + + if(classcol == "character"){ + if(ncol(df) != i){ + if(grepl(paste0(names(df[,i]),"."), names(df[,i+1])) && (is.logical (df[,i+1][[1]]) || + (is.numeric(df[i+1][[1]]) && sum(df[,i+1]<=nrow(df))) || + (sum(as.numeric(df[,i+1][[1]]), na.rm = TRUE) <= nrow(df)) + )){ + survey[i,]$type <- paste0('select_multiple ', as.character(names(df[i])), '_choices') + for (j in 1:length(grep(paste0(names(df[, i]),"."), names(df)))){ + labelChoice <- stringr::str_remove(names(df[,i +j]),paste0(names(df[,i]),".")) + choice <- c(names(df[i]), + labelChoice, + labelChoice, + NA) + choices <- rbind(choices, choice) + } + }else if(sum(is.na(as.numeric(as.character(df[,i][[1]])))) <= nrow(df)*0.999){ + df[,i] <- as.numeric(df[,i][[1]]) + survey[i,]$type <- "decimal" + }else if(nlevels(as.factor(df[,i][[1]])[[1]]) < n){ + vect_fact <- lapply(df[,i], factor) + df[,i] <- vect_fact + survey[i,]$type <- paste0('select_one ', as.character(names(df[i])), '_choices') + + labelChoice <- levels(df[,i][[1]]) + for (j in 1:length(labelChoice)){ + choice <- c(names(df[i]), + labelChoice[j], + labelChoice[j], + NA) + choices <- rbind(choices, choice) + } + }else{ + survey[i,]$type <- "text" + } + }else{ - cat(paste0("Factor: ",i,"\n")) - frame <- as.data.frame((levels(df[,i]))) - if (nrow(frame)!=0 & nrow(frame)<100 ){ - for(j in 1:nrow(frame)) { - # j <- 1 - choices1 <- data.frame(list_name = as.character(NA), - name = as.character(NA), - label = as.character(NA), - order = as.integer(NA), - stringsAsFactors = FALSE) + if(sum(is.na(as.numeric(as.character(df[,i][[1]])))) <= nrow(df)*0.999){ + df[,i] <- as.numeric(df[,i][[1]]) + survey[i,]$type <- "decimal" + }else if(nlevels(lapply(df[,i], factor)[[1]]) < n){ + vect_fact <- lapply(df[,i], factor) + df[,i] <- vect_fact + survey[i,]$type <- paste0('select_one ', as.character(names(df[i])), '_choices') + + labelChoice <- levels(df[,i][[1]]) + for (j in 1:length(labelChoice)){ + choice <- c(names(df[i]), + labelChoice[j], + labelChoice[j], + NA) + choices <- rbind(choices, choice) + } + }else if(nlevels(lapply(df[,i], factor)[[1]]) < n){ + vect_fact <- lapply(df[,i], factor) + df[,i] <- vect_fact + survey[i,]$type <- paste0('select_one ', as.character(names(df[i])), '_choices') - cat(paste0("Inserting level: ",j,"\n")) - choices1[j,]$list_name <- paste0( as.character(names(df[i])), '_choices') - choices1[j,]$name <- as.character(frame[j, ]) - choices1[j,]$label <- as.character(frame[j,]) - choices1[j,]$order <- j - choices <- rbind(choices, choices1) + + labelChoice <- levels(df[,i][[1]]) + for (j in 1:length(labelChoice)){ + choice <- c(names(df[i]), + labelChoice[j], + labelChoice[j], + NA) + choices <- rbind(choices, choice) + } + }else{ + survey[i,]$type <- "text" } - rm(choices1) - } else {cat("Too many choices to consider it as a factor\n")} - ### - } else {cat("This is not a factor \n")} + } + }else if(classcol == "numeric"){ + survey[i,]$type <- "decimal" + + }else if(classcol == "POSIXct"){ + survey[i, ]$type <- "date" + }else if (classcol == "logical"){ + survey[i, ] <- NA + }else if (classcol == "factor"){ + survey[i,]$type <- paste0('select_one ', as.character(names(df[i])), '_choices') + + labelChoice <- levels(df[,i][[1]]) + for (j in 1:length(labelChoice)){ + choice <- c(names(df[i]), + labelChoice[j], + labelChoice[j], + NA) + choices <- rbind(choices, choice) + } + } } - + survey <- survey[complete.cases(survey[1:3]), ] + + #create begin_groups + + form_str <- data.frame(col = names(df)) + n_vars <- form_str$col %>% stringr::str_split("\\.") %>% lapply(function(z) length(z)) %>% unlist() %>% max() + + form_str <- form_str %>% + separate(col, into = as.character(paste0("X",1:n_vars)), sep = "\\.", fill = "right", remove = FALSE) + + insertRow <- function(existingDF, newrow, r) { + existingDF[seq(r+1,nrow(existingDF)+1),] <- existingDF[seq(r,nrow(existingDF)),] + existingDF[r,] <- newrow + existingDF + } + + uc <- list() + for (i in 2:ncol(form_str)){ + uc[i] <- unique(form_str[i]) + for(j in 1:length(uc[[i]])){ + if(!is.na(sum(uc[[i]][j] == form_str[,i]))){ + if(sum(uc[[i]][j] == form_str[,i]) > 1){ + q_name <- as.character(uc[[i]][j]) + begin_g <- c("begin group",q_name,q_name,rep(NA, 6)) + w_begin <- match(survey$name[grep(q_name, survey$name)[1]],survey$name) + survey <- insertRow(survey, begin_g, w_begin) + w_end <- match(survey$name[tail(grep(q_name, survey$name), n =1)],survey$name)+1 + end_g <- c("end group",q_name,q_name,rep(NA, 6)) + survey <- insertRow(survey, end_g, w_end) + } + } + } + } + wb <- createWorkbook(type = "xls") sheetname <- "survey" surveySheet <- createSheet(wb, sheetname) @@ -107,6 +199,5 @@ kobo_to_xlsform <- function(df,form = "form.xls", if (file.exists(form_tmp)) file.remove(form_tmp) saveWorkbook(wb, form_tmp) - cat("XLS form has been successfully generated") } NULL \ No newline at end of file diff --git a/inst/script/data_koboloadeR.xlsx b/inst/script/data_koboloadeR.xlsx new file mode 100644 index 0000000..7ead5bd Binary files /dev/null and b/inst/script/data_koboloadeR.xlsx differ diff --git a/inst/script/form_koboloadeR.xls b/inst/script/form_koboloadeR.xls new file mode 100644 index 0000000..1cc37cd Binary files /dev/null and b/inst/script/form_koboloadeR.xls differ diff --git a/inst/shiny_app/app_koboloadeR.R b/inst/shiny_app/app_koboloadeR.R index 0472b0a..8787a09 100644 --- a/inst/shiny_app/app_koboloadeR.R +++ b/inst/shiny_app/app_koboloadeR.R @@ -160,7 +160,7 @@ server <- function(input, output,session) { cat("\n") cat(paste('sheet <- "',inFile_sheet,'"', sep = "")) cat("\n") - cat(paste0('data <- read_excel("', inFile_data$name,'", sheet = "',inFile_sheet,'")\n\n') ) + cat(paste0('data <- read_excel("', mainDir,'/data/',inFile_data$name,'", sheet = "',inFile_sheet,'")\n\n') ) cat("\n") cat("### 1. Weighting system ###\n") @@ -194,7 +194,7 @@ server <- function(input, output,session) { observeEvent(input$dico,{ isolate({source(paste0(mainDir,"/code/0-config.R"), local = TRUE)}) - kobo_dico(mainDir) + kobo_dico(form) if (input$analysis_plan == 'y') { kobo_indicator(mainDir) } diff --git a/man/kobo_bar_multi_indiv.Rd b/man/kobo_bar_multi_indiv.Rd new file mode 100644 index 0000000..de7a887 --- /dev/null +++ b/man/kobo_bar_multi_indiv.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kobo_crunch.R +\name{kobo_bar_multi_indiv} +\alias{kobo_bar_multi_indiv} +\title{Generate bar Chart - frequency - for select_one question} +\usage{ +kobo_bar_multi_indiv(question, mainDir = "") +} +\arguments{ +\item{question}{Question to be treated by the function. It should be the "fullname" (as a string) of the question as generated by kobo_dico().} + +\item{mainDir}{Path to the project's working directory: mainly for shiny app} +} +\value{ +Returns a list containing the question fullname, ggplot object of the plot, and a frequency table, weighted if this is included in the 0-config file. +} +\description{ +Generate basic data exploration analysis for kobo select_one question. Returns a list containing the question fullname, ggplot object of the plot, and a frequency table, weighted if this is included in the 0-config file. If disaggregation is set into the form, then the graph are "faceted" per disaggregation. +} +\examples{ +kobo_bar_multi_indiv("S5_FOOD_ASSESS.q3_food_coping_strat") +\donttest{ +kobo_bar_multi_indiv("S5_FOOD_ASSESS.q3_food_coping_strat") +} + +} +\author{ +Edouard Legoupil, Elliott Messeiller +} diff --git a/man/kobo_bar_one_indiv.Rd b/man/kobo_bar_one_indiv.Rd new file mode 100644 index 0000000..e30943e --- /dev/null +++ b/man/kobo_bar_one_indiv.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kobo_crunch.R +\name{kobo_bar_one_indiv} +\alias{kobo_bar_one_indiv} +\title{Generate bar Chart - frequency - for select_one question} +\usage{ +kobo_bar_one_indiv(question, mainDir = "") +} +\arguments{ +\item{question}{Question to be treated by the function. It should be the "fullname" (as a string) of the question as generated by kobo_dico().} + +\item{mainDir}{Path to the project's working directory: mainly for shiny app} +} +\value{ +Returns a list containing the question fullname, ggplot object of the plot, and a frequency table, weighted if this is included in the 0-config file. +} +\description{ +Generate basic data exploration analysis for kobo select_one question. Returns a list containing the question fullname, ggplot object of the plot, and a frequency table, weighted if this is included in the 0-config file. If disaggregation is set into the form, then the graph are "faceted" per disaggregation. +} +\examples{ +kobo_bar_one_indiv("question_name") +\dontrun{ +kobo_bar_one_indiv("question_name") +} + +} +\author{ +Edouard Legoupil, Elliott Messeiller +} diff --git a/man/kobo_crunching.Rd b/man/kobo_crunching.Rd new file mode 100644 index 0000000..893840c --- /dev/null +++ b/man/kobo_crunching.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kobo_crunch.R +\name{kobo_crunching} +\alias{kobo_crunching} +\title{Pick the right function for each question} +\usage{ +kobo_crunching(question, mainDir = "") +} +\arguments{ +\item{question}{Question to be treated by the function. It should be the "fullname" (as a string) of the question as generated by kobo_dico().} + +\item{mainDir}{Path to the project's working directory: mainly for shiny app} +} +\value{ +Return the result of the function. +} +\description{ +This function choose and run the right type of koboloadeR function for data crunching. +} +\examples{ +kobo_crunching("question_name") +\dontrun{ +kobo_crunching("question_name") +} + +} +\author{ +Elliott Messeiller +} diff --git a/man/kobo_histo_indiv.Rd b/man/kobo_histo_indiv.Rd new file mode 100644 index 0000000..4f0587a --- /dev/null +++ b/man/kobo_histo_indiv.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kobo_crunch.R +\name{kobo_histo_indiv} +\alias{kobo_histo_indiv} +\title{Generate histograme for individual integer questions} +\usage{ +kobo_histo_indiv(question, mainDir = "") +} +\arguments{ +\item{mainDir}{Path to the project's working directory: mainly for proper shiny app path} +} +\description{ +Automatically generate histogrammes for each of the integer questions in the dataset. ggplot2 is used. +} +\examples{ +kobo_histo_indiv() +\dontrun{ +kobo_histo_indiv() +} + +} +\author{ +Edouard Legoupil, Elliott Messeiller +} diff --git a/man/kobo_label.Rd b/man/kobo_label.Rd index d8fe293..a985061 100644 --- a/man/kobo_label.Rd +++ b/man/kobo_label.Rd @@ -12,7 +12,7 @@ kobo_label(datalabel, dico) \item{data}{.} } \value{ -A "data.table" with the full data.label. To be used for graphs generation. +A "data.table" with the full data.label, and choices labels. To be used for graphs generation. } \description{ Insert the full label in data frame based on dictionnary diff --git a/man/kobo_to_xlsform.Rd b/man/kobo_to_xlsform.Rd index 73a30af..fcc1abf 100644 --- a/man/kobo_to_xlsform.Rd +++ b/man/kobo_to_xlsform.Rd @@ -7,6 +7,9 @@ kobo_to_xlsform(df, form = "form.xls", n = 100) } \arguments{ +\item{df}{The dataframe object to be processed. For groups of questions to be processed, +they must have been exported from an ODK plateform with dots (".") as separator.} + \item{form}{The full filename of the form to be accessed (xls or xlsx file). It is assumed that the form is stored in the data folder.}