library(readxl) library(dplyr) library(lubridate) library(DT) mutlstxlrdr <- function() { for (i in seq_along(sheet.na)) { colnames <- unique(saptemplate[saptemplate$`Sheet Name` == snames[i], ]$Header) df <- read.table("", col.names = colnames) assign(snames[i], df) } } filenames <- list.files("./contacts/CodeList", pattern = "*.xlsx", full.names = T) # We can avoid creating a separate directory for code list. But organizing may be difficult. However, this can be explored further if we want transform all the data in one go i.e. not by functions (contacts, accounts etc.). # File paths sheet_names <- lapply(filenames, excel_sheets) # Creates a list of the sheet names for (i in seq_along(filenames)) { codelist_files <- lapply(excel_sheets(filenames[[i]]), read_excel, path = filenames[[i]]) # Reads the sheets of the excel files names(codelist_files) <- c(sheet_names[[i]]) # Renames them according to the sheet names extracted above } # Names of the files imported oldfilepath <- ("./contacts/olddummy.xlsx") old.data <- lapply(excel_sheets(oldfilepath), read_excel, path = oldfilepath) names(old.data) <- excel_sheets(oldfilepath) # Names of the files imported names(old.data) saptemplate <- read_excel("./contacts/Contact.xlsx", sheet = "Field_Definitions") snames <- unique(saptemplate$`Sheet Name`) # Creates data frame for each sheet in snames for (i in seq_along(snames)) { colnames <- saptemplate[saptemplate$`Sheet Name` == snames[i], ]$Header # Defines the column names df <- read.table("", col.names = colnames) # Creates an empty data frame using the column names assign(snames[i], df) # Assigns value of df to a data frame named in sname } old.copy <- old.data$Contact_o # Selecting only one table as sample mapped <- read.csv("./contacts/contact_map.csv", sep = ";") x = NULL for (i in 1:nrow(mapped)) { x[i] = mapped[mapped$oldkey == colnames(old.copy[i]), ]$Header } colnames(old.copy) <- x # Changing column names saptemplate[saptemplate$`Sheet Name` == "Contact", ] |> filter(Mandatory == "Yes") |> pull(Header) -> essential.rows # List of mandatory columns essen.rows.table = read.table("", col.names = c("Item", "Missing")) for (i in seq_along(essential.rows)) { essen.rows.table[i, 2] <- sum(is.na(old.copy[, essential.rows[i]])) essen.rows.table[i, 1] <- essential.rows[i] } # Creates the table below for (i in seq_along(essential.rows)) { old.copy <- old.copy[!is.na(old.copy[, essential.rows[i]]), ] } # Remove the rows with missing mandatory values codelistcols <- saptemplate[saptemplate$`Sheet Name` == "Contact", ] |> filter(!is.na(`CodeList File Path`)) |> pull(Header) # List of columns that have a codelist codelisted.rows.table = read.table("", col.names = c("Item", "Missing", "Not_from_code")) for (i in seq_along(codelistcols)) { codelisted.rows.table[i, 3] <- sum(!pull(old.copy[, codelistcols[i]], 1) %in% c(pull(codelist_files[codelistcols[i]][[1]], Description), NA)) # Added NA else empty columns also get counted codelisted.rows.table[i, 2] <- sum(is.na(old.copy[, codelistcols[i]])) codelisted.rows.table[i, 1] <- codelistcols[i] } # Creates the table below codelisted.rows.table for (i in seq_along(codelistcols)) { old.copy[!pull(old.copy[, codelistcols[i]], 1) %in% c(pull(codelist_files[codelistcols[i]][[1]], Description), NA), codelistcols[i]] <- NA } # Removes the value in case of mismatch for (i in seq_along(codelistcols)) { old.copy[, codelistcols[i]] <- pull(codelist_files[codelistcols[i]][[1]], 2)[match(pull(old.copy, codelistcols[i]), pull(codelist_files[codelistcols[i]][[1]], Description))] } # Matches each column with the corresponding code list and returns the value dtype <- saptemplate[saptemplate$`Sheet Name` == "Contact", ]$`Data Type` # List of data types. Non Exhaustive ATM for (i in 1:ncol(old.copy)) { if (dtype[i] == "String") {old.copy[, i] <- as.character(pull(old.copy, i))} if (dtype[i] == "Boolean") {old.copy[, i] <- as.logical(pull(old.copy, i))} if (dtype[i] == "DateTime") {old.copy[, i] <- lubridate::ymd_hms(pull(old.copy, i))} if (dtype[i] == "Time") { old.copy[, i] <- lubridate::hms(pull(old.copy, i))} # This list will increase and also change based on input date and time formats } max.length <- saptemplate[saptemplate$`Sheet Name` == "Contact", ]$`Max Length` # List of max lengths mentioned colclasses <- lapply(old.copy, class) # getting column classes for (i in 1:ncol(old.copy)) { if (colclasses[[i]] == "character") { old.copy[, i] <- ifelse(nchar(pull(old.copy, i)) > max.length[i], substring(pull(old.copy, i), 1, max.length[i]), pull(old.copy, i)) } # If string length is more than mentioned, trim it to the mentioned } write.csv(old.copy, "Contact.csv", row.names = FALSE)