You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

100 lines
4.8 KiB

library(readxl)
library(dplyr)
library(lubridate)
library(DT)
mutlstxlrdr <- function() {
for (i in seq_along(sheet.na)) {
colnames <-
unique(saptemplate[saptemplate$`Sheet Name` == snames[i], ]$Header)
df <- read.table("", col.names = colnames)
assign(snames[i], df)
}
}
filenames <- list.files("./contacts/CodeList", pattern = "*.xlsx", full.names = T) # We can avoid creating a separate directory for code list. But organizing may be difficult. However, this can be explored further if we want transform all the data in one go i.e. not by functions (contacts, accounts etc.).
# File paths
sheet_names <- lapply(filenames, excel_sheets) # Creates a list of the sheet names
for (i in seq_along(filenames)) {
codelist_files <-
lapply(excel_sheets(filenames[[i]]), read_excel, path = filenames[[i]]) # Reads the sheets of the excel files
names(codelist_files) <-
c(sheet_names[[i]]) # Renames them according to the sheet names extracted above
}
# Names of the files imported
oldfilepath <- ("./contacts/olddummy.xlsx")
old.data <-
lapply(excel_sheets(oldfilepath), read_excel, path = oldfilepath)
names(old.data) <- excel_sheets(oldfilepath)
# Names of the files imported
names(old.data)
saptemplate <-
read_excel("./contacts/Contact.xlsx", sheet = "Field_Definitions")
snames <- unique(saptemplate$`Sheet Name`)
# Creates data frame for each sheet in snames
for (i in seq_along(snames)) {
colnames <-
saptemplate[saptemplate$`Sheet Name` == snames[i], ]$Header # Defines the column names
df <- read.table("", col.names = colnames) # Creates an empty data frame using the column names
assign(snames[i], df) # Assigns value of df to a data frame named in sname
}
old.copy <- old.data$Contact_o # Selecting only one table as sample
mapped <- read.csv("./contacts/contact_map.csv", sep = ";")
x = NULL
for (i in 1:nrow(mapped)) {
x[i] = mapped[mapped$oldkey == colnames(old.copy[i]), ]$Header
}
colnames(old.copy) <- x # Changing column names
saptemplate[saptemplate$`Sheet Name` == "Contact", ] |>
filter(Mandatory == "Yes") |>
pull(Header) -> essential.rows # List of mandatory columns
essen.rows.table = read.table("", col.names = c("Item", "Missing"))
for (i in seq_along(essential.rows)) {
essen.rows.table[i, 2] <- sum(is.na(old.copy[, essential.rows[i]]))
essen.rows.table[i, 1] <- essential.rows[i]
} # Creates the table below
for (i in seq_along(essential.rows)) {
old.copy <- old.copy[!is.na(old.copy[, essential.rows[i]]), ]
} # Remove the rows with missing mandatory values
codelistcols <- saptemplate[saptemplate$`Sheet Name` == "Contact", ] |>
filter(!is.na(`CodeList File Path`)) |> pull(Header) # List of columns that have a codelist
codelisted.rows.table = read.table("", col.names = c("Item", "Missing", "Not_from_code"))
for (i in seq_along(codelistcols)) {
codelisted.rows.table[i, 3] <-
sum(!pull(old.copy[, codelistcols[i]], 1) %in% c(pull(codelist_files[codelistcols[i]][[1]], Description), NA)) # Added NA else empty columns also get counted
codelisted.rows.table[i, 2] <-
sum(is.na(old.copy[, codelistcols[i]]))
codelisted.rows.table[i, 1] <- codelistcols[i]
} # Creates the table below
codelisted.rows.table
for (i in seq_along(codelistcols)) {
old.copy[!pull(old.copy[, codelistcols[i]], 1) %in% c(pull(codelist_files[codelistcols[i]][[1]], Description), NA), codelistcols[i]] <-
NA
} # Removes the value in case of mismatch
for (i in seq_along(codelistcols)) {
old.copy[, codelistcols[i]] <-
pull(codelist_files[codelistcols[i]][[1]], 2)[match(pull(old.copy, codelistcols[i]),
pull(codelist_files[codelistcols[i]][[1]], Description))]
} # Matches each column with the corresponding code list and returns the value
dtype <-
saptemplate[saptemplate$`Sheet Name` == "Contact", ]$`Data Type` # List of data types. Non Exhaustive ATM
for (i in 1:ncol(old.copy)) {
if (dtype[i] == "String") {old.copy[, i] <- as.character(pull(old.copy, i))}
if (dtype[i] == "Boolean") {old.copy[, i] <- as.logical(pull(old.copy, i))}
if (dtype[i] == "DateTime") {old.copy[, i] <- lubridate::ymd_hms(pull(old.copy, i))}
if (dtype[i] == "Time") { old.copy[, i] <- lubridate::hms(pull(old.copy, i))} # This list will increase and also change based on input date and time formats
}
max.length <-
saptemplate[saptemplate$`Sheet Name` == "Contact", ]$`Max Length` # List of max lengths mentioned
colclasses <- lapply(old.copy, class) # getting column classes
for (i in 1:ncol(old.copy)) {
if (colclasses[[i]] == "character") {
old.copy[, i] <-
ifelse(nchar(pull(old.copy, i)) > max.length[i],
substring(pull(old.copy, i), 1, max.length[i]),
pull(old.copy, i))
} # If string length is more than mentioned, trim it to the mentioned
}
write.csv(old.copy, "Contact.csv", row.names = FALSE)