You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

983 lines
28 KiB

4 years ago
---
title: "Projects"
author: "Scary Scarecrow"
date: "1/12/2022"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(readxl)
library(dplyr)
library(lubridate)
library(DT)
library(tidyr)
3 years ago
library(stringr)
4 years ago
3 years ago
mutlstxlrdr <- function() {
for (i in seq_along(sheet.na)) {
colnames <-
unique(saptemplate[saptemplate$`Sheet Name` == snames[i], ]$Header)
df <- read.table("", col.names = colnames)
assign(snames[i], df)
}
4 years ago
}
3 years ago
do.call(file.remove, list(list.files(
"./projects/errors/mandatory/", full.names = TRUE
)))
do.call(file.remove, list(list.files(
"./projects/errors/codelist/", full.names = TRUE
)))
do.call(file.remove, list(list.files(
"./projects/errors/length/", full.names = TRUE
)))
do.call(file.remove, list(list.files("./projects/summary/", full.names = TRUE)))
do.call(file.remove, list(list.files("./projects/output/", full.names = TRUE)))
4 years ago
```
## Data transformation workflow
Following is the proposed preliminary workflow for the data transformation project.
>All file of a segment (contacts/accounts etc..) should be inside the relevant folder. Each folder should have one folder for all codelist files. All legacy data (one file for each country) should be inside the raw-data folder, named after each country. Another file having field definitions including name of the matching column from the legacy file should also be there.
>*Make sure that there are no hidden files inside the directory.*
3 years ago
### Employees
```{r}
employeecodes<-read.csv("emp.csv")
employeecodes<-employeecodes |> select(c(1,2))
```
4 years ago
### Relationship files
```{r echo=TRUE, message=FALSE, warning=FALSE}
3 years ago
relfilenames <-
list.files("./projects/relationship",
pattern = "*.xls",
full.names = T)
4 years ago
print(relfilenames)
3 years ago
rel_files <- NULL
for (i in seq_along(relfilenames)) {
b <- read_excel(path = relfilenames[[i]], sheet = 1)
b<-b |> left_join(employeecodes, by=c("Owner"="Name")) |>
select(-Owner) |>
mutate(Employee.ID=ifelse(is.na(Employee.ID),"99999",Employee.ID)) |>
rename(Owner=Employee.ID)
rel_files[[i]]<-b
}
4 years ago
3 years ago
names(rel_files) <- gsub("./projects/relationship/", "", relfilenames)
4 years ago
# Names of the files imported
names(rel_files)
```
4 years ago
### Code Lists
```{r Create List of Files, echo=TRUE, message=FALSE, warning=FALSE}
3 years ago
filenames <-
list.files("./projects/CodeList",
pattern = "*.xlsx",
full.names = T) # We can avoid creating a separate directory for code list. But organizing may be difficult. However, this can be explored further if we want transform all the data in one go i.e. not by functions (contacts, accounts etc.).
4 years ago
# File paths
print(filenames)
```
Check manually if the above list includes all the codelist files
If correct, then read the files.
```{r codelistreader, echo=TRUE, message=FALSE, warning=FALSE}
3 years ago
sheet_names <-
lapply(filenames, excel_sheets) # Creates a list of the sheet names
4 years ago
codelist_files <- NULL
for (i in seq_along(filenames)) {
3 years ago
a <-
lapply(excel_sheets(filenames[[i]]),
read_excel,
path = filenames[[i]],
col_types = "text") # Reads the sheets of the excel files
names(a) <-
c(sheet_names[[i]]) # Renames them according to the sheet names extracted above
4 years ago
codelist_files <- c(codelist_files, a)
}
4 years ago
# Names of the files imported
names(codelist_files)
4 years ago
# codelist_files<-unique(codelist_files)
4 years ago
```
### Templates
Let us now extract the data. Below we are reading only one file having all data related to `Contacts` from the legacy system.
```{r readlegacyfilepath, echo=TRUE, message=FALSE, warning=FALSE}
3 years ago
oldfilepath <-
list.files("./projects/raw-data",
pattern = "*.xls",
full.names = T) # Change the path, check pattern
4 years ago
print(oldfilepath)
```
Check it the list matches the actual files, manually.
```{r readlegacyfiles, echo=TRUE}
3 years ago
old_files <- NULL
for (i in seq_along(oldfilepath)) {
a<- read_excel(path = oldfilepath[[i]], sheet = 1)
a<-a |>
left_join(employeecodes, by=c(Responsible = "Name")) |>
select(-Responsible) |>
mutate(Employee.ID=ifelse(is.na(Employee.ID),"99999",Employee.ID)) |>
rename(Responsible=Employee.ID) |>
left_join(employeecodes, by=c(`Back office` = "Name")) |>
select(-`Back office`) |>
mutate(Employee.ID=ifelse(is.na(Employee.ID),"99999",Employee.ID)) |>
rename(`Back office`=Employee.ID) |>
left_join(employeecodes, by=c(Presales = "Name")) |>
select(-Presales) |>
mutate(Employee.ID=ifelse(is.na(Employee.ID),"99999",Employee.ID)) |>
rename(Presales=Employee.ID) |>
left_join(employeecodes, by=c(`application technology` = "Name")) |>
select(-`application technology`) |>
mutate(Employee.ID=ifelse(is.na(Employee.ID),"99999",Employee.ID)) |>
rename(`application technology`=Employee.ID)
old_files[[i]]<-a
}
4 years ago
3 years ago
names(old_files) <- gsub("./projects/raw-data/", "", oldfilepath)
4 years ago
```
*Some errors in the legacy file noticed. Columns with similar or same name exists.*
```{r readSAPtemplate, echo=TRUE, message=FALSE, warning=FALSE}
3 years ago
saptemplate <-
read_excel("./projects/template.xlsx", sheet = "Field_Definitions")
4 years ago
# First few rows of the imported data
head(saptemplate)
```
*Please note that the format of the tables (sheet) has been slightly changed. Earlier the corresponding sheet name was mentioned in a row before the actual table. Now, all the rows mention the corresponding sheet name. This was done manually for convenience of data extraction*
3 years ago
```{r createmptySAPfiles, message=FALSE, warning=FALSE, include=FALSE}
4 years ago
#orilo<-"en_US.UTF-8"
#Sys.setlocale(locale="en_US.UTF-8")
3 years ago
strt <- Sys.time()
4 years ago
snames <- unique(saptemplate$`Sheet Name`)
for (h in seq_along(old_files)) {
# Copy original data
old.copy <- old_files[[h]]
3 years ago
print(paste0(names(old_files[h]), " imported"))
3 years ago
err.summ <-
data.frame(
Country = NULL,
Name = NULL,
Expected = NULL,
Actual = NULL
) #Error Cal
4 years ago
# Creates data frame for each sheet in snames
for (i in seq_along(snames)) {
3 years ago
print(paste0("Processing ..", snames[i]))
4 years ago
# Select the column names from the field description sheet
print("Creating template")
sel.template.desc <-
3 years ago
saptemplate[saptemplate$`Sheet Name` == snames[i],]
4 years ago
print("Creating column names")
sel.template.desc.colnames <- sel.template.desc$Header
# Create a list by adding values from corresponding legacy data
temp <- NULL
print("adding values to template ")
3 years ago
if (snames[i] %in% c(
"Opportunity_Competitor_Party_In",
"Opportunity_EndBuyer_Contact_Pa",
"Opportunity_External_Party_Info",
"Opportunity_Installed_Object",
"Opportunity_Product",
"Opportunity_Other_Party_Informa",
"Opportunity_Payer_Contact_Party",
"Opportunity_Product_Recipient_C",
"Opportunity_Prospect_Contact_Pa",
"Opportunity_Revenue_Splits",
"Opportunity_Sales_Employee_Part",
"Opportunity_Sales_Partner_Party",
"Opportunity_Notes",
"Contact_Party_Information",
"Opportunity_Competitor_Product",
"Opportunity_Item_Party_Informat",
"Opportunity_Product_Quantity_Pl",
"Opportunity_Product_Revenue_Pla",
"Opportunity_Product_Notes",
"Opportunity_Header_Revenue_Plan",
"Opportunity_Account_Team_Party_"
)) {
4 years ago
next
}
3 years ago
if (snames[i] == "Opportunity") {
for (j in seq_along(sel.template.desc.colnames)) {
print(paste("Processing ", sel.template.desc.colnames[j]))
if (sel.template.desc.colnames[j] == "Expected_Value") {
temp[j] <-
ifelse(
!is.na(old.copy$`User Provided`),
old.copy$`User Provided`,
old.copy$`Potential Customer`
)
next
}
if (sel.template.desc.colnames[j] == "Sales_Unit" |
sel.template.desc.colnames[j] == "Sales_Organization") {
temp[j] <- paste0(substr(names(old_files[h]), 1, 2), "01")
next
}
if (sel.template.desc.colnames[j] == "International_project") {
temp[j] <-
ifelse(is.na(old.copy[, sel.template.desc$oldkey[j]]), FALSE, TRUE)
next
}
if (sel.template.desc.colnames[j] == "LEVIAT_specified") {
temp[j] <-
ifelse(
!is.na(old.copy$halfenspecified),
old.copy$halfenspecified,
old.copy$competitor
)
next
}
if (sel.template.desc.colnames[j] == "Project_Country") {
temp[j] <-
ifelse(is.na(old.copy$Country), NA, substr(names(old_files[h]), 1, 2))
next
}
if (sel.template.desc.colnames[j] == "BIM_designed") {
temp[j] <-
ifelse(
is.na(old.copy$`BIM designed`),
"Software Unknown",
old.copy$`BIM designed`
)
next
}
temp[j] <-
ifelse(
!is.na(sel.template.desc$default[j]),
as.character(as.vector(sel.template.desc$default[j])),
ifelse(
sel.template.desc$oldkey[j] == "NA" |
is.na(sel.template.desc$oldkey[j]),
NA,
as.vector(old.copy[, sel.template.desc$oldkey[j]])
)
)
4 years ago
}
3 years ago
# Rename the columns according to field description
print("renaming template ")
names(temp) <- sel.template.desc.colnames
4 years ago
3 years ago
# Create data frame from the list
df <- as.data.frame(temp)
print("Converted to data frame")
4 years ago
3 years ago
# if(names(old_files)=="CN.xlsx"){
# df$Owner<-"226"
# }
# if(names(old_files)=="CZ.xlsx"){
# df$Owner<-"390"
# }
# if(names(old_files)=="FI.xlsx"){
# df$Owner<-"325"
# }
# if(names(old_files)=="DE.xlsx"){
# df$Owner<-"289"
# }
# if(names(old_files)=="IT.xlsx"){
# df$Owner<-"182"
# }
# if(names(old_files)=="PL.xlsx"){
# df$Owner<-"368"
# }
# if(names(old_files)=="ES.xlsx"){
# df$Owner<-"447"
# }
# if(names(old_files)=="SE.xlsx"){
# df$Owner<-"351"
# }
# if(names(old_files)=="NL.xlsx"){
# df$Owner<-"90052"
# }
# if(names(old_files)=="NO.xlsx"){
# df$Owner<-"000"
# }
4 years ago
}
4 years ago
4 years ago
3 years ago
if (snames[i] == "Opportunity_Preceding_and_Follo") {
old.copy.f <- old.copy |> filter(`Project hierarchy` == "Opportunity")
if (nrow(old.copy.f) == 0) {
next
} #If not opportunity found in data go to next loop
4 years ago
for (j in seq_along(sel.template.desc.colnames)) {
3 years ago
temp[j] <-
ifelse(
!is.na(sel.template.desc$default[j]),
as.character(as.vector(sel.template.desc$default[j])),
ifelse(
sel.template.desc$oldkey[j] == "NA" |
is.na(sel.template.desc$oldkey[j]),
NA,
as.vector(old.copy.f[, sel.template.desc$oldkey[j]])
)
)
4 years ago
}
4 years ago
3 years ago
# Rename the columns according to field description
print("renaming template ")
names(temp) <- sel.template.desc.colnames
# Create data frame from the list
df <- as.data.frame(temp)
print("Converted to data frame")
corr.seq <-
colnames(df) # preserving sequence name seq is not maintained post join
df <- df |>
mutate(Reference_Doc_External_Key = str_sub(
Opportunity_External_Key,
1,
str_length(Opportunity_External_Key) - 4
)) |>
mutate(
External_Key = paste(
"OPF",
Reference_Doc_External_Key,
Opportunity_External_Key,
sep = "_"
)
) |> select(corr.seq)
}
4 years ago
3 years ago
if (snames[i] == "Opportunity_Party_Information") {
rdf <- rel_files[[paste0("",names(old_files[h]))]]
print("party info processing")
if (is.null(rdf)) {
next
} #If not data found loop
4 years ago
for (j in seq_along(sel.template.desc.colnames)) {
3 years ago
temp[j] <-
ifelse(
!is.na(sel.template.desc$default[j]),
as.character(as.vector(sel.template.desc$default[j])),
ifelse(
sel.template.desc$oldkey[j] == "NA" |
is.na(sel.template.desc$oldkey[j]),
NA,
as.vector(rdf[, sel.template.desc$oldkey[j]])
)
)
4 years ago
}
3 years ago
# Rename the columns according to field description
print("renaming template ")
names(temp) <- sel.template.desc.colnames
# Create data frame from the list
df <- as.data.frame(temp)
print("Converted to data frame")
corr.seq <-
colnames(df) # preserving sequence name seq is not maintained post join
# Party ID Dummy used. Must be removed later.
# if(names(old_files)=="CN.xlsx"){
# df$Party_ID<-"226"
# }
# if(names(old_files)=="CZ.xlsx"){
# df$Party_ID<-"390"
# }
# if(names(old_files)=="FI.xlsx"){
# df$Party_ID<-"325"
# }
# if(names(old_files)=="DE.xlsx"){
# df$Party_ID<-"289"
# }
# if(names(old_files)=="IT.xlsx"){
# df$Party_ID<-"182"
# }
# if(names(old_files)=="PL.xlsx"){
# df$Party_ID<-"368"
# }
# if(names(old_files)=="ES.xlsx"){
# df$Party_ID<-"447"
# }
# if(names(old_files)=="SE.xlsx"){
# df$Party_ID<-"351"
# }
# if(names(old_files)=="NL.xlsx"){
# df$Party_ID<-"90052"
# }
# if(names(old_files)=="NO.xlsx"){
# df$Party_ID<-"000"
# }
df <- df |>
mutate(
External_Key = paste(
"INV",
Opportunity_External_Key,
Party_ID,
Role,
Party_External_Key,
sep = "_"
)
) |> select(corr.seq)
4 years ago
}
3 years ago
if (snames[i] == "Opportunity_Sales_Team_Party_In") {
4 years ago
for (j in seq_along(sel.template.desc.colnames)) {
3 years ago
temp[j] <-
ifelse(
!is.na(sel.template.desc$default[j]),
as.character(as.vector(sel.template.desc$default[j])),
ifelse(
sel.template.desc$oldkey[j] == "NA" |
is.na(sel.template.desc$oldkey[j]),
NA,
as.vector(old.copy[, sel.template.desc$oldkey[j]])
)
)
4 years ago
}
3 years ago
# Rename the columns according to field description
print("renaming template ")
names(temp) <- sel.template.desc.colnames
# Create data frame from the list
df <- as.data.frame(temp)
print("Converted to data frame")
corr.seq <-
colnames(df) # preserving sequence name seq is not maintained post join
#if(names(old_files[h])=="DE.xls"){stop()}
df <-
df |> mutate(
resp = as.character(old.copy$Responsible),
apptech = as.character(old.copy$`application technology`),
backoff = as.character(old.copy$`Back office`),
pres = as.character(old.copy$Presales)
) |>
#mutate(resp=paste0(resp,"_resp"), apptech=paste0(apptech,"_apptech"), backoff=paste0(backoff,"_backoff")) |>
pivot_longer(cols = c(resp, apptech, backoff, pres)) |>
filter(!is.na(value)) |>
select(-c(Party_ID, Role)) |>
rename(Party_ID = value) |>
rename(Role = name) |>
mutate(Role = ifelse(
Role == "resp",
"ZT",
ifelse(
Role == "apptech",
"ZIN016",
ifelse(Role == "backoff", "ZIN002", "ZIN011")
)
)) |>
mutate(External_Key = paste("PAR", Opportunity_External_Key, Party_ID, Role, sep =
"_")) |>
filter(Role != "ZT")
#|>
# Party ID Dummy used. Must be removed later.
# if(names(old_files)=="CN.xlsx"){
# df$Party_ID<-"226"
# }
# if(names(old_files)=="CZ.xlsx"){
# df$Party_ID<-"390"
# }
# if(names(old_files)=="FI.xlsx"){
# df$Party_ID<-"325"
# }
# if(names(old_files)=="DE.xlsx"){
# df$Party_ID<-"289"
# }
# if(names(old_files)=="IT.xlsx"){
# df$Party_ID<-"182"
# }
# if(names(old_files)=="PL.xlsx"){
# df$Party_ID<-"368"
# }
# if(names(old_files)=="ES.xlsx"){
# df$Party_ID<-"447"
# }
# if(names(old_files)=="SE.xlsx"){
# df$Party_ID<-"351"
# }
# if(names(old_files)=="NL.xlsx"){
# df$Party_ID<-"90052"
# }
# if(names(old_files)=="NO.xlsx"){
# df$Party_ID<-"000"
# }
df<-df |>
select(corr.seq)
4 years ago
}
4 years ago
# Error summary file
3 years ago
Expected <- nrow(df)
4 years ago
#Select essential rows
print("Identifying essential rows")
sel.template.desc |>
filter(Mandatory == "Yes") |>
pull(Header) -> essential.columns
error.mandatory <- NULL
3 years ago
error.df <-
data.frame(
Country = NULL,
Name = NULL,
Rows = NULL,
Expected = NULL
)
4 years ago
# Operate on essential columns including creation of error file
for (k in seq_along(essential.columns)) {
print("Creating and writing data with missing mandatory values")
3 years ago
manerrdt <- df[is.na(df[, essential.columns[k]]),]
if (nrow(manerrdt > 0)) {
manerrdt <-
manerrdt |> mutate(error = paste0(essential.columns[k], " missing"))
}
4 years ago
assign(
paste0(
"error_mandatory_",
substr(names(old_files[h]), 2, 3),
4 years ago
"_",
snames[i],
"_",
essential.columns[k]
),
manerrdt
4 years ago
)
# TO be saved in error files
3 years ago
if (nrow(manerrdt) > 0) {
write.csv(
manerrdt,
paste0(
"./projects/errors/mandatory/",
substr(names(old_files[h]), 1, 2),
"_",
snames[i],
"_",
essential.columns[k],
"_error_mandatory.csv"
),
row.names = F,
na = "",
fileEncoding = "UTF-8"
)
4 years ago
}
# Error summary file
3 years ago
Country <- substr(names(old_files[h]), 1, 2)
Name <- snames[i]
err.type <- paste0("Missing ", essential.columns[k])
err.count <- nrow(df[is.na(df[, essential.columns[k]]),])
4 years ago
3 years ago
print("Removing rows with empty essential columns")
df <- df[!is.na(df[, essential.columns[k]]),]
if (err.count > 0) {
error.df <-
rbind(
error.df,
data.frame(
Country = Country,
Name = Name,
err.type = err.type,
err.count = err.count
)
) #Error cal
4 years ago
}
}
print("Identifying columns associated with codelists")
# List of columns that have a codelist
codelistcols <- sel.template.desc |>
filter(!is.na(`CodeList File Path`)) |> pull(Header)
for (k in seq_along(codelistcols)) {
4 years ago
# if(codelistcols[k]=="Currency"){
# print("Found Currency. Adding 0.")
# df$International_Version<-"CHF"
# }
4 years ago
3 years ago
print(paste0("Identifying errors ", codelistcols[k]))
4 years ago
def.rows <-
which(!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA))
3 years ago
def.n <- df[def.rows, 1]
4 years ago
def.rows.val <-
df[!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA), codelistcols[k]]
3 years ago
def.colname <-
rep(codelistcols[k], length.out = length(def.rows))
def <- data.frame(def.rows, def.n, def.rows.val, def.colname)
if (nrow(def > 0)) {
assign(paste0(
"error_codematch_",
substr(names(old_files[1]), 1, 2),
4 years ago
"_",
snames[i],
"_",
3 years ago
codelistcols[k]
),
def) # TO be saved
write.csv(
def,
paste0(
"./projects/errors/codelist/",
substr(names(old_files[h]), 1, 2),
"_",
snames[i],
"_",
codelistcols[k],
"_error_codematch_.csv"
),
row.names = F,
na = "",
fileEncoding = "UTF-8"
)
4 years ago
}
3 years ago
err.type <-
paste0("Codelist Mismatch ", codelistcols[k]) #Error cal
err.count <- nrow(def) #Error cal
if (err.count > 0) {
error.df <-
rbind(
error.df,
data.frame(
Country = Country,
Name = Name,
err.type = err.type,
err.count = err.count
)
) #Error cal
4 years ago
}
3 years ago
print(paste0("Removing errors ", codelistcols[k]))
4 years ago
# Removes any mismatch
df[!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA), codelistcols[k]] <-
NA
# Matches each column with the corresponding code list and returns the value
df[, codelistcols[k]] <-
pull(codelist_files[codelistcols[k]][[1]], 2)[match(pull(df, codelistcols[k]),
pull(codelist_files[codelistcols[k]][[1]], Description))]
}
max.length <- as.numeric(sel.template.desc$`Max Length`)
dtype <- sel.template.desc$`Data Type`
rowval <- NULL
ival <- NULL
rval <- NULL
lenght.issue.df <- NULL
# Changing the data class
for (k in 1:ncol(df)) {
if (dtype[k] == "String") {
df[, k] <- as.character(pull(df, k))
}
if (dtype[k] == "Boolean") {
df[, k] <- as.logical(pull(df, k))
}
if (dtype[k] == "DateTime") {
4 years ago
df[, k] <- lubridate::ymd(pull(df, k))
4 years ago
}
if (dtype[k] == "Time") {
df[, k] <- lubridate::hms(pull(df, k))
} # This list will increase and also change based on input date and time formats
}
4 years ago
# print("Rectifying streetname")
# # Street and House Number
# if (any(colnames(df) == "Street")) {
# print("found steet")
# # stop()
3 years ago
#
4 years ago
# df$Streetname<-NA
# df$HouseNumber<-NA
# #df |> extract("Street", "(\\D+)(\\d.*)")
# df<-tidyr::extract(df,
# "Street",
# c("Streetname", "HouseNumber"),
# "(\\D+)(\\d.*)")
# df <- df |>
# select(-c("House_Number")) |>
# rename(Street = Streetname, House_Number = HouseNumber) |>
# select(all_of(sel.template.desc.colnames))
# }
4 years ago
# Length Rectification
colclasses <- lapply(df, class)
print("Rectifying Length")
for (k in 1:ncol(df)) {
if (colclasses[[k]] == "character") {
print("found character column ")
rowval <- pull(df, 1)
3 years ago
ival <-
ifelse(nchar(pull(df, k)) == 0 |
is.na(nchar(pull(df, k))), 1, nchar(pull(df, k)))
4 years ago
rval <- max.length[k]
colval <- pull(df, k)
3 years ago
colnm <- colnames(df)[k]
cntr <- substr(names(old_files[h]), 1, 2)
4 years ago
# rectifying data length
df[, k] <-
ifelse(nchar(pull(df, k)) > max.length[k],
substring(pull(df, k), 1, max.length[k]),
pull(df, k))
}
lenght.issue.df <-
3 years ago
rbind(lenght.issue.df,
data.frame(rowval, ival, rval, colnm, colval, cntr))
4 years ago
3 years ago
err.type <-
paste0("Length error ", colnames(df)[k]) # Error cal
err.count <- sum(ival > rval, na.rm = T) # Error cal
if (err.count > 0) {
error.df <-
rbind(
error.df,
data.frame(
Country = Country,
Name = Name,
err.type = err.type,
err.count = err.count
)
) #Error cal
4 years ago
}
}
3 years ago
lenght.issue.df <- dplyr::filter(lenght.issue.df, ival > rval)
4 years ago
3 years ago
if (nrow(lenght.issue.df) > 0) {
write.csv(
lenght.issue.df,
paste0(
"./projects/errors/length/",
substr(names(old_files[h]), 1, 2),
"_",
snames[i],
"_length_error.csv"
),
row.names = F,
na = ""
)
4 years ago
}
3 years ago
4 years ago
assign(snames[i], df)
3 years ago
write.csv(
df,
paste0(
"./projects/output/",
substr(names(old_files[h]), 1, 2),
"_",
snames[i],
".csv"
),
sep=";",
row.names = F,
na = "",
fileEncoding = "UTF-8"
)
if (nrow(error.df) > 0) {
write.csv(
error.df,
paste0(
"./projects/summary/",
substr(names(old_files[h]), 1, 2),
"_",
snames[i],
"_error",
".csv"
),
row.names = F,
na = "",
fileEncoding = "UTF-8"
) # Error write
4 years ago
}
3 years ago
err.summ <-
rbind(
err.summ,
data.frame(
Country = Country,
Name = Name,
Expected = Expected,
Actual = nrow(df)
)
) #Error Cal
4 years ago
}
3 years ago
write.csv(
err.summ,
paste0(
"./projects/summary/" ,
substr(names(old_files[h]), 1, 2),
"_",
snames[i],
"_sumerror",
".csv"
),
row.names = F,
na = ""
) # Error Write
4 years ago
}
3 years ago
end <- Sys.time()
4 years ago
3 years ago
end - strt
4 years ago
```
*The code failed because Department Column appears several times in the data and while importing R renamed them to Department..xx).*
*Manually verify if these are the required templates*
```{r}
3 years ago
opfilepath <-
list.files("./projects/output",
pattern = "*Opportunity.csv",
full.names = T)
opfiles <- lapply(opfilepath, read.csv, colClasses = "character", header=TRUE, row.names=NULL)
opdf <- do.call(rbind.data.frame, opfiles)
write.csv(
opdf,
"./projects/output/combined/combinedopportunity.csv",
row.names = F,
na = "",
fileEncoding = "UTF-8",
sep = ","
)
openxlsx::write.xlsx(opdf,"./projects/output/combined/combinedopportunity.xlsx")
opfilepath <-
list.files("./projects/output",
pattern = "*Opportunity_Party_Information.csv",
full.names = T)
opfiles <- lapply(opfilepath, read.csv, colClasses = "character")
opdf <- do.call(rbind.data.frame, opfiles)
write.csv(
opdf,
"./projects/output/combined/combinedopportunitypartyinfo.csv",
row.names = F,
na = "",
fileEncoding = "UTF-8",
sep = ","
)
openxlsx::write.xlsx(opdf,"./projects/output/combined/combinedopportunitypartyinfo.xlsx")
opfilepath <-
list.files("./projects/output",
pattern = "*Opportunity_Preceding_and_Follo.csv",
full.names = T)
opfiles <- lapply(opfilepath, read.csv, colClasses = "character")
opdf <- do.call(rbind.data.frame, opfiles)
write.csv(
opdf,
"./projects/output/combined/combinedopportunityprecedingfollo.csv",
row.names = F,
na = "",
fileEncoding = "UTF-8",
sep = ","
)
openxlsx::write.xlsx(opdf,"./projects/output/combined/combinedopportunityprecedingfollo.xlsx")
opfilepath <-
list.files("./projects/output",
pattern = "*Opportunity_Sales_Team_Party_In.csv",
full.names = T)
opfiles <- lapply(opfilepath, read.csv, colClasses = "character")
opdf <- do.call(rbind.data.frame, opfiles)
opdf<-opdf |> filter(Party_ID!="99999")
#Removed dummy employees as per Alba's req.
write.csv(
opdf,
"./projects/output/combined/combinedopportunitysalesteampartyin.csv",
row.names = F,
na = "",
fileEncoding = "UTF-8",
sep = ","
)
openxlsx::write.xlsx(opdf,"./projects/output/combined/combinedopportunitysalesteampartyin.xlsx")
```