---
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								title: "Accounts"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								author: "Scary Scarecrow"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								date: "1/10/2022"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								output: html_document
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								---
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r setup, include=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								knitr::opts_chunk$set(echo = TRUE)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(readxl)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(dplyr)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(lubridate)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(DT)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(tidyr)
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								library(stringr)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								mutlstxlrdr <- function() {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  for (i in seq_along(sheet.na)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    colnames <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      unique(saptemplate[saptemplate$`Sheet Name` == snames[i], ]$Header)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df <- read.table("", col.names = colnames)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    assign(snames[i], df)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								}
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								do.call(file.remove, list(list.files(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  "./accounts/errors/mandatory/", full.names = TRUE
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								do.call(file.remove, list(list.files(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  "./accounts/errors/codelist/", full.names = TRUE
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								do.call(file.remove, list(list.files(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  "./accounts/errors/length/", full.names = TRUE
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								)))
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								do.call(file.remove, list(list.files("./accounts/summary/", full.names = TRUE)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								do.call(file.remove, list(list.files("./accounts/output/", full.names = TRUE)))
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								dir.create("./accounts/output/combined")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#do.call(file.remove, list(list.files("./accounts/output/combined/", full.names = TRUE)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								## Data transformation workflow
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								Following is the proposed preliminary workflow for the data transformation project.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								>All file of a segment (contacts/accounts etc..) should be inside the relevant folder. Each folder should have one folder for all codelist files. All legacy data (one file for each country) should be inside the raw-data folder, named after each country. Another file having field definitions including name of the matching column from the legacy file should also be there.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								>*Make sure that there are no hidden files inside the directory.*
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								### Employees
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# employeecodes<-read.csv("emp.csv")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# employeecodes<-employeecodes |> select(c(1,2))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# employeecodesnew<-read.csv("./employees/empoct.csv") |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#   select(c(Employee_ID,First_Name,Last_Name)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#   mutate(Name=paste(First_Name, Last_Name)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#   select(Employee_ID,Name) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#   rename(Employee.ID=Employee_ID)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# employeecodes<-rbind(employeecodes,employeecodesnew) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#   unique()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								employeecodes<-read.csv("./employees/empoct.csv") |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  mutate(Name=paste(First_Name, Last_Name)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  select(Employee_ID,Name)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								### Code Lists
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r Create List of Files, echo=TRUE, message=FALSE, warning=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								filenames <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  list.files("./accounts/CodeList",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								             pattern = "*.xls",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								             full.names = T) # We can avoid creating a separate directory for code list. But organizing may be difficult. However, this can be explored further if we want transform all the data in one go i.e. not by functions (contacts, accounts etc.).
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# File paths
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								print(filenames)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								Check manually if the above list includes all the codelist files
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								If correct, then read the files.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r codelistreader, echo=TRUE, message=FALSE, warning=FALSE}
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								sheet_names <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  lapply(filenames, excel_sheets) # Creates a list of the sheet names
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								codelist_files <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								for (i in seq_along(filenames)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  a <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    lapply(excel_sheets(filenames[[i]]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								           read_excel,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								           path = filenames[[i]],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								           col_types = "text") # Reads the sheets of the excel files
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  names(a) <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    c(sheet_names[[i]]) # Renames them according to the sheet names extracted above
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  codelist_files <- c(codelist_files, a)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# Names of the files imported
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								names(codelist_files)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#codelist_files<-unique(codelist_files)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								codelist_files$Customer_type_I
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								### Templates
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								Let us now extract the data. Below we are reading only one file having all data related to `Contacts` from the legacy system.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r readlegacyfilepath, echo=TRUE, message=FALSE, warning=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								oldfilepath<-list.files("./accounts/raw-data", pattern="*.xls", full.names = T) # Change the path, check pattern
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								print(oldfilepath)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								Check it the list matches the actual files, manually.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r readlegacyfiles, echo=TRUE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								old_files <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#read_excel(path = oldfilepath[[i]], sheet = 1)
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								for (i in seq_along(oldfilepath)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  a<- read_excel(path = oldfilepath[[i]], sheet = 1)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  a<-a |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    left_join(employeecodes, by=c(Owner = "Name")) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    select(-Owner) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    mutate(Employee_ID=ifelse(is.na(Employee_ID),"99999",Employee_ID)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    rename(Owner=Employee_ID) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    left_join(employeecodes, by=c(`Sales rep` = "Name")) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    select(-`Sales rep`) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    mutate(Employee_ID=ifelse(is.na(Employee_ID),"99999",Employee_ID)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    rename(`Sales rep`=Employee_ID)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  old_files[[i]]<-a
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								old_files
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								names(old_files) <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  gsub("./accounts/raw-data/", "", oldfilepath) # Change path
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								*Some errors in the legacy file noticed. Columns with similar or same name exists.*
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r readSAPtemplate, echo=TRUE, message=FALSE, warning=FALSE}
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								saptemplate <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  read_excel("./accounts/template.xlsx", sheet = "Field_Definitions")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# First few rows of the imported data
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								head(saptemplate)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								*Please note that the format of the tables (sheet) has been slightly changed. Earlier the corresponding sheet name was mentioned in a row before the actual table. Now, all the rows mention the corresponding sheet name. This was done manually for convenience of data extraction*
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								## Don't have Status column defined
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								## There could be issue in line of business
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								```{r createmptySAPfiles, message=FALSE, warning=FALSE, include=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#orilo<-"en_US.UTF-8"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#Sys.setlocale(locale="en_US.UTF-8")
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								strt <- Sys.time()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								snames <- unique(saptemplate$`Sheet Name`)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								for (h in seq_along(old_files)) {
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								  print("Importing new")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  # Copy original data
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  old.copy <- old_files[[h]]
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								  print(paste0(names(old_files[h]), " imported"))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  err.summ <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    data.frame(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      Country = NULL,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      Name = NULL,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      Expected = NULL,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      Actual = NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    ) #Error Cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  # Creates data frame for each sheet in snames
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  for (i in seq_along(snames)) {
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    print(paste0("Processing ..", snames[i]))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    if (snames[i] %in% c("Account",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                         "Account_Identification",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                         "Account_Sales_Data",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                         "Account_Team")) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # Select the column names from the field description sheet
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("Creating template")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      sel.template.desc <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        saptemplate[saptemplate$`Sheet Name` == snames[i],]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("Creating column names")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      sel.template.desc.colnames <- sel.template.desc$Header
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # Create a list by adding values from corresponding legacy data
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      temp <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("adding values to template ")
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      if (snames[i] %in% c(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_Addresses",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_Contact_Persons",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #"Account_Identification",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_International_Version",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_Skills",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_Tax_Numbers",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_Notes",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_Visiting_Hours",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_Visits_Details",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_Visiting_Hours_Weekly_R",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "Account_Visiting_Times"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      )) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        next
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      if (snames[i] == "Account") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        for (j in seq_along(sel.template.desc.colnames)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          temp[j] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              !is.na(sel.template.desc$default[j]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              as.character(as.vector(sel.template.desc$default[j])),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                sel.template.desc$oldkey[j] == "NA" |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                  is.na(sel.template.desc$oldkey[j]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                NA,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                as.vector(old.copy[, sel.template.desc$oldkey[j]])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Rename the columns according to field description
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("renaming template ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        names(temp) <- sel.template.desc.colnames
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Create data frame from the list
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- as.data.frame(temp)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Converted to data frame")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Implementing Line of Business transformations")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- df |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          mutate(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Customer_type_I = case_when(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "03 Building Contractor" ~ "General Contractor",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "12 Engineering construction" ~ "Engineer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "15 Steel contractors" ~ "Specialist Sub Contractor",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "16 Timber contractors" ~ "Specialist Sub Contractor",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "18 Engineering office - civil eng." ~ "Engineer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "19 Engineering office - steel/framing constructions" ~ "Engineer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "20 Architects" ~ "Architect"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- df |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          mutate(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Industry = case_when(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "01 Trader Building Constructions" ~ "Construction",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "02 Trader Steel Constructions" ~ "Construction",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "12 Engineering construction" ~ "Construction",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "13 Machine construction" ~ "Construction",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "14 Energy and power plants" ~ "Utilities",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "21 Universities, public institutions and associations" ~ "Educational services",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              Customer_type_I == "23 Transport" ~ "Transportation and warehousing"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        dfforsalesdatause <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          df |> select(c(External_Key, Customer_type_I)) |> filter(!is.na(External_Key))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Now splitting names")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$Additional_Name<-ifelse(nchar(df$Name)>40,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                                   substr(df$Name, 41, min(nchar(df$Name),80)),""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                                   )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$Additional_Name_2<-ifelse(nchar(df$Name)>80,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                                   substr(df$Name, 81, nchar(df$Name)), ""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                                   )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$Name<-substr(df$Name, 1,40)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df<- df |> unique()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="CN.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"226"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="CZ.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"390"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="FI.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"325"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="DE.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"289"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="IT.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"182"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="PL.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"368"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="ES.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"447"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="SE.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"351"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="NL.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"90052"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="NO.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Owner_ID<-"000"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      if (snames[i] == "Account_Sales_Data") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        for (j in seq_along(sel.template.desc.colnames)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          temp[j] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              !is.na(sel.template.desc$default[j]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              as.character(as.vector(sel.template.desc$default[j])),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                sel.template.desc$oldkey[j] == "NA" |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                  is.na(sel.template.desc$oldkey[j]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                NA,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                as.vector(old.copy[, sel.template.desc$oldkey[j]])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Rename the columns according to field description
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("renaming template ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        names(temp) <- sel.template.desc.colnames
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Create data frame from the list
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- as.data.frame(temp)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Converted to data frame")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$Currency <- str_to_title(df$Currency)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #if(substr(names(old_files[h]), 1, 2)=="DE"){stop()}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$External_Key <- paste0("SD", df$Corporate_Account_External_Key)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$Sales_Organization_ID <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          paste0(toupper(substr(names(old_files[h]), 1, 2)), "01")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- df |> filter(!is.na(Corporate_Account_External_Key))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        corr.seq <- colnames(df)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          df |> inner_join(dfforsalesdatause,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                           by = c("Corporate_Account_External_Key" = "External_Key"))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- df |>  mutate(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          Customer_Group = case_when(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Customer_type_I == "03 Building Contractor" ~ "Industrial customer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Customer_type_I == "05 Precast" ~ "Wholly-owned subsidiary",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Customer_type_I == "15 Steel contractors" ~ "Industrial customer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Customer_type_I == "16 Timber contractors" ~ "Industrial customer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Customer_type_I == "23 Transport" ~ "Trading company"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- df |> select(corr.seq) |> unique()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      if (snames[i] == "Account_Team") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        for (j in seq_along(sel.template.desc.colnames)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          temp[j] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              !is.na(sel.template.desc$default[j]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              as.character(as.vector(sel.template.desc$default[j])),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                sel.template.desc$oldkey[j] == "NA" |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                  is.na(sel.template.desc$oldkey[j]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                NA,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                as.vector(old.copy[, sel.template.desc$oldkey[j]])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Rename the columns according to field description
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("renaming template ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        names(temp) <- sel.template.desc.colnames
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Create data frame from the list
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- as.data.frame(temp)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Converted to data frame")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #if(substr(names(old_files[h]), 1, 2)=="DE"){stop()}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$External_Key <- paste0("AT", df$Corporate_Account_External_Key)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$Sales_Organization_ID <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          paste0(toupper(substr(names(old_files[h]), 1, 2)), "01")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df<-df |> unique()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="CN.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"226"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="CZ.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"390"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="FI.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"325"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="DE.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"289"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="IT.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"182"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="PL.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"368"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="ES.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"447"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="SE.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"351"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="NL.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"90052"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # if(names(old_files)=="NO.xlsx"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Employee_ID<-"000"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        if (snames[i] == "Account_Identification") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        for (j in seq_along(sel.template.desc.colnames)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          temp[j] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              !is.na(sel.template.desc$default[j]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              as.character(as.vector(sel.template.desc$default[j])),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                sel.template.desc$oldkey[j] == "NA" |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                  is.na(sel.template.desc$oldkey[j]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                NA,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                as.vector(old.copy[, sel.template.desc$oldkey[j]])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Rename the columns according to field description
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("renaming template ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        names(temp) <- sel.template.desc.colnames
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Create data frame from the list
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- as.data.frame(temp) |> unique()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Converted to data frame")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #if(substr(names(old_files[h]), 1, 2)=="DE"){stop()}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$External_Key <- paste0("ID_", df$CorporateAccountExternalKey)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      # Error summary file
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      Expected <- nrow(df)
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      #Select essential rows
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("Identifying essential rows")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      sel.template.desc |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        filter(Mandatory == "Yes") |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        pull(Header) -> essential.columns
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      error.mandatory <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      error.df <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        data.frame(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          Country = NULL,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          Name = NULL,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          Rows = NULL,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          Expected = NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        )
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      # Operate on essential columns including creation of error file
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      for (k in seq_along(essential.columns)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # In case there are any default values (of mandatory) they need to be added here
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   if(essential.columns[k]=="International_Version"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #     print("Found International Version. Adding 0.")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$International_Version<-"0"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   if(essential.columns[k]=="Status"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #     print("Found Status")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #   df$Status<-"2"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Creating and writing data with missing mandatory values")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        manerrdt <- df[is.na(df[, essential.columns[k]]),]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (nrow(manerrdt > 0)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          manerrdt <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            manerrdt |> mutate(error = paste0(essential.columns[k], " missing"))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        assign(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "error_mandatory_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            substr(names(old_files[h]), 2, 3),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            essential.columns[k]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          ),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          manerrdt
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # TO be saved in error files
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (nrow(manerrdt) > 0) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            manerrdt,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "./accounts/errors/mandatory/",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              #Change path
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              substr(names(old_files[h]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              essential.columns[k],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "_error_mandatory.csv"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            na = ""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Error summary file
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Country <- substr(names(old_files[h]), 1, 2)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Name <- snames[i]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        err.type <- paste0("Missing ", essential.columns[k])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        err.count <- nrow(df[is.na(df[, essential.columns[k]]),])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Removing rows with empty essetial columns")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- df[!is.na(df[, essential.columns[k]]),]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (err.count > 0) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          error.df <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            rbind(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              error.df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              data.frame(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                Country = Country,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                Name = Name,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                err.type = err.type,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                err.count = err.count
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      print("Identifying columns associated with codelists")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # List of columns that have a codelist
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      codelistcols <- sel.template.desc |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        filter(!is.na(`CodeList File Path`)) |> pull(Header)
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      for (k in seq_along(codelistcols)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print(paste0("Identifying errors ", codelistcols[k]))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        def.rows <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          which(!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        def.n <- df[def.rows, 1]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        def.rows.val <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          df[!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA), codelistcols[k]]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        def.colname <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          rep(codelistcols[k], length.out = length(def.rows))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        def <- data.frame(def.rows, def.n, def.rows.val, def.colname)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (nrow(def > 0)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          assign(paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "error_codematch_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            substr(names(old_files[1]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            codelistcols[k]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          ),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          def) # TO be saved
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            def,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "./accounts/errors/codelist/",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              #Change path
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              substr(names(old_files[h]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              codelistcols[k],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "_error_codematch_.csv"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            na = ""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        err.type <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          paste0("Codelist Mismatch ", codelistcols[k]) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        err.count <- nrow(def) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (err.count > 0) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          error.df <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            rbind(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              error.df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              data.frame(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                Country = Country,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                Name = Name,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                err.type = err.type,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                err.count = err.count
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print(paste0("Removing errors ", codelistcols[k]))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Removes any mismatch
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df[!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA), codelistcols[k]] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          NA
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # Matches each column with the corresponding code list and returns the value
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df[, codelistcols[k]] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          pull(codelist_files[codelistcols[k]][[1]], 2)[match(pull(df, codelistcols[k]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                                                              pull(codelist_files[codelistcols[k]][[1]], Description))]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (codelistcols[k] == "Party_Role") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          df$External_Key <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            paste0(df$External_Key,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                   "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                   df$Employee_ID,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                   "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                   df$Party_Role)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      max.length <- as.numeric(sel.template.desc$`Max Length`)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      dtype <- sel.template.desc$`Data Type`
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      rowval <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      ival <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      rval <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      lenght.issue.df <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # fname<- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # lname<- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # Changing the data class
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      for (k in 1:ncol(df)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (dtype[k] == "String") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          df[, k] <- as.character(pull(df, k))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (dtype[k] == "Boolean") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          df[, k] <- as.logical(pull(df, k))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (dtype[k] == "DateTime") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          df[, k] <- lubridate::ymd_hms(pull(df, k))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (dtype[k] == "Time") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          df[, k] <- lubridate::hms(pull(df, k))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        } # This list will increase and also change based on input date and time formats
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      print("Rectifying streetname")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # Street and House Number
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if (any(colnames(df) == "Street")) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("found street")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # stop()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #df$Streetname<-NA
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #df$HouseNumber<-NA
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        #df |> extract("Street", "(\\D+)(\\d.*)")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- tidyr::extract(df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                             "Street",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                             c("Streetname", "HouseNumber"),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                             "(\\D+)(\\d.*)")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df <- df |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          select(-c("House_Number")) |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          rename(Street = Streetname, House_Number = HouseNumber) |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          select(all_of(sel.template.desc.colnames))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      # Rectifying Phone, Mobile and Fax numbers
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if (any(colnames(df) == "Phone")) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Found Phone")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$Phone <- gsub("[+]", "00", df$Phone)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      if (any(colnames(df) == "Mobile")) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Found Mobile")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$Mobile <- gsub("[+]", "00", df$Mobile)
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      if (any(colnames(df) == "Mobile")) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("Found Mobile")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$Mobile <- gsub("[+]", "00", df$Mobile)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      # Length Rectification
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      colclasses <- lapply(df, class)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("Rectifying Length")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      for (k in 1:ncol(df)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (colclasses[[k]] == "character") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          print("found character column ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          rowval <- pull(df, 1)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          ival <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ifelse(nchar(pull(df, k)) == 0 |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                     is.na(nchar(pull(df, k))), 1, nchar(pull(df, k)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          rval <- max.length[k]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          colval <- pull(df, k)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          colnm <- colnames(df)[k]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          cntr <- substr(names(old_files[h]), 1, 2)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          # fname<-pull(df, 8)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          # lname<-pull(df, 9)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          print(" Values identified")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          # rectifying data length
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          df[, k] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ifelse(nchar(pull(df, k)) > max.length[k],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                   substring(pull(df, k), 1, max.length[k]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                   pull(df, k))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          print("Trimmed")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        lenght.issue.df <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          rbind(lenght.issue.df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                data.frame(rowval, ival, rval, colnm, colval, cntr))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        err.type <- paste0("Length error ", colnames(df)[k]) # Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        err.count <- sum(ival > rval, na.rm = T) # Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        if (err.count > 0) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          error.df <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            rbind(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              error.df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              data.frame(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                Country = Country,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                Name = Name,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                err.type = err.type,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                err.count = err.count
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      lenght.issue.df <- dplyr::filter(lenght.issue.df, ival > rval)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if (nrow(lenght.issue.df) > 0) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        write.csv(
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								          lenght.issue.df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "./accounts/errors/length/",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            # Change path
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            substr(names(old_files[h]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "_length_error.csv"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          ),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          na = ""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      assign(snames[i], df)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        paste0(
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								          "./accounts/output/",
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								          substr(names(old_files[h]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          snames[i],
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								          ".csv"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        ),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        sep=";",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        na = "",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        fileEncoding = "UTF-8"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      ) #Chnage path
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if (nrow(error.df) > 0) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          error.df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "./accounts/summary/",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            substr(names(old_files[h]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            "_error",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            ".csv"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          ),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          na = ""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        ) # Error write
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      err.summ <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        rbind(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          err.summ,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          data.frame(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Country = Country,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Name = Name,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Expected = Expected,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            Actual = nrow(df)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        ) #Error Cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      err.summ,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "./accounts/summary/" ,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        substr(names(old_files[h]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "_sumerror",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        ".csv"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      ),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      na = ""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    ) # Error Write
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								}
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								end <- Sys.time()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								end - strt
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								*The code failed because Department Column appears several times in the data and while importing R renamed them to Department..xx).*
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								*Manually verify if these are the required templates*
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r}
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								opfilepath <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  list.files("./accounts/output",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								             pattern = "*.csv",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								             full.names = T)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opfiles <- lapply(opfilepath, read.csv, colClasses = "character", header=TRUE, row.names=NULL)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opdf <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  do.call(rbind.data.frame, opfiles[grepl("Account_Sales_Data",opfilepath)]) |> unique()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  opdf,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  "./accounts/output/combined/combinedsalesdata.csv",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  na = "",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 fileEncoding = "UTF-8",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 sep = ","
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								openxlsx::write.xlsx(opdf,"./accounts/output/combined/combinedsalesdata.xlsx")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opdf <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  do.call(rbind.data.frame, opfiles[grepl("Account_Team",opfilepath)]) |> unique()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  opdf,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  "./accounts/output/combined/combinedaccountteam.csv",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  na = "",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 fileEncoding = "UTF-8",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 sep = ","
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								openxlsx::write.xlsx(opdf,"./accounts/output/combined/combinedaccountteam.xlsx")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opdf <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  do.call(rbind.data.frame, opfiles[c(4, 8, 12, 16, 20, 24, 28, 32, 36, 40)]) |> unique()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  opdf,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  "./accounts/output/combined/combinedaccount.csv",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  na = "",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 fileEncoding = "UTF-8",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 sep = ","
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								openxlsx::write.xlsx(opdf,"./accounts/output/combined/combinedaccount.xlsx")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opdf <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  do.call(rbind.data.frame, opfiles[grepl("Account_Identification",opfilepath)]) |> unique()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  opdf,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  "./accounts/output/combined/combinedaccountidentification.csv",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 row.names = F,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  na = "",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 fileEncoding = "UTF-8",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								 sep = ","
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								openxlsx::write.xlsx(opdf,"./accounts/output/combined/combinedaccountidentification.xlsx")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# Duplicate check
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								```{r}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								accwav2<-read.csv("./accounts/output/combined/combinedaccount.csv")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								sapacc<-read.csv("accoct.csv")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								accwav2[duplicated(accwav2$Name) | duplicated(accwav2$Name, fromLast = TRUE),] |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  write.csv("./accounts/errors/duplicateaccountsinsource.csv")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								sapacc<-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  sapacc |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  select(External_Key, Account_ID,Former_CRM_reference, Name, CountryRegion, House_Number, Street, City, Postal_Code, EMail,Owner_ID ) |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  mutate(source="S4-CAA200") |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  filter(!CountryRegion %in% c("AT","CH")) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  mutate(External_Key=ifelse(External_Key=="","EMPTY IN SAP",External_Key)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  left_join(employeecodes, by=c("Owner_ID"="Employee_ID")) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  mutate(Owner_ID=ifelse(is.na(Name.y),Owner_ID,Name.y)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  rename(Name=Name.x) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  select(-Name.y)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								accwav2<-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  accwav2 |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  select(External_Key, Account_ID,Former_CRM_reference, Name, CountryRegion, House_Number, Street, City, Postal_Code, EMail,Owner_ID ) |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  mutate(source="Legacy CRM") |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  filter(!CountryRegion %in% c("AT","CH")) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  mutate(External_Key=ifelse(External_Key=="","EMPTY IN SAP",External_Key)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  left_join(employeecodes, by=c("Owner_ID"="Employee_ID")) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  mutate(Owner_ID=ifelse(is.na(Name.y),Owner_ID,Name.y)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  rename(Name=Name.x) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  select(-Name.y)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								fullacc<-rbind(accwav2,sapacc)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								fullacc[duplicated(fullacc$Name) |duplicated(fullacc$Name, fromLast = T), ] |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  select(External_Key, Name, source, matches(".")) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  rename(Source=source) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  arrange(Name) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  group_by(Name) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  mutate(same = +(n_distinct(Source) == 1)) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  ungroup() |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  mutate(errorsource=ifelse(same==1, Source, "Both")) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  select(-same) |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  select(External_Key, Name, Source,errorsource, matches(".")) |> # check if we need to send all, because several are same names in SAP
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  #filter(errorsource=="legacy CRM") |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  #filter(errorsource=="S4-CAA200") |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  #filter(errorsource=="Both")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  write.csv("./accounts/errors/duplicateaccounts.csv", row.names = F)
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```