---
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								title: "Accounts"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								author: "Scary Scarecrow"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								date: "1/10/2022"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								output: html_document
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								---
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r setup, include=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								knitr::opts_chunk$set(echo = TRUE)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(readxl)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(dplyr)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(lubridate)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(DT)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								library(tidyr)
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								library(stringr)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								mutlstxlrdr<-function(){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  for( i in seq_along(sheet.na)){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  colnames<-unique(saptemplate[saptemplate$`Sheet Name`==snames[i],]$Header)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  df<-read.table("", col.names = colnames)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  assign(snames[i], df)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								}
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								do.call(file.remove, list(list.files("./accounts/errors/mandatory/", full.names = TRUE)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								do.call(file.remove, list(list.files("./accounts/errors/codelist/", full.names = TRUE)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								do.call(file.remove, list(list.files("./accounts/errors/length/", full.names = TRUE)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								do.call(file.remove, list(list.files("./accounts/summary/", full.names = TRUE)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								do.call(file.remove, list(list.files("./accounts/output/", full.names = TRUE)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								## Data transformation workflow
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								Following is the proposed preliminary workflow for the data transformation project.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								>All file of a segment (contacts/accounts etc..) should be inside the relevant folder. Each folder should have one folder for all codelist files. All legacy data (one file for each country) should be inside the raw-data folder, named after each country. Another file having field definitions including name of the matching column from the legacy file should also be there.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								>*Make sure that there are no hidden files inside the directory.*
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								### Code Lists
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r Create List of Files, echo=TRUE, message=FALSE, warning=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								filenames <- list.files("./accounts/CodeList", pattern="*.xls", full.names = T) # We can avoid creating a separate directory for code list. But organizing may be difficult. However, this can be explored further if we want transform all the data in one go i.e. not by functions (contacts, accounts etc.).
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# File paths
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								print(filenames)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								Check manually if the above list includes all the codelist files
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								If correct, then read the files.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r codelistreader, echo=TRUE, message=FALSE, warning=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								sheet_names<-lapply(filenames, excel_sheets) # Creates a list of the sheet names
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								codelist_files<-NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								for(i in seq_along(filenames)){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  a<-lapply(excel_sheets(filenames[[i]]), read_excel, path = filenames[[i]], col_types = "text") # Reads the sheets of the excel files
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  names(a)<-c(sheet_names[[i]]) # Renames them according to the sheet names extracted above
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  codelist_files<-c(codelist_files,a)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# Names of the files imported
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								names(codelist_files)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#codelist_files<-unique(codelist_files)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								codelist_files$Customer_type_I
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								### Templates
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								Let us now extract the data. Below we are reading only one file having all data related to `Contacts` from the legacy system.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r readlegacyfilepath, echo=TRUE, message=FALSE, warning=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								oldfilepath<-list.files("./accounts/raw-data", pattern="*.xls", full.names = T) # Change the path, check pattern
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								print(oldfilepath)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								Check it the list matches the actual files, manually.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r readlegacyfiles, echo=TRUE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								old_files<-NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#read_excel(path = oldfilepath[[i]], sheet = 1)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								for(i in seq_along(oldfilepath)){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  old_files[[i]]<-read_excel(path = oldfilepath[[i]], sheet = 1) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								old_files
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								names(old_files)<-gsub("./accounts/raw-data/","",oldfilepath) # Change path
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								*Some errors in the legacy file noticed. Columns with similar or same name exists.*
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r readSAPtemplate, echo=TRUE, message=FALSE, warning=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								saptemplate<-read_excel("./accounts/template.xlsx", sheet = "Field_Definitions")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								# First few rows of the imported data
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								head(saptemplate)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								*Please note that the format of the tables (sheet) has been slightly changed. Earlier the corresponding sheet name was mentioned in a row before the actual table. Now, all the rows mention the corresponding sheet name. This was done manually for convenience of data extraction*
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								## Don't have Status column defined
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								## There could be issue in line of business
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r createmptySAPfiles, echo=TRUE, message=FALSE, warning=FALSE}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#orilo<-"en_US.UTF-8"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								#Sys.setlocale(locale="en_US.UTF-8")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								strt<-Sys.time()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								snames <- unique(saptemplate$`Sheet Name`)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								for (h in seq_along(old_files)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								  print("Importing new")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  # Copy original data
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  old.copy <- old_files[[h]]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  print(paste0(names(old_files[h])," imported"))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  err.summ<-data.frame(Country=NULL, Name=NULL, Expected=NULL, Actual=NULL) #Error Cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  # Creates data frame for each sheet in snames
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  for (i in seq_along(snames)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print(paste0("Processing ..",snames[i]))
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    if(snames[i] %in% c("Account","Account_Identification","Account_Sales_Data","Account_Team")){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Select the column names from the field description sheet
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Creating template")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    sel.template.desc <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      saptemplate[saptemplate$`Sheet Name` == snames[i], ]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Creating column names")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    sel.template.desc.colnames <- sel.template.desc$Header
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Create a list by adding values from corresponding legacy data
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    temp <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("adding values to template ")
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    if(snames[i] %in% c("Account_Addresses", "Account_Contact_Persons",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        "Account_Identification","Account_International_Version",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        "Account_Skills","Account_Tax_Numbers","Account_Notes",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        "Account_Visiting_Hours","Account_Visits_Details",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        "Account_Visiting_Hours_Weekly_R","Account_Visiting_Times")){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      next
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    if(snames[i] == "Account"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      for (j in seq_along(sel.template.desc.colnames)) {
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      temp[j] <- ifelse(!is.na(sel.template.desc$default[j]), as.character(as.vector(sel.template.desc$default[j])),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                          sel.template.desc$oldkey[j]=="NA" | is.na(sel.template.desc$oldkey[j]), NA, 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                          as.vector(old.copy[, sel.template.desc$oldkey[j]])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        )
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								       # Rename the columns according to field description
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("renaming template ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    names(temp) <- sel.template.desc.colnames
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Create data frame from the list
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df <- as.data.frame(temp)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Converted to data frame")
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Implementing Line of Business transformations")  
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      df<- df |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      mutate(Customer_type_I= case_when(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "03 Building Contractor" ~ "General Contractor",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "12 Engineering construction" ~ "Engineer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "15 Steel contractors" ~ "Specialist Sub Contractor",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "16 Timber contractors" ~ "Specialist Sub Contractor",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "18 Engineering office - civil eng." ~ "Engineer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "19 Engineering office - steel/framing constructions" ~ "Engineer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "20 Architects" ~ "Architect"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      ))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      df <- df |> 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      mutate(Industry= case_when(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "01 Trader Building Constructions" ~ "Construction",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "02 Trader Steel Constructions" ~ "Construction",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "12 Engineering construction" ~ "Construction",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "13 Machine construction" ~ "Construction",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "14 Energy and power plants" ~ "Utilities",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "21 Universities, public institutions and associations" ~ "Educational services",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "23 Transport" ~ "Transportation and warehousing"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      ))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      dfforsalesdatause<-df |> select(c(External_Key, Customer_type_I)) |> filter(!is.na(External_Key))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    if(snames[i] == "Account_Sales_Data"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      for (j in seq_along(sel.template.desc.colnames)) {
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      temp[j] <- ifelse(!is.na(sel.template.desc$default[j]), as.character(as.vector(sel.template.desc$default[j])),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                          sel.template.desc$oldkey[j]=="NA" | is.na(sel.template.desc$oldkey[j]), NA, 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                          as.vector(old.copy[, sel.template.desc$oldkey[j]])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    # Rename the columns according to field description
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("renaming template ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    names(temp) <- sel.template.desc.colnames
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Create data frame from the list
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df <- as.data.frame(temp)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Converted to data frame")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df$Currency<-str_to_title(df$Currency)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    #if(substr(names(old_files[h]), 1, 2)=="DE"){stop()}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df$External_Key<-paste0("SD",df$Corporate_Account_External_Key)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df$Sales_Organization_ID<-paste0(toupper(substr(names(old_files[h]), 1, 2)),"01")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df<-df |> filter(!is.na(Corporate_Account_External_Key))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    corr.seq<-colnames(df)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df<- df |> inner_join(dfforsalesdatause, by=c("Corporate_Account_External_Key"="External_Key"))
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								     
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								     df<- df |>  mutate(Customer_Group= case_when(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "03 Building Contractor" ~ "Industrial customer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "05 Precast" ~ "Wholly-owned subsidiary",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "15 Steel contractors" ~ "Industrial customer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "16 Timber contractors" ~ "Industrial customer",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        Customer_type_I == "23 Transport" ~ "Trading company"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      ))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      df<- df |> select(corr.seq)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    if(snames[i] == "Account_Team"){
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      for (j in seq_along(sel.template.desc.colnames)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      temp[j] <- ifelse(!is.na(sel.template.desc$default[j]), as.character(as.vector(sel.template.desc$default[j])),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        ifelse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                          sel.template.desc$oldkey[j]=="NA" | is.na(sel.template.desc$oldkey[j]), NA, 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                          as.vector(old.copy[, sel.template.desc$oldkey[j]])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                        )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      # Rename the columns according to field description
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("renaming template ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    names(temp) <- sel.template.desc.colnames
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Create data frame from the list
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df <- as.data.frame(temp)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Converted to data frame")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    #if(substr(names(old_files[h]), 1, 2)=="DE"){stop()}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df$External_Key<-paste0("AT",df$Corporate_Account_External_Key)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    df$Sales_Organization_ID<-paste0(toupper(substr(names(old_files[h]), 1, 2)),"01")
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Error summary file
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    Expected<-nrow(df) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    #Select essential rows
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Identifying essential rows")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    sel.template.desc |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      filter(Mandatory == "Yes") |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      pull(Header) -> essential.columns
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    error.mandatory <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    error.df<-data.frame(Country=NULL, Name=NULL, Rows=NULL, Expected=NULL)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Operate on essential columns including creation of error file
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    for (k in seq_along(essential.columns)) { # In case there are any default values (of mandatory) they need to be added here
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      #   if(essential.columns[k]=="International_Version"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      #     print("Found International Version. Adding 0.")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      #   df$International_Version<-"0"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      #   }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      #   if(essential.columns[k]=="Status"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      #     print("Found Status")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      #   df$Status<-"2"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("Creating and writing data with missing mandatory values")
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      manerrdt<-df[is.na(df[, essential.columns[k]]), ]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if(nrow(manerrdt>0)){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        manerrdt<-manerrdt |> mutate(error=paste0(essential.columns[k]," missing"))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      assign(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "error_mandatory_",
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								          substr(names(old_files[h]), 2, 3),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          essential.columns[k]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        ),
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        manerrdt
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # TO be saved in error files
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      if(nrow(manerrdt)>0){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              write.csv(
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        manerrdt,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        paste0(
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								          "./accounts/errors/mandatory/",    #Change path
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          substr(names(old_files[h]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          essential.columns[k],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "_error_mandatory.csv"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        ), row.names = F, na=""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # Error summary file
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      Country<-substr(names(old_files[h]), 1, 2)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      Name<-snames[i]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      err.type<-paste0("Missing ",essential.columns[k])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      err.count<-nrow(df[is.na(df[, essential.columns[k]]), ])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("Removing rows with empty essetial columns")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      df <- df[!is.na(df[, essential.columns[k]]), ]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if(err.count>0){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        error.df<-rbind(error.df,data.frame(Country=Country, Name=Name, err.type=err.type, err.count=err.count)) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Identifying columns associated with codelists")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # List of columns that have a codelist
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    codelistcols <- sel.template.desc |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      filter(!is.na(`CodeList File Path`)) |> pull(Header)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    for (k in seq_along(codelistcols)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print(paste0("Identifying errors ",codelistcols[k]))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      def.rows <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        which(!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      def.n<- df[def.rows, 1]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      def.rows.val <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df[!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA), codelistcols[k]]
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      def.colname <- rep(codelistcols[k],length.out = length(def.rows))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      def <- data.frame(def.rows, def.n,def.rows.val,def.colname)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if(nrow(def>0)){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              assign(paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "error_codematch_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        substr(names(old_files[1]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        codelistcols[k]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      ),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      def) # TO be saved
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        write.csv(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        def,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "./accounts/errors/codelist/",    #Change path
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								          substr(names(old_files[h]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          codelistcols[k],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          "_error_codematch_.csv"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        ), row.names = F, na=""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      err.type<-paste0("Codelist Mismatch ", codelistcols[k]) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      err.count<-nrow(def) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								            if(err.count>0){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        error.df<-rbind(error.df,data.frame(Country=Country, Name=Name, err.type=err.type, err.count=err.count)) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print(paste0("Removing errors ",codelistcols[k]))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # Removes any mismatch
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      df[!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA), codelistcols[k]] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        NA
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # Matches each column with the corresponding code list and returns the value
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      df[, codelistcols[k]] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        pull(codelist_files[codelistcols[k]][[1]], 2)[match(pull(df, codelistcols[k]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                                                            pull(codelist_files[codelistcols[k]][[1]], Description))]
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      if(codelistcols[k]=="Party_Role"){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df$External_Key<-paste0(df$External_Key,"_",df$Employee_ID,"_",df$Party_Role)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    max.length <- as.numeric(sel.template.desc$`Max Length`)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    dtype <- sel.template.desc$`Data Type`
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    rowval <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    ival <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    rval <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    lenght.issue.df <- NULL
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Changing the data class
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    for (k in 1:ncol(df)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if (dtype[k] == "String") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df[, k] <- as.character(pull(df, k))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if (dtype[k] == "Boolean") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df[, k] <- as.logical(pull(df, k))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if (dtype[k] == "DateTime") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df[, k] <- lubridate::ymd_hms(pull(df, k))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if (dtype[k] == "Time") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df[, k] <- lubridate::hms(pull(df, k))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      } # This list will increase and also change based on input date and time formats
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Rectifying streetname")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Street and House Number
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    if (any(colnames(df) == "Street")) {
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								print("found street")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      # stop()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      #df$Streetname<-NA
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      #df$HouseNumber<-NA
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      #df |> extract("Street", "(\\D+)(\\d.*)")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      df<-tidyr::extract(df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "Street",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              c("Streetname", "HouseNumber"),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              "(\\D+)(\\d.*)")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      df <- df |>
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        select(-c("House_Number")) |>
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        rename(Street = Streetname, House_Number = HouseNumber) |>
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        select(all_of(sel.template.desc.colnames))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    # Rectifying Phone, Mobile and Fax numbers
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    if(any(colnames(df) == "Phone")) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("Found Phone")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      df$Phone<-gsub("[+]","00",df$Phone)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    if(any(colnames(df) == "Mobile")) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("Found Mobile")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      df$Mobile<-gsub("[+]","00",df$Mobile)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    if(any(colnames(df) == "Mobile")) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      print("Found Mobile")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      df$Mobile<-gsub("[+]","00",df$Mobile)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    # Length Rectification
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    colclasses <- lapply(df, class)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    print("Rectifying Length")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    for (k in 1:ncol(df)) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if (colclasses[[k]] == "character") {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        print("found character column ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        rowval <- pull(df, 1)
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        ival <- ifelse(nchar(pull(df, k)) == 0 | is.na(nchar(pull(df, k))),1,nchar(pull(df, k)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        rval <- max.length[k]
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        colval <- pull(df, k)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        colnm<-colnames(df)[k]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        cntr<-substr(names(old_files[h]), 1, 2)
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        print(" Values identified")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        # rectifying data length
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        df[, k] <-
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								          ifelse(nchar(pull(df, k)) > max.length[k],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                 substring(pull(df, k), 1, max.length[k]),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								                 pull(df, k))
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        print("Trimmed")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      lenght.issue.df <-
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								        rbind(lenght.issue.df, data.frame(rowval, ival, rval, colnm, colval,cntr)) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								err.type<- paste0("Length error ", colnames(df)[k]) # Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      err.count<- sum(ival>rval, na.rm = T) # Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      if(err.count>0){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        error.df<-rbind(error.df,data.frame(Country=Country, Name=Name, err.type=err.type, err.count=err.count)) #Error cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      }      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    lenght.issue.df <- dplyr::filter(lenght.issue.df,ival>rval)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    if(nrow(lenght.issue.df)>0){
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								        write.csv(lenght.issue.df,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								              paste0(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      "./accounts/errors/length/", # Change path
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								      substr(names(old_files[h]), 1, 2),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      "_",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      snames[i],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								      "_length_error.csv"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    ), row.names = F, na="")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    assign(snames[i], df)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    write.csv(df,paste0("./accounts/output/", substr(names(old_files[h]), 1, 2), "_", snames[i],".csv"), row.names = F, na="") #Chnage path
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    if(nrow(error.df)>0){
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								    write.csv(error.df, paste0("./accounts/summary/",substr(names(old_files[h]), 1, 2), "_", snames[i],"_error",".csv"), row.names = F, na="") # Error write
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								    err.summ<-rbind(err.summ,data.frame(Country=Country, Name=Name, Expected=Expected, Actual=nrow(df))) #Error Cal
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  write.csv(err.summ,
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								              paste0("./accounts/summary/" ,substr(names(old_files[h]), 1, 2), "_", snames[i],"_sumerror",".csv"), row.names = F, na="") # Error Write
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								end<-Sys.time()
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								end-strt
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								*The code failed because Department Column appears several times in the data and while importing R renamed them to Department..xx).*
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								*Manually verify if these are the required templates*
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```{r}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opfilepath<-list.files("./accounts/output", pattern="*.csv", full.names = T)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opfiles<-lapply(opfilepath, read.csv)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opdf<-do.call(rbind.data.frame, opfiles[c(1,4,7,10,13,16,19,22,25,28,31)])
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								write.csv(opdf,"./accounts/output/combined/combinedsalesdata.csv", row.names = F, na="")
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opdf<-do.call(rbind.data.frame, opfiles[c(2,5,8,11,14,17,20,23,26,29,32)])
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								write.csv(opdf,"./accounts/output/combined/combinedaccountteam.csv", row.names = F, na="")
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								opdf<-do.call(rbind.data.frame, opfiles[c(3,6,9,12,15,18,21,24,27,30,33)])
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								write.csv(opdf,"./accounts/output/combined/combinedaccount.csv", row.names = F, na="")
							 
						 
					
						
							
								
							 
							
								
									
										 
								
							 
							
								 
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								```