DataTransformationCRH/Tasks2.Rmd


								---

								title: "Task"

								author: "Scary Scarecrow"

								date: "1/12/2022"

								output: html_document

								---


								```{r setup, include=FALSE}

								knitr::opts_chunk$set(echo = TRUE)

								library(readxl)

								library(dplyr)

								library(lubridate)

								library(DT)

								library(tidyr)

								library(stringr)


								mutlstxlrdr <- function() {

								  for (i in seq_along(sheet.na)) {

								    colnames <-

								      unique(saptemplate[saptemplate$`Sheet Name` == snames[i], ]$Header)

								    df <- read.table("", col.names = colnames)

								    assign(snames[i], df)


								  }

								}

								do.call(file.remove, list(list.files(

								  "./tasks2/errors/mandatory/", full.names = TRUE

								)))

								do.call(file.remove, list(list.files(

								  "./tasks2/errors/codelist/", full.names = TRUE

								)))

								do.call(file.remove, list(list.files(

								  "./tasks2/errors/length/", full.names = TRUE

								)))

								do.call(file.remove, list(list.files("./tasks2/summary/", full.names = TRUE)))

								do.call(file.remove, list(list.files("./tasks2/output/", full.names = TRUE)))

								```


								## Data transformation workflow


								Following is the proposed preliminary workflow for the data transformation project.


								>All file of a segment (contacts/accounts etc..) should be inside the relevant folder. Each folder should have one folder for all codelist files. All legacy data (one file for each country) should be inside the raw-data folder, named after each country. Another file having field definitions including name of the matching column from the legacy file should also be there.


								>*Make sure that there are no hidden files inside the directory.*


								### Employees


								```{r}

								employeecodes<-read.csv("emp.csv")

								#employeecodes<-employeecodes |> select(c(2,3))


								contacts<-read.csv("./tasks2/contacts.csv")

								contacts<-

								  contacts |>

								  mutate(Name=paste(First_Name, Last_Name, sep = " ")) |>

								  rename(cont.ext.key=External_Key)

								quotes<-read.csv("./tasks2/quotes.csv") |>

								  select(Quote.ID, Account.Number..Customer.) |>

								  mutate(Account.Number..Customer.= ifelse(grepl("[A-Za-z*]", Account.Number..Customer.), Account.Number..Customer.,paste0("H",Account.Number..Customer.)))

								```


								### Relationship files


								```{r echo=TRUE, message=FALSE, warning=FALSE}

								# relfilenames <- #Not required in tasks2

								#   list.files("./projects/relationship",

								#              pattern = "*.xls",

								#              full.names = T)


								# print(relfilenames)

								# rel_files <- NULL

								# for (i in seq_along(relfilenames)) {

								#   b <- read_excel(path = relfilenames[[i]], sheet = 1)

								#   b<-b |> left_join(employeecodes, by=c("Owner"="Name")) |>

								#     select(-Owner) |>

								#     mutate(Employee.ID=ifelse(is.na(Employee.ID),"99999",Employee.ID)) |>

								#     rename(Owner=Employee.ID)

								#   rel_files[[i]]<-b

								# }


								# names(rel_files) <- gsub("./projects/relationship/", "", relfilenames)

								# # Names of the files imported

								# names(rel_files)

								```


								### Code Lists


								```{r Create List of Files, echo=TRUE, message=FALSE, warning=FALSE}


								filenames <-

								  list.files("./tasks2/CodeList",

								             pattern = "*.xlsx",

								             full.names = T) # We can avoid creating a separate directory for code list. But organizing may be difficult. However, this can be explored further if we want transform all the data in one go i.e. not by functions (contacts, accounts etc.).


								# File paths

								print(filenames)

								```


								Check manually if the above list includes all the codelist files

								If correct, then read the files.


								```{r codelistreader, echo=TRUE, message=FALSE, warning=FALSE}

								sheet_names <-

								  lapply(filenames, excel_sheets) # Creates a list of the sheet names

								codelist_files <- NULL

								for (i in seq_along(filenames)) {

								  a <-

								    lapply(excel_sheets(filenames[[i]]),

								           read_excel,

								           path = filenames[[i]],

								           col_types = "text") # Reads the sheets of the excel files

								  names(a) <-

								    c(sheet_names[[i]]) # Renames them according to the sheet names extracted above

								  codelist_files <- c(codelist_files, a)

								}

								# Names of the files imported

								names(codelist_files)

								# codelist_files<-unique(codelist_files)


								```


								### Templates


								Let us now extract the data. Below we are reading only one file having all data related to `Contacts` from the legacy system.


								```{r readlegacyfilepath, echo=TRUE, message=FALSE, warning=FALSE}


								oldfilepath <-

								  list.files("./tasks2/raw-data",

								             pattern = "*.xls", # Changed from *.xlsx to *.csv

								             full.names = T) # Change the path, check pattern

								print(oldfilepath)


								```


								Check it the list matches the actual files, manually.


								```{r readlegacyfiles, echo=TRUE}


								old_files <- NULL


								nona<-function(x){

								  x<-x[!is.na(x)]

								  if(length(x)==0){

								    return(NA)

								  } else return(unique(x))

								}


								for (i in seq_along(oldfilepath)) {

								  a<- read_excel(path = oldfilepath[[i]], sheet = 1)


								  a<-a |>

								    select(External_Key, Subject, `Quote ID (Regarding)`,`Start Date`, Due, `External_Key (Regarding)`,

								           Recipient, Owner, Category, Priority, `Modified On`, `Project/Opp Number (Regarding)`)


								  a<-a |>

								    fill(`Quote ID (Regarding)`, .direction = 'up') |>

								    group_by(`Quote ID (Regarding)`) |>

								    mutate(Recipient=str_c(Recipient, sep="_")) |>

								    ungroup() |>

								    group_by(`Quote ID (Regarding)`, Recipient) |>

								    summarise(External_Key=nona(External_Key),

								                Subject=nona(Subject),

								              `Start Date`=nona(`Start Date`),

								              Due=nona(Due),

								              `External_Key (Regarding)`=nona(`External_Key (Regarding)`),

								              Owner=nona(Owner),

								              Category=nona(Category),

								              Priority=nona(Priority),

								              `Modified On`=nona(`Modified On`),

								              `Project/Opp Number (Regarding)`=nona(`Project/Opp Number (Regarding)`))


								  a<-a |>

								    ungroup() |>

								    left_join(employeecodes, by=c(Owner = "Name")) |>

								    rename(owner.emp=Owner) |>

								    mutate(Employee.ID=ifelse(is.na(Employee.ID),"90112",Employee.ID)) |> #Changed to Axel's in case of missing employee

								    rename(Owner=Employee.ID) |>

								    left_join(contacts, by=c(Recipient = "Name")) |>

								    mutate(cont.ext.key=ifelse(is.na(cont.ext.key),Recipient,cont.ext.key)) |>

								    rename(recip.con=Recipient) |>

								    rename(Recipient=cont.ext.key) |>

								    left_join(quotes, by=c("Quote ID (Regarding)"="Quote.ID"))


								  old_files[[i]]<-a

								}


								names(old_files) <- gsub("./tasks2/raw-data/", "", oldfilepath)


								```


								```{r readSAPtemplate, echo=TRUE, message=FALSE, warning=FALSE}

								saptemplate <-

								  read_excel("./tasks2/template.xlsx", sheet = "Field_Definitions")

								# First few rows of the imported data

								head(saptemplate)

								```


								*Please note that the format of the tables (sheet) has been slightly changed. Earlier the corresponding sheet name was mentioned in a row before the actual table. Now, all the rows mention the corresponding sheet name. This was done manually for convenience of data extraction*


								```{r createmptySAPfiles, message=FALSE, warning=FALSE, include=FALSE}

								#orilo<-"en_US.UTF-8"

								#Sys.setlocale(locale="en_US.UTF-8")

								strt <- Sys.time()

								snames <- unique(saptemplate$`Sheet Name`)


								for (h in seq_along(old_files)) {

								  # Copy original data

								  old.copy <- old_files[[h]]


								  print(paste0(names(old_files[h]), " imported"))


								  err.summ <-

								    data.frame(

								      Country = NULL,

								      Name = NULL,

								      Expected = NULL,

								      Actual = NULL

								    ) #Error Cal

								  # Creates data frame for each sheet in snames

								  for (i in seq_along(snames)) {

								    print(paste0("Processing ..", snames[i]))


								    # Select the column names from the field description sheet

								    print("Creating template")

								    sel.template.desc <-

								      saptemplate[saptemplate$`Sheet Name` == snames[i],]

								    print("Creating column names")

								    sel.template.desc.colnames <- sel.template.desc$Header


								    # Create a list by adding values from corresponding legacy data

								    temp <- NULL

								    print("adding values to template ")


								    if(snames[i] == "Task"){

								        for (j in seq_along(sel.template.desc.colnames)) {

								        print(paste("Processing ", sel.template.desc.colnames[j]))

								        #   if (sel.template.desc.colnames[j] == "Main_Account_ID") {

								        #   temp[j] <-

								        #     ifelse(

								        #       !is.na(old.copy$`Account Number (Regarding)`),

								        #       old.copy$`Account Number (Regarding)`,

								        #       old.copy$`Full Name (Regarding)`

								        #     )

								        #   next

								        # }


								        temp[j] <-

								          ifelse(

								            !is.na(sel.template.desc$default[j]),

								            as.character(as.vector(sel.template.desc$default[j])),

								            ifelse(

								              sel.template.desc$oldkey[j] == "NA" |

								                is.na(sel.template.desc$oldkey[j]),

								              NA,

								              as.vector(old.copy[, sel.template.desc$oldkey[j]])

								            )

								          )


								      }

								    }


								    if(snames[i] %in% c("Task_Follow_Up_and_Related_Item", "Task_Involved_Parties", "Tasks_Notes", "Task_Attachment")){


								            for (j in seq_along(sel.template.desc.colnames)) {

								        print(paste("Processing ", sel.template.desc.colnames[j]))


								        temp[j] <-

								          ifelse(

								            !is.na(sel.template.desc$default[j]),

								            as.character(as.vector(sel.template.desc$default[j])),

								            ifelse(

								              sel.template.desc$oldkey[j] == "NA" |

								                is.na(sel.template.desc$oldkey[j]),

								              NA,

								              as.vector(old.copy[, sel.template.desc$oldkey[j]])

								            )

								          )


								      }


								        }


								      # Rename the columns according to field description

								      print("renaming template ")

								      names(temp) <- sel.template.desc.colnames


								      # Create data frame from the list

								      df <- as.data.frame(temp)

								      print("Converted to data frame")


								      if(c("Subject") %in% sel.template.desc.colnames){

								        print("Found Subject")

								        df$Subject <- gsub(",","/",df$Subject)

								        df$Start_DateTime_Time_Zone_Code <- ifelse(is.na(df$Start_DateTime), NA, df$Start_DateTime_Time_Zone_Code)

								        df$Due_Date_Time_Time_Zone_Code <- ifelse(is.na(df$Due_Date_Time), NA, df$Due_Date_Time_Time_Zone_Code)

								        df$Last_Changed<-NA

								        acc<-read.csv("accounts.csv") |> select(-1)

								      fulldf<-

								        df |>

								        mutate(Main_Account_ID= ifelse(grepl("[A-Za-z*]", Main_Account_ID), Main_Account_ID,paste0("H",Main_Account_ID))) |>

								      left_join(acc, by=c("Main_Account_ID"="Former_CRM_reference"))

								      df<-fulldf |>

								     # filter(!is.na(Account_ID)) |>

								      mutate(Main_Account_External_Key=NA) |>

								      select(-Main_Account_ID) |>

								      rename(Main_Account_ID=Account_ID) |>

								      mutate(Main_Employee_Responsible_ID=Processor_ID) |>

								      #mutate(External_Key=paste("NEW",External_Key,sep = "_")) |> # TEMPORARY. TO BE DELETED IN PROD

								      select(External_Key,

								             Document_Type,

								             Subject,

								             Status,

								             Start_DateTime,

								             Start_DateTime_Time_Zone_Code,

								             Due_Date_Time,

								             Due_Date_Time_Time_Zone_Code,

								             Planned_Duration,

								             Actual_Duration,

								             Completion_Date_Time,

								             Main_Employee_Responsible_ID,

								             MainEmployeeResponsiblePartyExternalKey,

								             Main_Account_External_Key,

								             Main_Account_ID,

								             Main_Contact_ID_External_Key,

								             Main_Contact_ID,

								             Processor_ID,

								             ProcessorPartyExternalKey,

								             Completion_Percent,

								             Category,

								             Priority,

								             Sales_Territory_ID,

								             Sales_Organization_External_Key,

								             Sales_Organization_ID,

								             Distribution_Channel,

								             Division,

								             Data_Origin,

								             Processor_Email,

								             Processor_Name,

								             Last_Changed)


								      missaccdf<- df |>

								        filter(is.na(Main_Account_ID))

								            if (nrow(missaccdf) > 0) {

								        write.csv(

								          missaccdf,

								          paste0(

								            "./tasks2/errors/missingacc/",

								            substr(names(old_files[h]), 1, 2),

								            "_",

								            "_missing_account.csv"

								          ),

								          row.names = F,

								          na = "",

								          fileEncoding = "UTF-8"

								        )

								      }


								      }


								      if(c("Text") %in% sel.template.desc.colnames){

								        print("Found Text")

								        df$Text<-paste("des",old.copy$Description,"recip",old.copy$recip.con, "own",old.copy$owner.emp, sep = "_")

								        df$Text<-gsub(",",".",df$Text)

								        #df$Task_External_Key<-paste("NEW", df$Task_External_Key, sep = "_")# TEMPORARY. TO BE DELETED IN PROD

								        df$External_Key<-paste(df$Task_External_Key,"TA01", sep = "_")


								      }

								      if(c("BTD_ID") %in% sel.template.desc.colnames | c("Party_ID") %in% sel.template.desc.colnames){

								        print("Found BTD_ID")


								        df$External_Key<-paste(df$Task_External_Key,"TA01", sep = "_")

								      }


								    # Error summary file

								    Expected <- nrow(df)


								    #Select essential rows

								    print("Identifying essential rows")

								    sel.template.desc |>

								      filter(Mandatory == "Yes") |>

								      pull(Header) -> essential.columns


								    error.mandatory <- NULL

								    error.df <-

								      data.frame(

								        Country = NULL,

								        Name = NULL,

								        Rows = NULL,

								        Expected = NULL

								      )

								    # Operate on essential columns including creation of error file

								    for (k in seq_along(essential.columns)) {

								      print("Creating and writing data with missing mandatory values")

								      manerrdt <- df[is.na(df[, essential.columns[k]]),]

								      if (nrow(manerrdt > 0)) {

								        manerrdt <-

								          manerrdt |> mutate(error = paste0(essential.columns[k], " missing"))

								      }

								      assign(

								        paste0(

								          "error_mandatory_",

								          substr(names(old_files[h]), 2, 3),

								          "_",

								          snames[i],

								          "_",

								          essential.columns[k]

								        ),

								        manerrdt

								      )

								      # TO be saved in error files


								      if (nrow(manerrdt) > 0) {

								        write.csv(

								          manerrdt,

								          paste0(

								            "./tasks2/errors/mandatory/",

								            substr(names(old_files[h]), 1, 2),

								            "_",

								            snames[i],

								            "_",

								            essential.columns[k],

								            "_error_mandatory.csv"

								          ),

								          row.names = F,

								          na = "",

								          fileEncoding = "UTF-8"

								        )

								      }

								      # Error summary file

								      Country <- substr(names(old_files[h]), 1, 2)

								      Name <- snames[i]

								      err.type <- paste0("Missing ", essential.columns[k])

								      err.count <- nrow(df[is.na(df[, essential.columns[k]]),])


								      print("Removing rows with empty essential columns")

								      df <- df[!is.na(df[, essential.columns[k]]),]

								      if (err.count > 0) {

								        error.df <-

								          rbind(

								            error.df,

								            data.frame(

								              Country = Country,

								              Name = Name,

								              err.type = err.type,

								              err.count = err.count

								            )

								          ) #Error cal

								      }

								    }


								    print("Identifying columns associated with codelists")

								    # List of columns that have a codelist

								    codelistcols <- sel.template.desc |>

								      filter(!is.na(`CodeList File Path`)) |> pull(Header)

								    for (k in seq_along(codelistcols)) {


								      print(paste0("Identifying errors ", codelistcols[k]))

								      def.rows <-

								        which(!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA))

								      def.n <- df[def.rows, 1]

								      def.rows.val <-

								        df[!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA), codelistcols[k]]

								      def.colname <-

								        rep(codelistcols[k], length.out = length(def.rows))

								      def <- data.frame(def.rows, def.n, def.rows.val, def.colname)

								      if (nrow(def > 0)) {

								        assign(paste0(

								          "error_codematch_",

								          substr(names(old_files[1]), 1, 2),

								          "_",

								          snames[i],

								          "_",

								          codelistcols[k]

								        ),

								        def) # TO be saved

								        write.csv(

								          def,

								          paste0(

								            "./tasks2/errors/codelist/",

								            substr(names(old_files[h]), 1, 2),

								            "_",

								            snames[i],

								            "_",

								            codelistcols[k],

								            "_error_codematch_.csv"

								          ),

								          row.names = F,

								          na = "",

								          fileEncoding = "UTF-8"

								        )

								      }

								      err.type <-

								        paste0("Codelist Mismatch ", codelistcols[k]) #Error cal

								      err.count <- nrow(def) #Error cal

								      if (err.count > 0) {

								        error.df <-

								          rbind(

								            error.df,

								            data.frame(

								              Country = Country,

								              Name = Name,

								              err.type = err.type,

								              err.count = err.count

								            )

								          ) #Error cal

								      }


								      print(paste0("Removing errors ", codelistcols[k]))

								      # Removes any mismatch

								      df[!df[, codelistcols[k]] %in% c(pull(codelist_files[codelistcols[k]][[1]], Description), NA), codelistcols[k]] <-

								        NA


								      # Matches each column with the corresponding code list and returns the value

								      df[, codelistcols[k]] <-

								        pull(codelist_files[codelistcols[k]][[1]], 2)[match(pull(df, codelistcols[k]),

								                                                            pull(codelist_files[codelistcols[k]][[1]], Description))]


								    }

								    max.length <- as.numeric(sel.template.desc$`Max Length`)

								    dtype <- sel.template.desc$`Data Type`

								    rowval <- NULL

								    ival <- NULL

								    rval <- NULL

								    lenght.issue.df <- NULL

								    # Changing the data class

								    for (k in 1:ncol(df)) {

								      if (dtype[k] == "String") {

								        df[, k] <- as.character(pull(df, k))

								      }

								      if (dtype[k] == "Boolean") {

								        df[, k] <- as.logical(pull(df, k))

								      }

								      if (dtype[k] == "DateTime") {

								        df[, k] <-

								         strftime(pull(df, k), format = "%Y-%m-%d")

								        #strftime(pull(df, k), format = "%Y-%m-%d %H:%M:%S", tz = "CET")

								        #pull(df, k)

								        #paste0(sub(" ","T",as.character(pull(df, k))),"Z")

								        print(df[, k] )

								      }

								      if (dtype[k] == "Time") {

								        df[, k] <- lubridate::hms(pull(df, k))


								      } # This list will increase and also change based on input date and time formats


								    }


								    # Length Rectification

								    colclasses <- lapply(df, class)

								    print("Rectifying Length")

								    for (k in 1:ncol(df)) {

								      if(colnames(df)[k] %in% c("Start_DateTime","Due_Date_Time","Completion_Date_Time","Last_Changed","Updated_On")){

								        next

								      }

								      if (colclasses[[k]] == "character") {

								        print("found character column ")

								        rowval <- pull(df, 1)

								        ival <-

								          ifelse(nchar(pull(df, k)) == 0 |

								                   is.na(nchar(pull(df, k))), 1, nchar(pull(df, k)))

								        rval <- max.length[k]

								        colval <- pull(df, k)

								        colnm <- colnames(df)[k]

								        cntr <- substr(names(old_files[h]), 1, 2)

								        # rectifying data length

								        df[, k] <-

								          ifelse(nchar(pull(df, k)) > max.length[k],

								                 substring(pull(df, k), 1, max.length[k]),

								                 pull(df, k))

								      }


								      lenght.issue.df <-

								        rbind(lenght.issue.df,

								              data.frame(rowval, ival, rval, colnm, colval, cntr))


								      err.type <-

								        paste0("Length error ", colnames(df)[k]) # Error cal

								      err.count <- sum(ival > rval, na.rm = T) # Error cal

								      if (err.count > 0) {

								        error.df <-

								          rbind(

								            error.df,

								            data.frame(

								              Country = Country,

								              Name = Name,

								              err.type = err.type,

								              err.count = err.count

								            )

								          ) #Error cal

								      }


								    }


								    lenght.issue.df <- dplyr::filter(lenght.issue.df, ival > rval)


								    if (nrow(lenght.issue.df) > 0 | !is.null(lenght.issue.df)) {

								      write.csv(

								        lenght.issue.df,

								        paste0(

								          "./tasks2/errors/length/",

								          substr(names(old_files[h]), 1, 2),

								          "_",

								          snames[i],

								          "_length_error.csv"

								        ),

								        row.names = F,

								        na = ""

								      )

								    }


								    assign(snames[i], df)


								    write.csv(

								      df,

								      paste0(

								        "./tasks2/output/",

								        substr(names(old_files[h]), 1, 2),

								        "_",

								        snames[i],

								        ".csv"

								      ),

								      sep=";",

								      row.names = F,

								      na = "",

								      fileEncoding = "UTF-8"

								    )

								    if (nrow(error.df) > 0) {

								      write.csv(

								        error.df,

								        paste0(

								          "./tasks2/summary/",

								          substr(names(old_files[h]), 1, 2),

								          "_",

								          snames[i],

								          "_error",

								          ".csv"

								        ),

								        row.names = F,

								        na = "",

								        fileEncoding = "UTF-8"

								      ) # Error write

								    }

								    err.summ <-

								      rbind(

								        err.summ,

								        data.frame(

								          Country = Country,

								          Name = Name,

								          Expected = Expected,

								          Actual = nrow(df)

								        )

								      ) #Error Cal


								  }

								  write.csv(

								    err.summ,

								    paste0(

								      "./tasks2/summary/" ,

								      substr(names(old_files[h]), 1, 2),

								      "_",

								      snames[i],

								      "_sumerror",

								      ".csv"

								    ),

								    row.names = F,

								    na = ""

								  ) # Error Write

								}


								end <- Sys.time()


								end - strt


								```

								*The code failed because Department Column appears several times in the data and while importing R renamed them to Department..xx).*

								*Manually verify if these are the required templates*


								```{r}

								opfilepath <-

								  list.files("./tasks2/output",

								             pattern = "*_Task.csv",

								             full.names = T)

								opfiles <- lapply(opfilepath, read.csv, colClasses = "character", header=TRUE, row.names=NULL)


								opdf <- do.call(rbind.data.frame, opfiles)

								write.csv(

								  opdf,

								  "./tasks2/output/combined/combinedtask.csv",

								  row.names = F,

								  na = "",

								  fileEncoding = "UTF-8",

								 sep = ",",

								 quote = FALSE

								)


								openxlsx::write.xlsx(opdf,"./tasks2/output/combined/combinedtask.xlsx")


								opfilepath <-

								  list.files("./tasks2/output",

								             pattern = "*_Task_Involved_Parties.csv",

								             full.names = T)

								opfiles <- lapply(opfilepath, read.csv, colClasses = "character")


								opdf <- do.call(rbind.data.frame, opfiles)


								write.csv(

								  opdf,

								  "./tasks2/output/combined/combinedDTaskInvolvedParties.csv",

								  row.names = F,

								  na = "",

								  fileEncoding = "UTF-8",

								 sep = ",",

								 quote = FALSE

								)


								openxlsx::write.xlsx(opdf,"./tasks2/output/combined/combinedDTaskInvolvedParties.xlsx")


								opfilepath <-

								  list.files("./tasks2/output",

								             pattern = "*_Task_Follow_Up_and_Related_Item.csv",

								             full.names = T)

								opfiles <- lapply(opfilepath, read.csv, colClasses = "character")


								opdf <- do.call(rbind.data.frame, opfiles)


								write.csv(

								  opdf,

								  "./tasks2/output/combined/combinedTaskFollowUpandRelatedItem.csv",

								  row.names = F,

								  na = "",

								  fileEncoding = "UTF-8",

								 sep = ",",

								 quote = FALSE

								)


								openxlsx::write.xlsx(opdf,"./tasks2/output/combined/combinedTaskFollowUpandRelatedItem.xlsx")


								opfilepath <-

								  list.files("./tasks2/output",

								             pattern = "*_tasks2_Notes.csv",

								             full.names = T)

								opfiles <- lapply(opfilepath, read.csv, colClasses = "character")


								opdf <- do.call(rbind.data.frame, opfiles)

								write.csv(

								  opdf,

								  "./tasks2/output/combined/combinedTaskNotes.csv",

								  row.names = F,

								  na = "",

								  fileEncoding = "UTF-8",

								 sep = ",",

								 quote = FALSE

								)


								openxlsx::write.xlsx(opdf,"./tasks2/output/combined/combinedTaskNotes.xlsx")


								```