You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
280 lines
7.5 KiB
280 lines
7.5 KiB
---
|
|
title: "Prerunner"
|
|
author: "Scary Scarecrow"
|
|
date: "2022-09-05"
|
|
output: html_document
|
|
---
|
|
|
|
```{r setup, include=FALSE}
|
|
knitr::opts_chunk$set(echo = TRUE)
|
|
library(shinymanager)
|
|
library(tidystringdist)
|
|
library(tidyverse)
|
|
```
|
|
|
|
## Credentials
|
|
|
|
|
|
|
|
```{r cars}
|
|
credentials <- data.frame(
|
|
user = c("lanubia", "admin" ,"german","austrian","swiss","chinese","spanish","finnish","italian","dutch","polish","swedish","norwegian"),
|
|
password = c("lanubia@2021", "adminadmin","german","austrian","swiss","chinese","spanish","finnish","italian","dutch","polish","swedish","norwegian"),
|
|
country = c("All","All","DE","AT","CZ","CN","ES","FI","IT","NL","PL","SE","NO"),
|
|
admin = c(TRUE, TRUE, rep(FALSE,11)),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
data.frame(user, password)
|
|
|
|
dat<-c("1", "4545", "1", "6", "4000", "Europe", "2", "3565", "5", "8", "5450", "Asia", "3", "4646", "1", "5", "3455", "Europe")
|
|
str(dat)
|
|
number_of_columns<-6
|
|
|
|
number_of_rows<-length(dat)/number_of_columns
|
|
data.frame(matrix(dat, nrow = number_of_rows))
|
|
```
|
|
|
|
## Including Plots
|
|
|
|
You can also embed plots, for example:
|
|
|
|
```{r pressure, echo=FALSE}
|
|
create_db(
|
|
credentials_data = credentials,
|
|
sqlite_path = "credentials.sqlite", # will be created
|
|
# passphrase = key_get("R-shinymanager-key", "KjnsduYgrajidi(87368()*7726Gdjicjs")
|
|
passphrase = "KjnsduYgrajidi(87368()*7726Gdjicjs"
|
|
)
|
|
|
|
iris |>
|
|
filter(Species %in% c("setosa","versicolor"))
|
|
|
|
```
|
|
|
|
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
|
|
|
|
```{r}
|
|
# Duplicates
|
|
accfilepath <-
|
|
list.files("./accounts/output",
|
|
pattern = "*_Account.csv",
|
|
full.names = T)
|
|
accfiles <- lapply(accfilepath, read.csv)
|
|
|
|
accdf <- unique(do.call(ltodf, accfiles))
|
|
accdf$source<-"legacy"
|
|
|
|
accdfn<-read.csv("account.csv")
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
```{r}
|
|
colnames(accdf)
|
|
accdf<-
|
|
accdf |>
|
|
select(External_Key, Name, source, CountryRegion)
|
|
```
|
|
|
|
|
|
|
|
```{r}
|
|
colnames(accdfn)
|
|
accdf<-
|
|
accdfn |>
|
|
select(External_ID, Name, CountryRegion) |>
|
|
mutate(source="ERP") |>
|
|
rename(External_Key=External_ID) |>
|
|
rbind(accdf)
|
|
mat<-accdf |>
|
|
tidy_comb_all(Name) |>
|
|
tidy_stringdist()
|
|
mat<- mat |>
|
|
filter(cosine < 0.05) |> # Set a threshold
|
|
pivot_longer(starts_with("V"), names_to = "x", values_to = "match") |>
|
|
pull(match)
|
|
|
|
|
|
accdf %>%
|
|
filter(Name %in% mat) %>%
|
|
arrange(Name) |>
|
|
write.csv("./accounts/errors/accdup.csv", row.names = FALSE)
|
|
|
|
read.csv("./accounts/errors/accdup.csv")
|
|
|
|
```
|
|
|
|
|
|
|
|
```{r}
|
|
contfilepath <-
|
|
list.files("./contacts/output",
|
|
pattern = "*_Contact.csv",
|
|
full.names = T)
|
|
contfiles <- lapply(contfilepath, read.csv)
|
|
|
|
contdf <- unique(do.call(ltodf, contfiles))
|
|
contdf<-contdf |>
|
|
mutate(Name=paste0(First_Name," ", Last_Name)) |>
|
|
mutate(source="legacy") |>
|
|
select(External_Key, Name, EMail, source, CountryRegion)
|
|
|
|
condfn<-read.csv("contact.csv")
|
|
|
|
contdf<-
|
|
condfn |>
|
|
mutate(Name=paste0(First_Name," ", Last_Name)) |>
|
|
mutate(source="ERP") |>
|
|
select(External_ID,Name, EMail, source, CountryRegion) |>
|
|
rename(External_Key=External_ID) |>
|
|
rbind(contdf)
|
|
|
|
|
|
mat<-contdf |>
|
|
tidy_comb_all(Name) |>
|
|
tidy_stringdist()
|
|
mat<- mat |>
|
|
filter(cosine < 0.03) |> # Set a threshold
|
|
pivot_longer(starts_with("V"), names_to = "x", values_to = "match") |>
|
|
pull(match)
|
|
|
|
cinddupname<-
|
|
contdf %>%
|
|
filter(Name %in% mat) %>%
|
|
arrange(Name)
|
|
|
|
mat<-contdf |>
|
|
tidy_comb_all(EMail) |>
|
|
tidy_stringdist()
|
|
mat<- mat |>
|
|
filter(cosine < 0.02) |> # Set a threshold
|
|
pivot_longer(starts_with("V"), names_to = "x", values_to = "match") |>
|
|
pull(match)
|
|
cinddupemail<-
|
|
contdf %>%
|
|
filter(EMail %in% mat) %>%
|
|
arrange(EMail)
|
|
rbind(cinddupname, cinddupemail) |>
|
|
unique() |>
|
|
write.csv("./contacts/errors/condup.csv", row.names = FALSE)
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
# cdecaded520d57cd
|
|
|
|
dsn_database<-"postgres"
|
|
dsn_hostname<-"localhost"
|
|
dsn_port<-"5432"
|
|
dsn_uid<- "postgres"
|
|
dsn_pwd<-"julley09"
|
|
library(RPostgres)
|
|
connec <- dbConnect(
|
|
RPostgres::Postgres(),
|
|
dbname = dsn_database,
|
|
host = dsn_hostname,
|
|
port = dsn_port,
|
|
user = dsn_uid,
|
|
password = dsn_pwd
|
|
)
|
|
|
|
emp<-read.csv("empmissinginsap.csv") |>
|
|
mutate(action=NA)
|
|
emp_cn<-emp |> filter(CountryRegion=="CN")
|
|
emp_cz<-emp |> filter(CountryRegion=="CZ")
|
|
emp_de<-emp |> filter(CountryRegion=="DE")
|
|
emp_es<-emp |> filter(CountryRegion=="ES")
|
|
emp_fi<-emp |> filter(CountryRegion=="FI")
|
|
emp_it<-emp |> filter(CountryRegion=="IT")
|
|
emp_nl<-emp |> filter(CountryRegion=="NL")
|
|
emp_no<-emp |> filter(CountryRegion=="NO")
|
|
emp_pl<-emp |> filter(CountryRegion=="PL")
|
|
emp_se<-emp |> filter(CountryRegion=="SE")
|
|
|
|
dbWriteTable(connec, "crh_emp_cn", emp_cn, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp_cz", emp_cz, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp_de", emp_de, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp_es", emp_es, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp_fi", emp_fi, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp_it", emp_it, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp_nl", emp_nl, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp_no", emp_no, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp_pl", emp_pl, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp_se", emp_se, append=TRUE)
|
|
dbWriteTable(connec, "crh_emp", emp, append=TRUE)
|
|
|
|
|
|
dupacc<-read.csv("duplicateaccounts.csv") |>
|
|
mutate(action=NA)
|
|
|
|
dupacc_cn<-emp |> filter(CountryRegion=="CN")
|
|
dupacc_cz<-emp |> filter(CountryRegion=="CZ")
|
|
dupacc_de<-emp |> filter(CountryRegion=="DE")
|
|
dupacc_es<-emp |> filter(CountryRegion=="ES")
|
|
dupacc_fi<-emp |> filter(CountryRegion=="FI")
|
|
dupacc_it<-emp |> filter(CountryRegion=="IT")
|
|
dupacc_nl<-emp |> filter(CountryRegion=="NL")
|
|
dupacc_no<-emp |> filter(CountryRegion=="NO")
|
|
dupacc_pl<-emp |> filter(CountryRegion=="PL")
|
|
dupacc_se<-emp |> filter(CountryRegion=="SE")
|
|
|
|
dbWriteTable(connec, "crh_dupacc_cn", dupacc_cn, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupacc_cz", dupacc_cz, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupacc_de", dupacc_de, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupacc_es", dupacc_es, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupacc_fi", dupacc_fi, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupacc_it", dupacc_it, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupacc_nl", dupacc_nl, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupacc_no", dupacc_no, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupacc_pl", dupacc_pl, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupacc_se", dupacc_se, append=TRUE)
|
|
|
|
dupcon<-read.csv("duplicatecontacts.csv") |>
|
|
mutate(action=NA)
|
|
|
|
dupcon_cn<-emp |> filter(CountryRegion=="CN")
|
|
dupcon_cz<-emp |> filter(CountryRegion=="CZ")
|
|
dupcon_de<-emp |> filter(CountryRegion=="DE")
|
|
dupcon_es<-emp |> filter(CountryRegion=="ES")
|
|
dupcon_fi<-emp |> filter(CountryRegion=="FI")
|
|
dupcon_it<-emp |> filter(CountryRegion=="IT")
|
|
dupcon_nl<-emp |> filter(CountryRegion=="NL")
|
|
dupcon_no<-emp |> filter(CountryRegion=="NO")
|
|
dupcon_pl<-emp |> filter(CountryRegion=="PL")
|
|
dupcon_se<-emp |> filter(CountryRegion=="SE")
|
|
|
|
dbWriteTable(connec, "crh_dupcon_cn", dupcon_cn, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupcon_cz", dupcon_cz, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupcon_de", dupcon_de, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupcon_es", dupcon_es, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupcon_fi", dupcon_fi, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupcon_it", dupcon_it, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupcon_nl", dupcon_nl, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupcon_no", dupcon_no, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupcon_pl", dupcon_pl, append=TRUE)
|
|
dbWriteTable(connec, "crh_dupcon_se", dupcon_se, append=TRUE)
|
|
|
|
unique(dupcon$CountryRegion)
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
dupcon |>
|
|
filter(FullName =="Mireille Ramez")
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|