You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

280 lines
7.5 KiB

---
title: "Prerunner"
author: "Scary Scarecrow"
date: "2022-09-05"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(shinymanager)
library(tidystringdist)
library(tidyverse)
```
## Credentials
```{r cars}
credentials <- data.frame(
user = c("lanubia", "admin" ,"german","austrian","swiss","chinese","spanish","finnish","italian","dutch","polish","swedish","norwegian"),
password = c("lanubia@2021", "adminadmin","german","austrian","swiss","chinese","spanish","finnish","italian","dutch","polish","swedish","norwegian"),
country = c("All","All","DE","AT","CZ","CN","ES","FI","IT","NL","PL","SE","NO"),
admin = c(TRUE, TRUE, rep(FALSE,11)),
stringsAsFactors = FALSE
)
data.frame(user, password)
dat<-c("1", "4545", "1", "6", "4000", "Europe", "2", "3565", "5", "8", "5450", "Asia", "3", "4646", "1", "5", "3455", "Europe")
str(dat)
number_of_columns<-6
number_of_rows<-length(dat)/number_of_columns
data.frame(matrix(dat, nrow = number_of_rows))
```
## Including Plots
You can also embed plots, for example:
```{r pressure, echo=FALSE}
create_db(
credentials_data = credentials,
sqlite_path = "credentials.sqlite", # will be created
# passphrase = key_get("R-shinymanager-key", "KjnsduYgrajidi(87368()*7726Gdjicjs")
passphrase = "KjnsduYgrajidi(87368()*7726Gdjicjs"
)
iris |>
filter(Species %in% c("setosa","versicolor"))
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
```{r}
# Duplicates
accfilepath <-
list.files("./accounts/output",
pattern = "*_Account.csv",
full.names = T)
accfiles <- lapply(accfilepath, read.csv)
accdf <- unique(do.call(ltodf, accfiles))
accdf$source<-"legacy"
accdfn<-read.csv("account.csv")
```
```{r}
colnames(accdf)
accdf<-
accdf |>
select(External_Key, Name, source, CountryRegion)
```
```{r}
colnames(accdfn)
accdf<-
accdfn |>
select(External_ID, Name, CountryRegion) |>
mutate(source="ERP") |>
rename(External_Key=External_ID) |>
rbind(accdf)
mat<-accdf |>
tidy_comb_all(Name) |>
tidy_stringdist()
mat<- mat |>
filter(cosine < 0.05) |> # Set a threshold
pivot_longer(starts_with("V"), names_to = "x", values_to = "match") |>
pull(match)
accdf %>%
filter(Name %in% mat) %>%
arrange(Name) |>
write.csv("./accounts/errors/accdup.csv", row.names = FALSE)
read.csv("./accounts/errors/accdup.csv")
```
```{r}
contfilepath <-
list.files("./contacts/output",
pattern = "*_Contact.csv",
full.names = T)
contfiles <- lapply(contfilepath, read.csv)
contdf <- unique(do.call(ltodf, contfiles))
contdf<-contdf |>
mutate(Name=paste0(First_Name," ", Last_Name)) |>
mutate(source="legacy") |>
select(External_Key, Name, EMail, source, CountryRegion)
condfn<-read.csv("contact.csv")
contdf<-
condfn |>
mutate(Name=paste0(First_Name," ", Last_Name)) |>
mutate(source="ERP") |>
select(External_ID,Name, EMail, source, CountryRegion) |>
rename(External_Key=External_ID) |>
rbind(contdf)
mat<-contdf |>
tidy_comb_all(Name) |>
tidy_stringdist()
mat<- mat |>
filter(cosine < 0.03) |> # Set a threshold
pivot_longer(starts_with("V"), names_to = "x", values_to = "match") |>
pull(match)
cinddupname<-
contdf %>%
filter(Name %in% mat) %>%
arrange(Name)
mat<-contdf |>
tidy_comb_all(EMail) |>
tidy_stringdist()
mat<- mat |>
filter(cosine < 0.02) |> # Set a threshold
pivot_longer(starts_with("V"), names_to = "x", values_to = "match") |>
pull(match)
cinddupemail<-
contdf %>%
filter(EMail %in% mat) %>%
arrange(EMail)
rbind(cinddupname, cinddupemail) |>
unique() |>
write.csv("./contacts/errors/condup.csv", row.names = FALSE)
```
```{r}
# cdecaded520d57cd
dsn_database<-"postgres"
dsn_hostname<-"localhost"
dsn_port<-"5432"
dsn_uid<- "postgres"
dsn_pwd<-"julley09"
library(RPostgres)
connec <- dbConnect(
RPostgres::Postgres(),
dbname = dsn_database,
host = dsn_hostname,
port = dsn_port,
user = dsn_uid,
password = dsn_pwd
)
emp<-read.csv("empmissinginsap.csv") |>
mutate(action=NA)
emp_cn<-emp |> filter(CountryRegion=="CN")
emp_cz<-emp |> filter(CountryRegion=="CZ")
emp_de<-emp |> filter(CountryRegion=="DE")
emp_es<-emp |> filter(CountryRegion=="ES")
emp_fi<-emp |> filter(CountryRegion=="FI")
emp_it<-emp |> filter(CountryRegion=="IT")
emp_nl<-emp |> filter(CountryRegion=="NL")
emp_no<-emp |> filter(CountryRegion=="NO")
emp_pl<-emp |> filter(CountryRegion=="PL")
emp_se<-emp |> filter(CountryRegion=="SE")
dbWriteTable(connec, "crh_emp_cn", emp_cn, append=TRUE)
dbWriteTable(connec, "crh_emp_cz", emp_cz, append=TRUE)
dbWriteTable(connec, "crh_emp_de", emp_de, append=TRUE)
dbWriteTable(connec, "crh_emp_es", emp_es, append=TRUE)
dbWriteTable(connec, "crh_emp_fi", emp_fi, append=TRUE)
dbWriteTable(connec, "crh_emp_it", emp_it, append=TRUE)
dbWriteTable(connec, "crh_emp_nl", emp_nl, append=TRUE)
dbWriteTable(connec, "crh_emp_no", emp_no, append=TRUE)
dbWriteTable(connec, "crh_emp_pl", emp_pl, append=TRUE)
dbWriteTable(connec, "crh_emp_se", emp_se, append=TRUE)
dbWriteTable(connec, "crh_emp", emp, append=TRUE)
dupacc<-read.csv("duplicateaccounts.csv") |>
mutate(action=NA)
dupacc_cn<-emp |> filter(CountryRegion=="CN")
dupacc_cz<-emp |> filter(CountryRegion=="CZ")
dupacc_de<-emp |> filter(CountryRegion=="DE")
dupacc_es<-emp |> filter(CountryRegion=="ES")
dupacc_fi<-emp |> filter(CountryRegion=="FI")
dupacc_it<-emp |> filter(CountryRegion=="IT")
dupacc_nl<-emp |> filter(CountryRegion=="NL")
dupacc_no<-emp |> filter(CountryRegion=="NO")
dupacc_pl<-emp |> filter(CountryRegion=="PL")
dupacc_se<-emp |> filter(CountryRegion=="SE")
dbWriteTable(connec, "crh_dupacc_cn", dupacc_cn, append=TRUE)
dbWriteTable(connec, "crh_dupacc_cz", dupacc_cz, append=TRUE)
dbWriteTable(connec, "crh_dupacc_de", dupacc_de, append=TRUE)
dbWriteTable(connec, "crh_dupacc_es", dupacc_es, append=TRUE)
dbWriteTable(connec, "crh_dupacc_fi", dupacc_fi, append=TRUE)
dbWriteTable(connec, "crh_dupacc_it", dupacc_it, append=TRUE)
dbWriteTable(connec, "crh_dupacc_nl", dupacc_nl, append=TRUE)
dbWriteTable(connec, "crh_dupacc_no", dupacc_no, append=TRUE)
dbWriteTable(connec, "crh_dupacc_pl", dupacc_pl, append=TRUE)
dbWriteTable(connec, "crh_dupacc_se", dupacc_se, append=TRUE)
dupcon<-read.csv("duplicatecontacts.csv") |>
mutate(action=NA)
dupcon_cn<-emp |> filter(CountryRegion=="CN")
dupcon_cz<-emp |> filter(CountryRegion=="CZ")
dupcon_de<-emp |> filter(CountryRegion=="DE")
dupcon_es<-emp |> filter(CountryRegion=="ES")
dupcon_fi<-emp |> filter(CountryRegion=="FI")
dupcon_it<-emp |> filter(CountryRegion=="IT")
dupcon_nl<-emp |> filter(CountryRegion=="NL")
dupcon_no<-emp |> filter(CountryRegion=="NO")
dupcon_pl<-emp |> filter(CountryRegion=="PL")
dupcon_se<-emp |> filter(CountryRegion=="SE")
dbWriteTable(connec, "crh_dupcon_cn", dupcon_cn, append=TRUE)
dbWriteTable(connec, "crh_dupcon_cz", dupcon_cz, append=TRUE)
dbWriteTable(connec, "crh_dupcon_de", dupcon_de, append=TRUE)
dbWriteTable(connec, "crh_dupcon_es", dupcon_es, append=TRUE)
dbWriteTable(connec, "crh_dupcon_fi", dupcon_fi, append=TRUE)
dbWriteTable(connec, "crh_dupcon_it", dupcon_it, append=TRUE)
dbWriteTable(connec, "crh_dupcon_nl", dupcon_nl, append=TRUE)
dbWriteTable(connec, "crh_dupcon_no", dupcon_no, append=TRUE)
dbWriteTable(connec, "crh_dupcon_pl", dupcon_pl, append=TRUE)
dbWriteTable(connec, "crh_dupcon_se", dupcon_se, append=TRUE)
unique(dupcon$CountryRegion)
```
```{r}
dupcon |>
filter(FullName =="Mireille Ramez")
```