--- title: "Untitled" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(tidyverse) library(lubridate) library(fst) library(DT) library(data.table) library(scales) # get_png <- function(filename) { # grid::rasterGrob(png::readPNG(filename), interpolate = TRUE) # } # l <- get_png("logo.png") # sum.na<-function(x){sum(is.na(x))} # id<-read.csv("./data/id20210921.csv", sep=";", header = F) # alive<-read.csv("active.csv",sep=",") # dead<-read.csv("inactive.csv",sep=",") oi<-read.fst("./data/open_items_new.fst") # oi |> filter(o.type =="Installments") |> filter(due_date>"2021-07-31") # indi<-id %>% # filter(V3=="Identiteitsbewijs") %>% # select(V2,V4) dunn<-read.fst("./data/dunning.fst") last.dunn<-dunn %>% group_by(DUNN_GPART, DUNN_VKONT) %>% filter(DUNN_DATE==max(DUNN_DATE)) %>% mutate(DUNN_GPART=as.character(DUNN_GPART)) #dead.org<-read.fst("./data/dead.fst") #org.status<-dead.org %>% rename(business_partner=V2) %>% mutate(type="org", e.status="discontinued") %>% select(business_partner,e.status,type) #ind.status<-ind.status %>% select(business_partner, e.status) %>% mutate(type="indi") #bp_operating_status<-rbind(org.status,ind.status) #write.fst(bp_operating_status,"./data/bp_operating_status.fst") operating_status<-read.fst("./data/bp_operating_status.fst") # ind.status<-indi %>% # left_join(ind.status, by=c("V4"="Id.Nummer")) %>% # filter(!is.na(e.status)) # ind.status %>% # rename(business_partner=V2, Id.Number=V4) %>% # mutate(business_partner=as.character(business_partner)) %>% # write.fst("./data/indestatus.fst") # dead.indi<-indi %>% unique() %>% inner_join(dead.indi, by=c("V4"="Id.Nummer")) %>% rename(business_partner=V2, id_number=V4) oi<- oi%>% mutate(o.type = case_when( deposit == T ~ "Deposit", documen_desc == "Installments" ~ "Installments", contract_type=="Waste" | grepl("Waste",trandesc) ~ "Waste", TRUE ~ "Real" ) ) oi<-oi %>% select(-c(sap_client,main_tran,sub_tran,division,dunn_exclu,payment_meth,clearing_date,clearing_post_date,clearing_reason,document_type,X,trantype, OUT_AGE)) %>% left_join(operating_status, by="business_partner") %>% mutate(e.status=ifelse(is.na(e.status),"active",e.status)) %>% select(-type) oi %>% write.fst("./data/open_items.fst") # c.status<- # oi %>% # mutate(AGE_BUCKET=ifelse(AGE_BUCKET!="5+ years","5- years", "5+ years")) %>% # select(contract_account,AGE_BUCKET) %>% # count(contract_account,AGE_BUCKET) %>% # pivot_wider(names_from = AGE_BUCKET,values_from = n) %>% # mutate(spread=ifelse(is.na(`5+ years`),"Only less than 5",ifelse(is.na(`5- years`)," Only more than 5","Both"))) %>% # select(contract_account,spread) oi<-oi %>% left_join(c.status, by="contract_account") unique(oi$contract_type) ``` Credit amounts: aggregated per contract account but older than 5 years ```{r} oi |> filter(o.type %in% c("Real", "Installments")) |> filter(negative) |> filter(AGE_BUCKET=="5+ years") |> group_by(contract_account) |> summarise(amount=sum(amount)) |> write.csv("creditagrrolderthan5.csv") ``` Credit amounts: aggregated per contract account but older than 3 years ```{r} oi |> filter(o.type %in% c("Real", "Installments")) |> filter(negative) |> filter(AGE_BUCKET %in% c("5+ years","4-5 years","3-4 years")) |> group_by(contract_account, active) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() write.csv("creditagrrolderthan3.csv") ``` Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death ```{r} oi |> filter(e.status=="Passed") |> filter(date.event <= due_date) |> filter(due_date < date.relaxed) |> group_by(contract_account, negative) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = negative, values_from = amount) |> rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60butnotbeforevent.csv") ``` Open amounts for migrated persons: aggregated per contract account for only less than 60 days past their migration date ```{r} oi |> filter(e.status=="Left") |> filter(date.event <= due_date) |> filter(due_date < date.relaxed) |> group_by(contract_account, negative) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = negative, values_from = amount) |> rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60butnotbeforevent.csv") ``` Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death ```{r} oi |> filter(e.status=="Passed") |> filter(due_date < date.relaxed) |> group_by(contract_account, negative) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = negative, values_from = amount) |> rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60inclbeforevent.csv") oi |> filter(o.type %in% c("Real", "Installments")) |> filter(e.status == "Left") |> group_by(contract_account, rel.status, active) |> summarise(amount=sum(amount)) |> ungroup() |> pivot_wider(names_from = rel.status, values_from = amount) |> rename(duedatebeforedaterelaxed=`long past`, duedateafterrelaxeddate=`within limit`) |> mutate(statu= ifelse(is.na(duedatebeforedaterelaxed),"Only duedateafterrelaxeddate", ifelse(is.na(duedateafterrelaxeddate), "Only duedatebeforedaterelaxed", "Both"))) |> inner_join(uniquebpcont, by="contract_account") |> filter(statu=="Only duedatebeforedaterelaxed") |> filter(duedatebeforedaterelaxed>10000) |> write.csv("leftaggr.csv") uniquebpcont<-oi |> select(contract_account, business_partner) |> unique() ``` ```{r} oi |> filter(e.status=="Left") |> filter(due_date < date.relaxed) |> group_by(contract_account, negative) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = negative, values_from = amount) |> rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60inclbeforevent.csv") ``` ## Dump Dec 29 ```{r} passed<-read.csv("./data-raw/passed.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name)) migrated<-read.csv("./data-raw/migrated.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name)) inactived<-read.csv("./data-raw/inactived.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,NAME)) discontinued<-read.csv("./data-raw/discontinued.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,NAME)) bpnames<-read.csv("./data-raw/bpnames.csv", sep = ";") |> select(c(PARTNER,X)) |> unique() bpnames<-uniquebpcont |> full_join(bpnames, by=c("business_partner"="PARTNER")) |> select(contract_account, business_partner, X) |> rename(Name=X) |> filter(!is.na(contract_account)) bpnames |> count(contract_account) |> arrange(desc(n)) bpnames |> filter(PARTNER=="1301041926") ``` ```{r} bpnames |> select(business_partner, Name) |> unique() right_join(discontinued, by="contract_account") |> arrange(contract_account) |> filter(amount!="") |> mutate(amount=as.numeric(amount)) pull(amount) |> sum() #|> write.csv("disco.csv") ``` ```{r} oi |> filter(contract_account == "11460001") ``` ## New inactive and kvk list post discussion with Aldo on Dec 30, 2021 1. Compare our inactive list and inactive list of Betza 2. If they match, proceed with aggregation 3. Create a list of aggregation of KvK ```{r} betza_inactive<-read.csv("betzainactive.csv", sep=";", dec = ",") ``` ```{r} our.inactive<-oi |> filter(o.type %in% c("Real", "Installments")) |> filter(bp_category=="Organization") |> filter(!active) our.inactive<-our.inactive |> filter(!negative) |> group_by(business_partner, contract_account) |> summarise(amount=sum(amount)) bpnames<-bpnames |> select(business_partner, Name) |> unique() our.inactive |> left_join(bpnames, by="business_partner") |> filter(contract_account=="11493258") write.csv(our.inactive,"ourinactive.csv") ``` None of them match. That means, Betza's list contain inactive businesses that are falsely marked as active in our data. ```{r} bi<-betza_inactive |> group_by(contract_account, business_partner) |> summarise(amount=sum(amount)) |> mutate(contract_account=as.character(contract_account)) |> left_join(bpnames, by="contract_account") |> arrange(business_partner.x) |> select(-business_partner.y) |> rename(business_partner=business_partner.x) |> group_by(business_partner, Name) |> summarise(amount=sum(amount)) |> mutate(business_partner=as.character(business_partner)) #|> write.csv("Betza_Inactive_Named_aggregated.csv") betzaaldokvk<-read.csv("betzaaldokvk.csv",sep=";",dec=",") ba<-betzaaldokvk |> mutate(BP=as.character(BP)) |> rename(business_partner=BP) |> left_join(oi, by="business_partner") |> filter(!negative) |> group_by(contract_account, business_partner, Name.x, Chamber.of.commerce) |> summarise(amount=sum(amount)) |> rename(Name=Name.x) |> arrange(business_partner) write.csv(ba,"Betza_Aldo_Named_kvk_aggregated.csv") our.inactive |> group_by(business_partner) |> summarise(amount=sum(amount)) |> full_join(bi, by="business_partner") |> rename(out.amount=amount.x, bi.amount=amount.y) |> full_join(ba, by="business_partner") |> rename(ba.amount=amount) |> filter(!is.na(out.amount)) |> filter(!is.na(ba.amount)) ``` ```{r} bi |> full_join(ba, by="business_partner") |> ``` ```{r} bpnames ``` #Dec312021 Adding Names ```{r} broke<-read.csv("./dec312021/broke.csv", sep=";", dec=",") dead<-read.csv("./dec312021/dead.csv", sep=";", dec=",") gone<-read.csv("./dec312021/gone.csv", sep=";", dec=",") smallamount<-read.csv("./dec312021/smallamount.csv", sep=";", dec=",") timebarred<-read.csv("./dec312021/timebarred.csv", sep=";", dec=",") broke |> mutate(contract_account=as.character(contract_account)) |> left_join(bpnames, by=c("contract_account")) |> rename(`status (1)`=status..1.) write.csv2("./dec312021/named/broke.csv",row.names = F) dead |> left_join(bpnames, by=c("Row.Labels"="contract_account")) |> write.csv2("./dec312021/named/dead.csv",row.names = F) gone |> mutate(Row.Labels=as.character(Row.Labels)) |> left_join(bpnames, by=c("Row.Labels"="contract_account")) |> write.csv2("./dec312021/named/gone.csv",row.names = F) smallamount |> mutate(contract_account=as.character(contract_account)) |> left_join(bpnames, by=c("contract_account")) |> rename(business_partner=business_partner.x) |> select(-business_partner.y) |> write.csv2("./dec312021/named/smallamount.csv",row.names = F) timebarred |> left_join(bpnames, by=c("Row.Labels"="contract_account")) write.csv2("./dec312021/named/timebarred.csv",row.names = F) ``` ## Jan 05, 2021 Task is to add year wise amount to the list of writeoff contract accounts inactive ```{r} deceased<-read.csv("./data-raw/jan052021/inactive/deceased.csv") discontinued<-read.csv("./data-raw/jan052021/inactive/discontinued.csv") migrated<-read.csv("./data-raw/jan052021/inactive/migrated.csv") small<-read.csv("./data-raw/jan052021/inactive/small.csv") timebarred<-read.csv("./data-raw/jan052021/inactive/timebarred.csv") ``` ```{r} real.pos<-oi |> filter(o.type %in% c("Real", "Installments")) |> filter(!negative) |> mutate(age=ifelse(AGE_BUCKET=="5+ years", "More than 5","Less than 5")) deceased |> #pull(Grand.Total) |> sum() # They have not used the 60 days filter here. SO, I'm not using as well select(c(Row.Labels,business_partner,Name)) |> rename(contract_account=Row.Labels) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> select(-business_partner.y) |> rename(business_partner=business_partner.x) |> mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/inactive/deceasedtrans.csv") group_by(contract_account, business_partner, Name, year.due) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = year.due, values_from = amount) |> rename(name=Name) |> write.csv("./data-raw/jan052021/inactive/deceasedbyyear.csv") ## Number not matching discontinued |> #pull(amount) |> sum() # 1108257 select(c(contract_account)) |> unique() |> mutate(contract_account=as.character(contract_account)) |> left_join(oi, by=c("contract_account"="contract_account")) |> filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years", "181-365 days","91-180 days","61-90 days", "0-30 days")) |> filter(o.type %in% c("Real", "Installments") ) |> mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, year.due )) |> write.csv("./data-raw/jan052021/inactive/discontinuedtrans.csv", na="", row.names = F) group_by(contract_account,year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() pivot_wider(names_from = year.due, values_from = amount) |> left_join(bpnames, by="contract_account") |> select(c(contract_account,business_partner, Name, `2016`,`2017`,`2018`)) |> write.csv("./data-raw/jan052021/inactive/discontinuedbyyear.csv", na="", row.names = F) ## Number not matching migrated |> #pull(Grand.Total) |> sum()#130502 select(c(contract_account)) |> unique() |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> #filter(o.type %in% c("Real", "Installments") ) |> filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years", "181-365 days","91-180 days","61-90 days","31-60 days")) |> mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/inactive/migratedtrans.csv", na="", row.names = F) group_by(contract_account,year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() pivot_wider(names_from = year.due, values_from = amount) |> left_join(bpnames, by="contract_account") |> select(c(contract_account,business_partner,Name, `2016`, `2017`, `2018`, `2021`)) |> write.csv("./data-raw/jan052021/inactive/migratedbyyear.csv", na="", row.names = F) small |> #pull(Open.Items) |> sum() #123207.9 select(c(contract_account,business_partner,Name)) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> select(-business_partner.y) |> rename(business_partner=business_partner.x) |> #pull(amount) |> sum() mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/inactive/smalltrans.csv", na="") group_by(contract_account, business_partner, Name.x, year.due) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = year.due, values_from = amount) |> rename(name=Name.x) |> write.csv("./data-raw/jan052021/inactive/smallbyyear.csv", na="") real.pos.uni<-real.pos |> unique() # not required timebarred |> #pull(Sum.of.amount) |> sum() #25943341 rename(contract_account=Row.Labels) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos.uni, by="contract_account") |> select(-Reasonunregistered) |> filter(AGE_BUCKET=="5+ years") |> unique() |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, e.status,date.relaxed, rel.status, age)) |> write.csv("./data-raw/jan052021/inactive/timebarredtrans.csv", na="") group_by(contract_account,business_partner.x, Name, Sum.of.amount) |> summarise(amount=sum(amount)) |> #select(-business_partner.y) |> #write.csv("./data-raw/jan052021/inactive/timebarredtrans.csv", na="") filter(amount!=Sum.of.amount) pull(amount) |> sum() real.pos |> filter(contract_account=="10043660") |> arrange(amount) |> pull(amount) |> sum() ``` active ```{r} act.inact<-read.csv("./data-raw/jan052021/active/inactive.csv") act.disc<-read.csv("./data-raw/jan052021/active/discontinued.csv") act.migrated<-read.csv("./data-raw/jan052021/active/migrated.csv") act.passed<-read.csv("./data-raw/jan052021/active/passed.csv") #Not matching act.inact |> #pull(amount) |> sum() #6830028 select(c(contract_account)) |> unique() |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> #pull(amount) |> sum() mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/inactivetrans.csv", na="", row.names = F) group_by(contract_account,year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() pivot_wider(names_from = year.due, values_from = amount) |> left_join(bpnames, by="contract_account") |> select(c(contract_account,business_partner, Name, `2004`,`2005`,`2006`,`2007`,`2008`, `2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |> write.csv("./data-raw/jan052021/active/inactivebyyear.csv", na="", row.names = F) act.disc |> #pull(amount) |> sum() #1214182 select(c(contract_account, rel.status, age)) |> unique() |> mutate(rel.status=ifelse(rel.status=="More than 60 days", "long past","within limit")) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by=c("contract_account"="contract_account", "rel.status"="rel.status","age"="age")) |> mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/discontinuedtrans.csv", na="", row.names = F) group_by(contract_account,year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() pivot_wider(names_from = year.due, values_from = amount) |> left_join(bpnames, by="contract_account") |> select(c(contract_account,business_partner, Name,`2006`,`2007`,`2008`, `2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |> write.csv("./data-raw/jan052021/active/discontinuedbyyear.csv", na="", row.names = F) # Matching act.migrated |> #1401989 select(c(contract_account, Name)) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/migratedtrans.csv", na="", row.names = F) group_by(contract_account,business_partner, Name, year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T) pivot_wider(names_from = year.due, values_from = amount) |> select(c(contract_account,business_partner, Name,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`, `2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |> write.csv("./data-raw/jan052021/active/migratedbyyear.csv", na="", row.names = F) # Matching act.passed |> #pull(duedatebeforedaterelaxed) |> sum(na.rm=T) #929285.6 select(c(contract_account, Name)) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/passedtrans.csv", na="", row.names = F) group_by(contract_account,business_partner, Name, year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T) pivot_wider(names_from = year.due, values_from = amount) |> select(c(contract_account,business_partner, Name,`1900`,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`, `2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |> write.csv("./data-raw/jan052021/active/passedbyyear.csv", na="", row.names = F) ``` ## R Markdown This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ```{r cars} summary(cars) ``` ## Including Plots You can also embed plots, for example: ```{r pressure, echo=FALSE} plot(pressure) ``` Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.