--- title: "Untitled" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(tidyverse) library(lubridate) library(fst) library(DT) library(data.table) library(scales) # get_png <- function(filename) { # grid::rasterGrob(png::readPNG(filename), interpolate = TRUE) # } # l <- get_png("logo.png") # sum.na<-function(x){sum(is.na(x))} id<-read.csv("./data/id20210921.csv", sep=";", header = F) # alive<-read.csv("active.csv",sep=",") # dead<-read.csv("inactive.csv",sep=",") oi<-read.fst("./data/open_items_new.fst") # oi |> filter(o.type =="Installments") |> filter(due_date>"2021-07-31") indi<-id %>% filter(V3=="Identiteitsbewijs") %>% select(V2,V4) dunn<-read.fst("./data/dunning.fst") last.dunn<-dunn %>% group_by(DUNN_GPART, DUNN_VKONT) %>% filter(DUNN_DATE==max(DUNN_DATE)) %>% mutate(DUNN_GPART=as.character(DUNN_GPART)) #dead.org<-read.fst("./data/dead.fst") #org.status<-dead.org %>% rename(business_partner=V2) %>% mutate(type="org", e.status="discontinued") %>% select(business_partner,e.status,type) #ind.status<-ind.status %>% select(business_partner, e.status) %>% mutate(type="indi") #bp_operating_status<-rbind(org.status,ind.status) #write.fst(bp_operating_status,"./data/bp_operating_status.fst") operating_status<-read.fst("./data/bp_operating_status.fst") # ind.status<-indi %>% # left_join(ind.status, by=c("V4"="Id.Nummer")) %>% # filter(!is.na(e.status)) # ind.status %>% # rename(business_partner=V2, Id.Number=V4) %>% # mutate(business_partner=as.character(business_partner)) %>% # write.fst("./data/indestatus.fst") # dead.indi<-indi %>% unique() %>% inner_join(dead.indi, by=c("V4"="Id.Nummer")) %>% rename(business_partner=V2, id_number=V4) oi<- oi%>% mutate(o.type = case_when( deposit == T ~ "Deposit", documen_desc == "Installments" ~ "Installments", contract_type=="Waste" | grepl("Waste",trandesc) ~ "Waste", TRUE ~ "Real" ) ) oi<-oi %>% select(-c(sap_client,main_tran,sub_tran,division,dunn_exclu,payment_meth,clearing_date,clearing_post_date,clearing_reason,document_type,X,trantype, OUT_AGE)) %>% left_join(operating_status, by="business_partner") %>% mutate(e.status=ifelse(is.na(e.status),"active",e.status)) %>% select(-type) oi %>% write.fst("./data/open_items.fst") # c.status<- # oi %>% # mutate(AGE_BUCKET=ifelse(AGE_BUCKET!="5+ years","5- years", "5+ years")) %>% # select(contract_account,AGE_BUCKET) %>% # count(contract_account,AGE_BUCKET) %>% # pivot_wider(names_from = AGE_BUCKET,values_from = n) %>% # mutate(spread=ifelse(is.na(`5+ years`),"Only less than 5",ifelse(is.na(`5- years`)," Only more than 5","Both"))) %>% # select(contract_account,spread) oi<-oi %>% left_join(c.status, by="contract_account") unique(oi$contract_type) ``` Credit amounts: aggregated per contract account but older than 5 years ```{r} oi |> filter(o.type %in% c("Real", "Installments")) |> filter(negative) |> filter(AGE_BUCKET=="5+ years") |> group_by(contract_account) |> summarise(amount=sum(amount)) |> write.csv("creditagrrolderthan5.csv") ``` Credit amounts: aggregated per contract account but older than 3 years ```{r} oi |> filter(o.type %in% c("Real", "Installments")) |> filter(negative) |> filter(AGE_BUCKET %in% c("5+ years","4-5 years","3-4 years")) |> group_by(contract_account, active) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() write.csv("creditagrrolderthan3.csv") ``` Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death ```{r} oi |> filter(e.status=="Passed") |> filter(date.event <= due_date) |> filter(due_date < date.relaxed) |> group_by(contract_account, negative) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = negative, values_from = amount) |> rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60butnotbeforevent.csv") ``` Open amounts for migrated persons: aggregated per contract account for only less than 60 days past their migration date ```{r} oi |> filter(e.status=="Left") |> filter(date.event <= due_date) |> filter(due_date < date.relaxed) |> group_by(contract_account, negative) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = negative, values_from = amount) |> rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60butnotbeforevent.csv") ``` Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death ```{r} oi |> filter(e.status=="Passed") |> filter(due_date < date.relaxed) |> group_by(contract_account, negative) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = negative, values_from = amount) |> rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60inclbeforevent.csv") oi |> filter(o.type %in% c("Real", "Installments")) |> filter(e.status == "Left") |> group_by(contract_account, rel.status, active) |> summarise(amount=sum(amount)) |> ungroup() |> pivot_wider(names_from = rel.status, values_from = amount) |> rename(duedatebeforedaterelaxed=`long past`, duedateafterrelaxeddate=`within limit`) |> mutate(statu= ifelse(is.na(duedatebeforedaterelaxed),"Only duedateafterrelaxeddate", ifelse(is.na(duedateafterrelaxeddate), "Only duedatebeforedaterelaxed", "Both"))) |> inner_join(uniquebpcont, by="contract_account") |> filter(statu=="Only duedatebeforedaterelaxed") |> filter(duedatebeforedaterelaxed>10000) |> write.csv("leftaggr.csv") uniquebpcont<-oi |> select(contract_account, business_partner) |> unique() ``` ```{r} oi |> filter(e.status=="Left") |> filter(due_date < date.relaxed) |> group_by(contract_account, negative) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = negative, values_from = amount) |> rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60inclbeforevent.csv") ``` ## Dump Dec 29 ```{r} passed<-read.csv("./data-raw/passed.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name)) migrated<-read.csv("./data-raw/migrated.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name)) inactived<-read.csv("./data-raw/inactived.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,NAME)) discontinued<-read.csv("./data-raw/discontinued.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,NAME)) bpnames<-read.csv("./data-raw/bpnames.csv", sep = ";") |> select(c(PARTNER,X)) |> unique() bpnames<-uniquebpcont |> full_join(bpnames, by=c("business_partner"="PARTNER")) |> select(contract_account, business_partner, X) |> rename(Name=X) |> filter(!is.na(contract_account)) bpnames |> count(contract_account) |> arrange(desc(n)) bpnames |> filter(PARTNER=="1301041926") ``` ```{r} bpnames |> select(business_partner, Name) |> unique() right_join(discontinued, by="contract_account") |> arrange(contract_account) |> filter(amount!="") |> mutate(amount=as.numeric(amount)) pull(amount) |> sum() #|> write.csv("disco.csv") ``` ```{r} oi |> filter(contract_account == "11460001") ``` ## New inactive and kvk list post discussion with Aldo on Dec 30, 2021 1. Compare our inactive list and inactive list of Betza 2. If they match, proceed with aggregation 3. Create a list of aggregation of KvK ```{r} betza_inactive<-read.csv("betzainactive.csv", sep=";", dec = ",") ``` ```{r} our.inactive<-oi |> filter(o.type %in% c("Real", "Installments")) |> filter(bp_category=="Organization") |> filter(!active) our.inactive<-our.inactive |> filter(!negative) |> group_by(business_partner, contract_account) |> summarise(amount=sum(amount)) bpnames<-bpnames |> select(business_partner, Name) |> unique() our.inactive |> left_join(bpnames, by="business_partner") |> filter(contract_account=="11493258") write.csv(our.inactive,"ourinactive.csv") ``` None of them match. That means, Betza's list contain inactive businesses that are falsely marked as active in our data. ```{r} bi<-betza_inactive |> group_by(contract_account, business_partner) |> summarise(amount=sum(amount)) |> mutate(contract_account=as.character(contract_account)) |> left_join(bpnames, by="contract_account") |> arrange(business_partner.x) |> select(-business_partner.y) |> rename(business_partner=business_partner.x) |> group_by(business_partner, Name) |> summarise(amount=sum(amount)) |> mutate(business_partner=as.character(business_partner)) #|> write.csv("Betza_Inactive_Named_aggregated.csv") betzaaldokvk<-read.csv("betzaaldokvk.csv",sep=";",dec=",") ba<-betzaaldokvk |> mutate(BP=as.character(BP)) |> rename(business_partner=BP) |> left_join(oi, by="business_partner") |> filter(!negative) |> group_by(contract_account, business_partner, Name.x, Chamber.of.commerce) |> summarise(amount=sum(amount)) |> rename(Name=Name.x) |> arrange(business_partner) write.csv(ba,"Betza_Aldo_Named_kvk_aggregated.csv") our.inactive |> group_by(business_partner) |> summarise(amount=sum(amount)) |> full_join(bi, by="business_partner") |> rename(out.amount=amount.x, bi.amount=amount.y) |> full_join(ba, by="business_partner") |> rename(ba.amount=amount) |> filter(!is.na(out.amount)) |> filter(!is.na(ba.amount)) ``` ```{r} bi |> full_join(ba, by="business_partner") |> ``` ```{r} bpnames ``` #Dec312021 Adding Names ```{r} broke<-read.csv("./dec312021/broke.csv", sep=";", dec=",") dead<-read.csv("./dec312021/dead.csv", sep=";", dec=",") gone<-read.csv("./dec312021/gone.csv", sep=";", dec=",") smallamount<-read.csv("./dec312021/smallamount.csv", sep=";", dec=",") timebarred<-read.csv("./dec312021/timebarred.csv", sep=";", dec=",") broke |> mutate(contract_account=as.character(contract_account)) |> left_join(bpnames, by=c("contract_account")) |> rename(`status (1)`=status..1.) write.csv2("./dec312021/named/broke.csv",row.names = F) dead |> left_join(bpnames, by=c("Row.Labels"="contract_account")) |> write.csv2("./dec312021/named/dead.csv",row.names = F) gone |> mutate(Row.Labels=as.character(Row.Labels)) |> left_join(bpnames, by=c("Row.Labels"="contract_account")) |> write.csv2("./dec312021/named/gone.csv",row.names = F) smallamount |> mutate(contract_account=as.character(contract_account)) |> left_join(bpnames, by=c("contract_account")) |> rename(business_partner=business_partner.x) |> select(-business_partner.y) |> write.csv2("./dec312021/named/smallamount.csv",row.names = F) timebarred |> left_join(bpnames, by=c("Row.Labels"="contract_account")) write.csv2("./dec312021/named/timebarred.csv",row.names = F) ``` ## Jan 05, 2021 Task is to add year wise amount to the list of writeoff contract accounts inactive ```{r} deceased<-read.csv("./data-raw/jan052021/inactive/deceased.csv") discontinued<-read.csv("./data-raw/jan052021/inactive/discontinued.csv") migrated<-read.csv("./data-raw/jan052021/inactive/migrated.csv") small<-read.csv("./data-raw/jan052021/inactive/small.csv") timebarred<-read.csv("./data-raw/jan052021/inactive/timebarred.csv") ``` ```{r} real.pos<-oi |> filter(o.type %in% c("Real", "Installments")) |> filter(!negative) |> mutate(age=ifelse(AGE_BUCKET=="5+ years", "More than 5","Less than 5")) deceased |> #pull(Grand.Total) |> sum() # They have not used the 60 days filter here. SO, I'm not using as well select(c(Row.Labels,business_partner,Name)) |> rename(contract_account=Row.Labels) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> select(-business_partner.y) |> rename(business_partner=business_partner.x) |> mutate(year.due=lubridate::year(due_date)) |> group_by(contract_account, business_partner, Name, year.due) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = year.due, values_from = amount) |> rename(name=Name) |> write.csv("./data-raw/jan052021/inactive/deceasedbyyear.csv") ## Number not matching discontinued |> #pull(amount) |> sum() # 1108257 select(c(contract_account)) |> unique() |> mutate(contract_account=as.character(contract_account)) |> left_join(oi, by=c("contract_account"="contract_account")) |> filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years", "181-365 days","91-180 days","61-90 days", "0-30 days")) |> filter(o.type %in% c("Real", "Installments") ) |> mutate(year.due=lubridate::year(due_date)) |> group_by(contract_account,year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() pivot_wider(names_from = year.due, values_from = amount) |> left_join(bpnames, by="contract_account") |> select(c(contract_account,business_partner, Name, `2016`,`2017`,`2018`)) |> write.csv("./data-raw/jan052021/inactive/discontinuedbyyear.csv", na="", row.names = F) ## Number not matching migrated |> #pull(Grand.Total) |> sum()#130502 select(c(contract_account)) |> unique() |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> #filter(o.type %in% c("Real", "Installments") ) |> filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years", "181-365 days","91-180 days","61-90 days","31-60 days")) |> mutate(year.due=lubridate::year(due_date)) |> group_by(contract_account,year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() pivot_wider(names_from = year.due, values_from = amount) |> left_join(bpnames, by="contract_account") |> select(c(contract_account,business_partner,Name, `2016`, `2017`, `2018`, `2021`)) |> write.csv("./data-raw/jan052021/inactive/migratedbyyear.csv", na="", row.names = F) small |> #pull(Open.Items) |> sum() #123207.9 select(c(contract_account,business_partner,Name)) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> select(-business_partner.y) |> rename(business_partner=business_partner.x) |> #pull(amount) |> sum() mutate(year.due=lubridate::year(due_date)) |> group_by(contract_account, business_partner, Name.x, year.due) |> summarise(amount=sum(amount)) |> pivot_wider(names_from = year.due, values_from = amount) |> rename(name=Name.x) |> write.csv("./data-raw/jan052021/inactive/smallbyyear.csv", na="") # not required timebarred |> #pull(Sum.of.amount) |> sum() #25943341 rename(contract_account=Row.Labels) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> select(-business_partner.y) |> rename(business_partner=business_partner.x) |> pull(amount) |> sum() real.pos |> filter(contract_account=="15276925") |> pull(amount) |> sum() ``` active ```{r} act.inact<-read.csv("./data-raw/jan052021/active/inactive.csv") act.disc<-read.csv("./data-raw/jan052021/active/discontinued.csv") act.migrated<-read.csv("./data-raw/jan052021/active/migrated.csv") act.passed<-read.csv("./data-raw/jan052021/active/passed.csv") #Not matching act.inact |> #pull(amount) |> sum() #6830028 select(c(contract_account)) |> unique() |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> #pull(amount) |> sum() mutate(year.due=lubridate::year(due_date)) |> group_by(contract_account,year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() pivot_wider(names_from = year.due, values_from = amount) |> left_join(bpnames, by="contract_account") |> select(c(contract_account,business_partner, Name, `2004`,`2005`,`2006`,`2007`,`2008`, `2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |> write.csv("./data-raw/jan052021/active/inactivebyyear.csv", na="", row.names = F) act.disc |> #pull(amount) |> sum() #1214182 select(c(contract_account, rel.status, age)) |> unique() |> mutate(rel.status=ifelse(rel.status=="More than 60 days", "long past","within limit")) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by=c("contract_account"="contract_account", "rel.status"="rel.status","age"="age")) |> mutate(year.due=lubridate::year(due_date)) |> group_by(contract_account,year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum() pivot_wider(names_from = year.due, values_from = amount) |> left_join(bpnames, by="contract_account") |> select(c(contract_account,business_partner, Name,`2006`,`2007`,`2008`, `2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |> write.csv("./data-raw/jan052021/active/discontinuedbyyear.csv", na="", row.names = F) # Matching act.migrated |> #1401989 select(c(contract_account, Name)) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> mutate(year.due=lubridate::year(due_date)) |> group_by(contract_account,business_partner, Name, year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T) pivot_wider(names_from = year.due, values_from = amount) |> select(c(contract_account,business_partner, Name,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`, `2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |> write.csv("./data-raw/jan052021/active/migratedbyyear.csv", na="", row.names = F) # Matching act.passed |> #pull(duedatebeforedaterelaxed) |> sum(na.rm=T) #929285.6 select(c(contract_account, Name)) |> mutate(contract_account=as.character(contract_account)) |> left_join(real.pos, by="contract_account") |> mutate(year.due=lubridate::year(due_date)) |> group_by(contract_account,business_partner, Name, year.due) |> summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T) pivot_wider(names_from = year.due, values_from = amount) |> select(c(contract_account,business_partner, Name,`1900`,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`, `2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |> write.csv("./data-raw/jan052021/active/passedbyyear.csv", na="", row.names = F) ``` ## R Markdown This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ```{r cars} summary(cars) ``` ## Including Plots You can also embed plots, for example: ```{r pressure, echo=FALSE} plot(pressure) ``` Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.