You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

575 lines
21 KiB

---
title: "Untitled"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(lubridate)
library(fst)
library(DT)
library(data.table)
library(scales)
# get_png <- function(filename) {
# grid::rasterGrob(png::readPNG(filename), interpolate = TRUE)
# }
# l <- get_png("logo.png")
# sum.na<-function(x){sum(is.na(x))}
# id<-read.csv("./data/id20210921.csv", sep=";", header = F)
# alive<-read.csv("active.csv",sep=",")
# dead<-read.csv("inactive.csv",sep=",")
oi<-read.fst("./data/open_items_new.fst")
# oi |> filter(o.type =="Installments") |> filter(due_date>"2021-07-31")
# indi<-id %>%
# filter(V3=="Identiteitsbewijs") %>%
# select(V2,V4)
dunn<-read.fst("./data/dunning.fst")
last.dunn<-dunn %>%
group_by(DUNN_GPART, DUNN_VKONT) %>%
filter(DUNN_DATE==max(DUNN_DATE)) %>%
mutate(DUNN_GPART=as.character(DUNN_GPART))
#dead.org<-read.fst("./data/dead.fst")
#org.status<-dead.org %>%
rename(business_partner=V2) %>%
mutate(type="org", e.status="discontinued") %>%
select(business_partner,e.status,type)
#ind.status<-ind.status %>%
select(business_partner, e.status) %>%
mutate(type="indi")
#bp_operating_status<-rbind(org.status,ind.status)
#write.fst(bp_operating_status,"./data/bp_operating_status.fst")
operating_status<-read.fst("./data/bp_operating_status.fst")
# ind.status<-indi %>%
# left_join(ind.status, by=c("V4"="Id.Nummer")) %>%
# filter(!is.na(e.status))
# ind.status %>%
# rename(business_partner=V2, Id.Number=V4) %>%
# mutate(business_partner=as.character(business_partner)) %>%
# write.fst("./data/indestatus.fst")
# dead.indi<-indi %>% unique() %>% inner_join(dead.indi, by=c("V4"="Id.Nummer")) %>% rename(business_partner=V2, id_number=V4)
oi<-
oi%>%
mutate(o.type = case_when(
deposit == T ~ "Deposit",
documen_desc == "Installments" ~ "Installments",
contract_type=="Waste" | grepl("Waste",trandesc) ~ "Waste",
TRUE ~ "Real"
)
)
oi<-oi %>%
select(-c(sap_client,main_tran,sub_tran,division,dunn_exclu,payment_meth,clearing_date,clearing_post_date,clearing_reason,document_type,X,trantype, OUT_AGE)) %>%
left_join(operating_status, by="business_partner") %>%
mutate(e.status=ifelse(is.na(e.status),"active",e.status)) %>%
select(-type)
oi %>%
write.fst("./data/open_items.fst")
# c.status<-
# oi %>%
# mutate(AGE_BUCKET=ifelse(AGE_BUCKET!="5+ years","5- years", "5+ years")) %>%
# select(contract_account,AGE_BUCKET) %>%
# count(contract_account,AGE_BUCKET) %>%
# pivot_wider(names_from = AGE_BUCKET,values_from = n) %>%
# mutate(spread=ifelse(is.na(`5+ years`),"Only less than 5",ifelse(is.na(`5- years`)," Only more than 5","Both"))) %>%
# select(contract_account,spread)
oi<-oi %>%
left_join(c.status, by="contract_account")
unique(oi$contract_type)
```
Credit amounts: aggregated per contract account but older than 5 years
```{r}
oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(negative) |>
filter(AGE_BUCKET=="5+ years") |>
group_by(contract_account) |>
summarise(amount=sum(amount)) |> write.csv("creditagrrolderthan5.csv")
```
Credit amounts: aggregated per contract account but older than 3 years
```{r}
oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(negative) |>
filter(AGE_BUCKET %in% c("5+ years","4-5 years","3-4 years")) |>
group_by(contract_account, active) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
write.csv("creditagrrolderthan3.csv")
```
Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death
```{r}
oi |>
filter(e.status=="Passed") |>
filter(date.event <= due_date) |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60butnotbeforevent.csv")
```
Open amounts for migrated persons: aggregated per contract account for only less than 60 days past their migration date
```{r}
oi |>
filter(e.status=="Left") |>
filter(date.event <= due_date) |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60butnotbeforevent.csv")
```
Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death
```{r}
oi |>
filter(e.status=="Passed") |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60inclbeforevent.csv")
oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(e.status == "Left") |>
group_by(contract_account, rel.status, active) |>
summarise(amount=sum(amount)) |>
ungroup() |>
pivot_wider(names_from = rel.status, values_from = amount) |>
rename(duedatebeforedaterelaxed=`long past`, duedateafterrelaxeddate=`within limit`) |>
mutate(statu= ifelse(is.na(duedatebeforedaterelaxed),"Only duedateafterrelaxeddate",
ifelse(is.na(duedateafterrelaxeddate), "Only duedatebeforedaterelaxed", "Both"))) |>
inner_join(uniquebpcont, by="contract_account") |>
filter(statu=="Only duedatebeforedaterelaxed") |>
filter(duedatebeforedaterelaxed>10000) |>
write.csv("leftaggr.csv")
uniquebpcont<-oi |>
select(contract_account, business_partner) |> unique()
```
```{r}
oi |>
filter(e.status=="Left") |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60inclbeforevent.csv")
```
## Dump Dec 29
```{r}
passed<-read.csv("./data-raw/passed.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name))
migrated<-read.csv("./data-raw/migrated.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name))
inactived<-read.csv("./data-raw/inactived.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,NAME))
discontinued<-read.csv("./data-raw/discontinued.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |>
select(-c(BP,NAME))
bpnames<-read.csv("./data-raw/bpnames.csv", sep = ";") |> select(c(PARTNER,X)) |> unique()
bpnames<-uniquebpcont |>
full_join(bpnames, by=c("business_partner"="PARTNER")) |>
select(contract_account, business_partner, X) |>
rename(Name=X) |> filter(!is.na(contract_account))
bpnames |> count(contract_account) |> arrange(desc(n))
bpnames |> filter(PARTNER=="1301041926")
```
```{r}
bpnames |>
select(business_partner, Name) |>
unique()
right_join(discontinued, by="contract_account") |>
arrange(contract_account) |>
filter(amount!="") |> mutate(amount=as.numeric(amount))
pull(amount) |> sum() #|> write.csv("disco.csv")
```
```{r}
oi |> filter(contract_account == "11460001")
```
## New inactive and kvk list post discussion with Aldo on Dec 30, 2021
1. Compare our inactive list and inactive list of Betza
2. If they match, proceed with aggregation
3. Create a list of aggregation of KvK
```{r}
betza_inactive<-read.csv("betzainactive.csv", sep=";", dec = ",")
```
```{r}
our.inactive<-oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(bp_category=="Organization") |>
filter(!active)
our.inactive<-our.inactive |>
filter(!negative) |>
group_by(business_partner, contract_account) |>
summarise(amount=sum(amount))
bpnames<-bpnames |>
select(business_partner, Name) |>
unique()
our.inactive |>
left_join(bpnames, by="business_partner") |> filter(contract_account=="11493258")
write.csv(our.inactive,"ourinactive.csv")
```
None of them match. That means, Betza's list contain inactive businesses that are falsely marked as active in our data.
```{r}
bi<-betza_inactive |>
group_by(contract_account, business_partner) |>
summarise(amount=sum(amount)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(bpnames, by="contract_account") |> arrange(business_partner.x) |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |>
group_by(business_partner, Name) |>
summarise(amount=sum(amount)) |>
mutate(business_partner=as.character(business_partner)) #|> write.csv("Betza_Inactive_Named_aggregated.csv")
betzaaldokvk<-read.csv("betzaaldokvk.csv",sep=";",dec=",")
ba<-betzaaldokvk |>
mutate(BP=as.character(BP)) |>
rename(business_partner=BP) |>
left_join(oi, by="business_partner") |>
filter(!negative) |>
group_by(contract_account, business_partner, Name.x, Chamber.of.commerce) |>
summarise(amount=sum(amount)) |>
rename(Name=Name.x) |>
arrange(business_partner)
write.csv(ba,"Betza_Aldo_Named_kvk_aggregated.csv")
our.inactive |>
group_by(business_partner) |>
summarise(amount=sum(amount)) |>
full_join(bi, by="business_partner") |>
rename(out.amount=amount.x, bi.amount=amount.y) |>
full_join(ba, by="business_partner") |>
rename(ba.amount=amount) |>
filter(!is.na(out.amount)) |>
filter(!is.na(ba.amount))
```
```{r}
bi |>
full_join(ba, by="business_partner") |>
```
```{r}
bpnames
```
#Dec312021 Adding Names
```{r}
broke<-read.csv("./dec312021/broke.csv", sep=";", dec=",")
dead<-read.csv("./dec312021/dead.csv", sep=";", dec=",")
gone<-read.csv("./dec312021/gone.csv", sep=";", dec=",")
smallamount<-read.csv("./dec312021/smallamount.csv", sep=";", dec=",")
timebarred<-read.csv("./dec312021/timebarred.csv", sep=";", dec=",")
broke |>
mutate(contract_account=as.character(contract_account)) |>
left_join(bpnames, by=c("contract_account")) |>
rename(`status (1)`=status..1.)
write.csv2("./dec312021/named/broke.csv",row.names = F)
dead |>
left_join(bpnames, by=c("Row.Labels"="contract_account")) |>
write.csv2("./dec312021/named/dead.csv",row.names = F)
gone |>
mutate(Row.Labels=as.character(Row.Labels)) |>
left_join(bpnames, by=c("Row.Labels"="contract_account")) |>
write.csv2("./dec312021/named/gone.csv",row.names = F)
smallamount |>
mutate(contract_account=as.character(contract_account)) |>
left_join(bpnames, by=c("contract_account")) |>
rename(business_partner=business_partner.x) |>
select(-business_partner.y) |>
write.csv2("./dec312021/named/smallamount.csv",row.names = F)
timebarred |>
left_join(bpnames, by=c("Row.Labels"="contract_account"))
write.csv2("./dec312021/named/timebarred.csv",row.names = F)
```
## Jan 05, 2021
Task is to add year wise amount to the list of writeoff contract accounts
inactive
```{r}
deceased<-read.csv("./data-raw/jan052021/inactive/deceased.csv")
discontinued<-read.csv("./data-raw/jan052021/inactive/discontinued.csv")
migrated<-read.csv("./data-raw/jan052021/inactive/migrated.csv")
small<-read.csv("./data-raw/jan052021/inactive/small.csv")
timebarred<-read.csv("./data-raw/jan052021/inactive/timebarred.csv")
```
```{r}
real.pos<-oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(!negative) |> mutate(age=ifelse(AGE_BUCKET=="5+ years", "More than 5","Less than 5"))
deceased |> #pull(Grand.Total) |> sum() # They have not used the 60 days filter here. SO, I'm not using as well
select(c(Row.Labels,business_partner,Name)) |>
rename(contract_account=Row.Labels) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/inactive/deceasedtrans.csv")
group_by(contract_account, business_partner, Name, year.due) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = year.due, values_from = amount) |>
rename(name=Name) |> write.csv("./data-raw/jan052021/inactive/deceasedbyyear.csv")
## Number not matching
discontinued |> #pull(amount) |> sum() # 1108257
select(c(contract_account)) |> unique() |>
mutate(contract_account=as.character(contract_account)) |>
left_join(oi, by=c("contract_account"="contract_account")) |>
filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years",
"181-365 days","91-180 days","61-90 days", "0-30 days")) |>
filter(o.type %in% c("Real", "Installments") ) |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, year.due )) |> write.csv("./data-raw/jan052021/inactive/discontinuedtrans.csv", na="", row.names = F)
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner, Name, `2016`,`2017`,`2018`)) |> write.csv("./data-raw/jan052021/inactive/discontinuedbyyear.csv", na="", row.names = F)
## Number not matching
migrated |> #pull(Grand.Total) |> sum()#130502
select(c(contract_account)) |> unique() |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
#filter(o.type %in% c("Real", "Installments") ) |>
filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years",
"181-365 days","91-180 days","61-90 days","31-60 days")) |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/inactive/migratedtrans.csv", na="", row.names = F)
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner,Name, `2016`, `2017`, `2018`, `2021`)) |>
write.csv("./data-raw/jan052021/inactive/migratedbyyear.csv", na="", row.names = F)
small |> #pull(Open.Items) |> sum() #123207.9
select(c(contract_account,business_partner,Name)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |> #pull(amount) |> sum()
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/inactive/smalltrans.csv", na="")
group_by(contract_account, business_partner, Name.x, year.due) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = year.due, values_from = amount) |>
rename(name=Name.x) |> write.csv("./data-raw/jan052021/inactive/smallbyyear.csv", na="")
real.pos.uni<-real.pos |> unique()
# not required
timebarred |> #pull(Sum.of.amount) |> sum() #25943341
rename(contract_account=Row.Labels) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos.uni, by="contract_account") |> select(-Reasonunregistered) |>
filter(AGE_BUCKET=="5+ years") |>
unique() |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, e.status,date.relaxed, rel.status, age)) |>
write.csv("./data-raw/jan052021/inactive/timebarredtrans.csv", na="")
group_by(contract_account,business_partner.x, Name, Sum.of.amount) |>
summarise(amount=sum(amount)) |>
#select(-business_partner.y) |> #write.csv("./data-raw/jan052021/inactive/timebarredtrans.csv", na="")
filter(amount!=Sum.of.amount)
pull(amount) |> sum()
real.pos |>
filter(contract_account=="10043660") |> arrange(amount) |> pull(amount) |> sum()
```
active
```{r}
act.inact<-read.csv("./data-raw/jan052021/active/inactive.csv")
act.disc<-read.csv("./data-raw/jan052021/active/discontinued.csv")
act.migrated<-read.csv("./data-raw/jan052021/active/migrated.csv")
act.passed<-read.csv("./data-raw/jan052021/active/passed.csv")
#Not matching
act.inact |> #pull(amount) |> sum() #6830028
select(c(contract_account)) |> unique() |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |> #pull(amount) |> sum()
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/inactivetrans.csv", na="", row.names = F)
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner, Name, `2004`,`2005`,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |>
write.csv("./data-raw/jan052021/active/inactivebyyear.csv", na="", row.names = F)
act.disc |> #pull(amount) |> sum() #1214182
select(c(contract_account, rel.status, age)) |> unique() |>
mutate(rel.status=ifelse(rel.status=="More than 60 days", "long past","within limit")) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by=c("contract_account"="contract_account", "rel.status"="rel.status","age"="age")) |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/discontinuedtrans.csv", na="", row.names = F)
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner, Name,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |>
write.csv("./data-raw/jan052021/active/discontinuedbyyear.csv", na="", row.names = F)
# Matching
act.migrated |> #1401989
select(c(contract_account, Name)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/migratedtrans.csv", na="", row.names = F)
group_by(contract_account,business_partner, Name, year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T)
pivot_wider(names_from = year.due, values_from = amount) |>
select(c(contract_account,business_partner, Name,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |>
write.csv("./data-raw/jan052021/active/migratedbyyear.csv", na="", row.names = F)
# Matching
act.passed |> #pull(duedatebeforedaterelaxed) |> sum(na.rm=T) #929285.6
select(c(contract_account, Name)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/passedtrans.csv", na="", row.names = F)
group_by(contract_account,business_partner, Name, year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T)
pivot_wider(names_from = year.due, values_from = amount) |>
select(c(contract_account,business_partner, Name,`1900`,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |>
write.csv("./data-raw/jan052021/active/passedbyyear.csv", na="", row.names = F)
```
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r cars}
summary(cars)
```
## Including Plots
You can also embed plots, for example:
```{r pressure, echo=FALSE}
plot(pressure)
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.