You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

566 lines
19 KiB

---
title: "Untitled"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(lubridate)
library(fst)
library(DT)
library(data.table)
library(scales)
# get_png <- function(filename) {
# grid::rasterGrob(png::readPNG(filename), interpolate = TRUE)
# }
# l <- get_png("logo.png")
# sum.na<-function(x){sum(is.na(x))}
id<-read.csv("./data/id20210921.csv", sep=";", header = F)
# alive<-read.csv("active.csv",sep=",")
# dead<-read.csv("inactive.csv",sep=",")
oi<-read.fst("./data/open_items_new.fst")
# oi |> filter(o.type =="Installments") |> filter(due_date>"2021-07-31")
indi<-id %>%
filter(V3=="Identiteitsbewijs") %>%
select(V2,V4)
dunn<-read.fst("./data/dunning.fst")
last.dunn<-dunn %>%
group_by(DUNN_GPART, DUNN_VKONT) %>%
filter(DUNN_DATE==max(DUNN_DATE)) %>%
mutate(DUNN_GPART=as.character(DUNN_GPART))
#dead.org<-read.fst("./data/dead.fst")
#org.status<-dead.org %>%
rename(business_partner=V2) %>%
mutate(type="org", e.status="discontinued") %>%
select(business_partner,e.status,type)
#ind.status<-ind.status %>%
select(business_partner, e.status) %>%
mutate(type="indi")
#bp_operating_status<-rbind(org.status,ind.status)
#write.fst(bp_operating_status,"./data/bp_operating_status.fst")
operating_status<-read.fst("./data/bp_operating_status.fst")
# ind.status<-indi %>%
# left_join(ind.status, by=c("V4"="Id.Nummer")) %>%
# filter(!is.na(e.status))
# ind.status %>%
# rename(business_partner=V2, Id.Number=V4) %>%
# mutate(business_partner=as.character(business_partner)) %>%
# write.fst("./data/indestatus.fst")
# dead.indi<-indi %>% unique() %>% inner_join(dead.indi, by=c("V4"="Id.Nummer")) %>% rename(business_partner=V2, id_number=V4)
oi<-
oi%>%
mutate(o.type = case_when(
deposit == T ~ "Deposit",
documen_desc == "Installments" ~ "Installments",
contract_type=="Waste" | grepl("Waste",trandesc) ~ "Waste",
TRUE ~ "Real"
)
)
oi<-oi %>%
select(-c(sap_client,main_tran,sub_tran,division,dunn_exclu,payment_meth,clearing_date,clearing_post_date,clearing_reason,document_type,X,trantype, OUT_AGE)) %>%
left_join(operating_status, by="business_partner") %>%
mutate(e.status=ifelse(is.na(e.status),"active",e.status)) %>%
select(-type)
oi %>%
write.fst("./data/open_items.fst")
# c.status<-
# oi %>%
# mutate(AGE_BUCKET=ifelse(AGE_BUCKET!="5+ years","5- years", "5+ years")) %>%
# select(contract_account,AGE_BUCKET) %>%
# count(contract_account,AGE_BUCKET) %>%
# pivot_wider(names_from = AGE_BUCKET,values_from = n) %>%
# mutate(spread=ifelse(is.na(`5+ years`),"Only less than 5",ifelse(is.na(`5- years`)," Only more than 5","Both"))) %>%
# select(contract_account,spread)
oi<-oi %>%
left_join(c.status, by="contract_account")
unique(oi$contract_type)
```
Credit amounts: aggregated per contract account but older than 5 years
```{r}
oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(negative) |>
filter(AGE_BUCKET=="5+ years") |>
group_by(contract_account) |>
summarise(amount=sum(amount)) |> write.csv("creditagrrolderthan5.csv")
```
Credit amounts: aggregated per contract account but older than 3 years
```{r}
oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(negative) |>
filter(AGE_BUCKET %in% c("5+ years","4-5 years","3-4 years")) |>
group_by(contract_account, active) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
write.csv("creditagrrolderthan3.csv")
```
Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death
```{r}
oi |>
filter(e.status=="Passed") |>
filter(date.event <= due_date) |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60butnotbeforevent.csv")
```
Open amounts for migrated persons: aggregated per contract account for only less than 60 days past their migration date
```{r}
oi |>
filter(e.status=="Left") |>
filter(date.event <= due_date) |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60butnotbeforevent.csv")
```
Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death
```{r}
oi |>
filter(e.status=="Passed") |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60inclbeforevent.csv")
oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(e.status == "Left") |>
group_by(contract_account, rel.status, active) |>
summarise(amount=sum(amount)) |>
ungroup() |>
pivot_wider(names_from = rel.status, values_from = amount) |>
rename(duedatebeforedaterelaxed=`long past`, duedateafterrelaxeddate=`within limit`) |>
mutate(statu= ifelse(is.na(duedatebeforedaterelaxed),"Only duedateafterrelaxeddate",
ifelse(is.na(duedateafterrelaxeddate), "Only duedatebeforedaterelaxed", "Both"))) |>
inner_join(uniquebpcont, by="contract_account") |>
filter(statu=="Only duedatebeforedaterelaxed") |>
filter(duedatebeforedaterelaxed>10000) |>
write.csv("leftaggr.csv")
uniquebpcont<-oi |>
select(contract_account, business_partner) |> unique()
```
```{r}
oi |>
filter(e.status=="Left") |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60inclbeforevent.csv")
```
## Dump Dec 29
```{r}
passed<-read.csv("./data-raw/passed.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name))
migrated<-read.csv("./data-raw/migrated.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name))
inactived<-read.csv("./data-raw/inactived.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,NAME))
discontinued<-read.csv("./data-raw/discontinued.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |>
select(-c(BP,NAME))
bpnames<-read.csv("./data-raw/bpnames.csv", sep = ";") |> select(c(PARTNER,X)) |> unique()
bpnames<-uniquebpcont |>
full_join(bpnames, by=c("business_partner"="PARTNER")) |>
select(contract_account, business_partner, X) |>
rename(Name=X) |> filter(!is.na(contract_account))
bpnames |> count(contract_account) |> arrange(desc(n))
bpnames |> filter(PARTNER=="1301041926")
```
```{r}
bpnames |>
select(business_partner, Name) |>
unique()
right_join(discontinued, by="contract_account") |>
arrange(contract_account) |>
filter(amount!="") |> mutate(amount=as.numeric(amount))
pull(amount) |> sum() #|> write.csv("disco.csv")
```
```{r}
oi |> filter(contract_account == "11460001")
```
## New inactive and kvk list post discussion with Aldo on Dec 30, 2021
1. Compare our inactive list and inactive list of Betza
2. If they match, proceed with aggregation
3. Create a list of aggregation of KvK
```{r}
betza_inactive<-read.csv("betzainactive.csv", sep=";", dec = ",")
```
```{r}
our.inactive<-oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(bp_category=="Organization") |>
filter(!active)
our.inactive<-our.inactive |>
filter(!negative) |>
group_by(business_partner, contract_account) |>
summarise(amount=sum(amount))
bpnames<-bpnames |>
select(business_partner, Name) |>
unique()
our.inactive |>
left_join(bpnames, by="business_partner") |> filter(contract_account=="11493258")
write.csv(our.inactive,"ourinactive.csv")
```
None of them match. That means, Betza's list contain inactive businesses that are falsely marked as active in our data.
```{r}
bi<-betza_inactive |>
group_by(contract_account, business_partner) |>
summarise(amount=sum(amount)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(bpnames, by="contract_account") |> arrange(business_partner.x) |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |>
group_by(business_partner, Name) |>
summarise(amount=sum(amount)) |>
mutate(business_partner=as.character(business_partner)) #|> write.csv("Betza_Inactive_Named_aggregated.csv")
betzaaldokvk<-read.csv("betzaaldokvk.csv",sep=";",dec=",")
ba<-betzaaldokvk |>
mutate(BP=as.character(BP)) |>
rename(business_partner=BP) |>
left_join(oi, by="business_partner") |>
filter(!negative) |>
group_by(contract_account, business_partner, Name.x, Chamber.of.commerce) |>
summarise(amount=sum(amount)) |>
rename(Name=Name.x) |>
arrange(business_partner)
write.csv(ba,"Betza_Aldo_Named_kvk_aggregated.csv")
our.inactive |>
group_by(business_partner) |>
summarise(amount=sum(amount)) |>
full_join(bi, by="business_partner") |>
rename(out.amount=amount.x, bi.amount=amount.y) |>
full_join(ba, by="business_partner") |>
rename(ba.amount=amount) |>
filter(!is.na(out.amount)) |>
filter(!is.na(ba.amount))
```
```{r}
bi |>
full_join(ba, by="business_partner") |>
```
```{r}
bpnames
```
#Dec312021 Adding Names
```{r}
broke<-read.csv("./dec312021/broke.csv", sep=";", dec=",")
dead<-read.csv("./dec312021/dead.csv", sep=";", dec=",")
gone<-read.csv("./dec312021/gone.csv", sep=";", dec=",")
smallamount<-read.csv("./dec312021/smallamount.csv", sep=";", dec=",")
timebarred<-read.csv("./dec312021/timebarred.csv", sep=";", dec=",")
broke |>
mutate(contract_account=as.character(contract_account)) |>
left_join(bpnames, by=c("contract_account")) |>
rename(`status (1)`=status..1.)
write.csv2("./dec312021/named/broke.csv",row.names = F)
dead |>
left_join(bpnames, by=c("Row.Labels"="contract_account")) |>
write.csv2("./dec312021/named/dead.csv",row.names = F)
gone |>
mutate(Row.Labels=as.character(Row.Labels)) |>
left_join(bpnames, by=c("Row.Labels"="contract_account")) |>
write.csv2("./dec312021/named/gone.csv",row.names = F)
smallamount |>
mutate(contract_account=as.character(contract_account)) |>
left_join(bpnames, by=c("contract_account")) |>
rename(business_partner=business_partner.x) |>
select(-business_partner.y) |>
write.csv2("./dec312021/named/smallamount.csv",row.names = F)
timebarred |>
left_join(bpnames, by=c("Row.Labels"="contract_account"))
write.csv2("./dec312021/named/timebarred.csv",row.names = F)
```
## Jan 05, 2021
Task is to add year wise amount to the list of writeoff contract accounts
inactive
```{r}
deceased<-read.csv("./data-raw/jan052021/inactive/deceased.csv")
discontinued<-read.csv("./data-raw/jan052021/inactive/discontinued.csv")
migrated<-read.csv("./data-raw/jan052021/inactive/migrated.csv")
small<-read.csv("./data-raw/jan052021/inactive/small.csv")
timebarred<-read.csv("./data-raw/jan052021/inactive/timebarred.csv")
```
```{r}
real.pos<-oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(!negative) |> mutate(age=ifelse(AGE_BUCKET=="5+ years", "More than 5","Less than 5"))
deceased |> #pull(Grand.Total) |> sum() # They have not used the 60 days filter here. SO, I'm not using as well
select(c(Row.Labels,business_partner,Name)) |>
rename(contract_account=Row.Labels) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |>
mutate(year.due=lubridate::year(due_date)) |>
group_by(contract_account, business_partner, Name, year.due) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = year.due, values_from = amount) |>
rename(name=Name) |> write.csv("./data-raw/jan052021/inactive/deceasedbyyear.csv")
## Number not matching
discontinued |> #pull(amount) |> sum() # 1108257
select(c(contract_account)) |> unique() |>
mutate(contract_account=as.character(contract_account)) |>
left_join(oi, by=c("contract_account"="contract_account")) |>
filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years",
"181-365 days","91-180 days","61-90 days", "0-30 days")) |>
filter(o.type %in% c("Real", "Installments") ) |>
mutate(year.due=lubridate::year(due_date)) |>
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner, Name, `2016`,`2017`,`2018`)) |> write.csv("./data-raw/jan052021/inactive/discontinuedbyyear.csv", na="", row.names = F)
## Number not matching
migrated |> #pull(Grand.Total) |> sum()#130502
select(c(contract_account)) |> unique() |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
#filter(o.type %in% c("Real", "Installments") ) |>
filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years",
"181-365 days","91-180 days","61-90 days","31-60 days")) |>
mutate(year.due=lubridate::year(due_date)) |>
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner,Name, `2016`, `2017`, `2018`, `2021`)) |>
write.csv("./data-raw/jan052021/inactive/migratedbyyear.csv", na="", row.names = F)
small |> #pull(Open.Items) |> sum() #123207.9
select(c(contract_account,business_partner,Name)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |> #pull(amount) |> sum()
mutate(year.due=lubridate::year(due_date)) |>
group_by(contract_account, business_partner, Name.x, year.due) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = year.due, values_from = amount) |>
rename(name=Name.x) |> write.csv("./data-raw/jan052021/inactive/smallbyyear.csv", na="")
# not required
timebarred |> #pull(Sum.of.amount) |> sum() #25943341
rename(contract_account=Row.Labels) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |> pull(amount) |> sum()
real.pos |>
filter(contract_account=="15276925") |>
pull(amount) |> sum()
```
active
```{r}
act.inact<-read.csv("./data-raw/jan052021/active/inactive.csv")
act.disc<-read.csv("./data-raw/jan052021/active/discontinued.csv")
act.migrated<-read.csv("./data-raw/jan052021/active/migrated.csv")
act.passed<-read.csv("./data-raw/jan052021/active/passed.csv")
#Not matching
act.inact |> #pull(amount) |> sum() #6830028
select(c(contract_account)) |> unique() |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |> #pull(amount) |> sum()
mutate(year.due=lubridate::year(due_date)) |>
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner, Name, `2004`,`2005`,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |>
write.csv("./data-raw/jan052021/active/inactivebyyear.csv", na="", row.names = F)
act.disc |> #pull(amount) |> sum() #1214182
select(c(contract_account, rel.status, age)) |> unique() |>
mutate(rel.status=ifelse(rel.status=="More than 60 days", "long past","within limit")) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by=c("contract_account"="contract_account", "rel.status"="rel.status","age"="age")) |>
mutate(year.due=lubridate::year(due_date)) |>
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner, Name,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |>
write.csv("./data-raw/jan052021/active/discontinuedbyyear.csv", na="", row.names = F)
# Matching
act.migrated |> #1401989
select(c(contract_account, Name)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
mutate(year.due=lubridate::year(due_date)) |>
group_by(contract_account,business_partner, Name, year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T)
pivot_wider(names_from = year.due, values_from = amount) |>
select(c(contract_account,business_partner, Name,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |>
write.csv("./data-raw/jan052021/active/migratedbyyear.csv", na="", row.names = F)
# Matching
act.passed |> #pull(duedatebeforedaterelaxed) |> sum(na.rm=T) #929285.6
select(c(contract_account, Name)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
mutate(year.due=lubridate::year(due_date)) |>
group_by(contract_account,business_partner, Name, year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T)
pivot_wider(names_from = year.due, values_from = amount) |>
select(c(contract_account,business_partner, Name,`1900`,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |>
write.csv("./data-raw/jan052021/active/passedbyyear.csv", na="", row.names = F)
```
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r cars}
summary(cars)
```
## Including Plots
You can also embed plots, for example:
```{r pressure, echo=FALSE}
plot(pressure)
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.