You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

576 lines
21 KiB

4 years ago
---
title: "Untitled"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(lubridate)
library(fst)
library(DT)
library(data.table)
library(scales)
# get_png <- function(filename) {
# grid::rasterGrob(png::readPNG(filename), interpolate = TRUE)
# }
# l <- get_png("logo.png")
# sum.na<-function(x){sum(is.na(x))}
# id<-read.csv("./data/id20210921.csv", sep=";", header = F)
4 years ago
# alive<-read.csv("active.csv",sep=",")
# dead<-read.csv("inactive.csv",sep=",")
oi<-read.fst("./data/open_items_new.fst")
# oi |> filter(o.type =="Installments") |> filter(due_date>"2021-07-31")
# indi<-id %>%
# filter(V3=="Identiteitsbewijs") %>%
# select(V2,V4)
4 years ago
dunn<-read.fst("./data/dunning.fst")
last.dunn<-dunn %>%
group_by(DUNN_GPART, DUNN_VKONT) %>%
filter(DUNN_DATE==max(DUNN_DATE)) %>%
mutate(DUNN_GPART=as.character(DUNN_GPART))
#dead.org<-read.fst("./data/dead.fst")
#org.status<-dead.org %>%
rename(business_partner=V2) %>%
mutate(type="org", e.status="discontinued") %>%
select(business_partner,e.status,type)
#ind.status<-ind.status %>%
select(business_partner, e.status) %>%
mutate(type="indi")
#bp_operating_status<-rbind(org.status,ind.status)
#write.fst(bp_operating_status,"./data/bp_operating_status.fst")
operating_status<-read.fst("./data/bp_operating_status.fst")
# ind.status<-indi %>%
# left_join(ind.status, by=c("V4"="Id.Nummer")) %>%
# filter(!is.na(e.status))
# ind.status %>%
# rename(business_partner=V2, Id.Number=V4) %>%
# mutate(business_partner=as.character(business_partner)) %>%
# write.fst("./data/indestatus.fst")
# dead.indi<-indi %>% unique() %>% inner_join(dead.indi, by=c("V4"="Id.Nummer")) %>% rename(business_partner=V2, id_number=V4)
oi<-
oi%>%
mutate(o.type = case_when(
deposit == T ~ "Deposit",
documen_desc == "Installments" ~ "Installments",
contract_type=="Waste" | grepl("Waste",trandesc) ~ "Waste",
TRUE ~ "Real"
)
)
oi<-oi %>%
select(-c(sap_client,main_tran,sub_tran,division,dunn_exclu,payment_meth,clearing_date,clearing_post_date,clearing_reason,document_type,X,trantype, OUT_AGE)) %>%
left_join(operating_status, by="business_partner") %>%
mutate(e.status=ifelse(is.na(e.status),"active",e.status)) %>%
select(-type)
oi %>%
write.fst("./data/open_items.fst")
# c.status<-
# oi %>%
# mutate(AGE_BUCKET=ifelse(AGE_BUCKET!="5+ years","5- years", "5+ years")) %>%
# select(contract_account,AGE_BUCKET) %>%
# count(contract_account,AGE_BUCKET) %>%
# pivot_wider(names_from = AGE_BUCKET,values_from = n) %>%
# mutate(spread=ifelse(is.na(`5+ years`),"Only less than 5",ifelse(is.na(`5- years`)," Only more than 5","Both"))) %>%
# select(contract_account,spread)
oi<-oi %>%
left_join(c.status, by="contract_account")
unique(oi$contract_type)
```
Credit amounts: aggregated per contract account but older than 5 years
```{r}
oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(negative) |>
filter(AGE_BUCKET=="5+ years") |>
group_by(contract_account) |>
summarise(amount=sum(amount)) |> write.csv("creditagrrolderthan5.csv")
```
Credit amounts: aggregated per contract account but older than 3 years
```{r}
oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(negative) |>
filter(AGE_BUCKET %in% c("5+ years","4-5 years","3-4 years")) |>
group_by(contract_account, active) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
write.csv("creditagrrolderthan3.csv")
```
Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death
```{r}
oi |>
filter(e.status=="Passed") |>
filter(date.event <= due_date) |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60butnotbeforevent.csv")
```
Open amounts for migrated persons: aggregated per contract account for only less than 60 days past their migration date
```{r}
oi |>
filter(e.status=="Left") |>
filter(date.event <= due_date) |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60butnotbeforevent.csv")
```
Open amounts for Deceased individuals: aggregated per contract account for only less than 60 days past their Date of Death
```{r}
oi |>
filter(e.status=="Passed") |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("deceasedaggrlessthan60inclbeforevent.csv")
oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(e.status == "Left") |>
group_by(contract_account, rel.status, active) |>
summarise(amount=sum(amount)) |>
ungroup() |>
pivot_wider(names_from = rel.status, values_from = amount) |>
rename(duedatebeforedaterelaxed=`long past`, duedateafterrelaxeddate=`within limit`) |>
mutate(statu= ifelse(is.na(duedatebeforedaterelaxed),"Only duedateafterrelaxeddate",
ifelse(is.na(duedateafterrelaxeddate), "Only duedatebeforedaterelaxed", "Both"))) |>
inner_join(uniquebpcont, by="contract_account") |>
filter(statu=="Only duedatebeforedaterelaxed") |>
filter(duedatebeforedaterelaxed>10000) |>
write.csv("leftaggr.csv")
uniquebpcont<-oi |>
select(contract_account, business_partner) |> unique()
```
```{r}
oi |>
filter(e.status=="Left") |>
filter(due_date < date.relaxed) |>
group_by(contract_account, negative) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = negative, values_from = amount) |>
rename(Open=`FALSE`, Credit=`TRUE`) |> write.csv("leftaggrlessthan60inclbeforevent.csv")
```
## Dump Dec 29
```{r}
passed<-read.csv("./data-raw/passed.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name))
migrated<-read.csv("./data-raw/migrated.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,Name))
inactived<-read.csv("./data-raw/inactived.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |> select(-c(BP,NAME))
discontinued<-read.csv("./data-raw/discontinued.csv", sep = ";") |> mutate(contract_account=as.character(contract_account)) |>
select(-c(BP,NAME))
bpnames<-read.csv("./data-raw/bpnames.csv", sep = ";") |> select(c(PARTNER,X)) |> unique()
bpnames<-uniquebpcont |>
full_join(bpnames, by=c("business_partner"="PARTNER")) |>
select(contract_account, business_partner, X) |>
rename(Name=X) |> filter(!is.na(contract_account))
bpnames |> count(contract_account) |> arrange(desc(n))
bpnames |> filter(PARTNER=="1301041926")
```
```{r}
bpnames |>
select(business_partner, Name) |>
unique()
right_join(discontinued, by="contract_account") |>
arrange(contract_account) |>
filter(amount!="") |> mutate(amount=as.numeric(amount))
pull(amount) |> sum() #|> write.csv("disco.csv")
```
```{r}
oi |> filter(contract_account == "11460001")
```
## New inactive and kvk list post discussion with Aldo on Dec 30, 2021
1. Compare our inactive list and inactive list of Betza
2. If they match, proceed with aggregation
3. Create a list of aggregation of KvK
```{r}
betza_inactive<-read.csv("betzainactive.csv", sep=";", dec = ",")
```
```{r}
our.inactive<-oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(bp_category=="Organization") |>
filter(!active)
our.inactive<-our.inactive |>
filter(!negative) |>
group_by(business_partner, contract_account) |>
summarise(amount=sum(amount))
bpnames<-bpnames |>
select(business_partner, Name) |>
unique()
our.inactive |>
left_join(bpnames, by="business_partner") |> filter(contract_account=="11493258")
write.csv(our.inactive,"ourinactive.csv")
```
None of them match. That means, Betza's list contain inactive businesses that are falsely marked as active in our data.
```{r}
bi<-betza_inactive |>
group_by(contract_account, business_partner) |>
summarise(amount=sum(amount)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(bpnames, by="contract_account") |> arrange(business_partner.x) |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |>
group_by(business_partner, Name) |>
summarise(amount=sum(amount)) |>
mutate(business_partner=as.character(business_partner)) #|> write.csv("Betza_Inactive_Named_aggregated.csv")
betzaaldokvk<-read.csv("betzaaldokvk.csv",sep=";",dec=",")
ba<-betzaaldokvk |>
mutate(BP=as.character(BP)) |>
rename(business_partner=BP) |>
left_join(oi, by="business_partner") |>
filter(!negative) |>
group_by(contract_account, business_partner, Name.x, Chamber.of.commerce) |>
summarise(amount=sum(amount)) |>
rename(Name=Name.x) |>
arrange(business_partner)
write.csv(ba,"Betza_Aldo_Named_kvk_aggregated.csv")
our.inactive |>
group_by(business_partner) |>
summarise(amount=sum(amount)) |>
full_join(bi, by="business_partner") |>
rename(out.amount=amount.x, bi.amount=amount.y) |>
full_join(ba, by="business_partner") |>
rename(ba.amount=amount) |>
filter(!is.na(out.amount)) |>
filter(!is.na(ba.amount))
```
```{r}
bi |>
full_join(ba, by="business_partner") |>
```
```{r}
bpnames
```
#Dec312021 Adding Names
```{r}
broke<-read.csv("./dec312021/broke.csv", sep=";", dec=",")
dead<-read.csv("./dec312021/dead.csv", sep=";", dec=",")
gone<-read.csv("./dec312021/gone.csv", sep=";", dec=",")
smallamount<-read.csv("./dec312021/smallamount.csv", sep=";", dec=",")
timebarred<-read.csv("./dec312021/timebarred.csv", sep=";", dec=",")
broke |>
mutate(contract_account=as.character(contract_account)) |>
left_join(bpnames, by=c("contract_account")) |>
rename(`status (1)`=status..1.)
write.csv2("./dec312021/named/broke.csv",row.names = F)
dead |>
left_join(bpnames, by=c("Row.Labels"="contract_account")) |>
write.csv2("./dec312021/named/dead.csv",row.names = F)
gone |>
mutate(Row.Labels=as.character(Row.Labels)) |>
left_join(bpnames, by=c("Row.Labels"="contract_account")) |>
write.csv2("./dec312021/named/gone.csv",row.names = F)
smallamount |>
mutate(contract_account=as.character(contract_account)) |>
left_join(bpnames, by=c("contract_account")) |>
rename(business_partner=business_partner.x) |>
select(-business_partner.y) |>
write.csv2("./dec312021/named/smallamount.csv",row.names = F)
timebarred |>
left_join(bpnames, by=c("Row.Labels"="contract_account"))
write.csv2("./dec312021/named/timebarred.csv",row.names = F)
```
## Jan 05, 2021
Task is to add year wise amount to the list of writeoff contract accounts
inactive
```{r}
deceased<-read.csv("./data-raw/jan052021/inactive/deceased.csv")
discontinued<-read.csv("./data-raw/jan052021/inactive/discontinued.csv")
migrated<-read.csv("./data-raw/jan052021/inactive/migrated.csv")
small<-read.csv("./data-raw/jan052021/inactive/small.csv")
timebarred<-read.csv("./data-raw/jan052021/inactive/timebarred.csv")
```
```{r}
real.pos<-oi |>
filter(o.type %in% c("Real", "Installments")) |>
filter(!negative) |> mutate(age=ifelse(AGE_BUCKET=="5+ years", "More than 5","Less than 5"))
deceased |> #pull(Grand.Total) |> sum() # They have not used the 60 days filter here. SO, I'm not using as well
select(c(Row.Labels,business_partner,Name)) |>
rename(contract_account=Row.Labels) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/inactive/deceasedtrans.csv")
4 years ago
group_by(contract_account, business_partner, Name, year.due) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = year.due, values_from = amount) |>
rename(name=Name) |> write.csv("./data-raw/jan052021/inactive/deceasedbyyear.csv")
## Number not matching
discontinued |> #pull(amount) |> sum() # 1108257
select(c(contract_account)) |> unique() |>
mutate(contract_account=as.character(contract_account)) |>
left_join(oi, by=c("contract_account"="contract_account")) |>
filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years",
"181-365 days","91-180 days","61-90 days", "0-30 days")) |>
filter(o.type %in% c("Real", "Installments") ) |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, year.due )) |> write.csv("./data-raw/jan052021/inactive/discontinuedtrans.csv", na="", row.names = F)
4 years ago
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner, Name, `2016`,`2017`,`2018`)) |> write.csv("./data-raw/jan052021/inactive/discontinuedbyyear.csv", na="", row.names = F)
## Number not matching
migrated |> #pull(Grand.Total) |> sum()#130502
select(c(contract_account)) |> unique() |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
#filter(o.type %in% c("Real", "Installments") ) |>
filter(AGE_BUCKET %in% c("4-5 years","3-4 years","2-3 years","1-2 years",
"181-365 days","91-180 days","61-90 days","31-60 days")) |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/inactive/migratedtrans.csv", na="", row.names = F)
4 years ago
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner,Name, `2016`, `2017`, `2018`, `2021`)) |>
write.csv("./data-raw/jan052021/inactive/migratedbyyear.csv", na="", row.names = F)
small |> #pull(Open.Items) |> sum() #123207.9
select(c(contract_account,business_partner,Name)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
select(-business_partner.y) |>
rename(business_partner=business_partner.x) |> #pull(amount) |> sum()
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/inactive/smalltrans.csv", na="")
4 years ago
group_by(contract_account, business_partner, Name.x, year.due) |>
summarise(amount=sum(amount)) |>
pivot_wider(names_from = year.due, values_from = amount) |>
rename(name=Name.x) |> write.csv("./data-raw/jan052021/inactive/smallbyyear.csv", na="")
real.pos.uni<-real.pos |> unique()
4 years ago
# not required
timebarred |> #pull(Sum.of.amount) |> sum() #25943341
rename(contract_account=Row.Labels) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos.uni, by="contract_account") |> select(-Reasonunregistered) |>
filter(AGE_BUCKET=="5+ years") |>
unique() |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, e.status,date.relaxed, rel.status, age)) |>
write.csv("./data-raw/jan052021/inactive/timebarredtrans.csv", na="")
group_by(contract_account,business_partner.x, Name, Sum.of.amount) |>
summarise(amount=sum(amount)) |>
#select(-business_partner.y) |> #write.csv("./data-raw/jan052021/inactive/timebarredtrans.csv", na="")
filter(amount!=Sum.of.amount)
pull(amount) |> sum()
4 years ago
real.pos |>
filter(contract_account=="10043660") |> arrange(amount) |> pull(amount) |> sum()
4 years ago
```
active
```{r}
act.inact<-read.csv("./data-raw/jan052021/active/inactive.csv")
act.disc<-read.csv("./data-raw/jan052021/active/discontinued.csv")
act.migrated<-read.csv("./data-raw/jan052021/active/migrated.csv")
act.passed<-read.csv("./data-raw/jan052021/active/passed.csv")
#Not matching
act.inact |> #pull(amount) |> sum() #6830028
select(c(contract_account)) |> unique() |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |> #pull(amount) |> sum()
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/inactivetrans.csv", na="", row.names = F)
4 years ago
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner, Name, `2004`,`2005`,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |>
write.csv("./data-raw/jan052021/active/inactivebyyear.csv", na="", row.names = F)
act.disc |> #pull(amount) |> sum() #1214182
select(c(contract_account, rel.status, age)) |> unique() |>
mutate(rel.status=ifelse(rel.status=="More than 60 days", "long past","within limit")) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by=c("contract_account"="contract_account", "rel.status"="rel.status","age"="age")) |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/discontinuedtrans.csv", na="", row.names = F)
4 years ago
group_by(contract_account,year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum()
pivot_wider(names_from = year.due, values_from = amount) |>
left_join(bpnames, by="contract_account") |>
select(c(contract_account,business_partner, Name,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,`2020`,`2021`)) |>
write.csv("./data-raw/jan052021/active/discontinuedbyyear.csv", na="", row.names = F)
# Matching
act.migrated |> #1401989
select(c(contract_account, Name)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/migratedtrans.csv", na="", row.names = F)
4 years ago
group_by(contract_account,business_partner, Name, year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T)
pivot_wider(names_from = year.due, values_from = amount) |>
select(c(contract_account,business_partner, Name,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |>
write.csv("./data-raw/jan052021/active/migratedbyyear.csv", na="", row.names = F)
# Matching
act.passed |> #pull(duedatebeforedaterelaxed) |> sum(na.rm=T) #929285.6
select(c(contract_account, Name)) |>
mutate(contract_account=as.character(contract_account)) |>
left_join(real.pos, by="contract_account") |>
mutate(year.due=lubridate::year(due_date)) |> select(-c(status.x,AGE_BUCKET,o.type,spread,dunn_status, new_contract_type, date.event, Reasonunregistered,e.status,date.relaxed, rel.status, age, year.due )) |> write.csv("./data-raw/jan052021/active/passedtrans.csv", na="", row.names = F)
4 years ago
group_by(contract_account,business_partner, Name, year.due) |>
summarise(amount=sum(amount)) |> #pull(amount) |> sum(na.rm = T)
pivot_wider(names_from = year.due, values_from = amount) |>
select(c(contract_account,business_partner, Name,`1900`,`1994`,`1995`,`1996`,`1997`,`1998`,`1999`,`2000`,`2001`,`2002`,`2003`,`2004`,`2005`,`2006`,`2007`,`2008`,
`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`)) |>
write.csv("./data-raw/jan052021/active/passedbyyear.csv", na="", row.names = F)
```
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r cars}
summary(cars)
```
## Including Plots
You can also embed plots, for example:
```{r pressure, echo=FALSE}
plot(pressure)
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.