You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							130 lines
						
					
					
						
							3.4 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							130 lines
						
					
					
						
							3.4 KiB
						
					
					
				| --- | |
| title: "Analysis" | |
| format: html | |
| editor: visual | |
| --- | |
| 
 | |
| # Setup | |
| 
 | |
| ```{r} | |
| library(dplyr) | |
| library(cluster) | |
| library(tidyr) | |
| library(highcharter) | |
| library(ggplot2) | |
| ``` | |
| 
 | |
| 
 | |
| # Data | |
| 
 | |
| ```{r} | |
| dat<-read.csv("Data.csv", colClasses = c("character","factor", "factor", | |
|                                          "numeric","numeric","numeric", | |
|                                          "numeric","numeric","numeric")) | |
| dat[is.na(dat)]<-0 | |
| ``` | |
| 
 | |
| 
 | |
| ```{r} | |
| # Select the columns for clustering (including "Role" and "Location") | |
| selected_cols <- c("Self.Purchase1", "Self.Purchase2", "Self.Purchase3", "Other.Purchase1", "Other.Purchase2", "Other.Purchase3", "Role", "Location") | |
| 
 | |
| # Subset the data frame to include only the selected columns | |
| df_subset <- dat[selected_cols] | |
| 
 | |
| # Convert the "Role" and "Location" columns to factors (if they are not already) | |
| df_subset$Role <- as.factor(df_subset$Role) | |
| df_subset$Location <- as.factor(df_subset$Location) | |
| 
 | |
| # Perform one-hot encoding for "Role" and "Location" | |
| df_encoded <- model.matrix(~Role + Location - 1, data = df_subset)  # -1 removes intercept terms | |
| 
 | |
| numeric_cols <- dat[, c("Self.Purchase1", "Self.Purchase2", "Self.Purchase3", "Other.Purchase1", "Other.Purchase2", "Other.Purchase3")] | |
| scaled_data <- scale(numeric_cols) | |
| final_data <- cbind(scaled_data, df_encoded) | |
| 
 | |
| wss <- numeric(length = 10) | |
| for (i in 1:10) { | |
|   kmeans_result <- kmeans(final_data, centers = i, nstart = 10) | |
|   wss[i] <- sum(kmeans_result$tot.withinss) | |
| } | |
| 
 | |
| plot(1:10, wss, type = "b", xlab = "Number of Clusters", ylab = "Within-cluster Sum of Squares") | |
| ``` | |
| 
 | |
| 
 | |
| ```{r} | |
| optimal_k <- 4 | |
| 
 | |
| kmeans_result <- kmeans(final_data, centers = optimal_k, nstart = 10) | |
| 
 | |
| dat$Cluster <- kmeans_result$cluster | |
| 
 | |
| ``` | |
| 
 | |
| 
 | |
| ```{r} | |
| summary(dat[dat$Cluster==1,]) | |
| filter.curr<-dat |>  | |
|   filter(Location=="Curacao") |>  | |
|   summarize(yr.1=sum(Self.Purchase1)+sum(Other.Purchase1), | |
|             yr.2=sum(Self.Purchase2)+sum(Other.Purchase2), | |
|             yr.3=sum(Self.Purchase3)+sum(Other.Purchase3) | |
|             ) |>  | |
|   pivot_longer(c(yr.1,yr.2,yr.3)) |>  | |
|   rename(Year=name, Revenue=value) | |
| 
 | |
| hchart(filter.curr, | |
|        "column", | |
|        hcaes(x=Year,y=Revenue)) |>  | |
|   hc_title(text="Revenue from self purchase") | |
| 
 | |
| unique(dat$Role) | |
| 
 | |
| write.csv(dat,"DataSegmented.csv",row.names = FALSE) | |
| 
 | |
| dat |>  | |
|   summarise(current.year=sum(Self.Purchase3), | |
|             last.year=sum(Self.Purchase2)) | |
| library(ggplot2) | |
| 
 | |
| # Create a scatter plot to visualize the clusters | |
| summ.dat<-dat |>  | |
|   group_by(Cluster) |>  | |
|   mutate( | |
|     self=sum(Self.Purchase1)+sum(Self.Purchase2)+sum(Self.Purchase3), | |
|     other=sum(Other.Purchase1)+sum(Other.Purchase2)+sum(Other.Purchase3) | |
|   ) | |
| hchart(summ.dat, type = "scatter", hcaes(x = self, y = other, color = factor(Cluster))) %>% | |
|   hc_plotOptions( | |
|     scatter = list(jitter = list(x = 10000000, y = 10000000)) | |
|   ) %>% | |
|   hc_title(text = "K-means Clustering Visualization") %>% | |
|   hc_xAxis(title = list(text = "Self")) %>% | |
|   hc_yAxis(title = list(text = "Other")) | |
| ``` | |
| 
 | |
| 
 | |
| 
 | |
| ```{r} | |
|  dat |>  | |
|   group_by(Cluster) |> | |
|         summarise( | |
|           self=sum(Self.Purchase1)+sum(Self.Purchase2)+sum(Self.Purchase3), | |
|           other=sum(Other.Purchase1)+sum(Other.Purchase2)+sum(Other.Purchase3) | |
|         ) |>  | |
|   pivot_longer(c(self,other)) |> | |
|         hchart("column", hcaes(x=Cluster, y=value, group=name), stacking="normal") |> | |
|   hc_colors(c("#0073C2FF", "#EFC000FF")) |> | |
|   hc_title(text="Revenue by segment") |> | |
|   hc_xAxis(title=list(text="Segment")) |> | |
|   hc_yAxis(title=list(text="Revenue")) | |
| ``` | |
| 
 | |
| 
 | |
| ```{r} | |
| 
 | |
| ``` | |
| 
 | |
| 
 | |
| 
 | |
| 
 |