74 lines
2.9 KiB
R
74 lines
2.9 KiB
R
|
# Install kohonen package if needed
|
||
|
if(!require(kohonen)) install.packages("kohonen", repos = "http://cran.us.r-project.org")
|
||
|
|
||
|
# Load kohonen library
|
||
|
library(kohonen)
|
||
|
|
||
|
# Keep only numeric columns, ignoring dates for now
|
||
|
train_num <- train_set %>% select(length, weight, count, looped, neighbors, income)
|
||
|
|
||
|
# SOM function can only work on matrices
|
||
|
train_SOM <- as.matrix(scale(train_num))
|
||
|
|
||
|
# Calculate idea grid size according to:
|
||
|
# https://www.researchgate.net/post/How-many-nodes-for-self-organizing-maps
|
||
|
|
||
|
# Formulaic method 1
|
||
|
#grid_size <- round(sqrt(5*sqrt(nrow(train_set))))
|
||
|
# Based on categorical number, method 2
|
||
|
grid_size = length(unique(train_set$label))
|
||
|
grid_size
|
||
|
|
||
|
# Create SOM grid
|
||
|
train_grid <- somgrid(xdim=grid_size, ydim=grid_size, topo="hexagonal", toroidal = FALSE)
|
||
|
|
||
|
# Set magic seed for reproducibility
|
||
|
set.seed(5)
|
||
|
|
||
|
## Now build the ESOM!
|
||
|
som.model <- som(train_SOM,
|
||
|
grid = train_grid,
|
||
|
rlen = 100,
|
||
|
mode="pbatch", # or: alpha = c(0.05,0.01),
|
||
|
cores = 4, # how to dynamically set number of cores?
|
||
|
keep.data = TRUE
|
||
|
)
|
||
|
|
||
|
# Visualize clusters
|
||
|
plot(som.model, type = 'mapping', pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Distance map
|
||
|
plot(som.model, type = 'quality', pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize counts
|
||
|
plot(som.model, type = 'counts', pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize fan diagram
|
||
|
plot(som.model, type = 'codes', pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize heatmap for variable 1
|
||
|
plot(som.model, type = 'property', property = getCodes(som.model)[,1], main=colnames(train_num)[1], pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize heatmap for variable 2
|
||
|
plot(som.model, type = 'property', property = getCodes(som.model)[,2], main=colnames(train_num)[2], pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize heatmap for variable 3
|
||
|
plot(som.model, type = 'property', property = getCodes(som.model)[,3], main=colnames(train_num)[3], pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize heatmap for variable 4
|
||
|
plot(som.model, type = 'property', property = getCodes(som.model)[,4], main=colnames(train_num)[4], pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize heatmap for variable 5
|
||
|
plot(som.model, type = 'property', property = getCodes(som.model)[,5], main=colnames(train_num)[5], pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize heatmap for variable 6
|
||
|
plot(som.model, type = 'property', property = getCodes(som.model)[,6], main=colnames(train_num)[6], pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize heatmap for variable 7
|
||
|
#plot(som.model, type = 'property', property = getCodes(som.model)[,7], main=colnames(train_num)[7], pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
# Visualize heatmap for variable 8
|
||
|
#plot(som.model, type = 'property', property = getCodes(som.model)[,8], main=colnames(train_num)[8], pch = 19, palette.name = topo.colors)
|
||
|
|
||
|
##Different cluster methods branch off here...
|