ransomware/SOM_test.R

# Install kohonen package if needed
if(!require(kohonen)) install.packages("kohonen", repos = "http://cran.us.r-project.org")

# Load kohonen library
library(kohonen)

# Keep only numeric columns, ignoring dates for now
train_num <- train_set %>% select(length, weight, count, looped, neighbors, income)

# SOM function can only work on matrices
train_SOM <- as.matrix(scale(train_num))

# Calculate idea grid size according to:
# https://www.researchgate.net/post/How-many-nodes-for-self-organizing-maps

# Formulaic method 1
#grid_size <- round(sqrt(5*sqrt(nrow(train_set))))
# Based on categorical number, method 2
grid_size = length(unique(train_set$label))
grid_size

# Create SOM grid
train_grid <- somgrid(xdim=grid_size, ydim=grid_size, topo="hexagonal", toroidal = FALSE)

# Set magic seed for reproducibility
set.seed(5)

## Now build the ESOM!
som.model <- som(train_SOM, 
                       grid = train_grid, 
                       rlen = 100,
                       mode="pbatch", # or: alpha = c(0.05,0.01),
                       cores = 4, # how to dynamically set number of cores?
                       keep.data = TRUE
                       )

# Visualize clusters
plot(som.model, type = 'mapping', pch = 19, palette.name = topo.colors)

# Distance map
plot(som.model, type = 'quality', pch = 19, palette.name = topo.colors)

# Visualize counts
plot(som.model, type = 'counts', pch = 19, palette.name = topo.colors)

# Visualize fan diagram
plot(som.model, type = 'codes', pch = 19, palette.name = topo.colors)

# Visualize heatmap for variable 1
plot(som.model, type = 'property', property = getCodes(som.model)[,1], main=colnames(train_num)[1], pch = 19, palette.name = topo.colors)

# Visualize heatmap for variable 2
plot(som.model, type = 'property', property = getCodes(som.model)[,2], main=colnames(train_num)[2], pch = 19, palette.name = topo.colors)

# Visualize heatmap for variable 3
plot(som.model, type = 'property', property = getCodes(som.model)[,3], main=colnames(train_num)[3], pch = 19, palette.name = topo.colors)

# Visualize heatmap for variable 4
plot(som.model, type = 'property', property = getCodes(som.model)[,4], main=colnames(train_num)[4], pch = 19, palette.name = topo.colors)

# Visualize heatmap for variable 5
plot(som.model, type = 'property', property = getCodes(som.model)[,5], main=colnames(train_num)[5], pch = 19, palette.name = topo.colors)

# Visualize heatmap for variable 6
plot(som.model, type = 'property', property = getCodes(som.model)[,6], main=colnames(train_num)[6], pch = 19, palette.name = topo.colors)

# Visualize heatmap for variable 7
#plot(som.model, type = 'property', property = getCodes(som.model)[,7], main=colnames(train_num)[7], pch = 19, palette.name = topo.colors)

# Visualize heatmap for variable 8
#plot(som.model, type = 'property', property = getCodes(som.model)[,8], main=colnames(train_num)[8], pch = 19, palette.name = topo.colors)

##Different cluster methods branch off here...
Self Organizing Maps scripts added 2021-09-27 11:12:00 +02:00			`# Install kohonen package if needed`
			`if(!require(kohonen)) install.packages("kohonen", repos = "http://cran.us.r-project.org")`

			`# Load kohonen library`
			`library(kohonen)`

			`# Keep only numeric columns, ignoring dates for now`
			`train_num <- train_set %>% select(length, weight, count, looped, neighbors, income)`

			`# SOM function can only work on matrices`
			`train_SOM <- as.matrix(scale(train_num))`

			`# Calculate idea grid size according to:`
			`# https://www.researchgate.net/post/How-many-nodes-for-self-organizing-maps`

			`# Formulaic method 1`
			`#grid_size <- round(sqrt(5*sqrt(nrow(train_set))))`
			`# Based on categorical number, method 2`
			`grid_size = length(unique(train_set$label))`
			`grid_size`

			`# Create SOM grid`
			`train_grid <- somgrid(xdim=grid_size, ydim=grid_size, topo="hexagonal", toroidal = FALSE)`

			`# Set magic seed for reproducibility`
			`set.seed(5)`

			`## Now build the ESOM!`
			`som.model <- som(train_SOM,`
			`grid = train_grid,`
			`rlen = 100,`
			`mode="pbatch", # or: alpha = c(0.05,0.01),`
			`cores = 4, # how to dynamically set number of cores?`
			`keep.data = TRUE`
			`)`

			`# Visualize clusters`
			`plot(som.model, type = 'mapping', pch = 19, palette.name = topo.colors)`

			`# Distance map`
			`plot(som.model, type = 'quality', pch = 19, palette.name = topo.colors)`

			`# Visualize counts`
			`plot(som.model, type = 'counts', pch = 19, palette.name = topo.colors)`

			`# Visualize fan diagram`
			`plot(som.model, type = 'codes', pch = 19, palette.name = topo.colors)`

			`# Visualize heatmap for variable 1`
			`plot(som.model, type = 'property', property = getCodes(som.model)[,1], main=colnames(train_num)[1], pch = 19, palette.name = topo.colors)`

			`# Visualize heatmap for variable 2`
			`plot(som.model, type = 'property', property = getCodes(som.model)[,2], main=colnames(train_num)[2], pch = 19, palette.name = topo.colors)`

			`# Visualize heatmap for variable 3`
			`plot(som.model, type = 'property', property = getCodes(som.model)[,3], main=colnames(train_num)[3], pch = 19, palette.name = topo.colors)`

			`# Visualize heatmap for variable 4`
			`plot(som.model, type = 'property', property = getCodes(som.model)[,4], main=colnames(train_num)[4], pch = 19, palette.name = topo.colors)`

			`# Visualize heatmap for variable 5`
			`plot(som.model, type = 'property', property = getCodes(som.model)[,5], main=colnames(train_num)[5], pch = 19, palette.name = topo.colors)`

			`# Visualize heatmap for variable 6`
			`plot(som.model, type = 'property', property = getCodes(som.model)[,6], main=colnames(train_num)[6], pch = 19, palette.name = topo.colors)`

			`# Visualize heatmap for variable 7`
			`#plot(som.model, type = 'property', property = getCodes(som.model)[,7], main=colnames(train_num)[7], pch = 19, palette.name = topo.colors)`

			`# Visualize heatmap for variable 8`
			`#plot(som.model, type = 'property', property = getCodes(som.model)[,8], main=colnames(train_num)[8], pch = 19, palette.name = topo.colors)`

			`##Different cluster methods branch off here...`