Visualizations are finally in place and looking good enough to show others. Now all that is needed is to fill in the text and clean up the flow. One or two more sessions, and then I can sent out reading drafts.
This commit is contained in:
parent
310cb800f8
commit
1c4ef5bb15
|
@ -303,18 +303,268 @@ Now we can compare the relative spread of each feature by calculating the coeffi
|
|||
|
||||
```{r cv-results, echo=FALSE, fig.align="center"}
|
||||
|
||||
# Summarize results in a table and a plot
|
||||
# Summarize results in a table
|
||||
knitr::kable(coeff_vars)
|
||||
|
||||
plot(coeff_vars)
|
||||
# Scatterplot, not very interesting
|
||||
# plot(coeff_vars)
|
||||
|
||||
```
|
||||
|
||||
From this, it appears that `r selected_features[1]` has the widest range of variability, followed by `r selected_features[2]`. These are also the features that are most strongly skewed to the right, meaning that a few addresses have really high values for each of these features while the bulk of the data set has very low values for these numbers.
|
||||
|
||||
Now do the following (after filling in methods, results, and conclusions, since those are done already):
|
||||
|
||||
!) Break into groups somehow. Graph variables per group? Show how the variables are distributed for each ransomware group? Percent ransomware per each day of the week, for example. Is ransomware more prevalent on a particular day of the week? Break other numerical values into bins, and graph percentage per bin. Look for trends and correlations between groups/variables, and display them here. MORE OF THIS....
|
||||
Taking the feature with the highest variation `r selected_features[1]`, let us take a look at the distribution for individual ransomware families. Perhaps there is a similarity across families.
|
||||
|
||||
|
||||
```{r variation histograms, echo=FALSE, fig.show="hold", out.width='35%', warning=FALSE}
|
||||
|
||||
# Density plots of the feature with highest variation
|
||||
selected_feature1 <- selected_features[1]
|
||||
|
||||
ransomware_big_families <- ransomware %>%
|
||||
mutate(selected_feature1 = as.numeric(selected_feature1))
|
||||
|
||||
# Note: Putting these graphs into a for loop breaks some of the formatting.
|
||||
# Low membership makes some of the graphs not very informative
|
||||
# Relatively boring graphs have been commented out to save time and space.
|
||||
# These can be uncommented if one wishes.
|
||||
# Label 1
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[1]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[1]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 2
|
||||
#ransomware_big_families %>%
|
||||
# filter(label==levels(ransomware_big_families$label)[2]) %>%
|
||||
# select(income) %>%
|
||||
# ggplot(aes(x=income, y = ..density..)) +
|
||||
# geom_density(col = "green", size = .5)+
|
||||
# ggtitle(levels(ransomware_big_families$label)[2]) +
|
||||
# scale_x_continuous(trans='log2')
|
||||
# Label 3
|
||||
#ransomware_big_families %>%
|
||||
# filter(label==levels(ransomware_big_families$label)[3]) %>%
|
||||
# select(income) %>%
|
||||
# ggplot(aes(x=income, y = ..density..)) +
|
||||
# geom_density(col = "green")+
|
||||
# ggtitle(levels(ransomware_big_families$label)[3]) +
|
||||
# scale_x_continuous(trans='log2')
|
||||
# Label 4
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[4]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[4]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 5
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[5]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[5]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 6
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[6]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[6]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 7
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[7]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[7]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 8
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[8]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[8]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 9
|
||||
#ransomware_big_families %>%
|
||||
# filter(label==levels(ransomware_big_families$label)[9]) %>%
|
||||
# select(income) %>%
|
||||
# ggplot(aes(x=income, y = ..density..)) +
|
||||
# geom_density(col = "green")+
|
||||
# ggtitle(levels(ransomware_big_families$label)[9]) +
|
||||
# scale_x_continuous(trans='log2')
|
||||
# Label 10
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[10]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[10]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 11
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[11]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[11]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 12
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[12]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[12]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 13
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[13]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[13]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 14
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[14]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[14]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 15
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[15]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[15]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 16
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[16]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[16]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 17
|
||||
#ransomware_big_families %>%
|
||||
# filter(label==levels(ransomware_big_families$label)[17]) %>%
|
||||
# select(income) %>%
|
||||
# ggplot(aes(x=income, y = ..density..)) +
|
||||
# geom_density(col = "green", size = .5)+
|
||||
# ggtitle(levels(ransomware_big_families$label)[17]) +
|
||||
# scale_x_continuous(trans='log2')
|
||||
# Label 18
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[18]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[18]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 19
|
||||
#ransomware_big_families %>%
|
||||
# filter(label==levels(ransomware_big_families$label)[19]) %>%
|
||||
# select(income) %>%
|
||||
# ggplot(aes(x=income, y = ..density..)) +
|
||||
# geom_density(col = "green")+
|
||||
# ggtitle(levels(ransomware_big_families$label)[19]) +
|
||||
# scale_x_continuous(trans='log2')
|
||||
# Label 20
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[20]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[20]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 21
|
||||
#ransomware_big_families %>%
|
||||
# filter(label==levels(ransomware_big_families$label)[21]) %>%
|
||||
# select(income) %>%
|
||||
# ggplot(aes(x=income, y = ..density..)) +
|
||||
# geom_density(col = "green", size = .5)+
|
||||
# ggtitle(levels(ransomware_big_families$label)[21]) +
|
||||
# scale_x_continuous(trans='log2')
|
||||
# Label 22
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[22]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[22]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 23
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[23]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[23]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 24
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[24]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[24]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 25
|
||||
#ransomware_big_families %>%
|
||||
# filter(label==levels(ransomware_big_families$label)[25]) %>%
|
||||
# select(income) %>%
|
||||
# ggplot(aes(x=income, y = ..density..)) +
|
||||
# geom_density(col = "green", size = .5)+
|
||||
# ggtitle(levels(ransomware_big_families$label)[25]) +
|
||||
# scale_x_continuous(trans='log2')
|
||||
# Label 26
|
||||
#ransomware_big_families %>%
|
||||
# filter(label==levels(ransomware_big_families$label)[26]) %>%
|
||||
# select(income) %>%
|
||||
# ggplot(aes(x=income, y = ..density..)) +
|
||||
# geom_density(col = "green")+
|
||||
# ggtitle(levels(ransomware_big_families$label)[26]) +
|
||||
# scale_x_continuous(trans='log2')
|
||||
# Label 27
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[27]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[27]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 28
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[28]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[28]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
# Label 29
|
||||
ransomware_big_families %>%
|
||||
filter(label==levels(ransomware_big_families$label)[29]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
geom_density(col = "green")+
|
||||
ggtitle(levels(ransomware_big_families$label)[29]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
```{r shrimp-percentage, echo=FALSE, include=FALSE}
|
||||
|
||||
|
@ -681,55 +931,55 @@ cm_labels$byClass %>% knitr::kable()
|
|||
|
||||
Here are some graphs, tell a bit more about them.
|
||||
|
||||
```{r binary som graphs, echo=FALSE, fig.align="center"}
|
||||
```{r binary som graphs, echo=FALSE, fig.show="hold", out.width='35%'}
|
||||
|
||||
# Be careful with these, some are really large and take a long time to produce.
|
||||
|
||||
# Visualize neural network mapping
|
||||
plot(som_model2, type = 'mapping', pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
# Distance map
|
||||
plot(som_model2, type = 'quality', pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
# Visualize counts
|
||||
plot(som_model2, type = 'counts', pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
# Visualize fan diagram
|
||||
plot(som_model2, type = 'codes', pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
# Visualize heatmap for variable 1
|
||||
plot(som_model2, type = 'property', property = som_model2$codes[[1]][,1],
|
||||
main=colnames(train_num)[1], pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
# Visualize heatmap for variable 2
|
||||
plot(som_model2, type = 'property', property = som_model2$codes[[1]][,2],
|
||||
main=colnames(train_num)[2], pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
# Visualize heatmap for variable 3
|
||||
plot(som_model2, type = 'property', property = som_model2$codes[[1]][,3],
|
||||
main=colnames(train_num)[3], pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
# Visualize heatmap for variable 4
|
||||
plot(som_model2, type = 'property', property = som_model2$codes[[1]][,4],
|
||||
main=colnames(train_num)[4], pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
# Visualize heatmap for variable 5
|
||||
plot(som_model2, type = 'property', property = som_model2$codes[[1]][,5],
|
||||
main=colnames(train_num)[5], pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
# Visualize heatmap for variable 6
|
||||
plot(som_model2, type = 'property', property = som_model2$codes[[1]][,6],
|
||||
main=colnames(train_num)[6], pch = 19, palette.name = topo.colors)
|
||||
cat(" \n")
|
||||
#cat(" \n")
|
||||
|
||||
```
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,8 +1,24 @@
|
|||
for(i in 1:30){
|
||||
print(
|
||||
ransomware %>% filter(label==levels(ransomware$label)[i]) %>%
|
||||
#i <- 1
|
||||
|
||||
ransomware_big_families <- ransomware %>%
|
||||
group_by(label) %>% filter(n() > 1) %>%
|
||||
ungroup()
|
||||
|
||||
ransomware_big_families <- ransomware_big_families %>% mutate(income = as.numeric(income))
|
||||
|
||||
|
||||
for(i in 1:length(levels(ransomware_big_families$label))){
|
||||
ggp <-
|
||||
ransomware_big_families %>% filter(label==levels(ransomware_big_families$label)[i]) %>%
|
||||
select(income) %>%
|
||||
ggplot(aes(x=income, y = ..density..)) + geom_histogram(bins="30") +
|
||||
geom_density(col = "green", size = .5)
|
||||
)
|
||||
}
|
||||
ggplot(aes(x=income, y = ..density..)) +
|
||||
# geom_histogram(bins="30") +
|
||||
# geom_bar() +
|
||||
geom_density(col = "green", size = .5)+
|
||||
ggtitle(levels(ransomware_big_families$label)[i]) +
|
||||
scale_x_continuous(trans='log2')
|
||||
print(ggp)
|
||||
print(i)
|
||||
print(levels(ransomware_big_families$label)[i])
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue