ransomware/Ransomware-Bitcoin-Addresses.R

47 lines
1.5 KiB
R

# Install necessary packages
if(!require(tidyverse)) install.packages("tidyverse", repos = "http://cran.us.r-project.org")
if(!require(caret)) install.packages("caret", repos = "http://cran.us.r-project.org")
# Load Libraries
library(tidyverse)
library(caret)
# Download data
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/00526/data.zip"
dest_file <- "data/data.zip"
if(!dir.exists("data"))dir.create("data")
if(!file.exists(dest_file))download.file(url, destfile = dest_file)
# Unzip
if(!file.exists("data/BitcoinHeistData.csv"))unzip(dest_file, "BitcoinHeistData.csv", exdir="data")
# Import data from CSV
ransomware <- read_csv("data/BitcoinHeistData.csv")
# Validation set made from 10% of BitcoinHeist data
test_index <- createDataPartition(y = ransomware$label, times = 1, p = 0.1, list = FALSE)
workset <- ransomware[-test_index,]
temp <- ransomware[test_index,]
# Make sure addresses in validation set are also in working set
validation <- temp %>%
semi_join(workset, by = "address")
# Add rows removed from validation set back into working set
removed <- anti_join(temp, validation)
workset <- rbind(workset, removed)
# Split the working set into a training set and a test set
test_index <- createDataPartition(y = workset$label, times = 1, p = 0.1, list = FALSE)
train_set <- workset[-test_index,]
test_set <- workset[test_index,]
# Clean up environment
rm(dest_file, url, temp, removed, ransomware, test_index)
# Inspect data frames
test_set %>% str()
test_set %>% head()
train_set %>% str()
train_set %>% head()