Cleaning the data

This commit is contained in:
Jose 2022-10-25 20:32:51 -03:00
parent 498071b36e
commit d7ddf39326
1 changed files with 108 additions and 0 deletions

108
script/cleaning.R Normal file
View File

@ -0,0 +1,108 @@
#' ---
#' title: Emacs survey 2020
#' date: "2022-10-25"
#' author: "Jose - https://ajuda.multifarm.top"
#' output:
#' html_document:
#' code_folding: show
#' toc: yes
#' toc_float:
#' smooth_scroll: true
#' df_print: paged
#' highlight: zenburn
#' ---
rm(list = ls())
# renv::init() #' For reproducibility
library(data.table)
library(DT)
#' ## Emacs survey 2020:
#'
#' cleaned up data:
#'
#' https://emacs-survey.netlify.app/2020/Emacs-User-Survey-2020-clean.csv
#'
#' Raw data:
#'
#' https://emacs-survey.netlify.app/2020/Emacs-User-Survey-2020-raw.csv
url = 'https://emacs-survey.netlify.app/2020/Emacs-User-Survey-2020-clean.csv'
d <- fread(url, check.names = TRUE)
str(d)
names(d)
#' ## Date
#'
#' Should arrange
class(d$Submission.Date)
d[, .N, Submission.Date]
#' ## Favorite packages
DT::datatable(d[, .N, Can.you.list.some.of.your.favorite.packages.][order(-N)])
#' ## Difficulties faced
d[, .N, Can.you.recall.any.difficulties.you.faced.initially.learning.Emacs.][
order(-N)] |>
datatable()
#' ## Describe org-mode usage
datatable(d[, .N, Describe.your.org.mode.usage])
#' ## Describe org-mode usage
d[, .N, For.how.many.years.have.you.been.using.Emacs.][order(-N)] |>
knitr::kable()
sent1 = 'I installed it first circa 2013, I guess. Hard to remember; it was on Windows. I was not a regular user until 2016, and a confident user until 2017.'
sent2 = "I've used Emacs for nearly one decade, but will be purposefully
vague beyond this."
sent1
sent2
d[For.how.many.years.have.you.been.using.Emacs. == sent1,
"For.how.many.years.have.you.been.using.Emacs."] <- 7
d[For.how.many.years.have.you.been.using.Emacs. == sent2,
"For.how.many.years.have.you.been.using.Emacs."] <- 10
d[, .N, For.how.many.years.have.you.been.using.Emacs.]
d[, Years.using.emacs :=
as.numeric(For.how.many.years.have.you.been.using.Emacs.)]
d[, .N, Years.using.emacs]
library(ggplot2)
ggplot(d, aes(Years.using.emacs)) +
geom_histogram() +
theme_minimal()
ggplot(d, aes(Years.using.emacs)) +
geom_density(aes(color = "red", fill = "red"), alpha = 0.3) +
# geom_histogram(alpha = 0.1) +
theme_minimal()
#' ## OS you primarily use with emacs
datatable(d[, .N, Which.OS.do.you.primarily.use.Emacs.on.][
order(-N)
])