From d7ddf39326eda15785ad58b6ffc2c496b0ce31db Mon Sep 17 00:00:00 2001 From: Jose Date: Tue, 25 Oct 2022 20:32:51 -0300 Subject: [PATCH] Cleaning the data --- script/cleaning.R | 108 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 script/cleaning.R diff --git a/script/cleaning.R b/script/cleaning.R new file mode 100644 index 0000000..710a7ac --- /dev/null +++ b/script/cleaning.R @@ -0,0 +1,108 @@ +#' --- +#' title: Emacs survey 2020 +#' date: "2022-10-25" +#' author: "Jose - https://ajuda.multifarm.top" +#' output: +#' html_document: +#' code_folding: show +#' toc: yes +#' toc_float: +#' smooth_scroll: true +#' df_print: paged +#' highlight: zenburn +#' --- + +rm(list = ls()) + +# renv::init() #' For reproducibility + +library(data.table) +library(DT) + +#' ## Emacs survey 2020: +#' +#' cleaned up data: +#' +#' https://emacs-survey.netlify.app/2020/Emacs-User-Survey-2020-clean.csv +#' +#' Raw data: +#' +#' https://emacs-survey.netlify.app/2020/Emacs-User-Survey-2020-raw.csv + +url = 'https://emacs-survey.netlify.app/2020/Emacs-User-Survey-2020-clean.csv' + +d <- fread(url, check.names = TRUE) + +str(d) + +names(d) + +#' ## Date +#' +#' Should arrange + + +class(d$Submission.Date) + +d[, .N, Submission.Date] + +#' ## Favorite packages + +DT::datatable(d[, .N, Can.you.list.some.of.your.favorite.packages.][order(-N)]) + +#' ## Difficulties faced + +d[, .N, Can.you.recall.any.difficulties.you.faced.initially.learning.Emacs.][ + order(-N)] |> + datatable() + + +#' ## Describe org-mode usage + +datatable(d[, .N, Describe.your.org.mode.usage]) + + +#' ## Describe org-mode usage + +d[, .N, For.how.many.years.have.you.been.using.Emacs.][order(-N)] |> + knitr::kable() + +sent1 = 'I installed it first circa 2013, I guess. Hard to remember; it was on Windows. I was not a regular user until 2016, and a confident user until 2017.' + +sent2 = "I've used Emacs for nearly one decade, but will be purposefully +vague beyond this." + +sent1 + +sent2 + +d[For.how.many.years.have.you.been.using.Emacs. == sent1, + "For.how.many.years.have.you.been.using.Emacs."] <- 7 + +d[For.how.many.years.have.you.been.using.Emacs. == sent2, + "For.how.many.years.have.you.been.using.Emacs."] <- 10 + + +d[, .N, For.how.many.years.have.you.been.using.Emacs.] + +d[, Years.using.emacs := + as.numeric(For.how.many.years.have.you.been.using.Emacs.)] + +d[, .N, Years.using.emacs] + +library(ggplot2) + +ggplot(d, aes(Years.using.emacs)) + + geom_histogram() + + theme_minimal() + +ggplot(d, aes(Years.using.emacs)) + + geom_density(aes(color = "red", fill = "red"), alpha = 0.3) + +# geom_histogram(alpha = 0.1) + + theme_minimal() + +#' ## OS you primarily use with emacs + +datatable(d[, .N, Which.OS.do.you.primarily.use.Emacs.on.][ + order(-N) +])