Adding {data.table} tutorial
parent
372a0896a5
commit
d46e60c05c
@ -0,0 +1,90 @@
|
||||
#+TITLE: Datatable package
|
||||
#+DATE: 2021-10-26
|
||||
#+OPTIONS: creator:nil timestamp:nil todo:nil num:nil
|
||||
#+PROPERTY: header-args:R :results output :session *Rc* :cmdline :tangle yes
|
||||
#+PROPERTY: header-args:R+ :exports both
|
||||
#+SETUPFILE: https://fniessen.github.io/org-html-themes/org/theme-readtheorg.setup
|
||||
#+HTML_HEAD: <style> #content{max-width:1800px;}</style>
|
||||
#+HTML_HEAD: <style> p{max-width:800px;}</style>
|
||||
#+HTML_HEAD: <style> li{max-width:800px;}</style>
|
||||
#+HTML_HEAD: <style> pre.src{background: #f5f5f5;color:black;}</style>
|
||||
#+HTML_HEAD: <style> .example{background: #fefefa;color:black;}</style>
|
||||
|
||||
|
||||
#+begin_export html
|
||||
<div id="subtitle" style="float: center; text-align: center;">
|
||||
<p>
|
||||
Org document with
|
||||
<a href="http://www.r-project.org/">R</a> code
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://www.r-project.org/">
|
||||
<img src="http://www.r-project.org/Rlogo.jpg"/>
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
#+end_export
|
||||
|
||||
#+begin_src R :exports code
|
||||
rm(list = ls())
|
||||
#+end_src
|
||||
|
||||
** Introduction
|
||||
|
||||
#+begin_src R
|
||||
library(data.table)
|
||||
#+end_src
|
||||
|
||||
|
||||
Reading data into R using ~{data.table}~
|
||||
|
||||
#+begin_src R :exports code
|
||||
url = "https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv"
|
||||
flights <- fread(url)
|
||||
#+end_src
|
||||
|
||||
#+RESULTS:
|
||||
|
||||
#+begin_src R
|
||||
names(flights)
|
||||
#+end_src
|
||||
|
||||
#+begin_src R
|
||||
flights
|
||||
#+end_src
|
||||
|
||||
#+begin_src R
|
||||
nrow(flights)
|
||||
#+end_src
|
||||
|
||||
#+RESULTS:
|
||||
: [1] 253316
|
||||
|
||||
Creating a data.table:
|
||||
|
||||
#+begin_src R
|
||||
dt <- data.table(col1 = c(1:6),
|
||||
col2 = LETTERS[1:6],
|
||||
col3 = (1))
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src R
|
||||
class(dt)
|
||||
#+end_src
|
||||
|
||||
#+begin_src R
|
||||
class(dt$col2)
|
||||
#+end_src
|
||||
|
||||
** Some references to learn more about {data.table}
|
||||
|
||||
* https://cran.r-project.org/web/packages/data.table/vignettes/datatable-intro.html
|
||||
* See vignettes in R:
|
||||
: > vignette(package="data.table")
|
||||
* https://atrebas.github.io/post/2019-03-03-datatable-dplyr/
|
||||
* https://rdatatable.gitlab.io/data.table/
|
||||
* https://s3.amazonaws.com/assets.datacamp.com/blog_assets/datatable_Cheat_Sheet_R.pdf
|
||||
* https://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly
|
||||
* ~dtplyr~ package:
|
||||
* https://github.com/tidyverse/dtplyr
|
@ -0,0 +1,67 @@
|
||||
#+TITLE: Datatable package
|
||||
#+DATE: 2021-10-26
|
||||
#+OPTIONS: creator:nil timestamp:nil todo:nil num:nil
|
||||
#+PROPERTY: header-args:R :results output :session *Rc* :cmdline :tangle yes
|
||||
#+PROPERTY: header-args:R+ :exports both
|
||||
#+SETUPFILE: https://fniessen.github.io/org-html-themes/org/theme-readtheorg.setup
|
||||
#+HTML_HEAD: <style> #content{max-width:1800px;}</style>
|
||||
#+HTML_HEAD: <style> p{max-width:800px;}</style>
|
||||
#+HTML_HEAD: <style> li{max-width:800px;}</style>
|
||||
#+HTML_HEAD: <style> pre.src{background: #f5f5f5;color:black;}</style>
|
||||
#+HTML_HEAD: <style> .example{background: #fefefa;color:black;}</style>
|
||||
|
||||
|
||||
#+begin_export html
|
||||
<div id="subtitle" style="float: center; text-align: center;">
|
||||
<p>
|
||||
Org document with
|
||||
<a href="http://www.r-project.org/">R</a> code
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://www.r-project.org/">
|
||||
<img src="http://www.r-project.org/Rlogo.jpg"/>
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
#+end_export
|
||||
|
||||
#+begin_src R :exports code
|
||||
rm(list = ls())
|
||||
#+end_src
|
||||
|
||||
|
||||
** Querying data.table
|
||||
|
||||
Sintaxis:
|
||||
|
||||
* ~DT[i, j, by]~
|
||||
|
||||
Subseting by "i" and "j"
|
||||
|
||||
** Subset rows in "i"
|
||||
|
||||
#+begin_src R
|
||||
dt1 <- flights[origin == "JFK" & month == 6L]
|
||||
dt1
|
||||
#+end_src
|
||||
|
||||
Get the first two rows
|
||||
|
||||
#+begin_src R
|
||||
flights[1:2]
|
||||
#+end_src
|
||||
|
||||
#+RESULTS:
|
||||
: year month day dep_delay arr_delay carrier origin dest air_time distance
|
||||
: 1: 2014 1 1 14 13 AA JFK LAX 359 2475
|
||||
: 2: 2014 1 1 -3 13 AA JFK LAX 363 2475
|
||||
: hour
|
||||
: 1: 9
|
||||
: 2: 11
|
||||
|
||||
Ordering results of querying
|
||||
|
||||
#+begin_src R
|
||||
dt_filt <- flights[order(origin, -dest)]
|
||||
dt_filt
|
||||
#+end_src
|
@ -0,0 +1,108 @@
|
||||
#+TITLE: Datatable package
|
||||
#+DATE: 2021-10-26
|
||||
#+OPTIONS: creator:nil timestamp:nil todo:nil num:nil
|
||||
#+PROPERTY: header-args:R :results output :session *Rc* :cmdline :tangle yes
|
||||
#+PROPERTY: header-args:R+ :exports both
|
||||
#+SETUPFILE: https://fniessen.github.io/org-html-themes/org/theme-readtheorg.setup
|
||||
#+HTML_HEAD: <style> #content{max-width:1800px;}</style>
|
||||
#+HTML_HEAD: <style> p{max-width:800px;}</style>
|
||||
#+HTML_HEAD: <style> li{max-width:800px;}</style>
|
||||
#+HTML_HEAD: <style> pre.src{background: #f5f5f5;color:black;}</style>
|
||||
#+HTML_HEAD: <style> .example{background: #fefefa;color:black;}</style>
|
||||
|
||||
|
||||
#+begin_export html
|
||||
<div id="subtitle" style="float: center; text-align: center;">
|
||||
<p>
|
||||
Org document with
|
||||
<a href="http://www.r-project.org/">R</a> code
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://www.r-project.org/">
|
||||
<img src="http://www.r-project.org/Rlogo.jpg"/>
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
#+end_export
|
||||
|
||||
#+begin_src R :exports code
|
||||
rm(list = ls())
|
||||
#+end_src
|
||||
|
||||
|
||||
** Select columns in "j"
|
||||
|
||||
This creates a vector
|
||||
|
||||
#+begin_src R
|
||||
dt_c <- flights[, arr_delay]
|
||||
head(dt_c)
|
||||
#+end_src
|
||||
|
||||
To create a data.table, use the function "~list~" or dot "~.~"
|
||||
|
||||
#+begin_src R
|
||||
dt_c1 <- flights[, list(arr_delay)]
|
||||
head(dt_c1)
|
||||
#+end_src
|
||||
|
||||
The selection is wrapped in "~list~", thus, the columns can be
|
||||
renamed
|
||||
|
||||
#+begin_src R
|
||||
dt_c2 <- flights[, .(delay2arriv = arr_delay, dist = distance)]
|
||||
head(dt_c2)
|
||||
#+end_src
|
||||
|
||||
#+begin_src R
|
||||
dt_c2a <- flights[, c("arr_delay", "distance")]
|
||||
head(dt_c2a)
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src R
|
||||
dt_c2b <- flights[, year:day]
|
||||
head(dt_c2b)
|
||||
#+end_src
|
||||
|
||||
Building new colummns
|
||||
|
||||
#+begin_src R
|
||||
head(dt_c2b[, `:=`(time = year -1, # name = value,
|
||||
pumpkin = month,
|
||||
tomatoe = day +1)])
|
||||
#+end_src
|
||||
|
||||
Renaming colummns
|
||||
|
||||
#+begin_src R
|
||||
setnames(dt_c2b,
|
||||
c("time", "pumpkin", "tomatoe"),# old names
|
||||
c("time_minus1", "pumpkin_month", "day_plus1")) # new names
|
||||
|
||||
names(dt_c2b)
|
||||
#+end_src
|
||||
|
||||
#+begin_src R
|
||||
setnames(dt_c2b, toupper) # new names
|
||||
names(dt_c2b)
|
||||
#+end_src
|
||||
|
||||
Renaming colummns
|
||||
|
||||
#+begin_src R
|
||||
setnames(dt_c2b, tolower) # new names
|
||||
names(dt_c2b)
|
||||
#+end_src
|
||||
|
||||
#+begin_src R
|
||||
dt_c2c <- flights[, -(year:day)]
|
||||
head(dt_c2c)
|
||||
#+end_src
|
||||
|
||||
Select columns by patterns, e.g., selecting 5 lines form columns having "del"
|
||||
characters in the name:
|
||||
|
||||
#+begin_src R
|
||||
flights[1:5, names(flights) %like% "del", with = FALSE]
|
||||
#+end_src
|
Loading…
Reference in New Issue