Updating script SQLite-including references
This commit is contained in:
parent
5db701e4a3
commit
7110478863
|
@ -3,19 +3,6 @@
|
|||
|
||||
* mort_geral
|
||||
|
||||
** TODO [#A] update code :noexport:
|
||||
:LOGBOOK:
|
||||
CLOCK: [2022-07-14 jue 08:12]--[2022-07-14 jue 08:22] => 0:10
|
||||
CLOCK: [2022-07-13 mié 16:13]--[2022-07-13 mié 16:43] => 0:30
|
||||
CLOCK: [2022-07-13 mié 08:44]--[2022-07-13 mié 09:14] => 0:30
|
||||
CLOCK: [2022-07-11 lun 15:21]--[2022-07-11 lun 15:25] => 0:04
|
||||
CLOCK: [2022-07-11 lun 14:09]--[2022-07-11 lun 14:39] => 0:30
|
||||
CLOCK: [2022-07-08 vie 14:56]--[2022-07-08 vie 15:26] => 0:30
|
||||
:END:
|
||||
|
||||
Análise de indicadores de saúde: mortalidade geral da população brasileira.
|
||||
|
||||
|
||||
** Scripts
|
||||
|
||||
- [[./script/db.R][Usando RSQlite para salvar dados de maior tamanho: exemplo]]
|
||||
|
|
|
@ -0,0 +1,341 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only">
|
||||
<info>
|
||||
<title>American Psychological Association 5th edition</title>
|
||||
<title-short>APA (5th ed.)</title-short>
|
||||
<id>http://www.zotero.org/styles/apa-5th-edition</id>
|
||||
<link href="http://www.zotero.org/styles/apa-5th-edition" rel="self"/>
|
||||
<link href="http://rdc.libguides.com/content.php?pid=63487" rel="documentation"/>
|
||||
<author>
|
||||
<name>Simon Kornblith</name>
|
||||
<email>simon@simonster.com</email>
|
||||
</author>
|
||||
<contributor>
|
||||
<name>Bruce D'Arcus</name>
|
||||
</contributor>
|
||||
<contributor>
|
||||
<name>Curtis M. Humphrey</name>
|
||||
</contributor>
|
||||
<contributor>
|
||||
<name>Richard Karnesky</name>
|
||||
<email>karnesky+zotero@gmail.com</email>
|
||||
<uri>http://arc.nucapt.northwestern.edu/Richard_Karnesky</uri>
|
||||
</contributor>
|
||||
<contributor>
|
||||
<name>Sebastian Karcher</name>
|
||||
</contributor>
|
||||
<category citation-format="author-date"/>
|
||||
<category field="psychology"/>
|
||||
<category field="generic-base"/>
|
||||
<updated>2020-03-23T14:28:18+00:00</updated>
|
||||
<rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
|
||||
</info>
|
||||
<locale xml:lang="fr">
|
||||
<terms>
|
||||
<term name="editor" form="short">
|
||||
<single>éd.</single>
|
||||
<multiple>éds.</multiple>
|
||||
</term>
|
||||
</terms>
|
||||
</locale>
|
||||
<macro name="container-contributors">
|
||||
<choose>
|
||||
<if type="chapter paper-conference" match="any">
|
||||
<text term="in" text-case="capitalize-first" suffix=" "/>
|
||||
<names variable="editor" delimiter=", " suffix=", ">
|
||||
<name and="symbol" initialize-with=". " delimiter=", "/>
|
||||
<label form="short" prefix=" (" text-case="capitalize-first" suffix=")"/>
|
||||
<substitute>
|
||||
<names variable="translator"/>
|
||||
</substitute>
|
||||
</names>
|
||||
</if>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="secondary-contributors">
|
||||
<choose>
|
||||
<if type="chapter paper-conference" match="none">
|
||||
<names variable="translator" delimiter=", " prefix=" (" suffix=")">
|
||||
<name and="symbol" initialize-with=". " delimiter=", "/>
|
||||
<label form="short" prefix=", " text-case="capitalize-first"/>
|
||||
<substitute>
|
||||
<names variable="editor"/>
|
||||
</substitute>
|
||||
</names>
|
||||
</if>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="author">
|
||||
<names variable="author">
|
||||
<name name-as-sort-order="all" and="symbol" sort-separator=", " initialize-with=". " delimiter=", " delimiter-precedes-last="always"/>
|
||||
<label form="short" prefix=" (" suffix=")" text-case="capitalize-first"/>
|
||||
<substitute>
|
||||
<names variable="editor"/>
|
||||
<names variable="translator"/>
|
||||
<text macro="title"/>
|
||||
</substitute>
|
||||
</names>
|
||||
</macro>
|
||||
<macro name="author-short">
|
||||
<names variable="author">
|
||||
<name form="short" and="symbol" delimiter=", " initialize-with=". "/>
|
||||
<substitute>
|
||||
<names variable="editor"/>
|
||||
<names variable="translator"/>
|
||||
<choose>
|
||||
<if type="bill book graphic legal_case legislation motion_picture report song" match="any">
|
||||
<text variable="title" form="short" font-style="italic"/>
|
||||
</if>
|
||||
<else>
|
||||
<text variable="title" form="short" quotes="true"/>
|
||||
</else>
|
||||
</choose>
|
||||
</substitute>
|
||||
</names>
|
||||
</macro>
|
||||
<macro name="access">
|
||||
<choose>
|
||||
<if type="thesis">
|
||||
<choose>
|
||||
<if variable="archive" match="any">
|
||||
<group>
|
||||
<text term="retrieved" text-case="capitalize-first" suffix=" "/>
|
||||
<text term="from" suffix=" "/>
|
||||
<text variable="archive" suffix="."/>
|
||||
<text variable="archive_location" prefix=" (" suffix=")"/>
|
||||
</group>
|
||||
</if>
|
||||
<else>
|
||||
<group>
|
||||
<text term="retrieved" text-case="capitalize-first" suffix=" "/>
|
||||
<date variable="accessed" suffix=", ">
|
||||
<date-part name="month" suffix=" "/>
|
||||
<date-part name="day" suffix=", "/>
|
||||
<date-part name="year"/>
|
||||
</date>
|
||||
<text term="from" suffix=" "/>
|
||||
<text variable="URL"/>
|
||||
</group>
|
||||
</else>
|
||||
</choose>
|
||||
</if>
|
||||
<else>
|
||||
<choose>
|
||||
<if variable="URL">
|
||||
<choose>
|
||||
<if variable="archive">
|
||||
<group>
|
||||
<text term="retrieved" text-case="capitalize-first" suffix=" "/>
|
||||
<text term="from" suffix=" "/>
|
||||
<text variable="archive" suffix="."/>
|
||||
</group>
|
||||
</if>
|
||||
<else>
|
||||
<group>
|
||||
<text term="retrieved" text-case="capitalize-first" suffix=" "/>
|
||||
<date variable="accessed" suffix=", ">
|
||||
<date-part name="month" suffix=" "/>
|
||||
<date-part name="day" suffix=", "/>
|
||||
<date-part name="year"/>
|
||||
</date>
|
||||
<group>
|
||||
<text term="from" suffix=" "/>
|
||||
<text variable="URL"/>
|
||||
</group>
|
||||
</group>
|
||||
</else>
|
||||
</choose>
|
||||
</if>
|
||||
</choose>
|
||||
</else>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="title">
|
||||
<choose>
|
||||
<if type="report thesis" match="any">
|
||||
<text variable="title" font-style="italic"/>
|
||||
<group prefix=" (" suffix=")">
|
||||
<text variable="genre"/>
|
||||
<text variable="number" prefix=" No. "/>
|
||||
</group>
|
||||
</if>
|
||||
<else-if type="bill book graphic legal_case legislation manuscript motion_picture report song speech" match="any">
|
||||
<text variable="title" font-style="italic"/>
|
||||
</else-if>
|
||||
<else>
|
||||
<text variable="title"/>
|
||||
</else>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="publisher">
|
||||
<choose>
|
||||
<if type="report" match="any">
|
||||
<group delimiter=": ">
|
||||
<text variable="publisher-place"/>
|
||||
<text variable="publisher"/>
|
||||
</group>
|
||||
</if>
|
||||
<else-if type="thesis" match="any">
|
||||
<group delimiter=", ">
|
||||
<text variable="publisher"/>
|
||||
<text variable="publisher-place"/>
|
||||
</group>
|
||||
</else-if>
|
||||
<else>
|
||||
<choose>
|
||||
<if variable="event" match="none">
|
||||
<text variable="genre" suffix=", "/>
|
||||
</if>
|
||||
</choose>
|
||||
<group delimiter=": ">
|
||||
<text variable="publisher-place"/>
|
||||
<text variable="publisher"/>
|
||||
</group>
|
||||
</else>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="event">
|
||||
<choose>
|
||||
<if variable="event">
|
||||
<choose>
|
||||
<if variable="genre" match="none">
|
||||
<text term="presented at" text-case="capitalize-first" suffix=" "/>
|
||||
<text variable="event"/>
|
||||
</if>
|
||||
<else>
|
||||
<group delimiter=" ">
|
||||
<text variable="genre" text-case="capitalize-first"/>
|
||||
<text term="presented at"/>
|
||||
<text variable="event"/>
|
||||
</group>
|
||||
</else>
|
||||
</choose>
|
||||
</if>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="issued">
|
||||
<choose>
|
||||
<if variable="issued">
|
||||
<group prefix=" (" suffix=").">
|
||||
<date variable="issued">
|
||||
<date-part name="year"/>
|
||||
</date>
|
||||
<choose>
|
||||
<if type="article-journal bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="none">
|
||||
<date variable="issued">
|
||||
<date-part prefix=", " name="month"/>
|
||||
<date-part prefix=" " name="day"/>
|
||||
</date>
|
||||
</if>
|
||||
</choose>
|
||||
</group>
|
||||
</if>
|
||||
<else>
|
||||
<text prefix=" (" term="no date" suffix=")." form="short"/>
|
||||
</else>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="issued-year">
|
||||
<choose>
|
||||
<if variable="issued">
|
||||
<date variable="issued">
|
||||
<date-part name="year"/>
|
||||
</date>
|
||||
</if>
|
||||
<else>
|
||||
<text term="no date" form="short"/>
|
||||
</else>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="edition">
|
||||
<choose>
|
||||
<if is-numeric="edition">
|
||||
<group delimiter=" ">
|
||||
<number variable="edition" form="ordinal"/>
|
||||
<text term="edition" form="short"/>
|
||||
</group>
|
||||
</if>
|
||||
<else>
|
||||
<text variable="edition" suffix="."/>
|
||||
</else>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="locators">
|
||||
<choose>
|
||||
<if type="article-journal article-magazine article-newspaper" match="any">
|
||||
<group prefix=", " delimiter=", ">
|
||||
<group>
|
||||
<text variable="volume" font-style="italic"/>
|
||||
<text variable="issue" prefix="(" suffix=")"/>
|
||||
</group>
|
||||
<text variable="page"/>
|
||||
</group>
|
||||
</if>
|
||||
<else-if type="bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="any">
|
||||
<group prefix=" (" suffix=")" delimiter=", ">
|
||||
<text macro="edition"/>
|
||||
<group>
|
||||
<text term="volume" form="short" plural="true" text-case="capitalize-first" suffix=" "/>
|
||||
<number variable="number-of-volumes" form="numeric" prefix="1-"/>
|
||||
</group>
|
||||
<group>
|
||||
<text term="volume" form="short" text-case="capitalize-first" suffix=" "/>
|
||||
<number variable="volume" form="numeric"/>
|
||||
</group>
|
||||
<group>
|
||||
<label variable="page" form="short" suffix=" "/>
|
||||
<text variable="page"/>
|
||||
</group>
|
||||
</group>
|
||||
</else-if>
|
||||
</choose>
|
||||
</macro>
|
||||
<macro name="citation-locator">
|
||||
<group>
|
||||
<label variable="locator" form="short"/>
|
||||
<text variable="locator" prefix=" "/>
|
||||
</group>
|
||||
</macro>
|
||||
<citation et-al-min="6" et-al-use-first="1" et-al-subsequent-min="3" et-al-subsequent-use-first="1" disambiguate-add-year-suffix="true" disambiguate-add-names="true" disambiguate-add-givenname="true" givenname-disambiguation-rule="primary-name" collapse="year">
|
||||
<sort>
|
||||
<key macro="author"/>
|
||||
<key macro="issued-year"/>
|
||||
</sort>
|
||||
<layout prefix="(" suffix=")" delimiter="; ">
|
||||
<group delimiter=", ">
|
||||
<text macro="author-short"/>
|
||||
<text macro="issued-year"/>
|
||||
<text macro="citation-locator"/>
|
||||
</group>
|
||||
</layout>
|
||||
</citation>
|
||||
<bibliography hanging-indent="true" et-al-min="8" et-al-use-first="7" entry-spacing="0" line-spacing="2">
|
||||
<sort>
|
||||
<key macro="author"/>
|
||||
<key macro="issued-year" sort="ascending"/>
|
||||
</sort>
|
||||
<layout>
|
||||
<group suffix=".">
|
||||
<text macro="author" suffix="."/>
|
||||
<text macro="issued" suffix=" "/>
|
||||
<group delimiter=". ">
|
||||
<text macro="title"/>
|
||||
<group>
|
||||
<text macro="container-contributors"/>
|
||||
<text macro="secondary-contributors"/>
|
||||
<group delimiter=", ">
|
||||
<text variable="container-title" font-style="italic"/>
|
||||
<text variable="collection-title"/>
|
||||
</group>
|
||||
</group>
|
||||
</group>
|
||||
<text macro="locators"/>
|
||||
<group delimiter=", " prefix=". ">
|
||||
<text macro="event"/>
|
||||
<text macro="publisher"/>
|
||||
</group>
|
||||
</group>
|
||||
<text macro="access" prefix=". "/>
|
||||
</layout>
|
||||
</bibliography>
|
||||
</style>
|
|
@ -0,0 +1,20 @@
|
|||
@book{wiley2020advanced,
|
||||
title = {Advanced R 4 Data Programming and the Cloud: Using PostgreSQL, AWS, and Shiny},
|
||||
author = {Matt Wiley and Joshua F. Wiley},
|
||||
year = {2020},
|
||||
doi = {10.1007/978-1-4842-5973-3},
|
||||
language = {English},
|
||||
isbn = {9781484259726},
|
||||
publisher = {Apress},
|
||||
address = {United States of America},
|
||||
edition = {2nd},
|
||||
}
|
||||
|
||||
@Manual{R,
|
||||
title = {R: A Language and Environment for Statistical Computing},
|
||||
author = {{R Core Team}},
|
||||
organization = {R Foundation for Statistical Computing},
|
||||
address = {Vienna, Austria},
|
||||
year = {2022},
|
||||
url = {https://www.R-project.org/},
|
||||
}
|
|
@ -1,23 +1,21 @@
|
|||
#' ---
|
||||
#' title: "Create database fo mortality indicator Brazil population"
|
||||
#' title: "Create a SQLite database for mortality data - Brazil population"
|
||||
#' author: "José A Bran - https://ayuda.onecluster.org/"
|
||||
#' date: "2021-04-22"
|
||||
#' output:
|
||||
#' html_document:
|
||||
#' df_print: paged
|
||||
#' toc: yes
|
||||
#' toc_float: yes
|
||||
#' code_folding: hide
|
||||
#' toc: yes
|
||||
#' toc_float:
|
||||
#' smooth_scroll: true
|
||||
#' highlight: zenburn
|
||||
#' bibliography: ../bib/references.bib # References
|
||||
#' csl: ../bib/apa-5th-edition.csl # Citation style language
|
||||
#' ---
|
||||
|
||||
#'+ r setup, include=FALSE
|
||||
#+ setup, include=FALSE
|
||||
knitr::opts_chunk$set(echo = TRUE)
|
||||
|
||||
############################################################################
|
||||
## From: ##
|
||||
## ##
|
||||
## https://cran.r-project.org/web/packages/RSQLite/vignettes/RSQLite.html ##
|
||||
############################################################################
|
||||
|
||||
rm(list = ls())
|
||||
|
||||
library(DBI)
|
||||
|
@ -26,70 +24,140 @@ library(data.table)
|
|||
library(ggplot2)
|
||||
theme_set(theme_bw())
|
||||
|
||||
#' ## How to deal with some big data for your machine memory ("data bigger than ram")
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' ## Dealing with large data
|
||||
#'
|
||||
#' ------------------------------------------------------------------------------
|
||||
#'
|
||||
#'
|
||||
#' R canot handle data larger than RAM. Therefore, how "big" some data is, is
|
||||
#' relative to the memory and processing capacity of the machines.
|
||||
#'
|
||||
#' These popultaion mortality data are larger than the RAM memory of most commom
|
||||
#' personal computers (4 to 16 Gigabytes).
|
||||
#'
|
||||
#' Thus, it would be appropriate to look for smart strategies to deal with this data.
|
||||
|
||||
|
||||
#' We may consider the following options:
|
||||
#'
|
||||
#'
|
||||
#' - Download the data in csv format, and reduce the size of each file
|
||||
#' - You may use 'rds' files in R for this purposes
|
||||
#' - Check "?saveRDS" help for more information
|
||||
#' - Work with pieces of data, extracting only columns or rows you are intrested
|
||||
#' in
|
||||
#' - Explore a database solution
|
||||
#' - There are multiple resources to word with SQL and NoSQL databases inR
|
||||
#' - Take a look to RPostgreSQL package for SQL integration
|
||||
#' - Check the "mongolite" package for NoSQL integration
|
||||
#'
|
||||
#' Learn about SQLite database can be an option to make a first approach to
|
||||
#' basic concepts about databases and connections in R.
|
||||
#'
|
||||
|
||||
#' This database is large, thus to work with it in a local machine, a Sqlite database can be an option
|
||||
|
||||
|
||||
#' ## How to create a new database
|
||||
#' A great discussion about this topic can be found in this book
|
||||
#' [@wiley2020advanced]
|
||||
#'
|
||||
#' Check the function for more information:
|
||||
#' > ?dbConnect
|
||||
#'
|
||||
#' And, take a look to this:
|
||||
|
||||
?saveRDS
|
||||
|
||||
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' ## How to create a new SQLite database
|
||||
#'
|
||||
#' ------------------------------------------------------------------------------
|
||||
|
||||
############################################################################
|
||||
## From: ##
|
||||
## ##
|
||||
## https://cran.r-project.org/web/packages/RSQLite/vignettes/RSQLite.html ##
|
||||
############################################################################
|
||||
|
||||
#' Check the function help in R for more information:
|
||||
|
||||
?dbConnect
|
||||
|
||||
mortdb <- dbConnect(RSQLite::SQLite(), "mort_db.sqlite")
|
||||
|
||||
|
||||
#' Disconnect:
|
||||
#' When you are done with the process of writing or extracting data, just
|
||||
#' stop the connection with the database:
|
||||
#'
|
||||
#' dbDisconnect(mortdb)
|
||||
|
||||
#'
|
||||
#' unlink("mort_db.sqlite")
|
||||
|
||||
#' ## Hoe to include a table in the Sqlite database
|
||||
?dbDisconnect
|
||||
|
||||
?unlink
|
||||
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' ## Including tables in the SQLite database
|
||||
#'
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' Download the data, then load the table to be written in the workspace and use #' the following to include it in the database.
|
||||
#'
|
||||
#' I saved the data as 'rds' to reduce the object weight.
|
||||
#'
|
||||
#' Download the data, then load the table to be written in the workspace and use
|
||||
#' the following to include it in the database
|
||||
|
||||
d <- readRDS("../data/ETLSIM.DORES_2010.rds") # I saved the data as 'rds' to reduce the object weight
|
||||
setDT(d)
|
||||
setnames(d, tolower)
|
||||
names(d)
|
||||
|
||||
#' You may also access the data from the cloud:
|
||||
#'
|
||||
#' url = 'https://diaad.s3.sa-east-1.amazonaws.com/sim/Mortalidade_Geral_2020.csv'
|
||||
#'
|
||||
#' d <- fread(url)
|
||||
#' setnames(d, tolower)
|
||||
|
||||
|
||||
d <- readRDS("../data/ETLSIM.DORES_2010.rds")
|
||||
|
||||
setDT(d)
|
||||
|
||||
setnames(d, tolower)
|
||||
|
||||
head(names(d))
|
||||
|
||||
#' ------------------------------------------------------------------------------
|
||||
#'
|
||||
#' ## Update 2022:
|
||||
#'
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' Please note that the data has been updated since the build of this script, thus
|
||||
#' some colum names and type differ between tables
|
||||
|
||||
dbWriteTable(mortdb, "Mortalidade_Br_2010", d)
|
||||
|
||||
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' ## List the tables
|
||||
#'
|
||||
#' ------------------------------------------------------------------------------
|
||||
|
||||
dbListTables(mortdb)
|
||||
|
||||
#' ## Reading again as data.table:
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' ## Reading again as data.table
|
||||
#'
|
||||
#' ------------------------------------------------------------------------------
|
||||
|
||||
dt = dbReadTable(mortdb, "Mortalidade_Br_2010")
|
||||
|
||||
setDT(dt)
|
||||
setnames(dt, tolower)
|
||||
names(dt)
|
||||
head(names(dt))
|
||||
|
||||
#' ## Disconnect
|
||||
#'
|
||||
#' Disconnect and remove duplicated information
|
||||
|
||||
dbDisconnect(mortdb)
|
||||
rm(d)
|
||||
rm(mortdb, d)
|
||||
|
||||
#' Then, you can select the columns or lines you want to use
|
||||
#'
|
||||
|
||||
dt = dt[, .(idade_obito_anos, def_sexo, dtobito, dtnasc)]
|
||||
|
||||
|
@ -97,7 +165,11 @@ str(dt)
|
|||
|
||||
dt[, .N, .(idade_obito_anos, def_sexo)]
|
||||
|
||||
#' ## Recoding dates: not reading date as date
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' ## Recoding dates from integers to "IDate"
|
||||
#'
|
||||
#' ------------------------------------------------------------------------------
|
||||
|
||||
class(dt$dtobito)
|
||||
|
||||
|
@ -113,7 +185,11 @@ dt[, .N, .(year(idtnasc))]
|
|||
|
||||
dt[, .N, (age)]
|
||||
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' ## Visualizing data distribution
|
||||
#'
|
||||
#' ------------------------------------------------------------------------------
|
||||
|
||||
ggplot(dt, aes(age, fill = def_sexo)) +
|
||||
geom_histogram(bins = 200) +
|
||||
|
@ -122,3 +198,9 @@ ggplot(dt, aes(age, fill = def_sexo)) +
|
|||
|
||||
|
||||
#' The end
|
||||
|
||||
#'------------------------------------------------------------------------------
|
||||
#'
|
||||
#' ## References
|
||||
#'
|
||||
#' ------------------------------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue