Updating script SQLite-including references

This commit is contained in:
Jose 2022-07-15 14:54:28 -03:00
parent 5db701e4a3
commit 7110478863
6 changed files with 481 additions and 51 deletions

0
.gitignore vendored Normal file → Executable file
View File

0
LICENSE Normal file → Executable file
View File

13
README.org Normal file → Executable file
View File

@ -3,19 +3,6 @@
* mort_geral
** TODO [#A] update code :noexport:
:LOGBOOK:
CLOCK: [2022-07-14 jue 08:12]--[2022-07-14 jue 08:22] => 0:10
CLOCK: [2022-07-13 mié 16:13]--[2022-07-13 mié 16:43] => 0:30
CLOCK: [2022-07-13 mié 08:44]--[2022-07-13 mié 09:14] => 0:30
CLOCK: [2022-07-11 lun 15:21]--[2022-07-11 lun 15:25] => 0:04
CLOCK: [2022-07-11 lun 14:09]--[2022-07-11 lun 14:39] => 0:30
CLOCK: [2022-07-08 vie 14:56]--[2022-07-08 vie 15:26] => 0:30
:END:
Análise de indicadores de saúde: mortalidade geral da população brasileira.
** Scripts
- [[./script/db.R][Usando RSQlite para salvar dados de maior tamanho: exemplo]]

341
bib/apa-5th-edition.csl Executable file
View File

@ -0,0 +1,341 @@
<?xml version="1.0" encoding="utf-8"?>
<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only">
<info>
<title>American Psychological Association 5th edition</title>
<title-short>APA (5th ed.)</title-short>
<id>http://www.zotero.org/styles/apa-5th-edition</id>
<link href="http://www.zotero.org/styles/apa-5th-edition" rel="self"/>
<link href="http://rdc.libguides.com/content.php?pid=63487" rel="documentation"/>
<author>
<name>Simon Kornblith</name>
<email>simon@simonster.com</email>
</author>
<contributor>
<name>Bruce D'Arcus</name>
</contributor>
<contributor>
<name>Curtis M. Humphrey</name>
</contributor>
<contributor>
<name>Richard Karnesky</name>
<email>karnesky+zotero@gmail.com</email>
<uri>http://arc.nucapt.northwestern.edu/Richard_Karnesky</uri>
</contributor>
<contributor>
<name>Sebastian Karcher</name>
</contributor>
<category citation-format="author-date"/>
<category field="psychology"/>
<category field="generic-base"/>
<updated>2020-03-23T14:28:18+00:00</updated>
<rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
</info>
<locale xml:lang="fr">
<terms>
<term name="editor" form="short">
<single>éd.</single>
<multiple>éds.</multiple>
</term>
</terms>
</locale>
<macro name="container-contributors">
<choose>
<if type="chapter paper-conference" match="any">
<text term="in" text-case="capitalize-first" suffix=" "/>
<names variable="editor" delimiter=", " suffix=", ">
<name and="symbol" initialize-with=". " delimiter=", "/>
<label form="short" prefix=" (" text-case="capitalize-first" suffix=")"/>
<substitute>
<names variable="translator"/>
</substitute>
</names>
</if>
</choose>
</macro>
<macro name="secondary-contributors">
<choose>
<if type="chapter paper-conference" match="none">
<names variable="translator" delimiter=", " prefix=" (" suffix=")">
<name and="symbol" initialize-with=". " delimiter=", "/>
<label form="short" prefix=", " text-case="capitalize-first"/>
<substitute>
<names variable="editor"/>
</substitute>
</names>
</if>
</choose>
</macro>
<macro name="author">
<names variable="author">
<name name-as-sort-order="all" and="symbol" sort-separator=", " initialize-with=". " delimiter=", " delimiter-precedes-last="always"/>
<label form="short" prefix=" (" suffix=")" text-case="capitalize-first"/>
<substitute>
<names variable="editor"/>
<names variable="translator"/>
<text macro="title"/>
</substitute>
</names>
</macro>
<macro name="author-short">
<names variable="author">
<name form="short" and="symbol" delimiter=", " initialize-with=". "/>
<substitute>
<names variable="editor"/>
<names variable="translator"/>
<choose>
<if type="bill book graphic legal_case legislation motion_picture report song" match="any">
<text variable="title" form="short" font-style="italic"/>
</if>
<else>
<text variable="title" form="short" quotes="true"/>
</else>
</choose>
</substitute>
</names>
</macro>
<macro name="access">
<choose>
<if type="thesis">
<choose>
<if variable="archive" match="any">
<group>
<text term="retrieved" text-case="capitalize-first" suffix=" "/>
<text term="from" suffix=" "/>
<text variable="archive" suffix="."/>
<text variable="archive_location" prefix=" (" suffix=")"/>
</group>
</if>
<else>
<group>
<text term="retrieved" text-case="capitalize-first" suffix=" "/>
<date variable="accessed" suffix=", ">
<date-part name="month" suffix=" "/>
<date-part name="day" suffix=", "/>
<date-part name="year"/>
</date>
<text term="from" suffix=" "/>
<text variable="URL"/>
</group>
</else>
</choose>
</if>
<else>
<choose>
<if variable="URL">
<choose>
<if variable="archive">
<group>
<text term="retrieved" text-case="capitalize-first" suffix=" "/>
<text term="from" suffix=" "/>
<text variable="archive" suffix="."/>
</group>
</if>
<else>
<group>
<text term="retrieved" text-case="capitalize-first" suffix=" "/>
<date variable="accessed" suffix=", ">
<date-part name="month" suffix=" "/>
<date-part name="day" suffix=", "/>
<date-part name="year"/>
</date>
<group>
<text term="from" suffix=" "/>
<text variable="URL"/>
</group>
</group>
</else>
</choose>
</if>
</choose>
</else>
</choose>
</macro>
<macro name="title">
<choose>
<if type="report thesis" match="any">
<text variable="title" font-style="italic"/>
<group prefix=" (" suffix=")">
<text variable="genre"/>
<text variable="number" prefix=" No. "/>
</group>
</if>
<else-if type="bill book graphic legal_case legislation manuscript motion_picture report song speech" match="any">
<text variable="title" font-style="italic"/>
</else-if>
<else>
<text variable="title"/>
</else>
</choose>
</macro>
<macro name="publisher">
<choose>
<if type="report" match="any">
<group delimiter=": ">
<text variable="publisher-place"/>
<text variable="publisher"/>
</group>
</if>
<else-if type="thesis" match="any">
<group delimiter=", ">
<text variable="publisher"/>
<text variable="publisher-place"/>
</group>
</else-if>
<else>
<choose>
<if variable="event" match="none">
<text variable="genre" suffix=", "/>
</if>
</choose>
<group delimiter=": ">
<text variable="publisher-place"/>
<text variable="publisher"/>
</group>
</else>
</choose>
</macro>
<macro name="event">
<choose>
<if variable="event">
<choose>
<if variable="genre" match="none">
<text term="presented at" text-case="capitalize-first" suffix=" "/>
<text variable="event"/>
</if>
<else>
<group delimiter=" ">
<text variable="genre" text-case="capitalize-first"/>
<text term="presented at"/>
<text variable="event"/>
</group>
</else>
</choose>
</if>
</choose>
</macro>
<macro name="issued">
<choose>
<if variable="issued">
<group prefix=" (" suffix=").">
<date variable="issued">
<date-part name="year"/>
</date>
<choose>
<if type="article-journal bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="none">
<date variable="issued">
<date-part prefix=", " name="month"/>
<date-part prefix=" " name="day"/>
</date>
</if>
</choose>
</group>
</if>
<else>
<text prefix=" (" term="no date" suffix=")." form="short"/>
</else>
</choose>
</macro>
<macro name="issued-year">
<choose>
<if variable="issued">
<date variable="issued">
<date-part name="year"/>
</date>
</if>
<else>
<text term="no date" form="short"/>
</else>
</choose>
</macro>
<macro name="edition">
<choose>
<if is-numeric="edition">
<group delimiter=" ">
<number variable="edition" form="ordinal"/>
<text term="edition" form="short"/>
</group>
</if>
<else>
<text variable="edition" suffix="."/>
</else>
</choose>
</macro>
<macro name="locators">
<choose>
<if type="article-journal article-magazine article-newspaper" match="any">
<group prefix=", " delimiter=", ">
<group>
<text variable="volume" font-style="italic"/>
<text variable="issue" prefix="(" suffix=")"/>
</group>
<text variable="page"/>
</group>
</if>
<else-if type="bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="any">
<group prefix=" (" suffix=")" delimiter=", ">
<text macro="edition"/>
<group>
<text term="volume" form="short" plural="true" text-case="capitalize-first" suffix=" "/>
<number variable="number-of-volumes" form="numeric" prefix="1-"/>
</group>
<group>
<text term="volume" form="short" text-case="capitalize-first" suffix=" "/>
<number variable="volume" form="numeric"/>
</group>
<group>
<label variable="page" form="short" suffix=" "/>
<text variable="page"/>
</group>
</group>
</else-if>
</choose>
</macro>
<macro name="citation-locator">
<group>
<label variable="locator" form="short"/>
<text variable="locator" prefix=" "/>
</group>
</macro>
<citation et-al-min="6" et-al-use-first="1" et-al-subsequent-min="3" et-al-subsequent-use-first="1" disambiguate-add-year-suffix="true" disambiguate-add-names="true" disambiguate-add-givenname="true" givenname-disambiguation-rule="primary-name" collapse="year">
<sort>
<key macro="author"/>
<key macro="issued-year"/>
</sort>
<layout prefix="(" suffix=")" delimiter="; ">
<group delimiter=", ">
<text macro="author-short"/>
<text macro="issued-year"/>
<text macro="citation-locator"/>
</group>
</layout>
</citation>
<bibliography hanging-indent="true" et-al-min="8" et-al-use-first="7" entry-spacing="0" line-spacing="2">
<sort>
<key macro="author"/>
<key macro="issued-year" sort="ascending"/>
</sort>
<layout>
<group suffix=".">
<text macro="author" suffix="."/>
<text macro="issued" suffix=" "/>
<group delimiter=". ">
<text macro="title"/>
<group>
<text macro="container-contributors"/>
<text macro="secondary-contributors"/>
<group delimiter=", ">
<text variable="container-title" font-style="italic"/>
<text variable="collection-title"/>
</group>
</group>
</group>
<text macro="locators"/>
<group delimiter=", " prefix=". ">
<text macro="event"/>
<text macro="publisher"/>
</group>
</group>
<text macro="access" prefix=". "/>
</layout>
</bibliography>
</style>

20
bib/references.bib Normal file
View File

@ -0,0 +1,20 @@
@book{wiley2020advanced,
title = {Advanced R 4 Data Programming and the Cloud: Using PostgreSQL, AWS, and Shiny},
author = {Matt Wiley and Joshua F. Wiley},
year = {2020},
doi = {10.1007/978-1-4842-5973-3},
language = {English},
isbn = {9781484259726},
publisher = {Apress},
address = {United States of America},
edition = {2nd},
}
@Manual{R,
title = {R: A Language and Environment for Statistical Computing},
author = {{R Core Team}},
organization = {R Foundation for Statistical Computing},
address = {Vienna, Austria},
year = {2022},
url = {https://www.R-project.org/},
}

158
script/db.R Normal file → Executable file
View File

@ -1,23 +1,21 @@
#' ---
#' title: "Create database fo mortality indicator Brazil population"
#' title: "Create a SQLite database for mortality data - Brazil population"
#' author: "José A Bran - https://ayuda.onecluster.org/"
#' date: "2021-04-22"
#' output:
#' html_document:
#' df_print: paged
#' toc: yes
#' toc_float: yes
#' code_folding: hide
#' toc: yes
#' toc_float:
#' smooth_scroll: true
#' highlight: zenburn
#' bibliography: ../bib/references.bib # References
#' csl: ../bib/apa-5th-edition.csl # Citation style language
#' ---
#'+ r setup, include=FALSE
#+ setup, include=FALSE
knitr::opts_chunk$set(echo = TRUE)
############################################################################
## From: ##
## ##
## https://cran.r-project.org/web/packages/RSQLite/vignettes/RSQLite.html ##
############################################################################
rm(list = ls())
library(DBI)
@ -26,70 +24,140 @@ library(data.table)
library(ggplot2)
theme_set(theme_bw())
#' ## How to deal with some big data for your machine memory ("data bigger than ram")
#'------------------------------------------------------------------------------
#'
#' ## Dealing with large data
#'
#' ------------------------------------------------------------------------------
#'
#'
#' R canot handle data larger than RAM. Therefore, how "big" some data is, is
#' relative to the memory and processing capacity of the machines.
#'
#' These popultaion mortality data are larger than the RAM memory of most commom
#' personal computers (4 to 16 Gigabytes).
#'
#' Thus, it would be appropriate to look for smart strategies to deal with this data.
#' We may consider the following options:
#'
#'
#' - Download the data in csv format, and reduce the size of each file
#' - You may use 'rds' files in R for this purposes
#' - Check "?saveRDS" help for more information
#' - Work with pieces of data, extracting only columns or rows you are intrested
#' in
#' - Explore a database solution
#' - There are multiple resources to word with SQL and NoSQL databases inR
#' - Take a look to RPostgreSQL package for SQL integration
#' - Check the "mongolite" package for NoSQL integration
#'
#' Learn about SQLite database can be an option to make a first approach to
#' basic concepts about databases and connections in R.
#'
#' This database is large, thus to work with it in a local machine, a Sqlite database can be an option
#' ## How to create a new database
#' A great discussion about this topic can be found in this book
#' [@wiley2020advanced]
#'
#' Check the function for more information:
#' > ?dbConnect
#'
#' And, take a look to this:
?saveRDS
#'------------------------------------------------------------------------------
#'
#' ## How to create a new SQLite database
#'
#' ------------------------------------------------------------------------------
############################################################################
## From: ##
## ##
## https://cran.r-project.org/web/packages/RSQLite/vignettes/RSQLite.html ##
############################################################################
#' Check the function help in R for more information:
?dbConnect
mortdb <- dbConnect(RSQLite::SQLite(), "mort_db.sqlite")
#' Disconnect:
#' When you are done with the process of writing or extracting data, just
#' stop the connection with the database:
#'
#' dbDisconnect(mortdb)
#'
#' unlink("mort_db.sqlite")
#' ## Hoe to include a table in the Sqlite database
?dbDisconnect
?unlink
#'------------------------------------------------------------------------------
#'
#' ## Including tables in the SQLite database
#'
#'------------------------------------------------------------------------------
#'
#' Download the data, then load the table to be written in the workspace and use #' the following to include it in the database.
#'
#' I saved the data as 'rds' to reduce the object weight.
#'
#' Download the data, then load the table to be written in the workspace and use
#' the following to include it in the database
d <- readRDS("../data/ETLSIM.DORES_2010.rds") # I saved the data as 'rds' to reduce the object weight
setDT(d)
setnames(d, tolower)
names(d)
#' You may also access the data from the cloud:
#'
#' url = 'https://diaad.s3.sa-east-1.amazonaws.com/sim/Mortalidade_Geral_2020.csv'
#'
#' d <- fread(url)
#' setnames(d, tolower)
d <- readRDS("../data/ETLSIM.DORES_2010.rds")
setDT(d)
setnames(d, tolower)
head(names(d))
#' ------------------------------------------------------------------------------
#'
#' ## Update 2022:
#'
#'------------------------------------------------------------------------------
#'
#' Please note that the data has been updated since the build of this script, thus
#' some colum names and type differ between tables
dbWriteTable(mortdb, "Mortalidade_Br_2010", d)
#'------------------------------------------------------------------------------
#'
#' ## List the tables
#'
#' ------------------------------------------------------------------------------
dbListTables(mortdb)
#' ## Reading again as data.table:
#'------------------------------------------------------------------------------
#'
#' ## Reading again as data.table
#'
#' ------------------------------------------------------------------------------
dt = dbReadTable(mortdb, "Mortalidade_Br_2010")
setDT(dt)
setnames(dt, tolower)
names(dt)
head(names(dt))
#' ## Disconnect
#'
#' Disconnect and remove duplicated information
dbDisconnect(mortdb)
rm(d)
rm(mortdb, d)
#' Then, you can select the columns or lines you want to use
#'
dt = dt[, .(idade_obito_anos, def_sexo, dtobito, dtnasc)]
@ -97,7 +165,11 @@ str(dt)
dt[, .N, .(idade_obito_anos, def_sexo)]
#' ## Recoding dates: not reading date as date
#'------------------------------------------------------------------------------
#'
#' ## Recoding dates from integers to "IDate"
#'
#' ------------------------------------------------------------------------------
class(dt$dtobito)
@ -113,7 +185,11 @@ dt[, .N, .(year(idtnasc))]
dt[, .N, (age)]
#'------------------------------------------------------------------------------
#'
#' ## Visualizing data distribution
#'
#' ------------------------------------------------------------------------------
ggplot(dt, aes(age, fill = def_sexo)) +
geom_histogram(bins = 200) +
@ -122,3 +198,9 @@ ggplot(dt, aes(age, fill = def_sexo)) +
#' The end
#'------------------------------------------------------------------------------
#'
#' ## References
#'
#' ------------------------------------------------------------------------------