197 lines
3.9 KiB
R
197 lines
3.9 KiB
R
#' ---
|
||
#' title: "Exploring data on mortality, Brazil - 2010"
|
||
#' author: "José A Bran jose.alfredo@posgrad.ufsc.br"
|
||
#' date: "2021-04-15"
|
||
#' output:
|
||
#' html_document:
|
||
#' df_print: paged
|
||
#' toc: yes
|
||
#' toc_float: yes
|
||
#' ---
|
||
|
||
#'+ setup, include=FALSE
|
||
knitr::opts_chunk$set(echo = TRUE)
|
||
|
||
library(httr)
|
||
library(read.dbc)
|
||
library(DT)
|
||
library(dygraphs)
|
||
library(knitr)
|
||
library(plotly)
|
||
library(data.table)
|
||
library(ggplot2)
|
||
theme_set(theme_bw())
|
||
|
||
|
||
#' Previously, the data was saved as RDS to reduce the weight (of data format)
|
||
#'
|
||
#' data <- fread("./data/ETLSIM.DORES_2010.csv"## )
|
||
#' saveRDS(data, "ETLSIM.DORES_2010.rds")
|
||
|
||
## Data for 2010
|
||
|
||
rm(list = ls())
|
||
|
||
#' 2021 databsae url = 'https://s3.sa-east-1.amazonaws.com/ckan.saude.gov.br/SIM/DO21OPEN.csv'
|
||
|
||
url = 'https://diaad.s3.sa-east-1.amazonaws.com/sim/Mortalidade_Geral_2020.csv'
|
||
|
||
d = fread(url, nrows = 500) # Download all the data, but you could select columns and rows to read by 'fread' function
|
||
|
||
|
||
###################################################
|
||
## d <- readRDS("../data/ETLSIM.DORES_2010.rds") ##
|
||
## setDT(d) ##
|
||
###################################################
|
||
|
||
setnames(d, tolower)
|
||
|
||
names(d)
|
||
|
||
#' Selecting 10 colums
|
||
#'
|
||
cols = c('dtobito', 'dtnasc', 'sexo', 'idade_obito_anos',
|
||
'racacor', 'causabas_categoria', 'causabas_capitulo',
|
||
'res_sigla_uf', 'ocor_regiao')
|
||
|
||
|
||
d = d[, ..cols]
|
||
|
||
str(d)
|
||
|
||
#' ## Data óbito - Date of death
|
||
|
||
d[, .N, dtobito]
|
||
|
||
d[, `:=` (idtobito = as.IDate(as.character(dtobito), "%d%m%Y"),
|
||
idtnasc = as.IDate(as.character(dtnasc), "%d%m%Y"))]
|
||
|
||
d[, .N, idtobito]
|
||
|
||
d[, .N, year(idtobito)]
|
||
d[, .N, month(idtobito)]
|
||
d[, .N, mday(idtobito)]
|
||
|
||
testing = grep("(^20)-", d$idtobito, value = T) # Values with incomplete year
|
||
|
||
d[, idtobito := gsub("^20-", "2020-", idtobito) ] # Susbtitute abnormal values in year
|
||
|
||
d[, .N, year(idtnasc)]
|
||
d[, .N, month(idtnasc)]
|
||
d[, .N, mday(idtnasc)]
|
||
|
||
|
||
|
||
d[year(idtnasc) < 1900, .(idtnasc)]
|
||
|
||
testable = grep("^(9)", d$dtnasc, value = T) # abornormal values in year
|
||
|
||
table(testable)
|
||
|
||
d[, idtnasc := gsub("^(9)", "19", idtnasc) ] # Susbtitute abnormal values in year
|
||
|
||
d[, .N, year(idtnasc)]
|
||
|
||
d[, sum(is.na(idtobito))]
|
||
|
||
class(d$idtobito)
|
||
|
||
ggplot(d, aes(idtobito)) +
|
||
geom_histogram(bins = 100)
|
||
|
||
|
||
#' ## Data nascimento - Date of birth
|
||
|
||
d[, .N, dtnasc]
|
||
|
||
sum(is.na(d$dtnasc))
|
||
|
||
ggplot(d, aes(dtnasc)) +
|
||
geom_histogram(bins = 100)
|
||
|
||
#' ## Sexo - Sex
|
||
|
||
d[, .N, sexo]
|
||
|
||
ggplot(d, aes(factor(sexo), idade)) +
|
||
geom_boxplot()
|
||
|
||
|
||
#' ## Idade - Age
|
||
|
||
d[, .N, idade]
|
||
|
||
#' Missing data
|
||
|
||
d[, sum(is.na(idade))]
|
||
|
||
ggplot(d, aes(idade)) +
|
||
geom_histogram(bins = 200)
|
||
|
||
|
||
p1 = ggplot(d[sexo != 'Ignorado', ],
|
||
aes(idade, fill = sexo)) +
|
||
geom_histogram(bins = 200, alpha = 0.7) +
|
||
labs(fill = '') +
|
||
theme(legend.position = c(.9, .9)) +
|
||
facet_wrap(~ sexo, ncol = 1)
|
||
|
||
p1
|
||
|
||
p2 = ggplot(d[sexo != 'Ignorado', ],
|
||
aes(idade, fill = sexo)) +
|
||
geom_histogram(bins = 200, alpha = 0.7) +
|
||
labs(fill = '') +
|
||
theme(legend.position = c(.9, .9)) +
|
||
facet_grid(sexo ~ racacor)
|
||
|
||
p2
|
||
|
||
#' ## Def Raça cor - Ethnic social representation
|
||
#'
|
||
#' Cor informada pelo responsável pelas informações do falecido. (1 – Branca; 2
|
||
#' – Preta; 3 – Amarela; 4 – Parda; 5 – Indígena)
|
||
|
||
d[, .N, racacor]
|
||
|
||
|
||
#' ## Causa básica - Cause of death
|
||
|
||
d[, .N, causabas]
|
||
|
||
d[, .N, causabas_o]
|
||
|
||
d[, .N, causabas_capitulo] # cid chapter
|
||
|
||
cap = d[sexo != 'Ignorado', .N, .(causabas_capitulo, sexo)]
|
||
|
||
names(cap)
|
||
|
||
count() %>%
|
||
mutate(Sexo = as.factor(sexo))
|
||
|
||
p3 = ggplot(cap, aes(reorder(causabas_capitulo, +N), N, fill = sexo)) +
|
||
geom_col() +
|
||
coord_flip() +
|
||
theme_bw() +
|
||
labs(y = "", x = "", fill = '') +
|
||
facet_wrap(~ sexo)
|
||
|
||
p3
|
||
|
||
#' ## Sexo
|
||
|
||
|
||
d[, .N, ocor_regiao]
|
||
|
||
## d %>%
|
||
## count(ocor_REGIAO) %>%
|
||
## ggplot(aes(reorder(ocor_REGIAO, +n), n)) +
|
||
## geom_col() +
|
||
## coord_flip()
|
||
|
||
|
||
#' ## Mortalidade por estado
|
||
|
||
d[, .N, res_sigla_uf]
|