diff --git a/README.org b/README.org index d1abceb..c78af24 100755 --- a/README.org +++ b/README.org @@ -7,6 +7,8 @@ - [[./script/db.R][Usando RSQlite para salvar dados de maior tamanho: exemplo]] - [[./script/mortal_anos.R][Descrição básica do indicador "mortalidade geral" da população brasileira 2010]] +- [[./script/save_rds.R][Comprimindo tabela em formato rds no R]] +- [[./script/mort_CID10.Rmd][Compilado mortalidade por cem mil habitantes CID 10]] ** Dados @@ -16,4 +18,8 @@ Saúde do Brasil. - [[https://opendatasus.saude.gov.br/dataset/sim-1979-2019][Dados de mortalidade 1979-2019]] - [[https://opendatasus.saude.gov.br/dataset/sim-2020-2021][Dados de mortalidade 2021]] +- [[./data/mortalidade_CID_10.csv][Compilado dados mortalidade por causas - CID 10]] +- [[./data/pop_reg_2000_2020.csv][Compilado dados população brasileira]] +- [[https://diaad.s3.sa-east-1.amazonaws.com/sim/Mortalidade_Geral+-+Estrutura.pdf +][Dicionário de dados mortalidade SIM]] - [[https://databank.worldbank.org/metadataglossary/world-development-indicators/series/SP.DYN.CDRT.IN][Definição do indicador "mortalidade geral" - Banco Mundial]] diff --git a/data/mortalidade_CID_10.csv b/data/mortalidade_CID_10.csv new file mode 100755 index 0000000..8a6dc26 --- /dev/null +++ b/data/mortalidade_CID_10.csv @@ -0,0 +1,20 @@ +CID 10,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +I. Infecciosas e parasitarias,44515,45032,45175,46533,46067,46628,46508,45945,47295,47010,48823,49175,49608,52058,52174,55022,57188,54874,54679,56666 +II. Neoplasias,120517,125348,129923,134691,140801,147418,155796,161491,167677,172255,178990,184384,191577,196954,201968,209780,215217,221821,227920,235301 +III. Sangue hemat e transt imunit,4800,5240,5217,5354,4978,4999,5496,5719,5825,6011,6284,6344,6358,6388,6108,6506,6878,6622,6601,7068 +IV. Endocrinas nutricionais e metablicas,47281,47800,49222,51190,53134,53983,58904,61860,64631,66984,70276,73929,72495,74726,73972,76235,78075,79662,81365,83485 +V. Transtornos mentais e comportamentais,6139,6655,7011,7356,8158,8931,10256,10948,11852,11861,12759,13725,12641,13052,12480,12558,12674,12858,13697,14526 +VI. Sistema nervoso,11575,12296,12857,13750,15156,16384,19166,20413,21609,23018,25303,26948,28712,30300,32381,34721,36870,38786,41035,45235 +VII. Olho e anexos,10,12,11,21,21,13,28,26,39,23,31,23,38,15,18,21,20,19,21,23 +VIII. Ouvido e da apofise mastoide,133,129,115,120,119,112,145,118,125,125,125,150,139,143,157,147,173,179,169,206 +IX. Aparelho circulatorio,260603,263417,267496,274068,285543,283927,302817,308466,317797,320074,326371,335213,333295,339672,340284,349642,362091,358882,357770,364132 +X. Aparelho respiratorio,88370,90288,94754,97656,102168,97397,102866,104498,104989,114539,119114,126693,127204,137832,139045,149541,158041,155620,155191,162005 +XI. Aparelho digestivo,43029,44393,45797,46894,48661,50097,51924,53724,55272,56202,58061,59707,60509,61934,62763,64202,66044,66052,67316,68770 +XII. Pele e tecido subcutaneo,1652,1825,1932,1977,1886,2014,2466,2475,2642,2979,3225,3395,3722,3919,4300,4970,5874,6100,6273,7152 +XIII. Sist osteomuscular e tec conjuntivo,2478,2606,2885,3001,3002,3084,3597,3789,4094,4216,4541,4488,4607,5001,5325,5385,5787,5912,6153,6506 +XIV. Aparelho geniturinario,13370,14350,15167,15858,17094,18365,17421,18301,19790,22489,24519,26317,27975,29709,32510,36549,39367,40470,43428,47566 +XV. Gravidez parto e puerperio,1646,1587,1650,1597,1672,1661,1637,1615,1691,1884,1728,1680,1647,1787,1889,1896,1814,1874,1862,1726 +XVI. Algumas afec originadas no periodo perinatal,36618,34274,33136,32040,31011,29799,28336,26898,26080,25367,23723,23579,23069,22745,22482,22162,21049,21458,20764,20354 +XVII. Malf cong deformid e anomalias cromossomicas,9804,9520,9733,10143,10210,9927,10397,10262,10502,10360,10196,10543,10622,10752,11050,10989,10882,10995,11156,11308 +XVIII. Sint sinais e achad anorm ex clin e laborat,135749,135766,134176,133434,126922,104455,85543,80244,79161,78994,79622,78363,74935,71804,71191,71713,75869,71822,70505,74972 +XX. Causas externas,118397,120954,126550,126657,127470,127633,128388,131032,135936,138697,143256,145842,152013,151683,156942,152136,155861,158657,150814,142800 diff --git a/data/pop_reg_2000_2020.csv b/data/pop_reg_2000_2020.csv new file mode 100755 index 0000000..b672930 --- /dev/null +++ b/data/pop_reg_2000_2020.csv @@ -0,0 +1,6 @@ +Regiao,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +Norte,12399633,13245084,13504599,13784881,14373260,14698878,15022060,14648122,15142684,15385707,15865678,16095187,16347807,17013559,17261983,17504446,17740418,17936201,18182253,18430980 +Nordeste,46768451,48331186,48845112,49352225,50427274,51019091,51609027,51535782,53088499,53591197,53078137,53501859,53907144,55794707,56186190,56560081,56915936,57254159,56760780,57071654 +Sudeste,70758097,73470763,74447456,75391969,77374720,78472017,79561095,77873342,80187717,80915332,80353724,80975616,81565983,84465570,85115623,85745520,86356952,86949714,87711946,88371433 +Sul,24738865,25453264,25734253,26025091,26635629,26973511,27308863,26733877,27497970,27719118,27384815,27562433,27731644,28795762,29016114,29230180,29439773,29644948,29754036,29975984 +Centro-Oeste,11447472,11885529,12101540,12317271,12770141,13020767,13269517,13223393,13695944,13895375,14050340,14244192,14423952,14993191,15219608,15442232,15660988,15875907,16085885,16297074 diff --git a/script/db.R b/script/db.R index c40cf43..d94bc05 100755 --- a/script/db.R +++ b/script/db.R @@ -34,7 +34,7 @@ theme_set(theme_bw()) #' R canot handle data larger than RAM. Therefore, how "big" some data is, is #' relative to the memory and processing capacity of the machines. #' -#' These popultaion mortality data are larger than the RAM memory of most commom +#' These population mortality data are larger than the RAM memory of most commom #' personal computers (4 to 16 Gigabytes). #' #' Thus, it would be appropriate to look for smart strategies to deal with this data. @@ -44,13 +44,13 @@ theme_set(theme_bw()) #' #' #' - Download the data in csv format, and reduce the size of each file -#' - You may use 'rds' files in R for this purposes +#' - You may use 'rds' files in R for this purpose #' - Check "?saveRDS" help for more information -#' - Work with pieces of data, extracting only columns or rows you are intrested +#' - Work with pieces of data, extracting only columns or rows you are interested #' in #' - Explore a database solution -#' - There are multiple resources to word with SQL and NoSQL databases inR -#' - Take a look to RPostgreSQL package for SQL integration +#' - There are multiple resources to work with SQL and NoSQL databases in R +#' - Take a look at RPostgreSQL package for SQL integration #' - Check the "mongolite" package for NoSQL integration #' #' Learn about SQLite database can be an option to make a first approach to @@ -61,7 +61,7 @@ theme_set(theme_bw()) #' [@wiley2020advanced] #' #' -#' And, take a look to this: +#' And, don't forget to take a look at this: ?saveRDS @@ -102,7 +102,7 @@ mortdb <- dbConnect(RSQLite::SQLite(), "mort_db.sqlite") #' #'------------------------------------------------------------------------------ #' -#' Download the data, then load the table to be written in the workspace and use #' the following to include it in the database. +#' Download the data. Then load the table to be written in the workspace. #' #' I saved the data as 'rds' to reduce the object weight. #' @@ -130,6 +130,9 @@ head(names(d)) #' Please note that the data has been updated since the build of this script, thus #' some colum names and type differ between tables + +#' The following code is used to include the data on the database: + dbWriteTable(mortdb, "Mortalidade_Br_2010", d) #'------------------------------------------------------------------------------ diff --git a/script/mort_CID10.Rmd b/script/mort_CID10.Rmd new file mode 100755 index 0000000..5357278 --- /dev/null +++ b/script/mort_CID10.Rmd @@ -0,0 +1,129 @@ +--- +title: "Mortalidade população brasileira 2000-2019" +subtitle: "Categorizada por capítulo CID 10" +author: "José" +date: "2021-04-22" +output: + flexdashboard::flex_dashboard: + orientation: rows + vertical_layout: fill + source_code: embed + social: menu +runtime: shiny +--- + +```{r setup, include=FALSE} +library(flexdashboard) +library(WDI) +library(ggplot2) +library(data.table) +library(DT) +library(plotly) +library(dygraphs) +library(knitr) +``` + +Óbitos categorizados por Capítulo CID 10 e por 100000 habitantes + +```{r } +ob <- fread("../data/mortalidade_CID_10.csv", + skip = 0, + header = TRUE) + +names(ob) + +cols = names(ob)[2:21] + +b = melt(ob, + id="CID 10", + measure = cols, + variable.name = "Ano", + value.name = "Obitos") + + +b[, Ano := gsub('X', "", b$Ano)] + +str(b) +``` + +População + +```{r } +p <- fread("../data/pop_reg_2000_2020.csv", + skip = 0, + header = TRUE) + +names(p) + +str(p) + + +po = melt(p, + id = "Regiao", + measure = cols, + variable.name = "Ano", + value.name = "População") + +po = po[, .("População" = sum(`População`)), by = Ano] + +str(po) +``` + +Unindo tabelas + +```{r } +ci = po[b, on = 'Ano'] + +names(ci) + +ci[, Mortalidade := (Obitos/`População`) * 100000 ] + +str(ci) +``` + +Tabela +======================================================================= + +```{r } +datatable(ci , filter = 'top') |> + formatRound('Mortalidade', 1) +``` + +Causas comuns +======================================================================= + +```{r } + +ci2 <- ci[`CID 10` == "II. Neoplasias" | + `CID 10` == "IX. Aparelho circulatorio" | + `CID 10` == "X. Aparelho respiratorio" | + `CID 10` == "XX. Causas externas" | + `CID 10` == "XVIII. Sint sinais e achad anorm ex clin e laborat", ] + + +p1 = ggplot(ci2, aes(Ano, Mortalidade, group = `CID 10`)) + + geom_point() + + geom_path(aes(col = `CID 10`), size = 1) + + theme_minimal() + + theme(legend.position = c(0.85, 0.7)) + + labs(x = "Ano", y = "Mortalidade x 100000 habitantes") +``` + +```{r } +plotly::ggplotly(p1) +``` + +Todas as causas +======================================================================= + +```{r } +p2 = ggplot(ci, aes(Ano, Mortalidade, group = `CID 10`)) + + geom_point() + + geom_path(aes(col = `CID 10`), size = 1) + + theme_minimal() + + labs(x = "Ano", y = "Mortalidade x 100000 habitantes") +``` + +```{r } +ggplotly(p2) +```