#' --- #' title: "Web scraping" #' date: "2022-06-17" #' author: "Jose https://ajuda.multifarm.top" #' output: #' html_document: #' code_folding: show #' toc: yes #' toc_float: #' smooth_scroll: true #' df_print: paged #' highlight: zenburn #' --- #' remove objects rm(list = ls()) #' Libraries library(rvest) #' Data url <- 'https://www-remessaonline-com-br.cdn.ampproject.org/c/s/www.remessaonline.com.br/blog/agtech/amp/' #'------------------------------------------------------------------------------ #' #' ## Scraping a url #' #' ------------------------------------------------------------------------------ ### Headings u <- read_html(url) class(u) str(u) u[[2]] html_elements(u, "h1") html_elements(u, "h2") html_nodes(u, "h3") |> html_text2() |> data.table::data.table() html_elements(u, "h4") html_elements(u, "h5") html_children(u) nodes <- paste0('h', c(1:6)) ?html_elements ### Paragraphs pel <- html_elements(u, "p") |> html_text() peli <- as.list(pel) peli[[2]] ### List elements html_nodes(u, "ul") |> html_text() html_nodes(u, "li") |> html_text()