From 8b24f381a0c6289d5c399affba9a7aedf902abda Mon Sep 17 00:00:00 2001 From: Jose Date: Thu, 27 Oct 2022 12:27:40 -0300 Subject: [PATCH] A web scraping example --- README.org | 4 ++++ script/scraping2.R | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100755 script/scraping2.R diff --git a/README.org b/README.org index 8363d39..c680466 100755 --- a/README.org +++ b/README.org @@ -21,6 +21,10 @@ data using the R programming environment * [[./script/iteration.R][Lapply, apply and for loop: brief introduction]] +** Web scraping + + * [[./script/scraping2.R][Web scraping example]] + ** Interesting packages * [[./script/roadoi_package.R]['roadoi']] diff --git a/script/scraping2.R b/script/scraping2.R new file mode 100755 index 0000000..380ac49 --- /dev/null +++ b/script/scraping2.R @@ -0,0 +1,49 @@ +#' --- +#' title: "" +#' date: "2022-06-24" +#' author: "Jose" +#' output: +#' html_document: +#' code_folding: show +#' toc: yes +#' toc_float: +#' smooth_scroll: true +#' df_print: paged +#' highlight: zenburn +#' --- + +#' remove objects + +rm(list = ls()) + +#' Libraries + +library(rvest) +library(crul) + +url <- 'https://www.mdpi.com/search?q=pasture&journal=remotesensing' + +url_parse = url + +a = read_html(url) + +html_children(a)[1] |> html_text() + +str(a) + +html_elements(a, "h1") + +html_elements(a, "h2") + +html_elements(a, "h3") + +html_elements(a, "h4") + +html_elements(a, "h2") |> html_text2() + +html_elements(a, "p1") + +html_elements(a, "p2") + +html_elements(a, "p3") +