diff --git a/README.org b/README.org index 8363d39..c680466 100755 --- a/README.org +++ b/README.org @@ -21,6 +21,10 @@ data using the R programming environment * [[./script/iteration.R][Lapply, apply and for loop: brief introduction]] +** Web scraping + + * [[./script/scraping2.R][Web scraping example]] + ** Interesting packages * [[./script/roadoi_package.R]['roadoi']] diff --git a/script/scraping2.R b/script/scraping2.R new file mode 100755 index 0000000..380ac49 --- /dev/null +++ b/script/scraping2.R @@ -0,0 +1,49 @@ +#' --- +#' title: "" +#' date: "2022-06-24" +#' author: "Jose" +#' output: +#' html_document: +#' code_folding: show +#' toc: yes +#' toc_float: +#' smooth_scroll: true +#' df_print: paged +#' highlight: zenburn +#' --- + +#' remove objects + +rm(list = ls()) + +#' Libraries + +library(rvest) +library(crul) + +url <- 'https://www.mdpi.com/search?q=pasture&journal=remotesensing' + +url_parse = url + +a = read_html(url) + +html_children(a)[1] |> html_text() + +str(a) + +html_elements(a, "h1") + +html_elements(a, "h2") + +html_elements(a, "h3") + +html_elements(a, "h4") + +html_elements(a, "h2") |> html_text2() + +html_elements(a, "p1") + +html_elements(a, "p2") + +html_elements(a, "p3") +