## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
#  install.packages("ralger")
#  

## ----example------------------------------------------------------------------
library(ralger)

my_link <- "http://www.shanghairanking.com/rankings/arwu/2021"

my_node <- "a span" # The element ID , I recommend SelectorGadget if you're not familiar with CSS selectors

clean <- TRUE # Should the function clean the extracted vector or not ? Default is FALSE

best_uni <- scrap(link = my_link, node = my_node, clean = clean)

head(best_uni, 10)


## -----------------------------------------------------------------------------
base_link <- "http://quotes.toscrape.com/page/"
links <- paste0(base_link, 1:3)
node <- ".text"

head(scrap(links, node), 10)

## -----------------------------------------------------------------------------
# Getting all classes' names from the anchor elements
# from the ropensci website

attributes <- attribute_scrap(link = "https://ropensci.org/",
                node = "a", # the a tag
                attr = "class" # getting the class attribute
                )

head(attributes, 10) # NA values are a tags without a class attribute

## -----------------------------------------------------------------------------

js_depend <- attribute_scrap(link = "https://ropensci.org/",
                             node = "script",
                             attr = "src")

js_depend


## -----------------------------------------------------------------------------


data <- table_scrap(link ="https://www.boxofficemojo.com/chart/top_lifetime_gross/?area=XWW")

head(data)


## ----example3, message=FALSE, warning=FALSE-----------------------------------

my_link <- "http://books.toscrape.com/catalogue/page-1.html"

my_nodes <- c(
  "h3 > a",            # Title
  ".price_color",      # Price
  ".availability"      # Availability
)

names <- c("title", "price", "availability") # respect the order

tidy_scrap(link = my_link, nodes = my_nodes, colnames = names)


## ----example4-----------------------------------------------------------------

titles <- titles_scrap(link = "https://www.nytimes.com/")

head(titles)


## -----------------------------------------------------------------------------

titles <- titles_scrap(link = "https://www.nytimes.com/", contain = "TrUMp", case_sensitive = FALSE)

head(titles)


## -----------------------------------------------------------------------------

pgs <- paragraphs_scrap(link = "https://ropensci.org/")

head(pgs)

## -----------------------------------------------------------------------------

paragraphs_scrap(link = "https://ropensci.org/", collapse = TRUE)


## -----------------------------------------------------------------------------

links <- weblink_scrap(link = "https://www.worldbank.org/en/access-to-information/reports/",
              contain = "PDF",
              case_sensitive = FALSE)

head(links)

## -----------------------------------------------------------------------------

imgs <- images_preview(link = "https://posit.co/")

head(imgs)

## ----eval=FALSE---------------------------------------------------------------
#  # Suppose we're in a project which has a folder called my_images:
#  images_scrap(
#    link = "http://books.toscrape.com/",
#    imgpath = here::here("my_images"),
#    extn = "jpg"  # images here use .jpg
#  )

## ----eval=FALSE---------------------------------------------------------------
#  pdf_scrap(
#    link = "https://www.make-it-in-germany.com/en/visa-residence/types/eu-blue-card",
#    path = here::here("my_pdfs")
#  )

## ----eval=FALSE---------------------------------------------------------------
#  csv_scrap(
#    link = "https://sample-files.com/data/csv/",
#    path = here::here("my_csvs")
#  )

## ----eval=FALSE---------------------------------------------------------------
#  xlsx_scrap(
#    link = "https://file-examples.com/index.php/sample-documents-download/sample-xls-download/",
#    path = here::here("my_xlsx")
#  )

## ----eval=FALSE---------------------------------------------------------------
#  xls_scrap(
#    link = "https://file-examples.com/index.php/sample-documents-download/sample-xls-download/",
#    path = here::here("my_xls")
#  )

## -----------------------------------------------------------------------------

images_noalt_scrap(link = "https://www.r-consortium.org/")


## -----------------------------------------------------------------------------
# WebAim is the reference website for web accessibility

images_noalt_scrap(link = "https://webaim.org/techniques/forms/controls")

## -----------------------------------------------------------------------------
head(comments_scrap("https://posit.co"))