I am trying to scrape basic data from Auto Trader and I can't get it to work. The outcome always depend on luck. I don't understand the error message because I didn't use summarise at all. Even sometimes it works, it only scrapes a portion of the data I wanted.
Error in UseMethod("summarise_") : no applicable method for 'summarise_' applied to an object of class "factor"
library(rvest)
library(tidyverse)
df_base<-data.frame(title = character(), price = character(), mileage = character())
url1<-'https://www.autotrader.ca/cars/ab/?rcp=100&rcs='
url2 <- '&srt=3&pRng=1%2C&oRng=1000%2C&prx=-2&prv=Alberta&loc=alberta&hprc=True&wcp=True&sts=Used&showcpo=1&inMarket=advancedSearch'
scrape.sleep <- function(call.period=c(0.5,1)) {
delay <- runif(1,call.period[1],call.period[2])
cat(paste0(" delay of ", round(delay,2)," seconds\n"))
Sys.sleep(delay)
}
for (i in 1:50){
scrape.sleep(c(0.2, 0.5))
url_string<-paste(url1,(100*i),url2,sep='')
tpage<-read_html(url_string)
x <- length(html_nodes(tpage,'.result-title span') %>% html_text())
y <- length(html_nodes(tpage,'.price-delta .price-amount')%>% html_text())
z <- length(html_nodes(tpage,'.dealer-badges .kms')%>% html_text())
if( x == y & x == z) {
df1 <- data.frame(title= html_nodes(tpage,'.result-title span') %>% html_text(),
price=html_nodes(tpage, '.price-delta .price-amount') %>% html_text(),
mileage = html_nodes(tpage, '.dealer-badges .kms') %>% html_text())
df_base <- rbind(df_base,df1)
}
}