I'm trying to scrape some text from a website using a loop function but my loop function doesn't move on picking the next item in my vector list. Appreciate any helpful advice. Thanks
library(rvest)
library(xml2)
ID <- c(1:2)
Land <- c('Afghanistan','Ägypten')
url <- c('afghanistan', 'aegypten')
Text <- (NA)
data <- data.frame(ID, Land, Text)
for(i in url) {
nam <- paste("https://www.reporter-ohne-grenzen.de", i, sep = "/")
assign(nam, i)
webpage <- read_html(paste0(nam, i))
data$Text <- i <- webpage %>% html_nodes('div.text') %>% .[[1]] %>% html_text()
}
Hmm, not sure if I made my problem clear. Here's an example of my desired data output.
library(rvest)
library(xml2)
ID <- c(1:2)
Land <- c('Afghanistan','Ägypten')
url <- c('afghanistan', 'aegypten')
Text <- (NA)
data <- data.frame(ID, Land, Text)
afghanistan <- 'https://www.reporter-ohne-grenzen.de/afghanistan'
afghanistan <- read_html(afghanistan)
afghanistan <- html_nodes(afghanistan,'div.text')
afghanistan <- html_text(afghanistan)[[1]]
aegypten <- 'https://www.reporter-ohne-grenzen.de/aegypten'
aegypten <- read_html(aegypten)
aegypten <- html_nodes(aegypten,'div.text')
aegypten <- html_text(aegypten)[[1]]
# desired data output
data$Text <- c(afghanistan, aegypten)
I don't want to repeat these lines for 180 countries.
aegypten <- 'https://www.reporter-ohne-grenzen.de/aegypten'
aegypten <- read_html(aegypten)
aegypten <- html_nodes(aegypten,'div.text')
aegypten <- html_text(aegypten)[[1]]
Here's the solution:
library(rvest)
library(xml2)
ID <- c(1:4)
Land <- c('Afghanistan','Ägypten','Deutschland','Italien')
Url <- c('afghanistan', 'aegypten','deutschland','italien')
Text <- NA
data <- data.frame(ID, Land, Text)
website <- 'https://www.reporter-ohne-grenzen.de'
for (i in ID) {
country <- Url[i]
html_url <- paste(website,country,sep='/')
output <- read_html(html_url)
output <- html_nodes(output,'div.text')
output <- html_text(output)[[1]]
data$Text[i] <- output
}