Previous chapter
Case StudiesA dynamic price comparison workbook from willhaben.at

A dynamic price comparison workbook from willhaben.at

In this exercise we want to create a dynamic price comparison workbook based on current data from the popular Austrian website [willhaben.at]](https://www.willhaben.at).

  1. Use the code in the next section to extract all relevant data to a data.frame
  2. Create a new Excel workbook including a 2.1. Sheet with a nicely formatted table. Add a column including the price/square meter. 2.2. Sheet with a price histogram/density plot.
  3. Optional: Make sure that the sheet gets updated incrementaly if run multiple times.

Extract first ten pages from result set in willhaben

library(rvest)
library(dplyr)

pages <- 1:10
links <- sprintf("https://www.willhaben.at/iad/immobilien/immobilien/angebote?areaId=900&page=%s", pages)
html_select <- '//*[contains(concat( " ", @class, " " ), concat( " ", "isRealestate", " " ))]'

path_result = "//section[@class='content-section isRealestate']"
path_link <- paste0(path_result, "/div[@class='header w-brk']/a")
path_size <- paste0(path_result, "/div[@class='info']/span[@class='desc-left']")
path_rooms <- paste0(path_result, "/div[@class='info']/span[@class='wh-pipe']")

path_price <- paste0(path_result, "//div[@class='info']")

path_price <- paste0(path_result, "//*[@id='resultlist']/article[1]/section[3]/div[2]/span[2]")
path_address <- paste0(path_result, "/div[@class='addressLine']")
path_bottom <- paste0(path_result, "/div[@class='bottom']")


items <- list()
for (l in links) {
  nodes_page <- xml2::read_html(l)
  link <- nodes_page %>% html_nodes(xpath = path_link) %>% html_attr('href')
  link <- paste0("https://www.willhaben.at", link)
  link_text <- nodes_page %>% html_nodes(xpath = path_link) %>% html_text(trim = TRUE)
  size <- nodes_page %>% html_nodes(xpath = path_size) %>% html_text(trim = TRUE)
  #rooms <- nodes_page %>% html_nodes(xpath = path_rooms) %>% html_text(trim = TRUE)
  #price <- nodes_page %>% html_nodes(xpath = path_price) %>% html_text(trim = TRUE)
  #price <- ifelse( length(price) == 0, "", price)
  #address <- nodes_page %>% html_nodes(xpath = path_address) %>% html_text(trim = TRUE)
  #bottom <- nodes_page %>% html_nodes(xpath = path_bottom) %>% html_text(trim = TRUE)
  #bottom <- ifelse( length(bottom) == 0, "", bottom)
  items[[l]] <- data.frame(link, link_text, size)
}
df <- bind_rows(items) %>%
  View()