I\'m able to scrape data off of basic html pages, but I\'m having trouble scraping off the site below. It looks like the data is presented via javascript, and I\'m not sure how
Using Relenium
:
require(relenium) # More info: https://github.com/LluisRamon/relenium
require(XML)
firefox <- firefoxClass$new() # init browser
res <- NULL
pages <- 1:2
for (page in pages) {
url <- sprintf("http://www.machinerytrader.com/list/list.aspx?pg=%d&bcatid=4&DidSearch=1&EID=1&LP=MAT&ETID=5&catid=1015&mdlx=Contains&Cond=All&SO=26&btnSearch=Search&units=imperial", page)
firefox$get(url)
doc <- htmlParse(firefox$getPageSource())
res <- rbind(res,
cbind(year_manu_model = xpathSApply(doc, '//table[substring(@id, string-length(@id)-15) = "tblListingHeader"]/tbody/tr/td[1]', xmlValue),
sn = xpathSApply(doc, '//table[substring(@id, string-length(@id)-15) = "tblListingHeader"]/tbody/tr/td[2]', xmlValue),
price = xpathSApply(doc, '//table[substring(@id, string-length(@id)-15) = "tblListingHeader"]/tbody/tr/td[3]', xmlValue),
loc = xpathSApply(doc, '//table[substring(@id, string-length(@id)-15) = "tblListingHeader"]/tbody/tr/td[4]', xmlValue),
auc = xpathSApply(doc, '//table[substring(@id, string-length(@id)-9) = "tblContent"]/tbody/tr/td[2]', xmlValue))
)
}
sapply(as.data.frame(res), substr, 0, 30)
# year_manu_model sn price loc auc
# [1,] " 1972 AMERICAN 5530" "GS14745W" "US $50,100" "MI " "\n\t\t\t\t\tAuction: 1/9/2013; 4,796"
# [2,] " AUSTIN-WESTERN 307" "307" "US $3,400" "MT " "\n\t\t\t\t\tDetails & Photo(s)Video("
# ...