· 1 min read
Screencast: Screen Scraping mit Nokogiri
Download (43 MB, 13:34) Alternativer Download für iPod & Apple TV (21.9 MB, 13:34)
Ressourcen:
Quellcode:
[bash] sudo gem install nokogiri — —with-xml2-include=/usr/local/include/libxml2 —with-xml2-lib=/usr/local/lib [/bash]
[ruby] # nokogiri_test.rb require ‘rubygems’ require ‘nokogiri’ require ‘open-uri’
url = “http://www.walmart.com/search/search-ng.do?search\_constraint=0&ic=48\_0&search\_query=batman&Find.x=0&Find.y=0&Find=Find” doc = Nokogiri::HTML(open(url)) puts doc.at_css(“title”).text doc.css(“.item”).each do |item| title = item.at_css(“.prodLink”).text price = item.at_css(“.PriceCompare .BodyS, .PriceXLBold”).text[/$[0-9.]+/] puts ”#{title} - #{price}” puts item.at_css(“.prodLink”)[:href] end
# lib/tasks/product_prices.rake desc “Fetch product prices” task :fetch_prices => :environment do require ‘nokogiri’ require ‘open-uri’ Product.find_all_by_price(nil).each do |product| url = “http://www.walmart.com/search/search-ng.do?search\_constraint=0&ic=48\_0&search\_query=#{CGI.escape(product.name)}&Find.x=0&Find.y=0&Find=Find” doc = Nokogiri::HTML(open(url)) price = doc.at_css(“.PriceCompare .BodyS, .PriceXLBold”).text[/[0-9.]+/] product.update_attribute(:price, price) end end [/ruby]