| @@ -0,0 +1,64 @@ | |||||
| # Scrape webpage into a podcast RSS feed | |||||
| # https://www.sutton.gov.uk/index.aspx?articleid=4332 | |||||
| require 'nokogiri' | |||||
| require 'open-uri' | |||||
| require 'time' | |||||
| require 'pp' | |||||
| FEED_TITLE = "Cheam North and Worcester Park Local Committee" | |||||
| FEED_IMAGE = "https://dl.dropbox.com/u/300783/logo.png" | |||||
| FEED_AUTHOR = "London Borough of Sutton" | |||||
| FEED_LINK = "https://www.sutton.gov.uk/index.aspx?articleid=4332" | |||||
| url = "cnwp.html" | |||||
| doc = Nokogiri.parse(open(url).read) | |||||
| meeting = '' | |||||
| items = [] | |||||
| doc.at("#bodytext").children.each do |node| | |||||
| if node.inner_text.match(/\d{1,2}\s+\w+\s+\d{4}/) # eg 10 December 2012 | |||||
| meeting = node.inner_text.strip | |||||
| end | |||||
| node.children.each do |subnode| | |||||
| if subnode.name == 'a' && subnode['href'].match(/\.mp3$/i) | |||||
| items << { | |||||
| :d => Time.parse(meeting), | |||||
| :href => subnode['href'].strip, | |||||
| :title => subnode.inner_text.strip | |||||
| } | |||||
| end | |||||
| end | |||||
| end | |||||
| builder = Nokogiri::XML::Builder.new do |xml| | |||||
| xml.rss('xmlns:itunes' => "http://www.itunes.com/dtds/podcast-1.0.dtd", | |||||
| :version => "2.0") { | |||||
| xml.channel { | |||||
| xml.title FEED_TITLE | |||||
| xml.link FEED_LINK | |||||
| xml['itunes'].image(:href => FEED_IMAGE) | |||||
| xml['itunes'].author FEED_AUTHOR | |||||
| items.each do |i| | |||||
| xml.item { | |||||
| xml.title i[:title] | |||||
| xml['itunes'].author FEED_AUTHOR | |||||
| xml.enclosure( | |||||
| :url => i[:href], | |||||
| :type => "audio/mpeg" | |||||
| ) | |||||
| xml.guid i[:href] | |||||
| xml.pubDate i[:d].rfc822 | |||||
| } | |||||
| end | |||||
| } | |||||
| } | |||||
| end | |||||
| puts builder.to_xml | |||||