|
- # Scrape webpage into a podcast RSS feed
- # https://www.sutton.gov.uk/index.aspx?articleid=4332
-
- require 'nokogiri'
- require 'open-uri'
- require 'time'
- require 'pp'
-
- FEED_TITLE = "Cheam North and Worcester Park Local Committee"
- FEED_IMAGE = "https://dl.dropbox.com/u/300783/logo.png"
- FEED_AUTHOR = "London Borough of Sutton"
- FEED_LINK = "https://www.sutton.gov.uk/index.aspx?articleid=4332"
-
- url = "cnwp.html"
-
- doc = Nokogiri.parse(open(url).read)
-
- meeting = ''
- items = []
-
- doc.at("#bodytext").children.each do |node|
- if node.inner_text.match(/\d{1,2}\s+\w+\s+\d{4}/) # eg 10 December 2012
- meeting = node.inner_text.strip
- end
-
- node.children.each do |subnode|
- if subnode.name == 'a' && subnode['href'].match(/\.mp3$/i)
- items << {
- :d => Time.parse(meeting),
- :href => subnode['href'].strip,
- :title => subnode.inner_text.strip
- }
- end
- end
- end
-
- builder = Nokogiri::XML::Builder.new do |xml|
- xml.rss('xmlns:itunes' => "http://www.itunes.com/dtds/podcast-1.0.dtd",
- :version => "2.0") {
-
- xml.channel {
- xml.title FEED_TITLE
- xml.link FEED_LINK
- xml['itunes'].image(:href => FEED_IMAGE)
- xml['itunes'].author FEED_AUTHOR
-
- items.each do |i|
- xml.item {
- xml.title i[:title]
- xml['itunes'].author FEED_AUTHOR
- xml.enclosure(
- :url => i[:href],
- :type => "audio/mpeg"
- )
- xml.guid i[:href]
- xml.pubDate i[:d].rfc822
- }
- end
- }
-
- }
- end
-
- puts builder.to_xml
|