|
|
@@ -0,0 +1,64 @@ |
|
|
|
# Scrape webpage into a podcast RSS feed |
|
|
|
# https://www.sutton.gov.uk/index.aspx?articleid=4332 |
|
|
|
|
|
|
|
require 'nokogiri' |
|
|
|
require 'open-uri' |
|
|
|
require 'time' |
|
|
|
require 'pp' |
|
|
|
|
|
|
|
FEED_TITLE = "Cheam North and Worcester Park Local Committee" |
|
|
|
FEED_IMAGE = "https://dl.dropbox.com/u/300783/logo.png" |
|
|
|
FEED_AUTHOR = "London Borough of Sutton" |
|
|
|
FEED_LINK = "https://www.sutton.gov.uk/index.aspx?articleid=4332" |
|
|
|
|
|
|
|
url = "cnwp.html" |
|
|
|
|
|
|
|
doc = Nokogiri.parse(open(url).read) |
|
|
|
|
|
|
|
meeting = '' |
|
|
|
items = [] |
|
|
|
|
|
|
|
doc.at("#bodytext").children.each do |node| |
|
|
|
if node.inner_text.match(/\d{1,2}\s+\w+\s+\d{4}/) # eg 10 December 2012 |
|
|
|
meeting = node.inner_text.strip |
|
|
|
end |
|
|
|
|
|
|
|
node.children.each do |subnode| |
|
|
|
if subnode.name == 'a' && subnode['href'].match(/\.mp3$/i) |
|
|
|
items << { |
|
|
|
:d => Time.parse(meeting), |
|
|
|
:href => subnode['href'].strip, |
|
|
|
:title => subnode.inner_text.strip |
|
|
|
} |
|
|
|
end |
|
|
|
end |
|
|
|
end |
|
|
|
|
|
|
|
builder = Nokogiri::XML::Builder.new do |xml| |
|
|
|
xml.rss('xmlns:itunes' => "http://www.itunes.com/dtds/podcast-1.0.dtd", |
|
|
|
:version => "2.0") { |
|
|
|
|
|
|
|
xml.channel { |
|
|
|
xml.title FEED_TITLE |
|
|
|
xml.link FEED_LINK |
|
|
|
xml['itunes'].image(:href => FEED_IMAGE) |
|
|
|
xml['itunes'].author FEED_AUTHOR |
|
|
|
|
|
|
|
items.each do |i| |
|
|
|
xml.item { |
|
|
|
xml.title i[:title] |
|
|
|
xml['itunes'].author FEED_AUTHOR |
|
|
|
xml.enclosure( |
|
|
|
:url => i[:href], |
|
|
|
:type => "audio/mpeg" |
|
|
|
) |
|
|
|
xml.guid i[:href] |
|
|
|
xml.pubDate i[:d].rfc822 |
|
|
|
} |
|
|
|
end |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
end |
|
|
|
|
|
|
|
puts builder.to_xml |