adrianshort
/
gist-4444819
miroir de https://gist.github.com/4444819.git


			
				
					
						
						
							
							# Scrape webpage into a podcast RSS feed
# https://www.sutton.gov.uk/index.aspx?articleid=4332

require 'nokogiri'
require 'open-uri'
require 'time'
require 'pp'

FEED_TITLE = "Cheam North and Worcester Park Local Committee"
FEED_IMAGE = "https://dl.dropbox.com/u/300783/logo.png"
FEED_AUTHOR = "London Borough of Sutton"
FEED_LINK = "https://www.sutton.gov.uk/index.aspx?articleid=4332"

url = "cnwp.html"

doc = Nokogiri.parse(open(url).read)

meeting = ''
items = []

doc.at("#bodytext").children.each do |node|
  if node.inner_text.match(/\d{1,2}\s+\w+\s+\d{4}/) # eg 10 December 2012
    meeting = node.inner_text.strip
  end

  node.children.each do |subnode|
    if subnode.name == 'a' && subnode['href'].match(/\.mp3$/i)
      items << {
        :d => Time.parse(meeting),
        :href => subnode['href'].strip,
        :title => subnode.inner_text.strip
      }
    end
  end
end

builder = Nokogiri::XML::Builder.new do |xml|
  xml.rss('xmlns:itunes' => "http://www.itunes.com/dtds/podcast-1.0.dtd",
          :version => "2.0") {

    xml.channel {
      xml.title FEED_TITLE
      xml.link FEED_LINK
      xml['itunes'].image(:href => FEED_IMAGE)
      xml['itunes'].author FEED_AUTHOR

      items.each do |i|
        xml.item {
          xml.title i[:title]
          xml['itunes'].author FEED_AUTHOR
          xml.enclosure(
            :url => i[:href],
            :type => "audio/mpeg"
          )
          xml.guid i[:href]
          xml.pubDate i[:d].rfc822
        }
      end
    }

  }
end

puts builder.to_xml