Cheam North and Worcester Park local committee podcast feed creator. Scrapes the webpage and outputs an iTunes-friendly podcast RSS feed.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
1.6 KiB

  1. # Scrape webpage into a podcast RSS feed
  2. # https://www.sutton.gov.uk/index.aspx?articleid=4332
  3. require 'nokogiri'
  4. require 'open-uri'
  5. require 'time'
  6. require 'pp'
  7. FEED_TITLE = "Cheam North and Worcester Park Local Committee"
  8. FEED_IMAGE = "https://dl.dropbox.com/u/300783/logo.png"
  9. FEED_AUTHOR = "London Borough of Sutton"
  10. FEED_LINK = "https://www.sutton.gov.uk/index.aspx?articleid=4332"
  11. url = "cnwp.html"
  12. doc = Nokogiri.parse(open(url).read)
  13. meeting = ''
  14. items = []
  15. items_this_meeting = 0
  16. doc.at("#bodytext").children.each do |node|
  17. if node.inner_text.match(/\d{1,2}\s+\w+\s+\d{4}/) # eg 10 December 2012
  18. meeting = node.inner_text.strip
  19. items_this_meeting = 0
  20. end
  21. node.children.each do |subnode|
  22. if subnode.name == 'a' && subnode['href'].match(/\.mp3$/i)
  23. items_this_meeting += 1
  24. items << {
  25. :d => Time.parse(meeting) + ((items_this_meeting - 1) * 30 * 60),
  26. :href => subnode['href'].strip,
  27. :title => subnode.inner_text.strip
  28. }
  29. end
  30. end
  31. end
  32. builder = Nokogiri::XML::Builder.new do |xml|
  33. xml.rss('xmlns:itunes' => "http://www.itunes.com/dtds/podcast-1.0.dtd",
  34. :version => "2.0") {
  35. xml.channel {
  36. xml.title FEED_TITLE
  37. xml.link FEED_LINK
  38. xml['itunes'].image(:href => FEED_IMAGE)
  39. xml['itunes'].author FEED_AUTHOR
  40. items.each do |i|
  41. xml.item {
  42. xml.title i[:title]
  43. xml['itunes'].author FEED_AUTHOR
  44. xml.enclosure(
  45. :url => i[:href],
  46. :type => "audio/mpeg"
  47. )
  48. xml.guid i[:href]
  49. xml.pubDate i[:d].rfc822
  50. }
  51. end
  52. }
  53. }
  54. end
  55. puts builder.to_xml