RB Kingston upon Thames planning applications
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

93 строки
2.7 KiB

  1. require 'bundler'
  2. Bundler.setup
  3. require 'scraperwiki'
  4. require 'mechanize'
  5. require 'pp'
  6. require 'time'
  7. require 'date'
  8. # Use the column names from planningalerts.org.au:
  9. # https://www.planningalerts.org.au/how_to_write_a_scraper
  10. LA_NAME = "Kingston upon Thames"
  11. LA_GSS = "E09000021" # https://mapit.mysociety.org/area/2480.html
  12. BASEURL = "https://maps.kingston.gov.uk/propertyServices/planning/"
  13. # Parse and save a single planning application
  14. def parse(app)
  15. record = {}
  16. record['la_name'] = LA_NAME
  17. record['la_gss'] = LA_GSS
  18. record['council_reference'], record['type'] = app.at("h4").inner_text.split(' - ')
  19. app.search("a").each do |link|
  20. record['info_url'] = BASEURL + link['href'].strip if link['href'].match(/Details/)
  21. record['map_url'] = link['href'].strip if link['href'].match(/\?map=/)
  22. record['images_url'] = BASEURL + link['href'].strip if link['href'].match(/ImageMenu/)
  23. record['comment_url'] = BASEURL + link['href'].strip if link['href'].match(/PlanningComments/)
  24. end
  25. spans = app.search("span")
  26. record['description'] = spans[0].inner_text
  27. record['address'] = spans[1].inner_text
  28. record['ward'] = spans[2].inner_text
  29. # Decision and decision date
  30. if matches = spans[4].inner_text.match(/(.+?)\s+(\d{1,2}\/\d{1,2}\/\d{4})/)
  31. record['decision'] = matches[1]
  32. record['date_decision'] = Date.parse(matches[2])
  33. end
  34. # Comments/consultation - consultation end date can change during lifetime of application
  35. app.search("dd").each do |dd|
  36. if matches = dd.inner_text.match(/The current closing date for comments on this application is (\d{1,2}-[A-Z][a-z]{2}-\d{4})/)
  37. record['on_notice_to'] = Date.parse(matches[1])
  38. end
  39. end
  40. # Date valid
  41. begin
  42. record['date_valid'] = Date.parse(spans[3].inner_text)
  43. record['date_valid_text'] = nil
  44. rescue ArgumentError
  45. record['date_valid'] = nil
  46. record['date_valid_text'] = spans[3].inner_text
  47. end
  48. # Scraper timestamps
  49. record['updated_at'] = Time.now
  50. record['date_scraped'] = Date.today.to_s
  51. ScraperWiki.save_sqlite(['council_reference'], record)
  52. end
  53. agent = Mechanize.new
  54. agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
  55. # Get all valid applications for the last 12 * 30 days
  56. d = Date.today
  57. 12.times do
  58. d_start = (d - 29).strftime("%d/%m/%Y")
  59. d_end = d.strftime("%d/%m/%Y")
  60. if ENV['SCRAPER_LOCAL']
  61. page = Nokogiri::HTML(open("page.html"))
  62. else
  63. url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500"
  64. page = agent.get(url)
  65. puts url
  66. end
  67. apps = page.search("#planningApplication")
  68. puts apps.size, ''
  69. apps.each { |app| parse(app) }
  70. d -= 30
  71. sleep 5
  72. end
  73. # page = Nokogiri::HTML(open("page.html"))