RB Kingston upon Thames planning applications
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

107 lines
3.2 KiB

  1. require 'bundler'
  2. Bundler.setup
  3. require 'scraperwiki'
  4. require 'mechanize'
  5. require 'pp'
  6. require 'time'
  7. require 'date'
  8. require 'breasal'
  9. # Use the column names from planningalerts.org.au:
  10. # https://www.planningalerts.org.au/how_to_write_a_scraper
  11. LA_NAME = "Kingston upon Thames"
  12. LA_GSS = "E09000021" # https://mapit.mysociety.org/area/2480.html
  13. LA_ONS = "00AX"
  14. BASEURL = "https://maps.kingston.gov.uk/propertyServices/planning/"
  15. # Parse and save a single planning application
  16. def parse(app)
  17. record = {}
  18. record['la_name'] = LA_NAME
  19. record['la_gss'] = LA_GSS
  20. record['la_ons'] = LA_ONS
  21. record['council_reference'], record['type'] = app.at("h4").inner_text.split(' - ')
  22. # Links
  23. app.search("a").each do |link|
  24. record['info_url'] = BASEURL + link['href'].strip if link['href'].match(/Details/)
  25. record['map_url'] = link['href'].strip if link['href'].match(/\?map=/)
  26. record['images_url'] = BASEURL + link['href'].strip if link['href'].match(/ImageMenu/)
  27. record['comment_url'] = BASEURL + link['href'].strip if link['href'].match(/PlanningComments/)
  28. end
  29. # Coordinates
  30. if record['map_url']
  31. matches = record['map_url'].match(/x=(\d+)&y=(\d+)/)
  32. record['easting'] = matches[1].to_i
  33. record['northing'] = matches[2].to_i
  34. en = Breasal::EastingNorthing.new(easting: record['easting'], northing: record['northing'], type: :gb)
  35. record['latitude'] = en.to_wgs84[:latitude]
  36. record['longitude'] = en.to_wgs84[:longitude]
  37. end
  38. spans = app.search("span")
  39. record['description'] = spans[0].inner_text
  40. record['address'] = spans[1].inner_text
  41. record['ward'] = spans[2].inner_text
  42. # Decision and decision date
  43. if matches = spans[4].inner_text.match(/(.+?)\s+(\d{1,2}\/\d{1,2}\/\d{4})/)
  44. record['decision'] = matches[1]
  45. record['date_decision'] = Date.parse(matches[2])
  46. end
  47. # Comments/consultation - consultation end date can change during lifetime of application
  48. app.search("dd").each do |dd|
  49. if matches = dd.inner_text.match(/The current closing date for comments on this application is (\d{1,2}-[A-Z][a-z]{2}-\d{4})/)
  50. record['on_notice_to'] = Date.parse(matches[1])
  51. end
  52. end
  53. # Date valid
  54. begin
  55. record['date_valid'] = Date.parse(spans[3].inner_text)
  56. record['date_valid_text'] = nil
  57. rescue ArgumentError
  58. record['date_valid'] = nil
  59. record['date_valid_text'] = spans[3].inner_text
  60. end
  61. # Scraper timestamps
  62. record['updated_at'] = Time.now
  63. record['date_scraped'] = Date.today.to_s
  64. ScraperWiki.save_sqlite(['council_reference'], record)
  65. end
  66. agent = Mechanize.new
  67. agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
  68. # Get all valid applications for the last 12 * 30 days
  69. d = Date.today
  70. 36.times do
  71. d_start = (d - 29).strftime("%d/%m/%Y")
  72. d_end = d.strftime("%d/%m/%Y")
  73. if ENV['SCRAPER_LOCAL']
  74. page = Nokogiri::HTML(open("page.html"))
  75. else
  76. url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500"
  77. page = agent.get(url)
  78. puts url
  79. end
  80. apps = page.search("#planningApplication")
  81. puts apps.size, ''
  82. apps.each { |app| parse(app) }
  83. d -= 30
  84. sleep 5
  85. end
  86. # page = Nokogiri::HTML(open("page.html"))