RB Kingston upon Thames planning applications
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

scraper.rb 3.1 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. require 'bundler'
  2. Bundler.setup
  3. require 'scraperwiki'
  4. require 'mechanize'
  5. require 'pp'
  6. require 'time'
  7. require 'date'
  8. require 'breasal'
  9. # Use the column names from planningalerts.org.au:
  10. # https://www.planningalerts.org.au/how_to_write_a_scraper
  11. LA_NAME = "Kingston upon Thames"
  12. LA_GSS = "E09000021" # https://mapit.mysociety.org/area/2480.html
  13. BASEURL = "https://maps.kingston.gov.uk/propertyServices/planning/"
  14. # Parse and save a single planning application
  15. def parse(app)
  16. record = {}
  17. record['la_name'] = LA_NAME
  18. record['la_gss'] = LA_GSS
  19. record['council_reference'], record['type'] = app.at("h4").inner_text.split(' - ')
  20. app.search("a").each do |link|
  21. record['info_url'] = BASEURL + link['href'].strip if link['href'].match(/Details/)
  22. record['map_url'] = link['href'].strip if link['href'].match(/\?map=/)
  23. record['images_url'] = BASEURL + link['href'].strip if link['href'].match(/ImageMenu/)
  24. record['comment_url'] = BASEURL + link['href'].strip if link['href'].match(/PlanningComments/)
  25. end
  26. if record['map_url']
  27. matches = record['map_url'].match(/x=(\d+)&y=(\d+)/)
  28. record['easting'] = matches[1].to_i
  29. record['northing'] = matches[2].to_i
  30. en = Breasal::EastingNorthing.new(easting: record['easting'], northing: record['northing'], type: :gb)
  31. record['latitude']= en.to_wgs84[:latitude]
  32. record['longitude'] = en.to_wgs84[:longitude]
  33. end
  34. spans = app.search("span")
  35. record['description'] = spans[0].inner_text
  36. record['address'] = spans[1].inner_text
  37. record['ward'] = spans[2].inner_text
  38. # Decision and decision date
  39. if matches = spans[4].inner_text.match(/(.+?)\s+(\d{1,2}\/\d{1,2}\/\d{4})/)
  40. record['decision'] = matches[1]
  41. record['date_decision'] = Date.parse(matches[2])
  42. end
  43. # Comments/consultation - consultation end date can change during lifetime of application
  44. app.search("dd").each do |dd|
  45. if matches = dd.inner_text.match(/The current closing date for comments on this application is (\d{1,2}-[A-Z][a-z]{2}-\d{4})/)
  46. record['on_notice_to'] = Date.parse(matches[1])
  47. end
  48. end
  49. # Date valid
  50. begin
  51. record['date_valid'] = Date.parse(spans[3].inner_text)
  52. record['date_valid_text'] = nil
  53. rescue ArgumentError
  54. record['date_valid'] = nil
  55. record['date_valid_text'] = spans[3].inner_text
  56. end
  57. # Scraper timestamps
  58. record['updated_at'] = Time.now
  59. record['date_scraped'] = Date.today.to_s
  60. ScraperWiki.save_sqlite(['council_reference'], record)
  61. end
  62. agent = Mechanize.new
  63. agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
  64. # Get all valid applications for the last 12 * 30 days
  65. d = Date.today
  66. 12.times do
  67. d_start = (d - 29).strftime("%d/%m/%Y")
  68. d_end = d.strftime("%d/%m/%Y")
  69. if ENV['SCRAPER_LOCAL']
  70. page = Nokogiri::HTML(open("page.html"))
  71. else
  72. url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500"
  73. page = agent.get(url)
  74. puts url
  75. end
  76. apps = page.search("#planningApplication")
  77. puts apps.size, ''
  78. apps.each { |app| parse(app) }
  79. d -= 30
  80. sleep 5
  81. end
  82. # page = Nokogiri::HTML(open("page.html"))