RB Kingston upon Thames planning applications
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

scraper.rb 2.6 KiB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. require 'bundler'
  2. Bundler.setup
  3. require 'scraperwiki'
  4. require 'mechanize'
  5. require 'pp'
  6. require 'time'
  7. require 'date'
  8. # Use the column names from planningalerts.org.au:
  9. # https://www.planningalerts.org.au/how_to_write_a_scraper
  10. BASEURL = "https://maps.kingston.gov.uk/propertyServices/planning/"
  11. # Parse and save a single planning application
  12. def parse(app)
  13. record = {}
  14. record['title'] = app.at("h4").inner_text
  15. matches = record['title'].match(/(\d+\/\d+\/\w+)\s+-\s+(.+)/)
  16. record['council_reference'] = matches[1]
  17. record['type'] = matches[2]
  18. app.search("a").each do |link|
  19. record['info_url'] = BASEURL + link['href'].strip if link['href'].match(/Details/)
  20. record['map_url'] = link['href'].strip if link['href'].match(/\?map=/)
  21. record['images_url'] = BASEURL + link['href'].strip if link['href'].match(/ImageMenu/)
  22. record['comment_url'] = BASEURL + link['href'].strip if link['href'].match(/PlanningComments/)
  23. end
  24. spans = app.search("span")
  25. record['description'] = spans[0].inner_text
  26. record['address'] = spans[1].inner_text
  27. record['ward'] = spans[2].inner_text
  28. # Decision and decision date
  29. if matches = spans[4].inner_text.match(/(.+?)\s+(\d{1,2}\/\d{1,2}\/\d{4})/)
  30. record['decision'] = matches[1]
  31. record['date_decision'] = Date.parse(matches[2])
  32. end
  33. # Comments/consultation - consultation end date can change during lifetime of application
  34. app.search("dd").each do |dd|
  35. if matches = dd.inner_text.match(/The current closing date for comments on this application is (\d{1,2}-[A-Z][a-z]{2}-\d{4})/)
  36. record['on_notice_to'] = Date.parse(matches[1])
  37. end
  38. end
  39. # Date valid
  40. begin
  41. record['date_valid'] = Date.parse(spans[3].inner_text)
  42. record['date_valid_text'] = nil
  43. rescue ArgumentError
  44. record['date_valid'] = nil
  45. record['date_valid_text'] = spans[3].inner_text
  46. end
  47. # Scraper timestamps
  48. record['updated_at'] = Time.now
  49. record['date_scraped'] = Date.today.to_s
  50. ScraperWiki.save_sqlite(['council_reference'], record)
  51. end
  52. agent = Mechanize.new
  53. agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
  54. # Get all valid applications for the last 12 * 30 days
  55. d = Date.today
  56. 12.times do
  57. d_start = (d - 29).strftime("%d/%m/%Y")
  58. d_end = d.strftime("%d/%m/%Y")
  59. url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500"
  60. puts url
  61. page = agent.get(url)
  62. apps = page.search("#planningApplication")
  63. puts apps.size, ''
  64. apps.each { |app| parse(app) }
  65. d -= 30
  66. sleep 5
  67. end
  68. # page = Nokogiri::HTML(open("page.html"))