A Ruby gem to get planning applications data from UK council websites.
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 

96 linhas
3.1 KiB

  1. require "uk_planning_scraper/version"
  2. require 'mechanize'
  3. require 'time'
  4. require 'logger'
  5. require 'pp'
  6. module UKPlanningScraper
  7. def self.search(search_url, params, options = {})
  8. default_options = {
  9. delay: 10,
  10. }
  11. @options = default_options.merge(options) # The user-supplied options override the defaults
  12. @search_url = search_url
  13. @base_url = search_url.match(/(https?:\/\/.+?)\//)[1]
  14. apps = []
  15. agent = Mechanize.new
  16. puts "Getting: #{@search_url}"
  17. page = agent.get(@search_url) # load the search form page
  18. # Fill out and submit search form
  19. form = page.form('searchCriteriaForm')
  20. # form.action = form.action + '&searchCriteria.resultsPerPage=100'
  21. # Some councils don't have the received from/to dates on their form, eg Newham
  22. form.send(:"date(applicationReceivedStart)", params[:received_from].strftime("%d/%m/%Y")) if params[:received_from]
  23. form.send(:"date(applicationReceivedEnd)", params[:received_to].strftime("%d/%m/%Y")) if params[:received_to]
  24. form.send(:"date(applicationValidatedStart)", params[:validated_from].strftime("%d/%m/%Y")) if params[:validated_from]
  25. form.send(:"date(applicationValidatedEnd)", params[:validated_to].strftime("%d/%m/%Y")) if params[:validated_to]
  26. form.send(:"searchCriteria\.description", params[:description])
  27. # Some councils don't have the applicant name on their form, eg Bexley
  28. form.send(:"searchCriteria\.applicantName", params[:applicant_name]) if form.has_field? 'searchCriteria.applicantName'
  29. form.send(:"searchCriteria\.caseType", params[:application_type])
  30. page = form.submit
  31. loop do
  32. # Parse search results
  33. items = page.search('li.searchresult')
  34. puts "Found #{items.size} apps on this page."
  35. items.each do |app|
  36. data = {}
  37. # Parse info line
  38. info_line = app.at("p.metaInfo").inner_text.strip
  39. bits = info_line.split('|').map { |e| e.strip.delete("\r\n") }
  40. bits.each do |bit|
  41. if matches = bit.match(/Ref\. No:\s+(.+)/)
  42. data[:council_reference] = matches[1]
  43. end
  44. if matches = bit.match(/(Received|Registered):\s+(.+)/)
  45. data[:date_received] = Date.parse(matches[2])
  46. end
  47. if matches = bit.match(/Validated:\s+(.+)/)
  48. data[:date_validated] = Date.parse(matches[1])
  49. end
  50. if matches = bit.match(/Status:\s+(.+)/)
  51. data[:status] = matches[1]
  52. end
  53. end
  54. data.merge!({
  55. scraped_at: Time.now,
  56. info_url: @base_url + app.at('a')['href'],
  57. address: app.at('p.address').inner_text.strip,
  58. description: app.at('a').inner_text.strip,
  59. })
  60. apps << data
  61. end
  62. # Get the Next button from the pager, if there is one
  63. if next_button = page.at('a.next')
  64. next_url = @base_url + next_button[:href]# + '&searchCriteria.resultsPerPage=100'
  65. sleep @options[:delay]
  66. puts "Getting: #{next_url}"
  67. page = agent.get(next_url)
  68. else
  69. break
  70. end
  71. end
  72. apps
  73. end
  74. end