A Ruby gem to get planning applications data from UK council websites.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 

151 rader
3.7 KiB

  1. require 'csv'
  2. module UKPlanningScraper
  3. class Authority
  4. # eg "Camden"
  5. attr_reader :name
  6. # URL of the advanced search page
  7. attr_reader :url
  8. # eg "idox", "northgate"
  9. attr_reader :system
  10. @@authorities = []
  11. def initialize(name, url)
  12. @name = name.strip
  13. @url = url.strip
  14. @tags = [] # Strings in arbitrary order
  15. @applications = [] # Application objects
  16. @scrape_params = {}
  17. # Determine @system when Authority is created
  18. if @url.match(/search\.do\?action=advanced/i)
  19. @system = 'idox'
  20. elsif @url.match(/generalsearch\.aspx/i)
  21. @system = 'northgate'
  22. elsif @url.match(/ocellaweb/i)
  23. @system = 'ocellaweb'
  24. elsif @url.match(/\/apas\//)
  25. @system = 'agileplanning'
  26. else
  27. @system = 'unknownsystem'
  28. end
  29. end
  30. # Scrape this authority's website for applications
  31. def scrape(options = {})
  32. default_options = {
  33. delay: 10,
  34. }
  35. # The user-supplied options override the defaults
  36. options = default_options.merge(options)
  37. # Select which scraper to use
  38. case system
  39. when 'idox'
  40. @applications = scrape_idox(@scrape_params, options)
  41. when 'northgate'
  42. @applications = scrape_northgate(@scrape_params, options)
  43. else
  44. raise SystemNotSupported.new("Planning system not supported for \
  45. #{@name} at URL: #{@url}")
  46. end
  47. # Post processing
  48. @applications.each do |app|
  49. app.authority_name = @name
  50. end
  51. # Output as an array of hashes
  52. output = []
  53. # FIXME - silently ignores invalid apps. How should we handle them?
  54. @applications.each { |app| output << app.to_hash if app.valid? }
  55. # Reset so that old params don't get used for new scrapes
  56. clear_scrape_params
  57. output # Single point of successful exit
  58. end
  59. # Return a sorted list of tags for this authority
  60. def tags
  61. @tags.sort
  62. end
  63. # Add multiple tags to existing tags
  64. def add_tags(tags)
  65. tags.each { |t| add_tag(t) }
  66. end
  67. # Add a single tag to existing tags
  68. def add_tag(tag)
  69. clean_tag = tag.strip.downcase.gsub(' ', '')
  70. @tags << clean_tag unless tagged?(clean_tag) # prevent duplicates
  71. end
  72. def tagged?(tag)
  73. @tags.include?(tag)
  74. end
  75. def self.all
  76. @@authorities
  77. end
  78. # List all the tags in use
  79. def self.tags
  80. tags = []
  81. @@authorities.each { |a| tags << a.tags }
  82. tags.flatten.uniq.sort
  83. end
  84. def self.named(name)
  85. authority = @@authorities.find { |a| name == a.name }
  86. raise AuthorityNotFound if authority.nil?
  87. authority
  88. end
  89. # Tagged x
  90. def self.tagged(tag)
  91. found = []
  92. @@authorities.each { |a| found << a if a.tagged?(tag) }
  93. found
  94. end
  95. # Not tagged x
  96. def self.not_tagged(tag)
  97. found = []
  98. @@authorities.each { |a| found << a unless a.tagged?(tag) }
  99. found
  100. end
  101. # Authorities with no tags
  102. def self.untagged
  103. found = []
  104. @@authorities.each { |a| found << a if a.tags.empty? }
  105. found
  106. end
  107. def self.load
  108. # Don't run this method more than once
  109. return unless @@authorities.empty?
  110. CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', \
  111. 'authorities.csv'), :headers => true) do |line|
  112. auth = Authority.new(line['authority_name'], line['url'])
  113. if line['tags']
  114. auth.add_tags(line['tags'].split(/\s+/))
  115. end
  116. auth.add_tag(auth.system)
  117. @@authorities << auth
  118. end
  119. end
  120. end
  121. end
  122. UKPlanningScraper::Authority.load