A Ruby gem to get planning applications data from UK council websites.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

151 lines
3.7 KiB

  1. require 'csv'
  2. module UKPlanningScraper
  3. class Authority
  4. # eg "Camden"
  5. attr_reader :name
  6. # URL of the advanced search page
  7. attr_reader :url
  8. # eg "idox", "northgate"
  9. attr_reader :system
  10. @@authorities = []
  11. def initialize(name, url)
  12. @name = name.strip
  13. @url = url.strip
  14. @tags = [] # Strings in arbitrary order
  15. @applications = [] # Application objects
  16. @scrape_params = {}
  17. # Determine @system when Authority is created
  18. if @url.match(/search\.do\?action=advanced/i)
  19. @system = 'idox'
  20. elsif @url.match(/generalsearch\.aspx/i)
  21. @system = 'northgate'
  22. elsif @url.match(/ocellaweb/i)
  23. @system = 'ocellaweb'
  24. elsif @url.match(/\/apas\//)
  25. @system = 'agileplanning'
  26. else
  27. @system = 'unknownsystem'
  28. end
  29. end
  30. # Scrape this authority's website for applications
  31. def scrape(options = {})
  32. default_options = {
  33. delay: 10,
  34. }
  35. # The user-supplied options override the defaults
  36. options = default_options.merge(options)
  37. # Select which scraper to use
  38. case system
  39. when 'idox'
  40. @applications = scrape_idox(@scrape_params, options)
  41. when 'northgate'
  42. @applications = scrape_northgate(@scrape_params, options)
  43. else
  44. raise SystemNotSupported.new("Planning system not supported for \
  45. #{@name} at URL: #{@url}")
  46. end
  47. # Post processing
  48. @applications.each do |app|
  49. app.authority_name = @name
  50. end
  51. # Output as an array of hashes
  52. output = []
  53. # FIXME - silently ignores invalid apps. How should we handle them?
  54. @applications.each { |app| output << app.to_hash if app.valid? }
  55. # Reset so that old params don't get used for new scrapes
  56. clear_scrape_params
  57. output # Single point of successful exit
  58. end
  59. # Return a sorted list of tags for this authority
  60. def tags
  61. @tags.sort
  62. end
  63. # Add multiple tags to existing tags
  64. def add_tags(tags)
  65. tags.each { |t| add_tag(t) }
  66. end
  67. # Add a single tag to existing tags
  68. def add_tag(tag)
  69. clean_tag = tag.strip.downcase.gsub(' ', '')
  70. @tags << clean_tag unless tagged?(clean_tag) # prevent duplicates
  71. end
  72. def tagged?(tag)
  73. @tags.include?(tag)
  74. end
  75. def self.all
  76. @@authorities
  77. end
  78. # List all the tags in use
  79. def self.tags
  80. tags = []
  81. @@authorities.each { |a| tags << a.tags }
  82. tags.flatten.uniq.sort
  83. end
  84. def self.named(name)
  85. authority = @@authorities.find { |a| name == a.name }
  86. raise AuthorityNotFound if authority.nil?
  87. authority
  88. end
  89. # Tagged x
  90. def self.tagged(tag)
  91. found = []
  92. @@authorities.each { |a| found << a if a.tagged?(tag) }
  93. found
  94. end
  95. # Not tagged x
  96. def self.not_tagged(tag)
  97. found = []
  98. @@authorities.each { |a| found << a unless a.tagged?(tag) }
  99. found
  100. end
  101. # Authorities with no tags
  102. def self.untagged
  103. found = []
  104. @@authorities.each { |a| found << a if a.tags.empty? }
  105. found
  106. end
  107. def self.load
  108. # Don't run this method more than once
  109. return unless @@authorities.empty?
  110. CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', \
  111. 'authorities.csv'), :headers => true) do |line|
  112. auth = Authority.new(line['authority_name'], line['url'])
  113. if line['tags']
  114. auth.add_tags(line['tags'].split(/\s+/))
  115. end
  116. auth.add_tag(auth.system)
  117. @@authorities << auth
  118. end
  119. end
  120. end
  121. end
  122. UKPlanningScraper::Authority.load