A Ruby gem to get planning applications data from UK council websites.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

authority.rb 3.3 KiB

5 jaren geleden
5 jaren geleden
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. require 'csv'
  2. module UKPlanningScraper
  3. class Authority
  4. attr_reader :name, :url
  5. @@authorities = []
  6. def initialize(name, url)
  7. @name = name.strip
  8. @url = url.strip
  9. @tags = [] # Strings in arbitrary order
  10. @applications = [] # Application objects
  11. @scrape_params = {}
  12. end
  13. def scrape(options = {})
  14. default_options = {
  15. delay: 10,
  16. }
  17. # The user-supplied options override the defaults
  18. options = default_options.merge(options)
  19. # Select which scraper to use
  20. case system
  21. when 'idox'
  22. @applications = scrape_idox(@scrape_params, options)
  23. when 'northgate'
  24. @applications = scrape_northgate(@scrape_params, options)
  25. else
  26. raise SystemNotSupported.new("Planning system not supported for \
  27. #{@name} at URL: #{@url}")
  28. end
  29. # Post processing
  30. @applications.each do |app|
  31. app.authority_name = @name
  32. end
  33. # Output as an array of hashes
  34. output = []
  35. # FIXME - silently ignores invalid apps. How should we handle them?
  36. @applications.each { |app| output << app.to_hash if app.valid? }
  37. # Reset so that old params don't get used for new scrapes
  38. clear_scrape_params
  39. output # Single point of successful exit
  40. end
  41. def tags
  42. @tags.sort
  43. end
  44. # Add multiple tags to existing tags
  45. def add_tags(tags)
  46. tags.each { |t| add_tag(t) }
  47. end
  48. # Add a single tag to existing tags
  49. def add_tag(tag)
  50. clean_tag = tag.strip.downcase.gsub(' ', '')
  51. @tags << clean_tag unless tagged?(clean_tag) # prevent duplicates
  52. end
  53. def tagged?(tag)
  54. @tags.include?(tag)
  55. end
  56. def system
  57. if @url.match(/search\.do\?action=advanced/i)
  58. 'idox'
  59. elsif @url.match(/generalsearch\.aspx/i)
  60. 'northgate'
  61. elsif @url.match(/ocellaweb/i)
  62. 'ocellaweb'
  63. elsif @url.match(/\/apas\//)
  64. 'agileplanning'
  65. else
  66. 'unknownsystem'
  67. end
  68. end
  69. def self.all
  70. @@authorities
  71. end
  72. # List all the tags in use
  73. def self.tags
  74. tags = []
  75. @@authorities.each { |a| tags << a.tags }
  76. tags.flatten.uniq.sort
  77. end
  78. def self.named(name)
  79. authority = @@authorities.find { |a| name == a.name }
  80. raise AuthorityNotFound if authority.nil?
  81. authority
  82. end
  83. # Tagged x
  84. def self.tagged(tag)
  85. found = []
  86. @@authorities.each { |a| found << a if a.tagged?(tag) }
  87. found
  88. end
  89. # Not tagged x
  90. def self.not_tagged(tag)
  91. found = []
  92. @@authorities.each { |a| found << a unless a.tagged?(tag) }
  93. found
  94. end
  95. # Authorities with no tags
  96. def self.untagged
  97. found = []
  98. @@authorities.each { |a| found << a if a.tags.empty? }
  99. found
  100. end
  101. def self.load
  102. # Don't run this method more than once
  103. return unless @@authorities.empty?
  104. CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', \
  105. 'authorities.csv'), :headers => true) do |line|
  106. auth = Authority.new(line['authority_name'], line['url'])
  107. if line['tags']
  108. auth.add_tags(line['tags'].split(/\s+/))
  109. end
  110. auth.add_tag(auth.system)
  111. @@authorities << auth
  112. end
  113. end
  114. end
  115. end
  116. UKPlanningScraper::Authority.load