A Ruby gem to get planning applications data from UK council websites.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

141 lignes
3.3 KiB

  1. require 'csv'
  2. module UKPlanningScraper
  3. class Authority
  4. attr_reader :name, :url
  5. @@authorities = []
  6. def initialize(name, url)
  7. @name = name.strip
  8. @url = url.strip
  9. @tags = [] # Strings in arbitrary order
  10. @applications = [] # Application objects
  11. @scrape_params = {}
  12. end
  13. def scrape(options = {})
  14. default_options = {
  15. delay: 10,
  16. }
  17. # The user-supplied options override the defaults
  18. options = default_options.merge(options)
  19. # Select which scraper to use
  20. case system
  21. when 'idox'
  22. @applications = scrape_idox(@scrape_params, options)
  23. when 'northgate'
  24. @applications = scrape_northgate(@scrape_params, options)
  25. else
  26. raise SystemNotSupported.new("Planning system not supported for \
  27. #{@name} at URL: #{@url}")
  28. end
  29. # Post processing
  30. @applications.each do |app|
  31. app.authority_name = @name
  32. end
  33. # Output as an array of hashes
  34. output = []
  35. # FIXME - silently ignores invalid apps. How should we handle them?
  36. @applications.each { |app| output << app.to_hash if app.valid? }
  37. # Reset so that old params don't get used for new scrapes
  38. clear_scrape_params
  39. output # Single point of successful exit
  40. end
  41. def tags
  42. @tags.sort
  43. end
  44. # Add multiple tags to existing tags
  45. def add_tags(tags)
  46. tags.each { |t| add_tag(t) }
  47. end
  48. # Add a single tag to existing tags
  49. def add_tag(tag)
  50. clean_tag = tag.strip.downcase.gsub(' ', '')
  51. @tags << clean_tag unless tagged?(clean_tag) # prevent duplicates
  52. end
  53. def tagged?(tag)
  54. @tags.include?(tag)
  55. end
  56. def system
  57. if @url.match(/search\.do\?action=advanced/i)
  58. 'idox'
  59. elsif @url.match(/generalsearch\.aspx/i)
  60. 'northgate'
  61. elsif @url.match(/ocellaweb/i)
  62. 'ocellaweb'
  63. elsif @url.match(/\/apas\//)
  64. 'agileplanning'
  65. else
  66. 'unknownsystem'
  67. end
  68. end
  69. def self.all
  70. @@authorities
  71. end
  72. # List all the tags in use
  73. def self.tags
  74. tags = []
  75. @@authorities.each { |a| tags << a.tags }
  76. tags.flatten.uniq.sort
  77. end
  78. def self.named(name)
  79. authority = @@authorities.find { |a| name == a.name }
  80. raise AuthorityNotFound if authority.nil?
  81. authority
  82. end
  83. # Tagged x
  84. def self.tagged(tag)
  85. found = []
  86. @@authorities.each { |a| found << a if a.tagged?(tag) }
  87. found
  88. end
  89. # Not tagged x
  90. def self.not_tagged(tag)
  91. found = []
  92. @@authorities.each { |a| found << a unless a.tagged?(tag) }
  93. found
  94. end
  95. # Authorities with no tags
  96. def self.untagged
  97. found = []
  98. @@authorities.each { |a| found << a if a.tags.empty? }
  99. found
  100. end
  101. def self.load
  102. # Don't run this method more than once
  103. return unless @@authorities.empty?
  104. CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', \
  105. 'authorities.csv'), :headers => true) do |line|
  106. auth = Authority.new(line['authority_name'], line['url'])
  107. if line['tags']
  108. auth.add_tags(line['tags'].split(/\s+/))
  109. end
  110. auth.add_tag(auth.system)
  111. @@authorities << auth
  112. end
  113. end
  114. end
  115. end
  116. UKPlanningScraper::Authority.load