A Ruby gem to get planning applications data from UK council websites.
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 

149 linhas
3.9 KiB

  1. require 'csv'
  2. module UKPlanningScraper
  3. class Authority
  4. attr_reader :name, :url
  5. @@authorities = []
  6. def initialize(name, url)
  7. @name = name.strip
  8. @url = url.strip
  9. @tags = [] # Strings in arbitrary order
  10. @applications = [] # Application objects
  11. end
  12. def scrape(params, options = {})
  13. default_options = {
  14. delay: 10,
  15. }
  16. options = default_options.merge(options) # The user-supplied options override the defaults
  17. # Validated within the last n days
  18. # Assumes that every scraper/system can do a date range search
  19. if params[:validated_days]
  20. params[:validated_to] = Date.today
  21. params[:validated_from] = Date.today - (params[:validated_days] - 1)
  22. end
  23. # Received within the last n days
  24. # Assumes that every scraper/system can do a date range search
  25. if params[:received_days]
  26. params[:received_to] = Date.today
  27. params[:received_from] = Date.today - (params[:received_days] - 1)
  28. end
  29. # Decided within the last n days
  30. # Assumes that every scraper/system can do a date range search
  31. if params[:decided_days]
  32. params[:decided_to] = Date.today
  33. params[:decided_from] = Date.today - (params[:decided_days] - 1)
  34. end
  35. # Select which scraper to use
  36. case system
  37. when 'idox'
  38. @applications = scrape_idox(params, options)
  39. when 'northgate'
  40. @applications = scrape_northgate(params, options)
  41. else
  42. raise SystemNotSupported.new("Planning system not supported for #{@name} at URL: #{@url}")
  43. end
  44. # Post processing
  45. @applications.each do |app|
  46. app.authority_name = @name
  47. end
  48. # Output as an array of hashes
  49. output = []
  50. # FIXME - silently ignores invalid apps. How should we handle them?
  51. @applications.each { |app| output << app.to_hash if app.valid? }
  52. output # Single point of successful exit
  53. end
  54. def tags
  55. @tags.sort
  56. end
  57. # Add multiple tags to existing tags
  58. def add_tags(tags)
  59. tags.each { |t| add_tag(t) }
  60. end
  61. # Add a single tag to existing tags
  62. def add_tag(tag)
  63. clean_tag = tag.strip.downcase.gsub(' ', '')
  64. @tags << clean_tag unless tagged?(clean_tag) # prevent duplicates
  65. end
  66. def tagged?(tag)
  67. @tags.include?(tag)
  68. end
  69. def system
  70. if @url.match(/search\.do\?action=advanced/i)
  71. s = 'idox'
  72. elsif @url.match(/generalsearch\.aspx/i)
  73. s = 'northgate'
  74. elsif @url.match(/ocellaweb/i)
  75. s = 'ocellaweb'
  76. elsif @url.match(/\/apas\//)
  77. s = 'agileplanning'
  78. else
  79. s = 'unknownsystem'
  80. end
  81. end
  82. def self.all
  83. @@authorities
  84. end
  85. # List all the tags in use
  86. def self.tags
  87. tags = []
  88. @@authorities.each { |a| tags << a.tags }
  89. tags.flatten.uniq.sort
  90. end
  91. def self.named(name)
  92. authority = @@authorities.find { |a| name == a.name }
  93. raise AuthorityNotFound if authority.nil?
  94. authority
  95. end
  96. # Tagged x
  97. def self.tagged(tag)
  98. found = []
  99. @@authorities.each { |a| found << a if a.tagged?(tag) }
  100. found
  101. end
  102. # Not tagged x
  103. def self.not_tagged(tag)
  104. found = []
  105. @@authorities.each { |a| found << a unless a.tagged?(tag) }
  106. found
  107. end
  108. # Authorities with no tags
  109. def self.untagged
  110. found = []
  111. @@authorities.each { |a| found << a if a.tags.empty? }
  112. found
  113. end
  114. def self.load
  115. # Don't run this method more than once
  116. return unless @@authorities.empty?
  117. CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', 'authorities.csv')) do |line|
  118. auth = Authority.new(line[0], line[1])
  119. auth.add_tags(line[2..-1])
  120. auth.add_tag(auth.system)
  121. @@authorities << auth
  122. end
  123. end
  124. end
  125. end
  126. UKPlanningScraper::Authority.load