A Ruby gem to get planning applications data from UK council websites.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

138 regels
3.7 KiB

  1. require 'csv'
  2. module UKPlanningScraper
  3. class Authority
  4. attr_reader :name, :tags, :url
  5. @@authorities = []
  6. def initialize(name, url, tags)
  7. @name = name
  8. @url = url
  9. @tags = tags
  10. @applications = [] # Application objects
  11. end
  12. def scrape(params, options = {})
  13. default_options = {
  14. delay: 10,
  15. }
  16. options = default_options.merge(options) # The user-supplied options override the defaults
  17. # Validated within the last n days
  18. # Assumes that every scraper/system can do a date range search
  19. if params[:validated_days]
  20. params[:validated_to] = Date.today
  21. params[:validated_from] = Date.today - (params[:validated_days] - 1)
  22. end
  23. # Received within the last n days
  24. # Assumes that every scraper/system can do a date range search
  25. if params[:received_days]
  26. params[:received_to] = Date.today
  27. params[:received_from] = Date.today - (params[:received_days] - 1)
  28. end
  29. # Decided within the last n days
  30. # Assumes that every scraper/system can do a date range search
  31. if params[:decided_days]
  32. params[:decided_to] = Date.today
  33. params[:decided_from] = Date.today - (params[:decided_days] - 1)
  34. end
  35. # Select which scraper to use
  36. case system
  37. when 'idox'
  38. @applications = scrape_idox(params, options)
  39. when 'northgate'
  40. @applications = scrape_northgate(params, options)
  41. else
  42. raise SystemNotSupported.new("Planning system not supported for #{@name} at URL: #{@url}")
  43. end
  44. # Post processing
  45. @applications.each do |app|
  46. app.authority_name = @name
  47. end
  48. # Output as an array of hashes
  49. output = []
  50. # FIXME - silently ignores invalid apps. How should we handle them?
  51. @applications.each { |app| output << app.to_hash if app.valid? }
  52. output # Single point of successful exit
  53. end
  54. def tagged?(tag)
  55. @tags.include?(tag)
  56. end
  57. def system
  58. if @url.match(/search\.do\?action=advanced/i)
  59. s = 'idox'
  60. elsif @url.match(/generalsearch\.aspx/i)
  61. s = 'northgate'
  62. elsif @url.match(/ocellaweb/i)
  63. s = 'ocellaweb'
  64. elsif @url.match(/\/apas\//)
  65. s = 'agileplanning'
  66. else
  67. s = 'unknownsystem'
  68. end
  69. end
  70. def self.all
  71. @@authorities
  72. end
  73. # List all the tags in use
  74. def self.tags
  75. tags = []
  76. @@authorities.each { |a| tags << a.tags }
  77. tags.flatten.uniq.sort
  78. end
  79. def self.named(name)
  80. authority = @@authorities.find { |a| name == a.name }
  81. raise AuthorityNotFound if authority.nil?
  82. authority
  83. end
  84. # Tagged x
  85. def self.tagged(tag)
  86. found = []
  87. @@authorities.each { |a| found << a if a.tags.include?(tag) }
  88. found
  89. end
  90. # Not tagged x
  91. def self.not_tagged(tag)
  92. found = []
  93. @@authorities.each { |a| found << a unless a.tags.include?(tag) }
  94. found
  95. end
  96. # Authorities with no tags
  97. def self.untagged
  98. found = []
  99. @@authorities.each { |a| found << a if a.tags.empty? }
  100. found
  101. end
  102. def self.load
  103. # Don't run this method more than once
  104. return unless @@authorities.empty?
  105. # FIXME hardcoded file path
  106. CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', 'authorities.csv')) do |line|
  107. auth = Authority.new(
  108. line[0].strip,
  109. line[1].strip,
  110. line[2..-1].map { |e| e.strip })
  111. auth.tags << auth.system unless auth.tagged?(auth.system)
  112. auth.tags.sort!
  113. @@authorities << auth
  114. end
  115. end
  116. end
  117. end
  118. UKPlanningScraper::Authority.load