A Ruby gem to get planning applications data from UK council websites.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 

133 líneas
3.4 KiB

  1. require 'csv'
  2. module UKPlanningScraper
  3. class Authority
  4. attr_reader :name, :tags, :url
  5. @@authorities = []
  6. def initialize(name, url, tags)
  7. @name = name
  8. @url = url
  9. @tags = tags
  10. end
  11. def scrape(params, options = {})
  12. default_options = {
  13. delay: 10,
  14. }
  15. options = default_options.merge(options) # The user-supplied options override the defaults
  16. # Validated within the last n days
  17. # Assumes that every scraper/system can do a date range search
  18. if params[:validated_days]
  19. params[:validated_to] = Date.today
  20. params[:validated_from] = Date.today - (params[:validated_days] - 1)
  21. end
  22. # Received within the last n days
  23. # Assumes that every scraper/system can do a date range search
  24. if params[:received_days]
  25. params[:received_to] = Date.today
  26. params[:received_from] = Date.today - (params[:received_days] - 1)
  27. end
  28. # Decided within the last n days
  29. # Assumes that every scraper/system can do a date range search
  30. if params[:decided_days]
  31. params[:decided_to] = Date.today
  32. params[:decided_from] = Date.today - (params[:decided_days] - 1)
  33. end
  34. # Select which scraper to use
  35. case system
  36. when 'idox'
  37. apps = scrape_idox(params, options)
  38. when 'northgate'
  39. apps = scrape_northgate(params, options)
  40. else
  41. raise SystemNotSupportedError.new("Planning system not supported for #{@name} at URL: #{@url}")
  42. end
  43. # Post processing
  44. apps.each do |app|
  45. app[:authority_name] = @name
  46. end
  47. apps # Single point of successful exit
  48. end
  49. def tagged?(tag)
  50. @tags.include?(tag)
  51. end
  52. def system
  53. if @url.match(/search\.do\?action=advanced/i)
  54. s = 'idox'
  55. elsif @url.match(/generalsearch\.aspx/i)
  56. s = 'northgate'
  57. elsif @url.match(/ocellaweb/i)
  58. s = 'ocellaweb'
  59. elsif @url.match(/\/apas\//)
  60. s = 'agileplanning'
  61. else
  62. s = 'unknownsystem'
  63. end
  64. end
  65. def self.all
  66. @@authorities
  67. end
  68. # List all the tags in use
  69. def self.tags
  70. tags = []
  71. @@authorities.each { |a| tags << a.tags }
  72. tags.flatten.uniq.sort
  73. end
  74. def self.named(name)
  75. authority = @@authorities.find { |a| name == a.name }
  76. raise AuthorityNotFound if authority.nil?
  77. authority
  78. end
  79. # Tagged x
  80. def self.tagged(tag)
  81. found = []
  82. @@authorities.each { |a| found << a if a.tags.include?(tag) }
  83. found
  84. end
  85. # Not tagged x
  86. def self.not_tagged(tag)
  87. found = []
  88. @@authorities.each { |a| found << a unless a.tags.include?(tag) }
  89. found
  90. end
  91. # Authorities with no tags
  92. def self.untagged
  93. found = []
  94. @@authorities.each { |a| found << a if a.tags.empty? }
  95. found
  96. end
  97. def self.load
  98. # Don't run this method more than once
  99. return unless @@authorities.empty?
  100. # FIXME hardcoded file path
  101. CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', 'authorities.csv')) do |line|
  102. auth = Authority.new(
  103. line[0].strip,
  104. line[1].strip,
  105. line[2..-1].map { |e| e.strip })
  106. auth.tags << auth.system unless auth.tagged?(auth.system)
  107. auth.tags.sort!
  108. @@authorities << auth
  109. end
  110. end
  111. end
  112. end
  113. UKPlanningScraper::Authority.load