Currently we only parse: - application_type - location_easting - location_northingnorthgate-dates
| @@ -74,6 +74,16 @@ module UKPlanningScraper | |||||
| # This may change if there are subsequent extensions. | # This may change if there are subsequent extensions. | ||||
| attr_accessor :extended_expiry_date | attr_accessor :extended_expiry_date | ||||
| # Application type: Full planning permission, advertisement, | |||||
| # LDC, prior approval etc. | |||||
| # Codes are specific to each local planning authority although there will | |||||
| # be a high degree of overlap between LPAs | |||||
| attr_accessor :application_type | |||||
| attr_accessor :location_easting | |||||
| attr_accessor :location_northing | |||||
| def to_hash | def to_hash | ||||
| { | { | ||||
| scraped_at: @scraped_at, | scraped_at: @scraped_at, | ||||
| @@ -94,7 +104,10 @@ module UKPlanningScraper | |||||
| appeal_decision: @appeal_decision, | appeal_decision: @appeal_decision, | ||||
| consultation_end_date: @consultation_end_date, | consultation_end_date: @consultation_end_date, | ||||
| statutory_due_date: @statutory_due_date, | statutory_due_date: @statutory_due_date, | ||||
| extended_expiry_date: @extended_expiry_date | |||||
| extended_expiry_date: @extended_expiry_date, | |||||
| application_type: @application_type, | |||||
| location_easting: @location_easting, | |||||
| location_northing: @location_northing | |||||
| } | } | ||||
| end | end | ||||
| @@ -73,6 +73,15 @@ module UKPlanningScraper | |||||
| self | self | ||||
| end | end | ||||
| def include_details | |||||
| unless system == 'northgate' | |||||
| raise NoMethodError.new("include_details is only implemented for Northgate. This authority (#{@name}) is #{system.capitalize}.") | |||||
| end | |||||
| @scrape_params[:include_details] = true | |||||
| self | |||||
| end | |||||
| def include_dates | def include_dates | ||||
| unless system == 'northgate' | unless system == 'northgate' | ||||
| raise NoMethodError.new("include_dates is only implemented for Northgate. This authority (#{@name}) is #{system.capitalize}.") | raise NoMethodError.new("include_dates is only implemented for Northgate. This authority (#{@name}) is #{system.capitalize}.") | ||||
| @@ -10,6 +10,7 @@ module UKPlanningScraper | |||||
| logger.level = Logger::DEBUG | logger.level = Logger::DEBUG | ||||
| logger.info "Using Northgate scraper." | logger.info "Using Northgate scraper." | ||||
| logger.info "Will also scrape details page." if params[:include_details] | |||||
| logger.info "Will also scrape dates page." if params[:include_dates] | logger.info "Will also scrape dates page." if params[:include_dates] | ||||
| base_url = @url.match(/(https?:\/\/.+?)\//)[1] | base_url = @url.match(/(https?:\/\/.+?)\//)[1] | ||||
| @@ -137,6 +138,7 @@ module UKPlanningScraper | |||||
| end | end | ||||
| end | end | ||||
| # Scrape dates page if required | |||||
| if params[:include_dates] | if params[:include_dates] | ||||
| apps.each do |app| | apps.each do |app| | ||||
| sleep options[:delay] | sleep options[:delay] | ||||
| @@ -175,7 +177,37 @@ module UKPlanningScraper | |||||
| end | end | ||||
| end | end | ||||
| end | end | ||||
| # Scrape details page if required | |||||
| if params[:include_details] | |||||
| apps.each do |app| | |||||
| sleep options[:delay] | |||||
| agent = Mechanize.new | |||||
| # agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE | |||||
| logger.info "Getting details page for application #{app.council_reference}: #{app.info_url}" | |||||
| page = agent.get(app.info_url) # load the search form page | |||||
| if page.code == '200' | |||||
| page.search(".dataview")[2].search(".list li").each do |element| | |||||
| if bits = element.inner_html.match(/<span>(.+)<\/span>(.+)</) | |||||
| # Some labels have tab characters (\t) in them | |||||
| label = bits[1].downcase.gsub(/[[:space:]]+/, ' ').strip | |||||
| value = bits[2].gsub(/[[:space:]]+/, ' ').strip | |||||
| case label | |||||
| when 'application type' | |||||
| app.application_type = value | |||||
| when 'location co ordinates' | |||||
| coords = value.match(/Easting.+?(\d+).+?(\d+)/) | |||||
| app.location_easting = coords[1].to_i | |||||
| app.location_northing = coords[2].to_i | |||||
| end | |||||
| end | |||||
| end | |||||
| end | |||||
| end | |||||
| end | |||||
| apps | apps | ||||
| end | end | ||||
| end | end | ||||