From 580b88770850fe637f11ed21a5b90a3d16b0afa8 Mon Sep 17 00:00:00 2001 From: Adrian Short Date: Fri, 21 Sep 2018 12:10:23 +0100 Subject: [PATCH] Automatically add tags for software systems --- README.md | 6 ++-- lib/uk_planning_scraper/authorities.csv | 40 ++++++++++++------------- lib/uk_planning_scraper/authority.rb | 29 ++++++++++++++---- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index f13c263..5940266 100644 --- a/README.md +++ b/README.md @@ -118,13 +118,13 @@ We've got tags for areas: - surrey - wales -and software systems: +We also automatically add tags for software systems: - idox - northgate - ocellaweb - agileplanning -- unknownsystem -- use when you can't identify the system +- unknownsystem -- for when we can't identify the system and whatever you'd like to add that would be useful to others. @@ -185,7 +185,7 @@ The file format is one line per authority, with comma-separated: - URL of the search form (use the advanced search URL if there is one) - Tags (use as many comma-separated tags as is reasonable, lowercase and all one word.) -Currently only `idox` and `northgate` scrapers work but feel free to add authorities that use other systems, along with appropriate system tags like `ocellaweb` and `agileplanning`. Use `unknownsystem` if you can't identify the system. This gem selects the appropriate scraper by examining the URL not by looking at the tags, so it doesn't matter what you use as long as it's consistent with others. +There's no need to manually add tags to the `authorities.csv` file for the software systems like `idox`, `northgate` etc as these are added automatically. Please check the tag list before you change anything: diff --git a/lib/uk_planning_scraper/authorities.csv b/lib/uk_planning_scraper/authorities.csv index eb8c31b..1393f46 100644 --- a/lib/uk_planning_scraper/authorities.csv +++ b/lib/uk_planning_scraper/authorities.csv @@ -1,31 +1,31 @@ -Barking and Dagenham,http://paplan.lbbd.gov.uk/online-applications/search.do?action=advanced,london,outerlondon,northlondon,england,idox,londonboroughs +Barking and Dagenham,http://paplan.lbbd.gov.uk/online-applications/search.do?action=advanced,london,outerlondon,northlondon,england,londonboroughs Barnet,https://publicaccess.barnet.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london Bexley,http://pa.bexley.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london Brent,https://pa.brent.gov.uk/online-applications/search.do?action=advanced&searchType=Application,londonboroughs,london Bromley,https://searchapplications.bromley.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london Camden,http://planningrecords.camden.gov.uk/Northgate/PlanningExplorer17/GeneralSearch.aspx,londonboroughs,london -City of London,http://www.planning2.cityoflondon.gov.uk/online-applications/search.do?action=advanced,london,innerlondon,northlondon,england,idox +City of London,http://www.planning2.cityoflondon.gov.uk/online-applications/search.do?action=advanced,london,innerlondon,northlondon,england Croydon,http://publicaccess2.croydon.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london Ealing,https://pam.ealing.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london Enfield,https://planningandbuildingcontrol.enfield.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london -Greenwich,https://planning.royalgreenwich.gov.uk/online-applications/search.do?action=advanced,london,innerlondon,southlondon,england,idox,londonboroughs +Greenwich,https://planning.royalgreenwich.gov.uk/online-applications/search.do?action=advanced,london,innerlondon,southlondon,england,londonboroughs Hackney,http://planning.hackney.gov.uk/Northgate/PlanningExplorer/generalsearch.aspx,londonboroughs,london -Hammersmith and Fulham,http://public-access.lbhf.gov.uk/online-applications/search.do?action=advanced,london,innerlondon,northlondon,england,idox,londonboroughs +Hammersmith and Fulham,http://public-access.lbhf.gov.uk/online-applications/search.do?action=advanced,london,innerlondon,northlondon,england,londonboroughs Haringey,http://www.planningservices.haringey.gov.uk/portal/servlets/ApplicationSearchServlet,unknownsystem,london,londonboroughs Harrow,http://www.harrow.gov.uk/planningsearch/lg/plansearch.page?org.apache.shale.dialog.DIALOG_NAME=planningsearch&Param=lg.Planning&searchType=detailed,unknownsystem,london,londonboroughs -Havering,http://development.havering.gov.uk/OcellaWeb/planningSearch,london,ocellaweb,londonboroughs,eastlondon,outerlondon -Hillingdon,http://planning.hillingdon.gov.uk/OcellaWeb/planningSearch,london,ocellaweb,londonboroughs,westlondon,outerlondon +Havering,http://development.havering.gov.uk/OcellaWeb/planningSearch,london,londonboroughs,eastlondon,outerlondon +Hillingdon,http://planning.hillingdon.gov.uk/OcellaWeb/planningSearch,london,londonboroughs,westlondon,outerlondon Hounslow,http://planning.hounslow.gov.uk/planning_search.aspx,unknownsystem,london,londonboroughs Islington,http://planning.islington.gov.uk/northgate/planningexplorer/generalsearch.aspx,londonboroughs,london Kensington and Chelsea,https://www.rbkc.gov.uk/planning/searches/default.aspx?adv=1#advancedSearch,unknownsystem,london,londonboroughs Kingston upon Thames,https://maps.kingston.gov.uk/propertyServices/planning/Search,unknownsystem,london,londonboroughs Lambeth,https://planning.lambeth.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london Lewisham,http://planning.lewisham.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london -London Legacy Development Corporation,http://planningregister.londonlegacy.co.uk/swift/apas/run/wphappcriteria.display,london,agileplanning,developmentcorporations,londondevelopmentcorporations -Merton,http://planning.merton.gov.uk/Northgate/PlanningExplorerAA/GeneralSearch.aspx,london,outerlondon,southlondon,england,northgate,londonboroughs -Newham,https://pa.newham.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london,idox,londonboroughs -Old Oak and Park Royal Development Corporation,http://planningregister.opdc.london.gov.uk/swift/apas/run/wphappcriteria.display,london,agileplanning,developmentcorporations,londondevelopmentcorporations -Redbridge,http://planning.redbridge.gov.uk/swiftlg/apas/run/wphappcriteria.display,agileplanning,london,londonboroughs +London Legacy Development Corporation,http://planningregister.londonlegacy.co.uk/swift/apas/run/wphappcriteria.display,london,developmentcorporations,londondevelopmentcorporations +Merton,http://planning.merton.gov.uk/Northgate/PlanningExplorerAA/GeneralSearch.aspx,london,outerlondon,southlondon,england,londonboroughs +Newham,https://pa.newham.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london,londonboroughs +Old Oak and Park Royal Development Corporation,http://planningregister.opdc.london.gov.uk/swift/apas/run/wphappcriteria.display,london,developmentcorporations,londondevelopmentcorporations +Redbridge,http://planning.redbridge.gov.uk/swiftlg/apas/run/wphappcriteria.display,london,londonboroughs Richmond,http://www2.richmond.gov.uk/PlanData2/Planning_Report.aspx,unknownsystem,london,londonboroughs Southwark,https://planning.southwark.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london Sutton,https://planningregister.sutton.gov.uk/online-applications/search.do?action=advanced,london,londonboroughs,outerlondon,southlondon,england @@ -33,15 +33,15 @@ Tower Hamlets,https://development.towerhamlets.gov.uk/online-applications/search Waltham Forest,http://planning.walthamforest.gov.uk/application-search?civica.query.ReceivedDateFrom=20%2F05%2F2018&civica.query.ReceivedDateTo=26%2F05%2F2018,unknownsystem,london,londonboroughs Wandsworth,https://planning1.wandsworth.gov.uk/Northgate/PlanningExplorer/GeneralSearch.aspx,london,innerlondon,southlondon,england,londonboroughs Westminster,http://idoxpa.westminster.gov.uk/online-applications/search.do?action=advanced,londonboroughs,london -Bolton,https://www.planningpa.bolton.gov.uk/online-applications-17/search.do?action=advanced,idox,england,greatermanchester -Bury,https://planning.bury.gov.uk/online-applications/search.do??action=advanced,idox,england,greatermanchester -Manchester,https://pa.manchester.gov.uk/online-applications/search.do?action=advanced,idox,england,greatermanchester -Oldham,http://planningpa.oldham.gov.uk/online-applications/search.do?action=advanced,idox,england,greatermanchester -Rochdale,http://publicaccess.rochdale.gov.uk/online-applications/search.do?action=advanced,idox,england,greatermanchester -Salford,http://publicaccess.salford.gov.uk/publicaccess/search.do?action=advanced,idox,england,greatermanchester -Stockport,http://planning.stockport.gov.uk/PlanningData-live/search.do?action=advanced,idox,england,greatermanchester +Bolton,https://www.planningpa.bolton.gov.uk/online-applications-17/search.do?action=advanced,england,greatermanchester +Bury,https://planning.bury.gov.uk/online-applications/search.do?action=advanced,england,greatermanchester +Manchester,https://pa.manchester.gov.uk/online-applications/search.do?action=advanced,england,greatermanchester +Oldham,http://planningpa.oldham.gov.uk/online-applications/search.do?action=advanced,england,greatermanchester +Rochdale,http://publicaccess.rochdale.gov.uk/online-applications/search.do?action=advanced,england,greatermanchester +Salford,http://publicaccess.salford.gov.uk/publicaccess/search.do?action=advanced,england,greatermanchester +Stockport,http://planning.stockport.gov.uk/PlanningData-live/search.do?action=advanced,england,greatermanchester Tameside,https://public.tameside.gov.uk/plan/f422planapp.asp,unknownsystem,england,greatermanchester -Trafford,https://publicaccess.trafford.gov.uk/online-applications/search.do?action=advanced,idox,england,greatermanchester +Trafford,https://publicaccess.trafford.gov.uk/online-applications/search.do?action=advanced,england,greatermanchester Wigan,https://apps.wigan.gov.uk/planapps/PlanAppsAppSearch.asp,unknownsystem,england,greatermanchester Bristol,https://planningonline.bristol.gov.uk/online-applications/search.do?action=advanced Leeds,https://publicaccess.leeds.gov.uk/online-applications/search.do?action=advanced @@ -53,4 +53,4 @@ Wakefield,https://planning.wakefield.gov.uk/online-applications/search.do?action Cardiff,https://planningonline.cardiff.gov.uk/online-applications/search.do?action=advanced,wales Birmingham,https://eplanning.birmingham.gov.uk/Northgate/PlanningExplorer/GeneralSearch.aspx Liverpool,http://northgate.liverpool.gov.uk/PlanningExplorer17/GeneralSearch.aspx -Epsom and Ewell,http://eplanning.epsom-ewell.gov.uk/online-applications/search.do?action=advanced,surrey,england,idox +Epsom and Ewell,http://eplanning.epsom-ewell.gov.uk/online-applications/search.do?action=advanced,surrey,england diff --git a/lib/uk_planning_scraper/authority.rb b/lib/uk_planning_scraper/authority.rb index cde35e9..56ad8ac 100644 --- a/lib/uk_planning_scraper/authority.rb +++ b/lib/uk_planning_scraper/authority.rb @@ -38,13 +38,13 @@ module UKPlanningScraper params[:decided_from] = Date.today - (params[:decided_days] - 1) end - # Select which scraper to use based on the URL - if @url.match(/search\.do\?action=advanced/i) + # Select which scraper to use + case system + when 'idox' apps = scrape_idox(params, options) - elsif @url.match(/generalsearch\.aspx/i) + when 'northgate' apps = scrape_northgate(params, options) else - # Not supported raise SystemNotSupportedError.new("Planning system not supported for #{@name} at URL: #{@url}") end @@ -59,6 +59,20 @@ module UKPlanningScraper def tagged?(tag) @tags.include?(tag) end + + def system + if @url.match(/search\.do\?action=advanced/i) + s = 'idox' + elsif @url.match(/generalsearch\.aspx/i) + s = 'northgate' + elsif @url.match(/ocellaweb/i) + s = 'ocellaweb' + elsif @url.match(/\/apas\//) + s = 'agileplanning' + else + s = 'unknownsystem' + end + end def self.all @@authorities @@ -103,10 +117,13 @@ module UKPlanningScraper return unless @@authorities.empty? # FIXME hardcoded file path CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', 'authorities.csv')) do |line| - @@authorities << Authority.new( + auth = Authority.new( line[0].strip, line[1].strip, - line[2..-1].map { |e| e.strip }.sort) + line[2..-1].map { |e| e.strip }) + auth.tags << auth.system unless auth.tagged?(auth.system) + auth.tags.sort! + @@authorities << auth end end end