diff --git a/lib/uk_planning_scraper/authorities.csv b/lib/uk_planning_scraper/authorities.csv index 095d9c3..790acc3 100644 --- a/lib/uk_planning_scraper/authorities.csv +++ b/lib/uk_planning_scraper/authorities.csv @@ -1,8 +1,10 @@ authority_name,url,tags Aberdeen,https://publicaccess.aberdeencity.gov.uk/online-applications/search.do?action=advanced,scotland +Aberdeenshire,https://upa.aberdeenshire.gov.uk/online-applications/search.do?action=advanced,scotland Adur and Worthing,https://planning.adur-worthing.gov.uk/online-applications/search.do?action=advanced,england Allerdale,https://planning.allerdale.gov.uk/portal/servlets/ApplicationSearchServlet,england Amber Valley,https://www.ambervalley.gov.uk/environment-and-planning/planning/development-management/planning-applications/view-a-planning-application.aspx,england +Argyll and Bute,https://publicaccess.argyll-bute.gov.uk/online-applications/search.do?action=advanced,scotland Arun,https://www.arun.gov.uk/weekly-lists,england Ashfield,https://www2.ashfield.gov.uk/cfusion/Planning/plan_findfile.cfm,england Ashford,http://planning.ashford.gov.uk/planning/Default.aspx?new=true,england @@ -32,6 +34,7 @@ Cornwall,http://planning.cornwall.gov.uk/online-applications/search.do?action=ad County Durham,https://publicaccess.durham.gov.uk/online-applications/search.do?action=advanced,england Darlington,https://publicaccess.darlington.gov.uk/online-applications/search.do?action=advanced,england Doncaster,https://planning.doncaster.gov.uk/online-applications/search.do?action=advanced,england southyorkshire +Dumfries and Galloway,https://eaccess.dumgal.gov.uk/online-applications/search.do?action=advanced,scotland Ealing,https://pam.ealing.gov.uk/online-applications/search.do?action=advanced,londonboroughs london East Riding of Yorkshire,https://newplanningaccess.eastriding.gov.uk/newplanningaccess/search.do?action=advanced,england Edinburgh,https://citydev-portal.edinburgh.gov.uk/idoxpa-web/search.do?action=advanced,scotland @@ -46,6 +49,7 @@ Haringey,http://www.planningservices.haringey.gov.uk/portal/servlets/Application Harrow,http://www.harrow.gov.uk/planningsearch/lg/plansearch.page?org.apache.shale.dialog.DIALOG_NAME=planningsearch&Param=lg.Planning&searchType=detailed,london londonboroughs Havering,http://development.havering.gov.uk/OcellaWeb/planningSearch,london londonboroughs eastlondon outerlondon Hillingdon,http://planning.hillingdon.gov.uk/OcellaWeb/planningSearch,london londonboroughs westlondon outerlondon +Highland,https://wam.highland.gov.uk/wam/search.do?action=advanced,scotland Hounslow,http://planning.hounslow.gov.uk/planning_search.aspx,london londonboroughs Hull,https://www.hullcc.gov.uk/padcbc/publicaccess-live/search.do?action=advanced,england Islington,http://planning.islington.gov.uk/northgate/planningexplorer/generalsearch.aspx,londonboroughs london @@ -62,6 +66,7 @@ Merton,http://planning.merton.gov.uk/Northgate/PlanningExplorerAA/GeneralSearch. Milton Keynes,https://publicaccess2.milton-keynes.gov.uk/online-applications/search.do?action=advanced,england Newcastle upon Tyne,https://publicaccessapplications.newcastle.gov.uk/online-applications/search.do?action=advanced,england tyneandwear Newham,https://pa.newham.gov.uk/online-applications/search.do?action=advanced,londonboroughs london londonboroughs +North Ayrshire,https://www.eplanning.north-ayrshire.gov.uk/OnlinePlanning/search.do?action=advanced,scotland North East Lincolnshire,http://planninganddevelopment.nelincs.gov.uk/online-applications/search.do?action=advanced,england North Lincolnshire,http://www.planning.northlincs.gov.uk/plan/search/,england North Somerset,https://planning.n-somerset.gov.uk/online-applications/search.do?action=advanced,england @@ -75,6 +80,7 @@ Peterborough,https://planpa.peterborough.gov.uk/online-applications/search.do?ac Plymouth,https://planning.plymouth.gov.uk/online-applications/search.do?action=advanced,england Poole,https://boppa.poole.gov.uk/online-applications/search.do?action=advanced,england Portsmouth,http://publicaccess.portsmouth.gov.uk/online-applications/search.do?action=advanced,england +Purbeck,https://planningsearch.purbeck-dc.gov.uk/PlanAppSrch.aspx,england Redbridge,http://planning.redbridge.gov.uk/swiftlg/apas/run/wphappcriteria.display,london londonboroughs Richmond,http://www2.richmond.gov.uk/PlanData2/Planning_Report.aspx,london londonboroughs Rochdale,http://publicaccess.rochdale.gov.uk/online-applications/search.do?action=advanced,england greatermanchester diff --git a/lib/uk_planning_scraper/authority_scrape_params.rb b/lib/uk_planning_scraper/authority_scrape_params.rb index 819c6e2..6ca27d6 100644 --- a/lib/uk_planning_scraper/authority_scrape_params.rb +++ b/lib/uk_planning_scraper/authority_scrape_params.rb @@ -51,6 +51,12 @@ module UKPlanningScraper self end + def reference(s) + check_class(s, String) + @scrape_params[:reference] = s.strip + self + end + def applicant_name(s) unless system == 'idox' raise NoMethodError.new("applicant_name is only implemented for Idox. \ diff --git a/lib/uk_planning_scraper/idox.rb b/lib/uk_planning_scraper/idox.rb index 20cd434..86d0390 100644 --- a/lib/uk_planning_scraper/idox.rb +++ b/lib/uk_planning_scraper/idox.rb @@ -41,6 +41,7 @@ module UKPlanningScraper form.send(:"date(applicationDecisionStart)", params[:decided_from].strftime(date_format)) if params[:decided_from] form.send(:"date(applicationDecisionEnd)", params[:decided_to].strftime(date_format)) if params[:decided_to] + form.send(:"searchCriteria\.reference", params[:reference]) form.send(:"searchCriteria\.description", params[:keywords]) # Some councils don't have the applicant name on their form, eg Bexley @@ -115,68 +116,84 @@ module UKPlanningScraper if res.code == '200' # That's a String not an Integer, ffs # Parse the summary tab for this app - - app.scraped_at = Time.now - - # The Documents tab doesn't show if there are no documents (we get li.nodocuments instead) - # Bradford has #tab_documents but without the document count on it - app.documents_count = 0 - - if documents_link = res.at('.associateddocument a') - if documents_link.inner_text.match(/\d+/) - app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i - app.documents_url = base_url + documents_link[:href] - end - elsif documents_link = res.at('#tab_documents') - if documents_link.inner_text.match(/\d+/) - app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i - app.documents_url = base_url + documents_link[:href] - end - end - - # We need to find values in the table by using the th labels. - # The row indexes/positions change from site to site (or even app to app) so we can't rely on that. - - res.search('#simpleDetailsTable tr').each do |row| - key = row.at('th').inner_text.strip - value = row.at('td').inner_text.strip - - case key - when 'Reference' - app.council_reference = value - when 'Alternative Reference' - app.alternative_reference = value unless value.empty? - when 'Planning Portal Reference' - app.alternative_reference = value unless value.empty? - when 'Application Received' - app.date_received = Date.parse(value) if value.match(/\d/) - when 'Application Registered' - app.date_received = Date.parse(value) if value.match(/\d/) - when 'Application Validated' - app.date_validated = Date.parse(value) if value.match(/\d/) - when 'Address' - app.address = value unless value.empty? - when 'Proposal' - app.description = value unless value.empty? - when 'Status' - app.status = value unless value.empty? - when 'Decision' - app.decision = value unless value.empty? - when 'Decision Issued Date' - app.date_decision = Date.parse(value) if value.match(/\d/) - when 'Appeal Status' - app.appeal_status = value unless value.empty? - when 'Appeal Decision' - app.appeal_decision = value unless value.empty? - else - puts "Error: key '#{key}' not found" - end # case - end # each row + parse_summary(app, res) else puts "Error: HTTP #{res.code}" end # if end # scrape summary tab for apps + + if apps == [] && page.search('pa') + puts "# direct hit!" + app = Application.new + parse_summary(app, page) + apps << app + end # direct hit apps end # scrape_idox + + def parse_summary(app, res) + base_url = @url.match(/(https?:\/\/.+?)\//)[1] + + app.scraped_at = Time.now + + unless app.info_url + key_val = res.link_with(id: 'tab_summary')&.href + app.info_url = "#{base_url}#{key_val}" + end + + # The Documents tab doesn't show if there are no documents (we get li.nodocuments instead) + # Bradford has #tab_documents but without the document count on it + app.documents_count = 0 + + if documents_link = res.at('.associateddocument a') + if documents_link.inner_text.match(/\d+/) + app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i + app.documents_url = base_url + documents_link[:href] + end + elsif documents_link = res.at('#tab_documents') + if documents_link.inner_text.match(/\d+/) + app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i + app.documents_url = base_url + documents_link[:href] + end + end + + # We need to find values in the table by using the th labels. + # The row indexes/positions change from site to site (or even app to app) so we can't rely on that. + res.search('#simpleDetailsTable tr').each do |row| + key = row.at('th').inner_text.strip + value = row.at('td').inner_text.strip + + case key + when 'Reference' + app.council_reference = value + when 'Alternative Reference' + app.alternative_reference = value unless value.empty? + when 'Planning Portal Reference' + app.alternative_reference = value unless value.empty? + when 'Application Received' + app.date_received = Date.parse(value) if value.match(/\d/) + when 'Application Registered' + app.date_received = Date.parse(value) if value.match(/\d/) + when 'Application Validated' + app.date_validated = Date.parse(value) if value.match(/\d/) + when 'Address' + app.address = value unless value.empty? + when 'Proposal' + app.description = value unless value.empty? + when 'Status' + app.status = value unless value.empty? + when 'Decision' + app.decision = value unless value.empty? + when 'Decision Issued Date' + app.date_decision = Date.parse(value) if value.match(/\d/) + when 'Appeal Status' + app.appeal_status = value unless value.empty? + when 'Appeal Decision' + app.appeal_decision = value unless value.empty? + else + puts "Error: key '#{key}' not found" + end # case + end + end end # class end