浏览代码

get details by planning reference (Idox)

UKPlanningScraper::Authority.named('xyz').reference('2016/nnn/abc').scrape
pull/37/head
Jez Nicholson 5 年前
父节点
当前提交
1c9287e6c9
共有 3 个文件被更改,包括 86 次插入57 次删除
  1. +6
    -0
      lib/uk_planning_scraper/authorities.csv
  2. +6
    -0
      lib/uk_planning_scraper/authority_scrape_params.rb
  3. +74
    -57
      lib/uk_planning_scraper/idox.rb

+ 6
- 0
lib/uk_planning_scraper/authorities.csv 查看文件

@@ -1,8 +1,10 @@
authority_name,url,tags
Aberdeen,https://publicaccess.aberdeencity.gov.uk/online-applications/search.do?action=advanced,scotland
Aberdeenshire,https://upa.aberdeenshire.gov.uk/online-applications/search.do?action=advanced,scotland
Adur and Worthing,https://planning.adur-worthing.gov.uk/online-applications/search.do?action=advanced,england
Allerdale,https://planning.allerdale.gov.uk/portal/servlets/ApplicationSearchServlet,england
Amber Valley,https://www.ambervalley.gov.uk/environment-and-planning/planning/development-management/planning-applications/view-a-planning-application.aspx,england
Argyll and Bute,https://publicaccess.argyll-bute.gov.uk/online-applications/search.do?action=advanced,scotland
Arun,https://www.arun.gov.uk/weekly-lists,england
Ashfield,https://www2.ashfield.gov.uk/cfusion/Planning/plan_findfile.cfm,england
Ashford,http://planning.ashford.gov.uk/planning/Default.aspx?new=true,england
@@ -32,6 +34,7 @@ Cornwall,http://planning.cornwall.gov.uk/online-applications/search.do?action=ad
County Durham,https://publicaccess.durham.gov.uk/online-applications/search.do?action=advanced,england
Darlington,https://publicaccess.darlington.gov.uk/online-applications/search.do?action=advanced,england
Doncaster,https://planning.doncaster.gov.uk/online-applications/search.do?action=advanced,england southyorkshire
Dumfries and Galloway,https://eaccess.dumgal.gov.uk/online-applications/search.do?action=advanced,scotland
Ealing,https://pam.ealing.gov.uk/online-applications/search.do?action=advanced,londonboroughs london
East Riding of Yorkshire,https://newplanningaccess.eastriding.gov.uk/newplanningaccess/search.do?action=advanced,england
Edinburgh,https://citydev-portal.edinburgh.gov.uk/idoxpa-web/search.do?action=advanced,scotland
@@ -46,6 +49,7 @@ Haringey,http://www.planningservices.haringey.gov.uk/portal/servlets/Application
Harrow,http://www.harrow.gov.uk/planningsearch/lg/plansearch.page?org.apache.shale.dialog.DIALOG_NAME=planningsearch&Param=lg.Planning&searchType=detailed,london londonboroughs
Havering,http://development.havering.gov.uk/OcellaWeb/planningSearch,london londonboroughs eastlondon outerlondon
Hillingdon,http://planning.hillingdon.gov.uk/OcellaWeb/planningSearch,london londonboroughs westlondon outerlondon
Highland,https://wam.highland.gov.uk/wam/search.do?action=advanced,scotland
Hounslow,http://planning.hounslow.gov.uk/planning_search.aspx,london londonboroughs
Hull,https://www.hullcc.gov.uk/padcbc/publicaccess-live/search.do?action=advanced,england
Islington,http://planning.islington.gov.uk/northgate/planningexplorer/generalsearch.aspx,londonboroughs london
@@ -62,6 +66,7 @@ Merton,http://planning.merton.gov.uk/Northgate/PlanningExplorerAA/GeneralSearch.
Milton Keynes,https://publicaccess2.milton-keynes.gov.uk/online-applications/search.do?action=advanced,england
Newcastle upon Tyne,https://publicaccessapplications.newcastle.gov.uk/online-applications/search.do?action=advanced,england tyneandwear
Newham,https://pa.newham.gov.uk/online-applications/search.do?action=advanced,londonboroughs london londonboroughs
North Ayrshire,https://www.eplanning.north-ayrshire.gov.uk/OnlinePlanning/search.do?action=advanced,scotland
North East Lincolnshire,http://planninganddevelopment.nelincs.gov.uk/online-applications/search.do?action=advanced,england
North Lincolnshire,http://www.planning.northlincs.gov.uk/plan/search/,england
North Somerset,https://planning.n-somerset.gov.uk/online-applications/search.do?action=advanced,england
@@ -75,6 +80,7 @@ Peterborough,https://planpa.peterborough.gov.uk/online-applications/search.do?ac
Plymouth,https://planning.plymouth.gov.uk/online-applications/search.do?action=advanced,england
Poole,https://boppa.poole.gov.uk/online-applications/search.do?action=advanced,england
Portsmouth,http://publicaccess.portsmouth.gov.uk/online-applications/search.do?action=advanced,england
Purbeck,https://planningsearch.purbeck-dc.gov.uk/PlanAppSrch.aspx,england
Redbridge,http://planning.redbridge.gov.uk/swiftlg/apas/run/wphappcriteria.display,london londonboroughs
Richmond,http://www2.richmond.gov.uk/PlanData2/Planning_Report.aspx,london londonboroughs
Rochdale,http://publicaccess.rochdale.gov.uk/online-applications/search.do?action=advanced,england greatermanchester


+ 6
- 0
lib/uk_planning_scraper/authority_scrape_params.rb 查看文件

@@ -51,6 +51,12 @@ module UKPlanningScraper
self
end
def reference(s)
check_class(s, String)
@scrape_params[:reference] = s.strip
self
end
def applicant_name(s)
unless system == 'idox'
raise NoMethodError.new("applicant_name is only implemented for Idox. \


+ 74
- 57
lib/uk_planning_scraper/idox.rb 查看文件

@@ -41,6 +41,7 @@ module UKPlanningScraper
form.send(:"date(applicationDecisionStart)", params[:decided_from].strftime(date_format)) if params[:decided_from]
form.send(:"date(applicationDecisionEnd)", params[:decided_to].strftime(date_format)) if params[:decided_to]

form.send(:"searchCriteria\.reference", params[:reference])
form.send(:"searchCriteria\.description", params[:keywords])
# Some councils don't have the applicant name on their form, eg Bexley
@@ -115,68 +116,84 @@ module UKPlanningScraper
if res.code == '200' # That's a String not an Integer, ffs
# Parse the summary tab for this app

app.scraped_at = Time.now

# The Documents tab doesn't show if there are no documents (we get li.nodocuments instead)
# Bradford has #tab_documents but without the document count on it
app.documents_count = 0

if documents_link = res.at('.associateddocument a')
if documents_link.inner_text.match(/\d+/)
app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i
app.documents_url = base_url + documents_link[:href]
end
elsif documents_link = res.at('#tab_documents')
if documents_link.inner_text.match(/\d+/)
app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i
app.documents_url = base_url + documents_link[:href]
end
end
# We need to find values in the table by using the th labels.
# The row indexes/positions change from site to site (or even app to app) so we can't rely on that.

res.search('#simpleDetailsTable tr').each do |row|
key = row.at('th').inner_text.strip
value = row.at('td').inner_text.strip
case key
when 'Reference'
app.council_reference = value
when 'Alternative Reference'
app.alternative_reference = value unless value.empty?
when 'Planning Portal Reference'
app.alternative_reference = value unless value.empty?
when 'Application Received'
app.date_received = Date.parse(value) if value.match(/\d/)
when 'Application Registered'
app.date_received = Date.parse(value) if value.match(/\d/)
when 'Application Validated'
app.date_validated = Date.parse(value) if value.match(/\d/)
when 'Address'
app.address = value unless value.empty?
when 'Proposal'
app.description = value unless value.empty?
when 'Status'
app.status = value unless value.empty?
when 'Decision'
app.decision = value unless value.empty?
when 'Decision Issued Date'
app.date_decision = Date.parse(value) if value.match(/\d/)
when 'Appeal Status'
app.appeal_status = value unless value.empty?
when 'Appeal Decision'
app.appeal_decision = value unless value.empty?
else
puts "Error: key '#{key}' not found"
end # case
end # each row
parse_summary(app, res)
else
puts "Error: HTTP #{res.code}"
end # if
end # scrape summary tab for apps

if apps == [] && page.search('pa')
puts "# direct hit!"
app = Application.new
parse_summary(app, page)
apps << app
end # direct hit
apps
end # scrape_idox

def parse_summary(app, res)
base_url = @url.match(/(https?:\/\/.+?)\//)[1]

app.scraped_at = Time.now

unless app.info_url
key_val = res.link_with(id: 'tab_summary')&.href
app.info_url = "#{base_url}#{key_val}"
end

# The Documents tab doesn't show if there are no documents (we get li.nodocuments instead)
# Bradford has #tab_documents but without the document count on it
app.documents_count = 0

if documents_link = res.at('.associateddocument a')
if documents_link.inner_text.match(/\d+/)
app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i
app.documents_url = base_url + documents_link[:href]
end
elsif documents_link = res.at('#tab_documents')
if documents_link.inner_text.match(/\d+/)
app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i
app.documents_url = base_url + documents_link[:href]
end
end
# We need to find values in the table by using the th labels.
# The row indexes/positions change from site to site (or even app to app) so we can't rely on that.
res.search('#simpleDetailsTable tr').each do |row|
key = row.at('th').inner_text.strip
value = row.at('td').inner_text.strip
case key
when 'Reference'
app.council_reference = value
when 'Alternative Reference'
app.alternative_reference = value unless value.empty?
when 'Planning Portal Reference'
app.alternative_reference = value unless value.empty?
when 'Application Received'
app.date_received = Date.parse(value) if value.match(/\d/)
when 'Application Registered'
app.date_received = Date.parse(value) if value.match(/\d/)
when 'Application Validated'
app.date_validated = Date.parse(value) if value.match(/\d/)
when 'Address'
app.address = value unless value.empty?
when 'Proposal'
app.description = value unless value.empty?
when 'Status'
app.status = value unless value.empty?
when 'Decision'
app.decision = value unless value.empty?
when 'Decision Issued Date'
app.date_decision = Date.parse(value) if value.match(/\d/)
when 'Appeal Status'
app.appeal_status = value unless value.empty?
when 'Appeal Decision'
app.appeal_decision = value unless value.empty?
else
puts "Error: key '#{key}' not found"
end # case
end
end
end # class
end

正在加载...
取消
保存