Ver código fonte

Add scraper for Crawley from Andre.

import/raw
duncan.parkes@gmail.com 15 anos atrás
pai
commit
ee425b23b5
3 arquivos alterados com 70 adições e 0 exclusões
  1. +1
    -0
      trunk/OtherFilesToCopy.csv
  2. +1
    -0
      trunk/SitesToGenerate.csv
  3. +68
    -0
      trunk/python_scrapers/Crawley.py

+ 1
- 0
trunk/OtherFilesToCopy.csv Ver arquivo

@@ -70,3 +70,4 @@
"Weymouth.py", "420"
"Solihull.py", "420"
"Wychavon.py", "420"
"Crawley.py", "420"

+ 1
- 0
trunk/SitesToGenerate.csv Ver arquivo

@@ -72,6 +72,7 @@
"Cornwall County Council","Cornwall",,,,,,"http://planapps.cornwall.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser",
"Coventry City Council","Coventry",,,,,,"http://planning.coventry.gov.uk/","ApplicationSearchServletParser","CoventrySearchParser",
"Craven District Council","Craven",,,,,,"http://www.planning.cravendc.gov.uk/fastweb/","FastWeb","FastWeb",
"Crawley Borough Council","Crawley",,,,,,,"Crawley","CrawleyParser",
"Crewe and Nantwich Borough Council","Crewe and Nantwich",,,,,,"http://portal.crewe-nantwich.gov.uk/","PlanningExplorer","CreweParser",
"London Borough of Croydon","Croydon",,,,,,"http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch","AcolnetParser","AcolnetParser",
"Cumbria County Council","Cumbria",,,,,,"http://217.114.50.149:7778/ePlanningOPS/loadResults.do","AtriumePlanning","AtriumePlanningParser",


+ 68
- 0
trunk/python_scrapers/Crawley.py Ver arquivo

@@ -0,0 +1,68 @@
import urllib2
import urlparse
import datetime, time
import BeautifulSoup
from PlanningUtils import PlanningApplication, PlanningAuthorityResults

date_format = "%d/%m/%Y"

class CrawleyParser:
comment_url_template = "http://www.crawley.gov.uk/stellent/idcplg?IdcService=SS_GET_PAGE&nodeId=561&pageCSS=&pAppNo=%(pAppNo)s&pAppDocName=%(pAppDocName)s"
def __init__(self, *args):

self.authority_name = "Crawley Borough Council"
self.authority_short_name = "Crawley"
self.base_url = "http://www.crawley.gov.uk/stellent/idcplg?IdcService=SS_GET_PAGE&nodeId=560&is_NextRow=1&accept=yes&strCSS=null&pApplicationNo=&pProposal=&pLocation=&pPostcode=&pWard=&pDateType=received&pDayFrom=%(dayFrom)s&pMonthFrom=%(monthFrom)s&pYearFrom=%(yearFrom)s&pDayTo=%(dayTo)s&pMonthTo=%(monthTo)s&pYearTo=%(yearTo)s&submit=Search"


self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)

def getResultsByDayMonthYear(self, day, month, year):
search_day = datetime.date(year, month, day)
#- Crawley only allows searches from-to, so:

next = self.base_url %{
"dayFrom": day,
"monthFrom": month,
"yearFrom": year,
"dayTo": day,
"monthTo": month,
"yearTo": year,
}
# Now get the search page
response = urllib2.urlopen(next)
soup = BeautifulSoup.BeautifulSoup(response.read())
if soup.table: #- Empty result set has no table
trs = soup.table.findAll("tr")[1:] # First one is just headers
for tr in trs:
tds = tr.findAll("td")
application = PlanningApplication()
application.council_reference = tds[0].a.contents[0].strip().replace("/", "/")
application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])

info_qs = urlparse.parse_qs(urlparse.urlsplit(application.info_url)[3])

comment_qs = {
"pAppNo": application.council_reference,
"pAppDocName": info_qs["ssDocName"][0],
}
application.comment_url = self.comment_url_template %comment_qs

application.address = tds[1].string.strip()
if tds[2].string: #- if postcode present, append it to the address too
application.postcode = tds[2].string.replace(" ", " ").strip()
application.address += ", " + application.postcode
application.description = tds[3].string.strip()
application.date_received = datetime.datetime(*(time.strptime(tds[4].string.strip(), date_format)[0:6]))
self._results.addApplication(application)
return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = CrawleyParser()
print parser.getResults(12,6,2008)

Carregando…
Cancelar
Salvar