From ee425b23b5babf2f487cc9c928ae7056e8349103 Mon Sep 17 00:00:00 2001 From: "duncan.parkes@gmail.com" Date: Sat, 20 Jun 2009 16:06:59 +0000 Subject: [PATCH] Add scraper for Crawley from Andre. --- trunk/OtherFilesToCopy.csv | 1 + trunk/SitesToGenerate.csv | 1 + trunk/python_scrapers/Crawley.py | 68 ++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 trunk/python_scrapers/Crawley.py diff --git a/trunk/OtherFilesToCopy.csv b/trunk/OtherFilesToCopy.csv index 6f8fb30..06a91a5 100644 --- a/trunk/OtherFilesToCopy.csv +++ b/trunk/OtherFilesToCopy.csv @@ -70,3 +70,4 @@ "Weymouth.py", "420" "Solihull.py", "420" "Wychavon.py", "420" +"Crawley.py", "420" diff --git a/trunk/SitesToGenerate.csv b/trunk/SitesToGenerate.csv index e6052e7..e0ff488 100644 --- a/trunk/SitesToGenerate.csv +++ b/trunk/SitesToGenerate.csv @@ -72,6 +72,7 @@ "Cornwall County Council","Cornwall",,,,,,"http://planapps.cornwall.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser", "Coventry City Council","Coventry",,,,,,"http://planning.coventry.gov.uk/","ApplicationSearchServletParser","CoventrySearchParser", "Craven District Council","Craven",,,,,,"http://www.planning.cravendc.gov.uk/fastweb/","FastWeb","FastWeb", +"Crawley Borough Council","Crawley",,,,,,,"Crawley","CrawleyParser", "Crewe and Nantwich Borough Council","Crewe and Nantwich",,,,,,"http://portal.crewe-nantwich.gov.uk/","PlanningExplorer","CreweParser", "London Borough of Croydon","Croydon",,,,,,"http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch","AcolnetParser","AcolnetParser", "Cumbria County Council","Cumbria",,,,,,"http://217.114.50.149:7778/ePlanningOPS/loadResults.do","AtriumePlanning","AtriumePlanningParser", diff --git a/trunk/python_scrapers/Crawley.py b/trunk/python_scrapers/Crawley.py new file mode 100644 index 0000000..730d16a --- /dev/null +++ b/trunk/python_scrapers/Crawley.py @@ -0,0 +1,68 @@ +import urllib2 +import urlparse +import datetime, time +import BeautifulSoup +from PlanningUtils import PlanningApplication, PlanningAuthorityResults + +date_format = "%d/%m/%Y" + +class CrawleyParser: + comment_url_template = "http://www.crawley.gov.uk/stellent/idcplg?IdcService=SS_GET_PAGE&nodeId=561&pageCSS=&pAppNo=%(pAppNo)s&pAppDocName=%(pAppDocName)s" + + def __init__(self, *args): + + self.authority_name = "Crawley Borough Council" + self.authority_short_name = "Crawley" + self.base_url = "http://www.crawley.gov.uk/stellent/idcplg?IdcService=SS_GET_PAGE&nodeId=560&is_NextRow=1&accept=yes&strCSS=null&pApplicationNo=&pProposal=&pLocation=&pPostcode=&pWard=&pDateType=received&pDayFrom=%(dayFrom)s&pMonthFrom=%(monthFrom)s&pYearFrom=%(yearFrom)s&pDayTo=%(dayTo)s&pMonthTo=%(monthTo)s&pYearTo=%(yearTo)s&submit=Search" + + + self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) + + def getResultsByDayMonthYear(self, day, month, year): + search_day = datetime.date(year, month, day) + #- Crawley only allows searches from-to, so: + + next = self.base_url %{ + "dayFrom": day, + "monthFrom": month, + "yearFrom": year, + "dayTo": day, + "monthTo": month, + "yearTo": year, + } + # Now get the search page + response = urllib2.urlopen(next) + soup = BeautifulSoup.BeautifulSoup(response.read()) + + if soup.table: #- Empty result set has no table + trs = soup.table.findAll("tr")[1:] # First one is just headers + for tr in trs: + tds = tr.findAll("td") + application = PlanningApplication() + application.council_reference = tds[0].a.contents[0].strip().replace("/", "/") + application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href']) + + info_qs = urlparse.parse_qs(urlparse.urlsplit(application.info_url)[3]) + + comment_qs = { + "pAppNo": application.council_reference, + "pAppDocName": info_qs["ssDocName"][0], + } + application.comment_url = self.comment_url_template %comment_qs + + application.address = tds[1].string.strip() + if tds[2].string: #- if postcode present, append it to the address too + application.postcode = tds[2].string.replace(" ", " ").strip() + application.address += ", " + application.postcode + application.description = tds[3].string.strip() + application.date_received = datetime.datetime(*(time.strptime(tds[4].string.strip(), date_format)[0:6])) + self._results.addApplication(application) + return self._results + + def getResults(self, day, month, year): + return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +if __name__ == '__main__': + parser = CrawleyParser() + print parser.getResults(12,6,2008) +