From 6a2ed311a407bd5e49774cd7f547a905d3af6d4e Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Sun, 7 Jun 2009 23:07:19 +0000 Subject: [PATCH] Add Wychavon scraper from Thomas. --- OtherFilesToCopy.csv | 3 +- SitesToGenerate.csv | 1 + python_scrapers/Wychavon.py | 128 ++++++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 python_scrapers/Wychavon.py diff --git a/OtherFilesToCopy.csv b/OtherFilesToCopy.csv index d8e2a79..6f8fb30 100644 --- a/OtherFilesToCopy.csv +++ b/OtherFilesToCopy.csv @@ -68,4 +68,5 @@ "Broxtowe.py", "420" "Mendip.py", "420" "Weymouth.py", "420" -"Solihull.py", "420" \ No newline at end of file +"Solihull.py", "420" +"Wychavon.py", "420" diff --git a/SitesToGenerate.csv b/SitesToGenerate.csv index 081094b..e6052e7 100644 --- a/SitesToGenerate.csv +++ b/SitesToGenerate.csv @@ -317,6 +317,7 @@ "Wolverhampton City Council","Wolverhampton",,,,,,"http://planningonline.wolverhampton.gov.uk/PublicAccess/tdc/","PublicAccess","PublicAccessParser", "Worcester City Council","Worcester",,,,,,"http://www.worcester.gov.uk:8080/planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry","Planet","PlanetParser", "Worthing Borough Council","Worthing",,,,,,"http://planning.worthing.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser", +"Wychavon District Council","Wychavon",,,,,,,"Wychavon","WychavonParser", "Wycombe District Council","Wycombe",,,,,,"http://planningpa.wycombe.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser", "Wyre Forest District Council","Wyre Forest",,,,,,"http://www.wyreforest.gov.uk/fastweb/","FastWeb","FastWeb", "City of York Council","York",,,,,,"http://planning.york.gov.uk/PublicAccess/tdc/","PublicAccess","PublicAccessParser", diff --git a/python_scrapers/Wychavon.py b/python_scrapers/Wychavon.py new file mode 100644 index 0000000..43c7693 --- /dev/null +++ b/python_scrapers/Wychavon.py @@ -0,0 +1,128 @@ +""" +This is the screenscraper for planning apps from Wychavon District Council. + +This appears to be an Acolnet variant, and is searched by a block of months. +""" + +import urllib +import urlparse + +import datetime + +from BeautifulSoup import BeautifulSoup + +from PlanningUtils import PlanningApplication, \ + PlanningAuthorityResults, \ + getPostcodeFromText + +class WychavonParser: + + def __init__(self, *args): + self.authority_name = "Wychavon" + self.authority_short_name = "Wychavon" + # Currently hard coded--if this address updates, we'll need to scrape + # the search form to get it each time. + self.base_url = "http://www.e-wychavon.org.uk/scripts/plan2005/\ +acolnetcgi.exe?ACTION=UNWRAP&WhereDescription=General%20Search&\ +Whereclause3=%27%30%31%2F%7BEdtMonthEnd%7D%2F%7BEdtYearEnd%7D%27&\ +RIPNAME=Root%2EPages%2EPgeDC%2EPgeListCases" + + self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) + + + def getResultsByDayMonthYear(self, day, month, year): + + form_data = "EdtYearNo=&EdtCaseNo=&EdtApplicant=&EdtAgent=&EdtLocation"\ + + "=&EdtWard=&EdtMonthStart1=" + str(month) + "&EdtYearStart=" \ + + str(year) + "&EdtMonthEnd=" + str(month) + "&EdtYearEnd="\ + + str(year) + "&submit=Search" + + # Fetch the results + response = urllib.urlopen(self.base_url, form_data) + soup = BeautifulSoup(response.read()) + + + #Each set of results has its own table + results_tables = soup.findAll("table", cellpadding="2", cols="4") + + for table in results_tables: + application = PlanningApplication() + + trs = table.findAll("tr") + + application.council_reference = trs[0].findAll("td")[1].font.font.\ + font.string.strip() + + relative_info_url = trs[0].findAll("td")[1].a['href'] + application.info_url = urlparse.urljoin(self.base_url, relative_info_url) + + application.address = trs[1].findAll("td")[1].font.string.strip() + application.postcode = getPostcodeFromText(application.address) + + #This code avoids an error if there's no description given. + descrip = trs[2].findAll("td")[1].font.string + if descrip == None: + application.description = "" + else: + application.description = descrip.strip() + + rec_m, rec_d, rec_y = trs[1].findAll("td")[3].font.string.strip().\ + split("/") + + application.date_received = datetime.date(int(rec_y), int(rec_m), \ + int(rec_d)) + + apptype = trs[0].findAll("td")[3].font.string + # Avoids throwing an error if no apptype is given (this can happen) + if apptype != None: + apptype = apptype.strip() + + # Is all this really necessary? I don't know, but I've assumed that + # it is. The form will appear without the suffix, I don't know if + # the council's backend would accept it or not. Current behaviour + # is to degrade silently to no suffix if it can't match an + # application type. + if apptype == "Telecommunications": + # Don't know why it's a naked IP rather than sitting on the + # same site, but there it is. + application.comment_url = "http://81.171.139.151/WAM/createCom"\ + +"ment.do?action=CreateApplicationComment&applicationType=PLANNI"\ + +"NG&appNumber=T3/" + application.council_reference + "/TC" + else: + comment_url = "http://81.171.139.151/WAM/createComment.do?acti"\ + +"on=CreateApplicationComment&applicationType=PLANNING&appNumber"\ + +"=W/" + application.council_reference + "/" + suffix = "" + if apptype == "Householder planning application": + suffix = "PP" + elif apptype == "Non-householder planning application": + suffix = "PN" + elif apptype == "Outline applications": + suffix = "OU" + elif apptype == "Change of use": + suffix = "CU" + elif apptype == "Listed Building consent": + suffix = "LB" + elif apptype == "Advertisement application": + suffix = "AA" + elif apptype == "Certificate of Lawfulness Existing": + suffix = "LUE" + elif apptype == "Approval of reserved matters": + suffix = "VOC" + #These are all the ones that I found, except "Advice - Pre-app/ + #Householder", the suffix for which is inconsistent. The suffix + #for this could be obtained by scraping the description page for + #each application. + application.comment_url = comment_url + suffix + + self._results.addApplication(application) + + return self._results + + def getResults(self, day, month, year): + return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +if __name__ == '__main__': + parser = WychavonParser() + #Put this in with constant numbers, copying the Barnsley example. Works for testing, but should it use the arguments for a real run? + print parser.getResults(16,3,2009)