From c0e96f15e553aff13cd88154271f113078993f6b Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Sun, 21 Jun 2009 14:51:57 +0000 Subject: [PATCH] Fix Mendip - now using PlanningExplorer rather than a bespoke scraper. --- SitesToGenerate.csv | 4 +- python_scrapers/Maldon.py | 2 +- python_scrapers/Mendip.py | 71 ----------------------------- python_scrapers/PlanningExplorer.py | 10 +++- python_scrapers/Shropshire.py | 6 +-- 5 files changed, 15 insertions(+), 78 deletions(-) delete mode 100644 python_scrapers/Mendip.py diff --git a/SitesToGenerate.csv b/SitesToGenerate.csv index 4f8140e..ac32296 100644 --- a/SitesToGenerate.csv +++ b/SitesToGenerate.csv @@ -179,7 +179,7 @@ "Mansfield District Council","Mansfield",,,,,,"http://www.mansfield.gov.uk/Fastweb/","FastWeb","FastWeb", "Medway Council","Medway",,,,,,,"Medway","MedwayParser", "Melton Borough Council","Melton",,,,,,"http://publicaccess.melton.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser", -"Mendip District Council","Mendip",,,,,,,"Mendip","MendipParser", +"Mendip District Council","Mendip",,,,,"Now using planningexplorer rather than bespoke","http://planning.mendip.gov.uk/","PlanningExplorer","MendipParser", "London Borough of Merton","Merton",,,,,,"http://planning.merton.gov.uk/","PlanningExplorer","MertonParser", "Mid Bedfordshire District Council","Mid Beds",,,,,,"http://www.midbeds.gov.uk/acolnetDC/DCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch","AcolnetParser","MidBedsParser", "Mid Devon District Council","Mid Devon",,,,,,"http://planning.middevon.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser", @@ -314,7 +314,7 @@ "West Sussex County Council","West Sussex",,,,,,"http://eplanning.westsussex.gov.uk/ePlanningOPS/loadResults.do","AtriumePlanning","AtriumePlanningParser", "West Wiltshire District Council","West Wiltshire",,,,,,"http://planning.westwiltshire.gov.uk/PublicAccess/tdc/","PublicAccess","PublicAccessParser", "Wiltshire County Council","Wiltshire",,,,,,,,,"wiltshire" -"Winchester City Council","Winchester",,,,,,"http://win2padmz.winchester.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser", +"Winchester City Council","Winchester",,,,1,"Now using a bespoke system","http://win2padmz.winchester.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser", "Woking Borough Council","Woking",,,,,,"http://caps.woking.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser", "Wolverhampton City Council","Wolverhampton",,,,,,"http://planningonline.wolverhampton.gov.uk/PublicAccess/tdc/","PublicAccess","PublicAccessParser", "Worcester City Council","Worcester",,,,,,"http://www.worcester.gov.uk:8080/planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry","Planet","PlanetParser", diff --git a/python_scrapers/Maldon.py b/python_scrapers/Maldon.py index e1703c8..f80a491 100644 --- a/python_scrapers/Maldon.py +++ b/python_scrapers/Maldon.py @@ -106,7 +106,7 @@ class PendleParser(MaldonParser): if __name__ == '__main__': #parser = MaldonParser("Maldon District Council", "Maldon", "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlanApp-action.do") parser = PendleParser("Pendle Borough Council", "Pendle", "http://bopdoccip.pendle.gov.uk/PlanApp/jsp/searchPlanApp-action.do") - print parser.getResults(21,5,2008) + print parser.getResults(12,6,2009) # TODO diff --git a/python_scrapers/Mendip.py b/python_scrapers/Mendip.py deleted file mode 100644 index 4a0c52f..0000000 --- a/python_scrapers/Mendip.py +++ /dev/null @@ -1,71 +0,0 @@ -import urllib2 -import urllib -import urlparse - -import datetime - -from BeautifulSoup import BeautifulSoup - -from PlanningUtils import PlanningApplication, \ - PlanningAuthorityResults, \ - getPostcodeFromText - -date_format = "%d%%2F%m%%2F%Y" - -class MendipParser: - def __init__(self, *args): - self.authority_name = "Mendip District Council" - self.authority_short_name = "Mendip" - - # The site itelf uses a search by validated date, but received date seems - # to be there too, and to work... - # self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=&DateRecvTo=&DateValidFrom=%(date)s&DateValidTo=%(date)s&Search=Search" - self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=%(date)s&DateRecvTo=%(date)s&Search=Search" - self.comment_url = "http://www.mendip.gov.uk/ShowForm.asp?fm_fid=107&AppNo=%(reference)s&SiteAddress=%(address)s" - - self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) - - - def getResultsByDayMonthYear(self, day, month, year): - search_date = datetime.date(year, month, day) - - search_url = self.base_url %{"date": search_date.strftime(date_format)} - - while search_url: - response = urllib2.urlopen(search_url) - soup = BeautifulSoup(response.read()) - - if soup.find(text="No applications matched the search criteria"): - break - - for tr in soup.find("table", summary="Application Results").tbody.findAll("tr"): - application = PlanningApplication() - application.date_received = search_date - - tds = tr.findAll("td") - - application.council_reference = tds[0].a.string.strip() - application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href']) - application.description = tds[1].p.string.strip() - application.address = tds[2].p.string.strip() - - application.comment_url = self.comment_url %{ - "reference": application.council_reference, - "address": urllib.quote_plus(application.address), - } - - self._results.addApplication(application) - - next_link = soup.find("a", title="Go to the next page") - search_url = urlparse.urljoin(self.base_url, next_link['href']) if next_link else None - - return self._results - - - def getResults(self, day, month, year): - return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() - -if __name__ == '__main__': - parser = MendipParser() - print parser.getResults(1,10,2008) - diff --git a/python_scrapers/PlanningExplorer.py b/python_scrapers/PlanningExplorer.py index 3b4a080..7cbe40a 100644 --- a/python_scrapers/PlanningExplorer.py +++ b/python_scrapers/PlanningExplorer.py @@ -681,6 +681,13 @@ class ConwyParser(BroadlandLike, PlanningExplorerParser): use_firefox_user_agent = True +class MendipParser(BroadlandLike, PlanningExplorerParser): + comments_email_address = "customerservices@mendip.gov.uk" + + +# search_url_path = "northgate/planningexplorer/generalsearch.aspx" + +#&first=1&quick=1&search=&txtApplicationNumber=&txtApplicantName=&txtAgentName=&txtProposal=&txtSiteAddress=&txtStreetName=&cboWardCode=&cboParishCode=&cboApplicationTypeCode=&cboDevelopmentTypeCode=&cboStatusCode=&cboSelectDateValue=DATE_RECEIVED&cboMonths=1&cboDays=1&rbGroup=rbRange&dateStart=12%2F06%2F2009&dateEnd=12%2F06%2F2009&edrDateSelection=&csbtnSearch=Search #&txtApplicationNumber=&txtProposal=&txtSiteAddress=&cboWardCode=&cboParishCode=&cboApplicationTypeCode=&cboDevelopmentTypeCode=&cboStatusCode=&cboSelectDateValue=DATE_RECEIVED&cboMonths=1&cboDays=1&rbGroup=rbRange&dateStart=10%2F07%2F2008&dateEnd=20%2F07%2F2008&edrDateSelection=&csbtnSearch=Search @@ -731,7 +738,8 @@ if __name__ == '__main__': # parser = WalthamForestParser("Waltham Forest", "Waltham Forest", "http://planning.walthamforest.gov.uk/") # parser = ConwyParser("Conwy County Borough Council", "Conwy", "http://www.conwy.gov.uk/") # parser = MertonParser("London Borough of Merton", "Merton", "http://planning.merton.gov.uk") - print parser.getResults(30, 3, 2009) + parser = MendipParser("Mendip District Council", "Mendip", "http://planning.mendip.gov.uk/") + print parser.getResults(12, 6, 2009) # To Do diff --git a/python_scrapers/Shropshire.py b/python_scrapers/Shropshire.py index 64eedba..853b97c 100644 --- a/python_scrapers/Shropshire.py +++ b/python_scrapers/Shropshire.py @@ -149,12 +149,12 @@ class SouthNorthamptonshireParser(ShropshireParser): if __name__ == '__main__': # parser = ShropshireParser("Shropshire County Council", "Shropshire", "http://planning.shropshire.gov.uk/PlanAppList.asp") # print parser.getResults(6,6,2008) -# parser = NorthYorkshireParser("North Yorkshire County Council", "North Yorkshire", "https://onlineplanningregister.northyorks.gov.uk/Online%20Register/PlanAppList.asp") + parser = NorthYorkshireParser("North Yorkshire County Council", "North Yorkshire", "https://onlineplanningregister.northyorks.gov.uk/Online%20Register/PlanAppList.asp") # print parser.getResults(10,6,2008) # parser = SouthNorthamptonshireParser("South Northamptonshire Council", "South Northamptonshire", "http://snc.planning-register.co.uk/PlanAppList.asp") # print parser.getResults(5,6,2008) - parser = ShropshireParser("Purbeck District Council", "Purbeck", "http://www.purbeck.gov.uk/planning/PlanAppList.asp") - print parser.getResults(5,6,2008) +# parser = ShropshireParser("Purbeck District Council", "Purbeck", "http://www.purbeck.gov.uk/planning/PlanAppList.asp") + print parser.getResults(12,6,2009) # TODO