From 4e824c66284306a51e4ace12ecf82d7a8e1f14ac Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Sun, 15 Jun 2008 14:21:08 +0000 Subject: [PATCH] Add Pendle scraper (this is basically the same as Maldon). --- python_scrapers/Maldon.py | 34 ++++++++++++++++++++--------- python_scrapers/SitesToGenerate.csv | 3 ++- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/python_scrapers/Maldon.py b/python_scrapers/Maldon.py index 0be19c3..e1703c8 100644 --- a/python_scrapers/Maldon.py +++ b/python_scrapers/Maldon.py @@ -17,12 +17,17 @@ date_format = "%d/%m/%Y" class MaldonParser: comment_email_address = "dc.planning@maldon.gov.uk" - info_url = "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlan.jsp" - def __init__(self, *args): - self.authority_name = "Maldon District Council" - self.authority_short_name = "Maldon" - self.base_url = "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlanApp-action.do" + def __init__(self, authority_name, authority_short_name, base_url, debug=False): + + self.debug = debug + + self.authority_name = authority_name + self.authority_short_name = authority_short_name + self.base_url = base_url + + self.info_url = urlparse.urljoin(base_url, "searchPlan.jsp") + self._split_base_url = urlparse.urlsplit(self.base_url) self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) @@ -44,6 +49,11 @@ class MaldonParser: response = urllib2.urlopen(search_url) soup = BeautifulSoup(response.read()) + # First check if we have the no apps found page + + if soup.find(text="No Applications Found"): + return self._results + # Not a very good way of finding the table, but it works for the moment. results_table = soup.find("table", cellpadding="5px") @@ -89,12 +99,16 @@ class MaldonParser: def getResults(self, day, month, year): return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +class PendleParser(MaldonParser): + comment_email_address = "planning@pendle.gov.uk" + if __name__ == '__main__': - parser = MaldonParser() - print parser.getResults(02,6,2008) + #parser = MaldonParser("Maldon District Council", "Maldon", "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlanApp-action.do") + parser = PendleParser("Pendle Borough Council", "Pendle", "http://bopdoccip.pendle.gov.uk/PlanApp/jsp/searchPlanApp-action.do") + print parser.getResults(21,5,2008) # TODO -# 1) Check that it works ok on a no results page. -# 2) Email the council about non-linkable info page. -# 3) Email the council about missing descriptions? +# 1) Email the council about non-linkable info page. +# 2) Email the council about missing descriptions? diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv index fe671ee..c349638 100644 --- a/python_scrapers/SitesToGenerate.csv +++ b/python_scrapers/SitesToGenerate.csv @@ -228,5 +228,6 @@ "Fife Council", "Fife", "", "Fife", "FifeParser" "Forest of Dean District Council", "Forest of Dean", "", "ForestOfDean", "ForestOfDeanParser" "Flintshire County Council", "Flintshire", "", "Flintshire", "FlintshireParser" -"Maldon District Council", "Maldon", "", "Maldon", "MaldonParser" +"Maldon District Council", "Maldon", "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlanApp-action.do", "Maldon", "MaldonParser" "Medway Council", "Medway", "", "Medway", "MedwayParser" +"Pendle Borough Council", "Pendle", "http://bopdoccip.pendle.gov.uk/PlanApp/jsp/searchPlanApp-action.do", "Maldon", "PendleParser"