diff --git a/trunk/python_scrapers/Gosport.py b/trunk/python_scrapers/Gosport.py
new file mode 100644
index 0000000..14d48a0
--- /dev/null
+++ b/trunk/python_scrapers/Gosport.py
@@ -0,0 +1,100 @@
+import urllib2
+import urllib
+import urlparse
+
+import datetime, time
+import cgi
+
+import re
+
+import cookielib
+
+cookie_jar = cookielib.CookieJar()
+
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+ PlanningAuthorityResults, \
+ getPostcodeFromText
+
+
+from HTTPHandlers import CookieAddingHTTPRedirectHandler
+
+cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar))
+
+
+search_date_format = "%m/%d/%Y" #That's right, the search date is US style.
+info_page_date_format = "%d/%m/%Y" # and the info page is UK style
+
+class GosportParser:
+ def __init__(self, *args):
+
+ self.authority_name = "Gosport Borough Council"
+ self.authority_short_name = "Gosport"
+
+ self.base_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationSearch2.aspx"
+ self.info_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationDetails.aspx?ID=%s"
+
+ self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+ def getResultsByDayMonthYear(self, day, month, year):
+ search_date = datetime.date(year, month, day)
+
+ get_request = urllib2.Request(self.base_url)
+ get_response = urllib2.urlopen(get_request)
+ cookie_jar.extract_cookies(get_response, get_request)
+
+ get_soup = BeautifulSoup(get_response.read())
+
+ post_data = (
+ ("__VIEWSTATE", get_soup.find("input", {"name": "__VIEWSTATE"})["value"]),
+ ("pgid", get_soup.find("input", {"name": "pgid"})["value"]),
+ ("action", "Search"),
+# ("ApplicationSearch21%3AtbDevAddress", ""),
+# ("ApplicationSearch21%3AtbApplicantName", ""),
+# ("ApplicationSearch21%3AtbAgentName", ""),
+ ("ApplicationSearch21:tbDateSubmitted", "10/01/2008"),
+ ("ApplicationSearch21:btnDateSubmitted", "Search"),
+# ("ApplicationSearch21%3AtbDateDetermined", ""),
+ )
+
+
+ post_request = urllib2.Request(self.base_url, urllib.urlencode(post_data))
+ cookie_jar.add_cookie_header(post_request)
+ post_response = cookie_handling_opener.open(post_request)
+
+ post_soup = BeautifulSoup(post_response.read())
+
+ # Discard the first
, which contains headers
+ trs = post_soup.find("table", id="SearchResults1_dgSearchResults").findAll("tr")[1:]
+
+ for tr in trs:
+ application = PlanningApplication()
+
+ tds = tr.findAll("td")
+
+ application.council_reference = tds[0].string.strip()
+ application.address = tds[1].string.strip()
+ application.postcode = getPostcodeFromText(application.address)
+ application.description = tds[2].string.strip()
+
+ application.date_received = datetime.datetime(*(time.strptime(tds[3].string.strip(), info_page_date_format)[0:6]))
+ application.info_url = self.info_url %(application.council_reference)
+
+ # The comment url must be accessed by a POST, so we'll just use the info url for that as well
+
+ application.comment_url = application.info_url
+
+ self._results.addApplication(application)
+
+ return self._results
+
+ def getResults(self, day, month, year):
+ return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+if __name__ == '__main__':
+ parser = GosportParser()
+ print parser.getResults(1,10,2008)
+
diff --git a/trunk/python_scrapers/HTTPHandlers.py b/trunk/python_scrapers/HTTPHandlers.py
new file mode 100644
index 0000000..0015c5e
--- /dev/null
+++ b/trunk/python_scrapers/HTTPHandlers.py
@@ -0,0 +1,18 @@
+
+from urllib2 import HTTPRedirectHandler
+
+class CookieAddingHTTPRedirectHandler(HTTPRedirectHandler):
+ """The standard python HttpRedirectHandler doesn't add a cookie to the new request after a 302. This handler does."""
+
+ def __init__(self, cookie_jar):
+ self.cookie_jar = cookie_jar
+
+ # This really ought to call the superclasses init method, but there doesn't seem to be one.
+
+
+ def redirect_request(self, *args):
+ new_request = HTTPRedirectHandler.redirect_request(self, *args)
+ # We need to add a cookie from the cookie_jar
+ self.cookie_jar.add_cookie_header(new_request)
+
+ return new_request
diff --git a/trunk/python_scrapers/Ocella.py b/trunk/python_scrapers/Ocella.py
index 924a349..61ffa99 100644
--- a/trunk/python_scrapers/Ocella.py
+++ b/trunk/python_scrapers/Ocella.py
@@ -18,16 +18,9 @@ search_date_format = "%d-%m-%Y" # Format used for the accepted date when searchi
possible_date_formats = [search_date_format, "%d/%m/%Y"]
-class CookieAddingHTTPRedirectHandler(urllib2.HTTPRedirectHandler):
- """The standard python HttpRedirectHandler doesn't add a cookie to the new request after a 302. This handler does."""
- def redirect_request(self, req, fp, code, msg, headers, newurl):
- new_request = urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
- # We need to add a cookie from the cookie_jar
- cookie_jar.add_cookie_header(new_request)
+from HTTPHandlers import CookieAddingHTTPRedirectHandler
- return new_request
-
-cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler())
+cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar))
class OcellaParser:
@@ -206,7 +199,7 @@ if __name__ == '__main__':
# parser = OcellaParser("North East Lincolnshire", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,64104&_dad=portal&_schema=PORTAL")
# parser = OcellaParser("Uttlesford", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page/portal/plan/weekly")
# parser = OcellaParser("Bridgend", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL")
- parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL")
+# parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL")
# parser = OcellaParser("Castle Point", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL")
parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly")
diff --git a/trunk/python_scrapers/OtherFilesToCopy.csv b/trunk/python_scrapers/OtherFilesToCopy.csv
index f5819a5..eae8963 100644
--- a/trunk/python_scrapers/OtherFilesToCopy.csv
+++ b/trunk/python_scrapers/OtherFilesToCopy.csv
@@ -1,6 +1,7 @@
"filename", "permissions"
"PublicAccess.py", "420"
"PlanningUtils.py", "420"
+"HTTPHandlers.py", "420"
"SouthOxfordshireParser.py", "420"
"SouthOxfordshire.cgi", "493"
"ApplicationSearchServletParser.py", "420"
@@ -58,3 +59,4 @@
"Herefordshire.py", "420"
"Exmoor.py", "420"
"Eastbourne.py", "420"
+"Gosport.py", "420"
diff --git a/trunk/python_scrapers/SitesToGenerate.csv b/trunk/python_scrapers/SitesToGenerate.csv
index 95bac87..0661ca7 100644
--- a/trunk/python_scrapers/SitesToGenerate.csv
+++ b/trunk/python_scrapers/SitesToGenerate.csv
@@ -264,3 +264,4 @@
"Eastbourne Borough Council", "Eastbourne", "", "Eastbourne", "EastbourneParser"
"Waltham Forest Council", "Waltham Forest", "http://planning.walthamforest.gov.uk/", "PlanningExplorer", "WalthamForestParser"
"Caerphilly County Borough Council", "Caerphilly", "http://publicaccess.caerphilly.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
+"Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser"