Make display method on a planningapplication work out the postcode if it isn't set.master
@@ -0,0 +1,71 @@ | |||||
import urllib2 | |||||
import urllib | |||||
import urlparse | |||||
import datetime | |||||
from BeautifulSoup import BeautifulSoup | |||||
from PlanningUtils import PlanningApplication, \ | |||||
PlanningAuthorityResults, \ | |||||
getPostcodeFromText | |||||
date_format = "%d%%2F%m%%2F%Y" | |||||
class MendipParser: | |||||
def __init__(self, *args): | |||||
self.authority_name = "Mendip District Council" | |||||
self.authority_short_name = "Mendip" | |||||
# The site itelf uses a search by validated date, but received date seems | |||||
# to be there too, and to work... | |||||
# self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=&DateRecvTo=&DateValidFrom=%(date)s&DateValidTo=%(date)s&Search=Search" | |||||
self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=%(date)s&DateRecvTo=%(date)s&Search=Search" | |||||
self.comment_url = "http://www.mendip.gov.uk/ShowForm.asp?fm_fid=107&AppNo=%(reference)s&SiteAddress=%(address)s" | |||||
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) | |||||
def getResultsByDayMonthYear(self, day, month, year): | |||||
search_date = datetime.date(year, month, day) | |||||
search_url = self.base_url %{"date": search_date.strftime(date_format)} | |||||
while search_url: | |||||
response = urllib2.urlopen(search_url) | |||||
soup = BeautifulSoup(response.read()) | |||||
if soup.find(text="No applications matched the search criteria"): | |||||
break | |||||
for tr in soup.find("table", summary="Application Results").tbody.findAll("tr"): | |||||
application = PlanningApplication() | |||||
application.date_received = search_date | |||||
tds = tr.findAll("td") | |||||
application.council_reference = tds[0].a.string.strip() | |||||
application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href']) | |||||
application.description = tds[1].p.string.strip() | |||||
application.address = tds[2].p.string.strip() | |||||
application.comment_url = self.comment_url %{ | |||||
"reference": application.council_reference, | |||||
"address": urllib.quote_plus(application.address), | |||||
} | |||||
self._results.addApplication(application) | |||||
next_link = soup.find("a", title="Go to the next page") | |||||
search_url = urlparse.urljoin(self.base_url, next_link['href']) if next_link else None | |||||
return self._results | |||||
def getResults(self, day, month, year): | |||||
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() | |||||
if __name__ == '__main__': | |||||
parser = MendipParser() | |||||
print parser.getResults(1,10,2008) | |||||
@@ -66,3 +66,4 @@ | |||||
"Cairngorms.py", "420" | "Cairngorms.py", "420" | ||||
"Calderdale.py", "420" | "Calderdale.py", "420" | ||||
"Broxtowe.py", "420" | "Broxtowe.py", "420" | ||||
"Mendip.py", "420" |
@@ -19,15 +19,14 @@ def fixNewlines(text): | |||||
postcode_regex = re.compile("[A-Z][A-Z]?\d(\d|[A-Z])? ?\d[A-Z][A-Z]") | postcode_regex = re.compile("[A-Z][A-Z]?\d(\d|[A-Z])? ?\d[A-Z][A-Z]") | ||||
def getPostcodeFromText(text): | |||||
def getPostcodeFromText(text, default_postcode="No Postcode"): | |||||
"""This function takes a piece of text and returns the first | """This function takes a piece of text and returns the first | ||||
bit of it that looks like a postcode.""" | bit of it that looks like a postcode.""" | ||||
postcode_match = postcode_regex.search(text) | postcode_match = postcode_regex.search(text) | ||||
if postcode_match is not None: | |||||
return postcode_match.group() | |||||
return postcode_match.group() if postcode_match else default_postcode | |||||
class PlanningAuthorityResults: | class PlanningAuthorityResults: | ||||
"""This class represents a set of results of a planning search. | """This class represents a set of results of a planning search. | ||||
@@ -69,10 +68,10 @@ class PlanningAuthorityResults: | |||||
class PlanningApplication: | class PlanningApplication: | ||||
def __init__(self, no_postcode_default='No postcode'): | |||||
def __init__(self): | |||||
self.council_reference = None | self.council_reference = None | ||||
self.address = None | self.address = None | ||||
self.postcode = no_postcode_default | |||||
self.postcode = None | |||||
self.description = None | self.description = None | ||||
self.info_url = None | self.info_url = None | ||||
self.comment_url = None | self.comment_url = None | ||||
@@ -104,6 +103,9 @@ class PlanningApplication: | |||||
def displayXML(self): | def displayXML(self): | ||||
#print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received | #print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received | ||||
if not self.postcode: | |||||
self.postcode = getPostcodeFromText(self.address) | |||||
contents = [ | contents = [ | ||||
u"<council_reference><![CDATA[%s]]></council_reference>" %(self.council_reference), | u"<council_reference><![CDATA[%s]]></council_reference>" %(self.council_reference), | ||||
u"<address><![CDATA[%s]]></address>" %(self.address), | u"<address><![CDATA[%s]]></address>" %(self.address), | ||||
@@ -272,3 +272,4 @@ | |||||
"Cairngorms National Park", "Cairngorms", "", "Cairngorms", "CairngormsParser" | "Cairngorms National Park", "Cairngorms", "", "Cairngorms", "CairngormsParser" | ||||
"Calderdale Council", "Calderdale", "", "Calderdale", "CalderdaleParser" | "Calderdale Council", "Calderdale", "", "Calderdale", "CalderdaleParser" | ||||
"Broxtowe Borough Council", "Broxtowe", "", "Broxtowe", "BroxtoweParser" | "Broxtowe Borough Council", "Broxtowe", "", "Broxtowe", "BroxtoweParser" | ||||
"Mendip District Council", "Mendip", "", "Mendip", "MendipParser" |