|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
-
- import urllib2
- import urllib
- import urlparse
-
- import datetime, time
- import cgi
-
- from BeautifulSoup import BeautifulSoup
-
- from PlanningUtils import PlanningApplication, \
- PlanningAuthorityResults, \
- getPostcodeFromText
-
- import re
-
- location_re = re.compile("Location:")
- date_received_re = re.compile("Date first received:")
-
- date_format = "%d %b %Y"
-
- class HarrowParser:
- def __init__(self, *args):
-
- self.authority_name = "London Borough of Harrow"
- self.authority_short_name = "Harrow"
-
- # This is a link to the last seven days applications
- # The day, month, and year arguments will be ignored.
- self.base_url = "http://www.harrow.gov.uk/www4/planning/dcweek1.asp"
-
- self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
-
-
- def getResultsByDayMonthYear(self, day, month, year):
- # Now get the search page
- response = urllib2.urlopen(self.base_url)
-
- soup = BeautifulSoup(response.read())
-
- # Each application contains the nav string "Application: "
- nav_strings = soup.findAll(text="Application: ")
-
- for nav_string in nav_strings:
- application = PlanningApplication()
-
- application.council_reference = nav_string.findPrevious("tr").findAll("td", limit=2)[1].string.strip()
-
- application.address = nav_string.findNext(text=location_re).split(":")[1].strip()
- application.postcode = getPostcodeFromText(application.address)
-
- application.description = nav_string.findNext(text="Proposal: ").findNext("td").string.strip()
-
- application.comment_url = urlparse.urljoin(self.base_url, nav_string.findNext(text="Proposal: ").findNext("a")['href'])
-
- application.date_received = datetime.datetime.strptime(nav_string.findNext(text=date_received_re).split(": ")[1], date_format).date()
-
- # FIXME: There is no appropriate info_url for the Harrow apps.
- # I'll put the base url for the moment, but as that is
- # a list of apps from the last 7 days that will quickly be out of date.
-
- application.info_url = self.base_url
-
- self._results.addApplication(application)
-
- return self._results
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
- if __name__ == '__main__':
- parser = HarrowParser()
- print parser.getResults(21,5,2008)
-
|