|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- import urllib2
- import urllib
- import urlparse
-
- import datetime, time
- import cgi
-
- import re
-
- from BeautifulSoup import BeautifulSoup
-
- from PlanningUtils import PlanningApplication, \
- PlanningAuthorityResults, \
- getPostcodeFromText
-
- date_format = "%d/%m/%Y"
-
- class MaldonParser:
- comment_email_address = "dc.planning@maldon.gov.uk"
-
- def __init__(self, authority_name, authority_short_name, base_url, debug=False):
-
- self.debug = debug
-
- self.authority_name = authority_name
- self.authority_short_name = authority_short_name
- self.base_url = base_url
-
- self.info_url = urlparse.urljoin(base_url, "searchPlan.jsp")
-
- self._split_base_url = urlparse.urlsplit(self.base_url)
-
- self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
-
-
- def getResultsByDayMonthYear(self, day, month, year):
- search_date = datetime.date(year, month, day)
- search_date_string = search_date.strftime(date_format)
-
- search_data = urllib.urlencode(
- [("RegisteredDateFrom", search_date_string),
- ("RegisteredDateTo", search_date_string),
- ]
- )
-
- split_search_url = self._split_base_url[:3] + (search_data, '')
- search_url = urlparse.urlunsplit(split_search_url)
-
- response = urllib2.urlopen(search_url)
- soup = BeautifulSoup(response.read())
-
- # First check if we have the no apps found page
-
- if soup.find(text="No Applications Found"):
- return self._results
-
- # Not a very good way of finding the table, but it works for the moment.
- results_table = soup.find("table", cellpadding="5px")
-
- trs = results_table.findAll("tr")[1:]
-
- tr_counter = 0
-
- while tr_counter < len(trs):
- tr = trs[tr_counter]
-
- if tr_counter % 2 == 0:
- application = PlanningApplication()
- application.date_received = search_date
- application.comment_url = self.comment_email_address
-
- tds = tr.findAll("td")
-
- application.council_reference = tds[0].b.string.strip()
- application.address = ' '.join(tds[2].string.split())
- application.postcode = getPostcodeFromText(application.address)
-
-
- # This is what it ought to be, but you can't get there without a sodding cookie.
- # I guess we'll have to send people to the front page
- # application.info_url = urlparse.urljoin(self.base_url, tr.find("a", title="Click here to view application details")['href'])
- application.info_url = self.info_url
-
- else:
- description = tr.td.string
-
- if tr.td.string is not None:
- application.description = tr.td.string.strip()
- else:
- application.description = "Description Missing"
-
- self._results.addApplication(application)
-
- tr_counter += 1
-
- return self._results
-
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
-
- class PendleParser(MaldonParser):
- comment_email_address = "planning@pendle.gov.uk"
-
- if __name__ == '__main__':
- #parser = MaldonParser("Maldon District Council", "Maldon", "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlanApp-action.do")
- parser = PendleParser("Pendle Borough Council", "Pendle", "http://bopdoccip.pendle.gov.uk/PlanApp/jsp/searchPlanApp-action.do")
- print parser.getResults(21,5,2008)
-
- # TODO
-
- # 1) Email the council about non-linkable info page.
- # 2) Email the council about missing descriptions?
|