|
|
@@ -0,0 +1,97 @@ |
|
|
|
import urllib2 |
|
|
|
import urllib |
|
|
|
import urlparse |
|
|
|
|
|
|
|
import datetime |
|
|
|
#import re |
|
|
|
|
|
|
|
from BeautifulSoup import BeautifulSoup |
|
|
|
|
|
|
|
from PlanningUtils import PlanningApplication, \ |
|
|
|
PlanningAuthorityResults, \ |
|
|
|
getPostcodeFromText |
|
|
|
|
|
|
|
date_format = "%d/%m/%Y" |
|
|
|
|
|
|
|
# Where the council reference fills the gap |
|
|
|
comment_url_end = "comment.asp?%s" |
|
|
|
|
|
|
|
#comment_regex = re.compile("Comment on this ") |
|
|
|
|
|
|
|
|
|
|
|
class RutlandLikeParser: |
|
|
|
def __init__(self, |
|
|
|
authority_name, |
|
|
|
authority_short_name, |
|
|
|
base_url, |
|
|
|
debug=False): |
|
|
|
|
|
|
|
self.authority_name = authority_name |
|
|
|
self.authority_short_name = authority_short_name |
|
|
|
self.base_url = base_url |
|
|
|
|
|
|
|
self.debug = debug |
|
|
|
|
|
|
|
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) |
|
|
|
|
|
|
|
|
|
|
|
def getResultsByDayMonthYear(self, day, month, year): |
|
|
|
search_date = datetime.date(year, month, day) |
|
|
|
date_string = search_date.strftime(date_format) |
|
|
|
|
|
|
|
search_data = urllib.urlencode({"reference": "", |
|
|
|
"undecided": "yes", |
|
|
|
"dateFrom": date_string, |
|
|
|
"dateTo": date_string, |
|
|
|
"Address": "", |
|
|
|
"validate": "true", |
|
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
request = urllib2.Request(self.base_url, search_data) |
|
|
|
response = urllib2.urlopen(request) |
|
|
|
|
|
|
|
html = response.read() |
|
|
|
|
|
|
|
soup = BeautifulSoup(html) |
|
|
|
|
|
|
|
tables = soup.findAll("table", {"style": "width:auto;"}) |
|
|
|
|
|
|
|
if not tables: |
|
|
|
return self._results |
|
|
|
|
|
|
|
# We don't want the first or last tr |
|
|
|
trs = tables[0].findAll("tr")[1:-1] |
|
|
|
|
|
|
|
for tr in trs: |
|
|
|
app = PlanningApplication() |
|
|
|
|
|
|
|
tds = tr.findAll("td") |
|
|
|
|
|
|
|
if len(tds) == 4: |
|
|
|
local_info_url = tds[0].a['href'] |
|
|
|
app.info_url = urlparse.urljoin(self.base_url, local_info_url) |
|
|
|
app.council_reference = tds[0].a.string |
|
|
|
|
|
|
|
app.address = tds[1].string |
|
|
|
app.postcode = getPostcodeFromText(app.address) |
|
|
|
|
|
|
|
app.description = tds[2].string |
|
|
|
|
|
|
|
app.comment_url = urlparse.urljoin(self.base_url, comment_url_end %app.council_reference) |
|
|
|
app.date_received = search_date |
|
|
|
|
|
|
|
self._results.addApplication(app) |
|
|
|
|
|
|
|
return self._results |
|
|
|
|
|
|
|
|
|
|
|
def getResults(self, day, month, year): |
|
|
|
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
rutland_parser = RutlandLikeParser("Rutland long", "Rutland", "http://www.meltononline.co.uk/planning/searchparam.asp") |
|
|
|
|
|
|
|
print rutland_parser.getResults(15,11,2007) |
|
|
|
|