#!/usr/local/bin/python import urllib, urllib2 import HTMLParser #from BeautifulSoup import BeautifulSoup # Adding this to try to help Surrey Heath - Duncan 14/9/2007 import cookielib cookie_jar = cookielib.CookieJar() ################ import urlparse import re # We allow the optional > for Bridgenorth, which doesn't have broken html end_head_regex = re.compile("?", re.IGNORECASE) import MultipartPostHandler # this is not mine, or part of standard python (though it should be!) # it comes from http://pipe.scs.fsu.edu/PostHandler/MultipartPostHandler.py from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication from datetime import date from time import strptime date_format = "%d/%m/%Y" our_date = date(2007,4,25) #This is to get the system key out of the info url system_key_regex = re.compile("TheSystemkey=(\d*)", re.IGNORECASE) class AcolnetParser(HTMLParser.HTMLParser): case_number_tr = None # this one can be got by the td class attribute reg_date_tr = None location_tr = None proposal_tr = None # There is no online comment facility in these, so we provide an # appropriate email address instead comments_email_address = None # The optional amp; is to cope with Oldham, which seems to have started # quoting this url. action_regex = re.compile("