#!/usr/local/bin/python import urllib2 import urlparse from datetime import date import datetime import re from BeautifulSoup import BeautifulSoup # Adding this to try to help Surrey Heath - Duncan 14/9/2007 import cookielib cookie_jar = cookielib.CookieJar() ################ import MultipartPostHandler # this is not mine, or part of standard python (though it should be!) # it comes from http://pipe.scs.fsu.edu/PostHandler/MultipartPostHandler.py from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication date_format = "%d/%m/%Y" #This is to get the system key out of the info url system_key_regex = re.compile("TheSystemkey=(\d*)", re.IGNORECASE) # We allow the optional > for Bridgnorth, which doesn't have broken html end_head_regex = re.compile("?", re.IGNORECASE) class AcolnetParser(HTMLParser.HTMLParser): received_date_format = "%d/%m/%Y" comment_qs_template = "ACTION=UNWRAP&RIPNAME=Root.PgeCommentForm&TheSystemkey=%s" # There is no online comment facility in these, so we provide an # appropriate email address instead comments_email_address = None # The optional amp; is to cope with Oldham, which seems to have started # quoting this url. action_regex = re.compile("