diff --git a/cgi-bin/AcolnetParser.py b/cgi-bin/AcolnetParser.py index 36e916e..2da86db 100644 --- a/cgi-bin/AcolnetParser.py +++ b/cgi-bin/AcolnetParser.py @@ -23,6 +23,8 @@ from time import strptime date_format = "%d/%m/%Y" our_date = date(2007,4,25) +#This is to get the system key out of the info url +system_key_regex = re.compile("TheSystemkey=(\d*)", re.IGNORECASE) class AcolnetParser(HTMLParser.HTMLParser): case_number_tr = None # this one can be got by the td class attribute @@ -34,6 +36,7 @@ class AcolnetParser(HTMLParser.HTMLParser): # appropriate email address instead comments_email_address = None + action_regex = re.compile("]*action=\"([^\"]*ACTION=UNWRAP&RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE) def __init__(self, authority_name, @@ -96,23 +99,18 @@ class AcolnetParser(HTMLParser.HTMLParser): self._tr_number += 1 if tag == "td": self._in_td = True - if self._tr_number == self.case_number_tr: - #get the reference and the info link here - pass - elif self._tr_number == self.reg_date_tr: - #get the registration date here - pass - elif self._tr_number == self.location_tr: - #get the address and postcode here - pass - elif self._tr_number == self.proposal_tr: - #get the description here - pass if tag == "a" and self._tr_number == self.case_number_tr: # this is where we get the info link and the case number for key, value in attrs: if key == "href": self._current_application.info_url = value + + system_key = system_key_regex.search(value).groups()[0] + + if self.comments_email_address is not None: + self._current_application.comment_url = self.comments_email_address + else: + self._current_application.comment_url = value.replace("PgeResultDetail", "PgeCommentForm") def handle_data(self, data): # If we are in the tr which contains the case number, @@ -206,52 +204,31 @@ class AcolnetParser(HTMLParser.HTMLParser): class BaberghParser(AcolnetParser): - #search_url = "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 4 proposal_tr = 5 - #authority_name = "Babergh District Council" - #authority_short_name = "Babergh" - # It would be nice to scrape this... comments_email_address = "planning.reception@babergh.gov.uk" - action_regex = re.compile("
") - class BasingstokeParser(AcolnetParser): - #search_url = "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 3 location_tr = 6 proposal_tr = 8 - #authority_name = "Basingstoke and Deane Borough Council" - #authority_short_name = "Basingstoke and Deane" - # It would be nice to scrape this... comments_email_address = "development.control@basingstoke.gov.uk" - action_regex = re.compile("") - class BassetlawParser(AcolnetParser): - #search_url = "http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 5 proposal_tr = 6 - #authority_name = "Bassetlaw District Council" - #authority_short_name = "Bassetlaw" - comments_email_address = "planning@bassetlaw.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - def _cleanupHTML(self, html): """There is a broken div in this page. We don't need any divs, so let's get rid of them all.""" @@ -260,34 +237,26 @@ class BassetlawParser(AcolnetParser): return div_regex.sub('', html) -class BridgenorthParser(AcolnetParser): +class BridgnorthParser(AcolnetParser): + # This site is currently down... #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" + #authority_name = "Bridgenorth District Council" + #authority_short_name = "Bridgenorth" case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 4 proposal_tr = 5 - #authority_name = "Bridgenorth District Council" - #authority_short_name = "Bridgenorth" - comments_email_address = "contactus@bridgnorth-dc.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - class BuryParser(AcolnetParser): - #search_url = "http://e-planning.bury.gov.uk/ePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 4 proposal_tr = 5 - #authority_name = "Bury Metropolitan Borough Council" - #authority_short_name = "Bury" - comments_email_address = "development.control@bury.gov.uk" - action_regex = re.compile("", re.IGNORECASE) ## class CanterburyParser(AcolnetParser): ## search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" @@ -301,36 +270,22 @@ class BuryParser(AcolnetParser): ## authority_short_name = "Canterbury" ## comments_email_address = "" -## action_regex = re.compile("") class CarlisleParser(AcolnetParser): - #search_url = "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 5 proposal_tr = 6 - #authority_name = "Carlisle City Council" - #authority_short_name = "Carlisle" - comments_email_address = "dc@carlisle.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - class DerbyParser(AcolnetParser): - #search_url = "http://195.224.106.204/scripts/planningpages02%5CXSLPagesDC_DERBY%5CDCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 3 location_tr = 4 proposal_tr = 5 - #authority_name = "Derby City Council" - #authority_short_name = "Derby" - comments_email_address = "developmentcontrol@derby.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class CroydonParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -339,7 +294,6 @@ class CroydonParser(AcolnetParser): proposal_tr = 6 comments_email_address = "planning.control@croydon.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class EastLindseyParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -348,7 +302,6 @@ class EastLindseyParser(AcolnetParser): proposal_tr = 6 comments_email_address = "development.control@e-lindsey.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class FyldeParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -357,8 +310,6 @@ class FyldeParser(AcolnetParser): proposal_tr = 5 comments_email_address = "planning@fylde.gov.uk" - action_regex = re.compile("") - class HarlowParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -367,8 +318,6 @@ class HarlowParser(AcolnetParser): proposal_tr = 5 comments_email_address = "Planning.services@harlow.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - class HavantParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -377,7 +326,6 @@ class HavantParser(AcolnetParser): proposal_tr = 5 comments_email_address = "representations@havant.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class HertsmereParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -386,7 +334,6 @@ class HertsmereParser(AcolnetParser): proposal_tr = 5 comments_email_address = "planning@hertsmere.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class LewishamParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -394,50 +341,46 @@ class LewishamParser(AcolnetParser): location_tr = 4 proposal_tr = 5 - comments_email_address = "planning@hertsmere.gov.uk" - action_regex = re.compile("", re.IGNORECASE) + comments_email_address = "planning@lewisham.gov.uk" -class NorthHertfordshireParser(AcolnetParser): +## class NorthHertfordshireParser(AcolnetParser): +## case_number_tr = 1 # this one can be got by the td class attribute +## reg_date_tr = 2 +## location_tr = 4 +## proposal_tr = 5 + +## comments_email_address = "planning@lewisham.gov.uk" + +## class MidSuffolkParser(AcolnetParser): +## case_number_tr = 1 # this one can be got by the td class attribute +## reg_date_tr = 2 +## location_tr = 4 +## proposal_tr = 5 + +## comments_email_address = "planning@lewisham.gov.uk" +## #action_regex = re.compile("]*>", re.IGNORECASE) + +class NewForestParser(AcolnetParser): + # In this case there is an online comment facility at the + # bottom of each view app page... case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 4 proposal_tr = 5 - - comments_email_address = "planning@lewisham.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - - if __name__ == '__main__': day = 15 month = 3 year = 2007 - # working - # parser = BasingstokeParser() - #parser = BaberghParser("Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") - - # works with the divs stripped out - #parser = BassetlawParser() - # returns error 400 - bad request #parser = BridgenorthParser() - # working - #parser = BuryParser() - # cambridgeshire is a bit different... # no advanced search page # canterbury # results as columns of one table - # returns error 400 - bad request - #parser = CarlisleParser() - - # working - #parser = DerbyParser() - - parser = HavantParser("HavantBC", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") print parser.getResults(day, month, year) diff --git a/cgi-bin/Allerdale.cgi b/cgi-bin/Allerdale.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Alnwick.cgi b/cgi-bin/Alnwick.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Angus.cgi b/cgi-bin/Angus.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Aylesbury Vale.cgi b/cgi-bin/Aylesbury Vale.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Babergh.cgi b/cgi-bin/Babergh.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Barrow.cgi b/cgi-bin/Barrow.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Basildon.cgi b/cgi-bin/Basildon.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Basingstoke and Deane.cgi b/cgi-bin/Basingstoke and Deane.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bassetlaw.cgi b/cgi-bin/Bassetlaw.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bath.cgi b/cgi-bin/Bath.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bexley.cgi b/cgi-bin/Bexley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Blaby.cgi b/cgi-bin/Blaby.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bolsover.cgi b/cgi-bin/Bolsover.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bridgnorth.cgi b/cgi-bin/Bridgnorth.cgi new file mode 100755 index 0000000..95f1734 --- /dev/null +++ b/cgi-bin/Bridgnorth.cgi @@ -0,0 +1,29 @@ +#!/usr/local/bin/python + +# This is the parser for Bridgnorth District Council. +# it is generated from the file CGITemplate + +import cgi +import cgitb +#cgitb.enable(display=0, logdir="/tmp") + + +form = cgi.FieldStorage() +day = form.getfirst('day') +month = form.getfirst('month') +year = form.getfirst('year') + + +authority_name = "Bridgnorth District Council" +authority_short_name = "Bridgnorth" +base_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" + +import AcolnetParser + +parser = AcolnetParser.BridgnorthParser(authority_name, authority_short_name, base_url) + +xml = parser.getResults(day, month, year) + +print "Content-Type: text/xml" # XML is following +print +print xml # print the xml diff --git a/cgi-bin/Bristol.cgi b/cgi-bin/Bristol.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Buckinghamshire.cgi b/cgi-bin/Buckinghamshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bury.cgi b/cgi-bin/Bury.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Carlisle.cgi b/cgi-bin/Carlisle.cgi new file mode 100755 index 0000000..5c9b09f --- /dev/null +++ b/cgi-bin/Carlisle.cgi @@ -0,0 +1,29 @@ +#!/usr/local/bin/python + +# This is the parser for Carlisle City Council. +# it is generated from the file CGITemplate + +import cgi +import cgitb +#cgitb.enable(display=0, logdir="/tmp") + + +form = cgi.FieldStorage() +day = form.getfirst('day') +month = form.getfirst('month') +year = form.getfirst('year') + + +authority_name = "Carlisle City Council" +authority_short_name = "Carlisle" +base_url = "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch" + +import AcolnetParser + +parser = AcolnetParser.CarlisleParser(authority_name, authority_short_name, base_url) + +xml = parser.getResults(day, month, year) + +print "Content-Type: text/xml" # XML is following +print +print xml # print the xml diff --git a/cgi-bin/Chelmsford.cgi b/cgi-bin/Chelmsford.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Cherwell.cgi b/cgi-bin/Cherwell.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Chorley.cgi b/cgi-bin/Chorley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/City of London.cgi b/cgi-bin/City of London.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Cornwall.cgi b/cgi-bin/Cornwall.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Coventry.cgi b/cgi-bin/Coventry.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Croydon.cgi b/cgi-bin/Croydon.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Denbighshire.cgi b/cgi-bin/Denbighshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Derby.cgi b/cgi-bin/Derby.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Doncaster.cgi b/cgi-bin/Doncaster.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Dundee.cgi b/cgi-bin/Dundee.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Durham.cgi b/cgi-bin/Durham.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Ealing.cgi b/cgi-bin/Ealing.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Easington.cgi b/cgi-bin/Easington.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/East Devon.cgi b/cgi-bin/East Devon.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/East Dorset.cgi b/cgi-bin/East Dorset.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/East Lindsey.cgi b/cgi-bin/East Lindsey.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Edinburgh.cgi b/cgi-bin/Edinburgh.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Epsom and Ewell.cgi b/cgi-bin/Epsom and Ewell.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Fenland.cgi b/cgi-bin/Fenland.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Fylde.cgi b/cgi-bin/Fylde.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Gateshead.cgi b/cgi-bin/Gateshead.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Gedling.cgi b/cgi-bin/Gedling.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Gloucestershire.cgi b/cgi-bin/Gloucestershire.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Gravesham.cgi b/cgi-bin/Gravesham.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Hammersmith and Fulham.cgi b/cgi-bin/Hammersmith and Fulham.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Haringey.cgi b/cgi-bin/Haringey.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Harlow.cgi b/cgi-bin/Harlow.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Harrogate.cgi b/cgi-bin/Harrogate.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Hart.cgi b/cgi-bin/Hart.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Hartlepool.cgi b/cgi-bin/Hartlepool.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Hertsmere.cgi b/cgi-bin/Hertsmere.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/High Peak.cgi b/cgi-bin/High Peak.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Huntingdonshire.cgi b/cgi-bin/Huntingdonshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Kerrier.cgi b/cgi-bin/Kerrier.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Knowsley.cgi b/cgi-bin/Knowsley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Lancaster.cgi b/cgi-bin/Lancaster.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Lewisham.cgi b/cgi-bin/Lewisham.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Luton.cgi b/cgi-bin/Luton.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Malvern Hills.cgi b/cgi-bin/Malvern Hills.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Manchester.cgi b/cgi-bin/Manchester.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Mid Devon.cgi b/cgi-bin/Mid Devon.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Milton Keynes.cgi b/cgi-bin/Milton Keynes.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/NW Leicestershire.cgi b/cgi-bin/NW Leicestershire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/New Forest.cgi b/cgi-bin/New Forest.cgi new file mode 100755 index 0000000..39e34b2 --- /dev/null +++ b/cgi-bin/New Forest.cgi @@ -0,0 +1,29 @@ +#!/usr/local/bin/python + +# This is the parser for New Forest National Park. +# it is generated from the file CGITemplate + +import cgi +import cgitb +#cgitb.enable(display=0, logdir="/tmp") + + +form = cgi.FieldStorage() +day = form.getfirst('day') +month = form.getfirst('month') +year = form.getfirst('year') + + +authority_name = "New Forest National Park" +authority_short_name = "New Forest" +base_url = "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch" + +import AcolnetParser + +parser = AcolnetParser.NewForestParser(authority_name, authority_short_name, base_url) + +xml = parser.getResults(day, month, year) + +print "Content-Type: text/xml" # XML is following +print +print xml # print the xml diff --git a/cgi-bin/Newcastle-under-Lyme.cgi b/cgi-bin/Newcastle-under-Lyme.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Newham.cgi b/cgi-bin/Newham.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/North Hertfordshire.cgi b/cgi-bin/North Hertfordshire.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/North Tyneside.cgi b/cgi-bin/North Tyneside.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/North Warwickshire.cgi b/cgi-bin/North Warwickshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Northumberland.cgi b/cgi-bin/Northumberland.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Oadby and Wigston.cgi b/cgi-bin/Oadby and Wigston.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Oswestry.cgi b/cgi-bin/Oswestry.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Peterborough.cgi b/cgi-bin/Peterborough.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Portsmouth.cgi b/cgi-bin/Portsmouth.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Redditch.cgi b/cgi-bin/Redditch.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Rushmoor.cgi b/cgi-bin/Rushmoor.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Scarborough.cgi b/cgi-bin/Scarborough.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Sevenoaks.cgi b/cgi-bin/Sevenoaks.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/South Bucks.cgi b/cgi-bin/South Bucks.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/South Ribble.cgi b/cgi-bin/South Ribble.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/South Staffordshire.cgi b/cgi-bin/South Staffordshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/SouthOxfordshire.cgi b/cgi-bin/SouthOxfordshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Southampton.cgi b/cgi-bin/Southampton.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Spelthorne.cgi b/cgi-bin/Spelthorne.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/St Helens.cgi b/cgi-bin/St Helens.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Stevenage.cgi b/cgi-bin/Stevenage.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Stirling.cgi b/cgi-bin/Stirling.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Stockton-On-Tees.cgi b/cgi-bin/Stockton-On-Tees.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Stratford.cgi b/cgi-bin/Stratford.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Sunderland.cgi b/cgi-bin/Sunderland.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Teignbridge.cgi b/cgi-bin/Teignbridge.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Test Valley.cgi b/cgi-bin/Test Valley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Tonbridge.cgi b/cgi-bin/Tonbridge.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Torbay.cgi b/cgi-bin/Torbay.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Vale Royal.cgi b/cgi-bin/Vale Royal.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Waveney.cgi b/cgi-bin/Waveney.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Wear Valley.cgi b/cgi-bin/Wear Valley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Wellingborough.cgi b/cgi-bin/Wellingborough.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/West Berkshire.cgi b/cgi-bin/West Berkshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/West Lancashire.cgi b/cgi-bin/West Lancashire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/West Norfolk.cgi b/cgi-bin/West Norfolk.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Winchester.cgi b/cgi-bin/Winchester.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Woking.cgi b/cgi-bin/Woking.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Wolverhampton.cgi b/cgi-bin/Wolverhampton.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/York.cgi b/cgi-bin/York.cgi old mode 100755 new mode 100644 diff --git a/python_scrapers/AcolnetParser.py b/python_scrapers/AcolnetParser.py index 36e916e..2da86db 100644 --- a/python_scrapers/AcolnetParser.py +++ b/python_scrapers/AcolnetParser.py @@ -23,6 +23,8 @@ from time import strptime date_format = "%d/%m/%Y" our_date = date(2007,4,25) +#This is to get the system key out of the info url +system_key_regex = re.compile("TheSystemkey=(\d*)", re.IGNORECASE) class AcolnetParser(HTMLParser.HTMLParser): case_number_tr = None # this one can be got by the td class attribute @@ -34,6 +36,7 @@ class AcolnetParser(HTMLParser.HTMLParser): # appropriate email address instead comments_email_address = None + action_regex = re.compile("]*action=\"([^\"]*ACTION=UNWRAP&RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE) def __init__(self, authority_name, @@ -96,23 +99,18 @@ class AcolnetParser(HTMLParser.HTMLParser): self._tr_number += 1 if tag == "td": self._in_td = True - if self._tr_number == self.case_number_tr: - #get the reference and the info link here - pass - elif self._tr_number == self.reg_date_tr: - #get the registration date here - pass - elif self._tr_number == self.location_tr: - #get the address and postcode here - pass - elif self._tr_number == self.proposal_tr: - #get the description here - pass if tag == "a" and self._tr_number == self.case_number_tr: # this is where we get the info link and the case number for key, value in attrs: if key == "href": self._current_application.info_url = value + + system_key = system_key_regex.search(value).groups()[0] + + if self.comments_email_address is not None: + self._current_application.comment_url = self.comments_email_address + else: + self._current_application.comment_url = value.replace("PgeResultDetail", "PgeCommentForm") def handle_data(self, data): # If we are in the tr which contains the case number, @@ -206,52 +204,31 @@ class AcolnetParser(HTMLParser.HTMLParser): class BaberghParser(AcolnetParser): - #search_url = "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 4 proposal_tr = 5 - #authority_name = "Babergh District Council" - #authority_short_name = "Babergh" - # It would be nice to scrape this... comments_email_address = "planning.reception@babergh.gov.uk" - action_regex = re.compile("") - class BasingstokeParser(AcolnetParser): - #search_url = "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 3 location_tr = 6 proposal_tr = 8 - #authority_name = "Basingstoke and Deane Borough Council" - #authority_short_name = "Basingstoke and Deane" - # It would be nice to scrape this... comments_email_address = "development.control@basingstoke.gov.uk" - action_regex = re.compile("") - class BassetlawParser(AcolnetParser): - #search_url = "http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 5 proposal_tr = 6 - #authority_name = "Bassetlaw District Council" - #authority_short_name = "Bassetlaw" - comments_email_address = "planning@bassetlaw.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - def _cleanupHTML(self, html): """There is a broken div in this page. We don't need any divs, so let's get rid of them all.""" @@ -260,34 +237,26 @@ class BassetlawParser(AcolnetParser): return div_regex.sub('', html) -class BridgenorthParser(AcolnetParser): +class BridgnorthParser(AcolnetParser): + # This site is currently down... #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" + #authority_name = "Bridgenorth District Council" + #authority_short_name = "Bridgenorth" case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 4 proposal_tr = 5 - #authority_name = "Bridgenorth District Council" - #authority_short_name = "Bridgenorth" - comments_email_address = "contactus@bridgnorth-dc.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - class BuryParser(AcolnetParser): - #search_url = "http://e-planning.bury.gov.uk/ePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 4 proposal_tr = 5 - #authority_name = "Bury Metropolitan Borough Council" - #authority_short_name = "Bury" - comments_email_address = "development.control@bury.gov.uk" - action_regex = re.compile("", re.IGNORECASE) ## class CanterburyParser(AcolnetParser): ## search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" @@ -301,36 +270,22 @@ class BuryParser(AcolnetParser): ## authority_short_name = "Canterbury" ## comments_email_address = "" -## action_regex = re.compile("") class CarlisleParser(AcolnetParser): - #search_url = "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 5 proposal_tr = 6 - #authority_name = "Carlisle City Council" - #authority_short_name = "Carlisle" - comments_email_address = "dc@carlisle.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - class DerbyParser(AcolnetParser): - #search_url = "http://195.224.106.204/scripts/planningpages02%5CXSLPagesDC_DERBY%5CDCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" - case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 3 location_tr = 4 proposal_tr = 5 - #authority_name = "Derby City Council" - #authority_short_name = "Derby" - comments_email_address = "developmentcontrol@derby.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class CroydonParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -339,7 +294,6 @@ class CroydonParser(AcolnetParser): proposal_tr = 6 comments_email_address = "planning.control@croydon.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class EastLindseyParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -348,7 +302,6 @@ class EastLindseyParser(AcolnetParser): proposal_tr = 6 comments_email_address = "development.control@e-lindsey.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class FyldeParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -357,8 +310,6 @@ class FyldeParser(AcolnetParser): proposal_tr = 5 comments_email_address = "planning@fylde.gov.uk" - action_regex = re.compile("") - class HarlowParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -367,8 +318,6 @@ class HarlowParser(AcolnetParser): proposal_tr = 5 comments_email_address = "Planning.services@harlow.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - class HavantParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -377,7 +326,6 @@ class HavantParser(AcolnetParser): proposal_tr = 5 comments_email_address = "representations@havant.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class HertsmereParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -386,7 +334,6 @@ class HertsmereParser(AcolnetParser): proposal_tr = 5 comments_email_address = "planning@hertsmere.gov.uk" - action_regex = re.compile("", re.IGNORECASE) class LewishamParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute @@ -394,50 +341,46 @@ class LewishamParser(AcolnetParser): location_tr = 4 proposal_tr = 5 - comments_email_address = "planning@hertsmere.gov.uk" - action_regex = re.compile("", re.IGNORECASE) + comments_email_address = "planning@lewisham.gov.uk" -class NorthHertfordshireParser(AcolnetParser): +## class NorthHertfordshireParser(AcolnetParser): +## case_number_tr = 1 # this one can be got by the td class attribute +## reg_date_tr = 2 +## location_tr = 4 +## proposal_tr = 5 + +## comments_email_address = "planning@lewisham.gov.uk" + +## class MidSuffolkParser(AcolnetParser): +## case_number_tr = 1 # this one can be got by the td class attribute +## reg_date_tr = 2 +## location_tr = 4 +## proposal_tr = 5 + +## comments_email_address = "planning@lewisham.gov.uk" +## #action_regex = re.compile("]*>", re.IGNORECASE) + +class NewForestParser(AcolnetParser): + # In this case there is an online comment facility at the + # bottom of each view app page... case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 2 location_tr = 4 proposal_tr = 5 - - comments_email_address = "planning@lewisham.gov.uk" - action_regex = re.compile("", re.IGNORECASE) - - if __name__ == '__main__': day = 15 month = 3 year = 2007 - # working - # parser = BasingstokeParser() - #parser = BaberghParser("Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") - - # works with the divs stripped out - #parser = BassetlawParser() - # returns error 400 - bad request #parser = BridgenorthParser() - # working - #parser = BuryParser() - # cambridgeshire is a bit different... # no advanced search page # canterbury # results as columns of one table - # returns error 400 - bad request - #parser = CarlisleParser() - - # working - #parser = DerbyParser() - - parser = HavantParser("HavantBC", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") print parser.getResults(day, month, year) diff --git a/python_scrapers/PublicAccessSites.csv b/python_scrapers/PublicAccessSites.csv index 34f18ff..c974ca9 100644 --- a/python_scrapers/PublicAccessSites.csv +++ b/python_scrapers/PublicAccessSites.csv @@ -100,4 +100,7 @@ "Harlow Council", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HarlowParser" "Hertsmere Borough Council", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HertsmereParser" "London Borough of Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "LewishamParser" -"North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "NorthHertfordshireParser" \ No newline at end of file +"North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "NorthHertfordshireParser" +"New Forest National Park", "New Forest", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestParser" +"Bridgnorth District Council", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "BridgnorthParser" +"Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CarlisleParser"