diff --git a/cgi-bin/AcolnetParser.py b/cgi-bin/AcolnetParser.py index d916221..78ab7de 100644 --- a/cgi-bin/AcolnetParser.py +++ b/cgi-bin/AcolnetParser.py @@ -159,8 +159,8 @@ class AcolnetParser(HTMLParser.HTMLParser): search_form_response = urllib2.urlopen(self.base_url) search_form_contents = search_form_response.read() - #outfile = open("tmpfile", "w") - #outfile.write(search_form_contents) + outfile = open("tmpfile", "w") + outfile.write(search_form_contents) # This sometimes causes a problem in HTMLParser, so let's just get the link # out with a regex... @@ -410,6 +410,14 @@ class SuffolkCoastalParser(AcolnetParser): proposal_tr = 5 comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" + +class SurreyHeathParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "development-control@surreyheath.gov.uk" if __name__ == '__main__': day = 15 @@ -425,7 +433,7 @@ if __name__ == '__main__': # canterbury # results as columns of one table - parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") print parser.getResults(day, month, year) diff --git a/cgi-bin/Allerdale.cgi b/cgi-bin/Allerdale.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Alnwick.cgi b/cgi-bin/Alnwick.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Angus.cgi b/cgi-bin/Angus.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Aylesbury Vale.cgi b/cgi-bin/Aylesbury Vale.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Babergh.cgi b/cgi-bin/Babergh.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Barrow.cgi b/cgi-bin/Barrow.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Basildon.cgi b/cgi-bin/Basildon.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Basingstoke and Deane.cgi b/cgi-bin/Basingstoke and Deane.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bassetlaw.cgi b/cgi-bin/Bassetlaw.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bath.cgi b/cgi-bin/Bath.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bexley.cgi b/cgi-bin/Bexley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Blaby.cgi b/cgi-bin/Blaby.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bolsover.cgi b/cgi-bin/Bolsover.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bridgnorth.cgi b/cgi-bin/Bridgnorth.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Bristol.cgi b/cgi-bin/Bristol.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Buckinghamshire.cgi b/cgi-bin/Buckinghamshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Bury.cgi b/cgi-bin/Bury.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Carlisle.cgi b/cgi-bin/Carlisle.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Chelmsford.cgi b/cgi-bin/Chelmsford.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Cherwell.cgi b/cgi-bin/Cherwell.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Chorley.cgi b/cgi-bin/Chorley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/City of London.cgi b/cgi-bin/City of London.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Cornwall.cgi b/cgi-bin/Cornwall.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Coventry.cgi b/cgi-bin/Coventry.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Croydon.cgi b/cgi-bin/Croydon.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Denbighshire.cgi b/cgi-bin/Denbighshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Derby.cgi b/cgi-bin/Derby.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Doncaster.cgi b/cgi-bin/Doncaster.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Dundee.cgi b/cgi-bin/Dundee.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Durham.cgi b/cgi-bin/Durham.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Ealing.cgi b/cgi-bin/Ealing.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Easington.cgi b/cgi-bin/Easington.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/East Devon.cgi b/cgi-bin/East Devon.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/East Dorset.cgi b/cgi-bin/East Dorset.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/East Lindsey.cgi b/cgi-bin/East Lindsey.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Edinburgh.cgi b/cgi-bin/Edinburgh.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Epsom and Ewell.cgi b/cgi-bin/Epsom and Ewell.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Fenland.cgi b/cgi-bin/Fenland.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Fylde.cgi b/cgi-bin/Fylde.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Gateshead.cgi b/cgi-bin/Gateshead.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Gedling.cgi b/cgi-bin/Gedling.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Gloucestershire.cgi b/cgi-bin/Gloucestershire.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Gravesham.cgi b/cgi-bin/Gravesham.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Hammersmith and Fulham.cgi b/cgi-bin/Hammersmith and Fulham.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Haringey.cgi b/cgi-bin/Haringey.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Harlow.cgi b/cgi-bin/Harlow.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Harrogate.cgi b/cgi-bin/Harrogate.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Hart.cgi b/cgi-bin/Hart.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Hartlepool.cgi b/cgi-bin/Hartlepool.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Hertsmere.cgi b/cgi-bin/Hertsmere.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/High Peak.cgi b/cgi-bin/High Peak.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Huntingdonshire.cgi b/cgi-bin/Huntingdonshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Kerrier.cgi b/cgi-bin/Kerrier.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Knowsley.cgi b/cgi-bin/Knowsley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Lancaster.cgi b/cgi-bin/Lancaster.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Lewisham.cgi b/cgi-bin/Lewisham.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Luton.cgi b/cgi-bin/Luton.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Malvern Hills.cgi b/cgi-bin/Malvern Hills.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Manchester.cgi b/cgi-bin/Manchester.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Mid Devon.cgi b/cgi-bin/Mid Devon.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Milton Keynes.cgi b/cgi-bin/Milton Keynes.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/NW Leicestershire.cgi b/cgi-bin/NW Leicestershire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/New Forest.cgi b/cgi-bin/New Forest.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Newcastle-under-Lyme.cgi b/cgi-bin/Newcastle-under-Lyme.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Newcastle.cgi b/cgi-bin/Newcastle.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Newham.cgi b/cgi-bin/Newham.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/North Hertfordshire.cgi b/cgi-bin/North Hertfordshire.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/North Tyneside.cgi b/cgi-bin/North Tyneside.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/North Warwickshire.cgi b/cgi-bin/North Warwickshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/North Wiltshire.cgi b/cgi-bin/North Wiltshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Northumberland.cgi b/cgi-bin/Northumberland.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Oadby and Wigston.cgi b/cgi-bin/Oadby and Wigston.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Oldham.cgi b/cgi-bin/Oldham.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Oswestry.cgi b/cgi-bin/Oswestry.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Peterborough.cgi b/cgi-bin/Peterborough.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Portsmouth.cgi b/cgi-bin/Portsmouth.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/PublicAccess.py b/cgi-bin/PublicAccess.py index 1dc7341..5ff7baf 100644 --- a/cgi-bin/PublicAccess.py +++ b/cgi-bin/PublicAccess.py @@ -10,7 +10,7 @@ import cookielib cookie_jar = cookielib.CookieJar() -from PlanningUtils import fixNewlines, PlanningAuthorityResults, PlanningApplication +from PlanningUtils import fixNewlines, getPostcodeFromText, PlanningAuthorityResults, PlanningApplication search_form_url_end = "DcApplication/application_searchform.aspx" @@ -167,6 +167,9 @@ class PublicAccessParser(HTMLParser.HTMLParser): # one found on the property page if property_file_parser.postcode is not None: self._current_application.postcode = property_file_parser.postcode + else: + # If there is no postcode in here, then we'll have to make do with regexing one out of the address. + self._current_application.postcode = getPostcodeFromText(self._current_application.address) # There is no need for us to look at any more attributes. break diff --git a/cgi-bin/Redditch.cgi b/cgi-bin/Redditch.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Renfrewshire.cgi b/cgi-bin/Renfrewshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Rushmoor.cgi b/cgi-bin/Rushmoor.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Scarborough.cgi b/cgi-bin/Scarborough.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Selby.cgi b/cgi-bin/Selby.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Sevenoaks.cgi b/cgi-bin/Sevenoaks.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/South Bedfordshire.cgi b/cgi-bin/South Bedfordshire.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/South Bucks.cgi b/cgi-bin/South Bucks.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/South Ribble.cgi b/cgi-bin/South Ribble.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/South Staffordshire.cgi b/cgi-bin/South Staffordshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/SouthOxfordshire.cgi b/cgi-bin/SouthOxfordshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Southampton.cgi b/cgi-bin/Southampton.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Spelthorne.cgi b/cgi-bin/Spelthorne.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/St Helens.cgi b/cgi-bin/St Helens.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Stevenage.cgi b/cgi-bin/Stevenage.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Stirling.cgi b/cgi-bin/Stirling.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Stockton-On-Tees.cgi b/cgi-bin/Stockton-On-Tees.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Stratford.cgi b/cgi-bin/Stratford.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Suffolk Coastal.cgi b/cgi-bin/Suffolk Coastal.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Sunderland.cgi b/cgi-bin/Sunderland.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Surrey Heath.cgi b/cgi-bin/Surrey Heath.cgi new file mode 100755 index 0000000..9537585 --- /dev/null +++ b/cgi-bin/Surrey Heath.cgi @@ -0,0 +1,29 @@ +#!/usr/local/bin/python + +# This is the parser for Surrey Heath Borough Council. +# it is generated from the file CGITemplate + +import cgi +import cgitb +#cgitb.enable(display=0, logdir="/tmp") + + +form = cgi.FieldStorage() +day = form.getfirst('day') +month = form.getfirst('month') +year = form.getfirst('year') + + +authority_name = "Surrey Heath Borough Council" +authority_short_name = "Surrey Heath" +base_url = "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch" + +import AcolnetParser + +parser = AcolnetParser.SurreyHeathParser(authority_name, authority_short_name, base_url) + +xml = parser.getResults(day, month, year) + +print "Content-Type: text/xml" # XML is following +print +print xml # print the xml diff --git a/cgi-bin/Teignbridge.cgi b/cgi-bin/Teignbridge.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Test Valley.cgi b/cgi-bin/Test Valley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Tonbridge.cgi b/cgi-bin/Tonbridge.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Torbay.cgi b/cgi-bin/Torbay.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Vale Royal.cgi b/cgi-bin/Vale Royal.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Waveney.cgi b/cgi-bin/Waveney.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Wear Valley.cgi b/cgi-bin/Wear Valley.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Wellingborough.cgi b/cgi-bin/Wellingborough.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/West Berkshire.cgi b/cgi-bin/West Berkshire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/West Lancashire.cgi b/cgi-bin/West Lancashire.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/West Norfolk.cgi b/cgi-bin/West Norfolk.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Winchester.cgi b/cgi-bin/Winchester.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/Woking.cgi b/cgi-bin/Woking.cgi old mode 100755 new mode 100644 diff --git a/cgi-bin/Wolverhampton.cgi b/cgi-bin/Wolverhampton.cgi old mode 100644 new mode 100755 diff --git a/cgi-bin/York.cgi b/cgi-bin/York.cgi old mode 100755 new mode 100644 diff --git a/python_scrapers/AcolnetParser.py b/python_scrapers/AcolnetParser.py index d916221..78ab7de 100644 --- a/python_scrapers/AcolnetParser.py +++ b/python_scrapers/AcolnetParser.py @@ -159,8 +159,8 @@ class AcolnetParser(HTMLParser.HTMLParser): search_form_response = urllib2.urlopen(self.base_url) search_form_contents = search_form_response.read() - #outfile = open("tmpfile", "w") - #outfile.write(search_form_contents) + outfile = open("tmpfile", "w") + outfile.write(search_form_contents) # This sometimes causes a problem in HTMLParser, so let's just get the link # out with a regex... @@ -410,6 +410,14 @@ class SuffolkCoastalParser(AcolnetParser): proposal_tr = 5 comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" + +class SurreyHeathParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "development-control@surreyheath.gov.uk" if __name__ == '__main__': day = 15 @@ -425,7 +433,7 @@ if __name__ == '__main__': # canterbury # results as columns of one table - parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") print parser.getResults(day, month, year) diff --git a/python_scrapers/PublicAccess.py b/python_scrapers/PublicAccess.py index 1dc7341..5ff7baf 100644 --- a/python_scrapers/PublicAccess.py +++ b/python_scrapers/PublicAccess.py @@ -10,7 +10,7 @@ import cookielib cookie_jar = cookielib.CookieJar() -from PlanningUtils import fixNewlines, PlanningAuthorityResults, PlanningApplication +from PlanningUtils import fixNewlines, getPostcodeFromText, PlanningAuthorityResults, PlanningApplication search_form_url_end = "DcApplication/application_searchform.aspx" @@ -167,6 +167,9 @@ class PublicAccessParser(HTMLParser.HTMLParser): # one found on the property page if property_file_parser.postcode is not None: self._current_application.postcode = property_file_parser.postcode + else: + # If there is no postcode in here, then we'll have to make do with regexing one out of the address. + self._current_application.postcode = getPostcodeFromText(self._current_application.address) # There is no need for us to look at any more attributes. break diff --git a/python_scrapers/PublicAccessSites.csv b/python_scrapers/PublicAccessSites.csv index 5f9cd72..e5e6eb8 100644 --- a/python_scrapers/PublicAccessSites.csv +++ b/python_scrapers/PublicAccessSites.csv @@ -110,4 +110,5 @@ "Renfrewshire Council", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "RenfrewshireParser" "Selby District Council", "Selby", "http://publicaccess.selby.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" "South Bedfordshire District Council", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "SouthBedfordshireParser" -"Suffolk Coastal District Council", "Suffolk Coastal", "https://apps3.suffolkcoastal.gov.uk/planningonline/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SuffolkCoastalParser" \ No newline at end of file +"Suffolk Coastal District Council", "Suffolk Coastal", "https://apps3.suffolkcoastal.gov.uk/planningonline/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SuffolkCoastalParser" +"Surrey Heath Borough Council", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SurreyHeathParser"