diff --git a/cgi-bin/Bracknell Forest.cgi b/cgi-bin/Bracknell Forest.cgi
new file mode 100755
index 0000000..197754a
--- /dev/null
+++ b/cgi-bin/Bracknell Forest.cgi
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Bracknell Forest Borough Council.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Bracknell Forest Borough Council"
+authority_short_name = "Bracknell Forest"
+base_url = "https://my.bracknell-forest.gov.uk/publicaccess/tdc/"
+
+import PublicAccess
+
+parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml" # XML is following
+print
+print xml # print the xml
diff --git a/cgi-bin/Broads Authority.cgi b/cgi-bin/Broads Authority.cgi
new file mode 100755
index 0000000..efc4aa2
--- /dev/null
+++ b/cgi-bin/Broads Authority.cgi
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Broads Authority.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Broads Authority"
+authority_short_name = "Broads Authority"
+base_url = "https://planning.broads-authority.gov.uk/PublicAccess/tdc/"
+
+import PublicAccess
+
+parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml" # XML is following
+print
+print xml # print the xml
diff --git a/cgi-bin/Broads.cgi b/cgi-bin/Broads.cgi
new file mode 100755
index 0000000..fabe99d
--- /dev/null
+++ b/cgi-bin/Broads.cgi
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Broads Authority.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Broads Authority"
+authority_short_name = "Broads"
+base_url = "https://planning.broads-authority.gov.uk/PublicAccess/tdc/"
+
+import PublicAccess
+
+parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml" # XML is following
+print
+print xml # print the xml
diff --git a/cgi-bin/Chiltern.cgi b/cgi-bin/Chiltern.cgi
new file mode 100755
index 0000000..44b1ce9
--- /dev/null
+++ b/cgi-bin/Chiltern.cgi
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Chiltern District Council.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Chiltern District Council"
+authority_short_name = "Chiltern"
+base_url = "https://isa.chiltern.gov.uk/publicaccess/tdc/"
+
+import PublicAccess
+
+parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml" # XML is following
+print
+print xml # print the xml
diff --git a/cgi-bin/Hinkley and Bosworth.cgi b/cgi-bin/Hinkley and Bosworth.cgi
new file mode 100755
index 0000000..b9be750
--- /dev/null
+++ b/cgi-bin/Hinkley and Bosworth.cgi
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Hinkley and Bosworth Borough Council.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Hinkley and Bosworth Borough Council"
+authority_short_name = "Hinkley and Bosworth"
+base_url = "https://cx.hinckley-bosworth.gov.uk/PublicAccess/tdc/"
+
+import PublicAccess
+
+parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml" # XML is following
+print
+print xml # print the xml
diff --git a/cgi-bin/Perthshire.cgi~ b/cgi-bin/Perthshire.cgi~
new file mode 100755
index 0000000..79907b8
--- /dev/null
+++ b/cgi-bin/Perthshire.cgi~
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Perth and Kinross Council.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Perth and Kinross Council"
+authority_short_name = "Perthshire"
+base_url = "http://193.63.61.22/publicaccess/tdc/"
+
+import PublicAccess
+
+parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml" # XML is following
+print
+print xml # print the xml
diff --git a/cgi-bin/PublicAccess.py b/cgi-bin/PublicAccess.py
index 5ff7baf..22621e8 100644
--- a/cgi-bin/PublicAccess.py
+++ b/cgi-bin/PublicAccess.py
@@ -144,7 +144,7 @@ class PublicAccessParser(HTMLParser.HTMLParser):
# Join this query string to the comments URL, and store this as
# the comments URL of the current planning application
comments_url = urlparse.urljoin(self.base_url, comments_url_end)
- self._current_application.comment_url = urlparse.urljoin(comments_url, query_string)
+ self._current_application.comment_url = "?".join([comments_url, query_string])
# while we're here, let's follow some links to find the postcode...
# the postcode is in an input tag in the property page. This page
@@ -300,20 +300,24 @@ class PublicAccessInfoPageParser(HTMLParser.HTMLParser):
Once we have got the URL, there is no need for us to look at any more tags.
"""
if tag == "a" and self.property_page_url is None:
+
+ #print attrs
if attrs.count(("id","A_btnPropertyDetails")) > 0:
for attr,value in attrs:
if attr == "href":
the_link = value
- # this has some garbage on either side of it...
+ # this may have some garbage on either side of it...
# let's strip that off
+ # If the stripping fails, take the whole link
+
# the garbage on the left is separated by whitespace.
# the garbage on the right is separated by a "'".
-
- self.property_page_url = the_link.split()[1].split("'")[0]
-
-
+ try:
+ self.property_page_url = the_link.split()[1].split("'")[0]
+ except IndexError:
+ self.property_page_url = the_link
class PublicAccessPropertyPageParser(HTMLParser.HTMLParser):
diff --git a/python_scrapers/PublicAccess.py b/python_scrapers/PublicAccess.py
index 5ff7baf..22621e8 100644
--- a/python_scrapers/PublicAccess.py
+++ b/python_scrapers/PublicAccess.py
@@ -144,7 +144,7 @@ class PublicAccessParser(HTMLParser.HTMLParser):
# Join this query string to the comments URL, and store this as
# the comments URL of the current planning application
comments_url = urlparse.urljoin(self.base_url, comments_url_end)
- self._current_application.comment_url = urlparse.urljoin(comments_url, query_string)
+ self._current_application.comment_url = "?".join([comments_url, query_string])
# while we're here, let's follow some links to find the postcode...
# the postcode is in an input tag in the property page. This page
@@ -300,20 +300,24 @@ class PublicAccessInfoPageParser(HTMLParser.HTMLParser):
Once we have got the URL, there is no need for us to look at any more tags.
"""
if tag == "a" and self.property_page_url is None:
+
+ #print attrs
if attrs.count(("id","A_btnPropertyDetails")) > 0:
for attr,value in attrs:
if attr == "href":
the_link = value
- # this has some garbage on either side of it...
+ # this may have some garbage on either side of it...
# let's strip that off
+ # If the stripping fails, take the whole link
+
# the garbage on the left is separated by whitespace.
# the garbage on the right is separated by a "'".
-
- self.property_page_url = the_link.split()[1].split("'")[0]
-
-
+ try:
+ self.property_page_url = the_link.split()[1].split("'")[0]
+ except IndexError:
+ self.property_page_url = the_link
class PublicAccessPropertyPageParser(HTMLParser.HTMLParser):
diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv
index 141dbc3..426eefb 100644
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -126,4 +126,8 @@
"Caradon District Council", "Caradon", "http://publicaccess.caradon.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Hambleton District Council", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Moray Council", "Moray", "http://public.moray.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
-"Perth and Kinross Council", "Perthshire", "http://193.63.61.22/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
\ No newline at end of file
+"Perth and Kinross Council", "Perthshire", "http://193.63.61.22/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
+"Broads Authority", "Broads", "https://planning.broads-authority.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
+"Bracknell Forest Borough Council", "Bracknell Forest", "https://my.bracknell-forest.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
+"Chiltern District Council", "Chiltern", "https://isa.chiltern.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
+"Hinkley and Bosworth Borough Council", "Hinkley and Bosworth", "https://cx.hinckley-bosworth.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
\ No newline at end of file