diff --git a/trunk/python_scrapers/AmberValley.py b/trunk/python_scrapers/AmberValley.py
new file mode 100644
index 0000000..64fa382
--- /dev/null
+++ b/trunk/python_scrapers/AmberValley.py
@@ -0,0 +1,194 @@
+"""
+This is the screenscraper for the planning applications in Amber Valley.
+
+We have to get the initial search page so that we can use the __VIEWSTATE
+parameter.
+
+The start and end dates have to be separated by 1 day - I presume they are
+interpreting dates as a datetime at midnight...
+
+BeautifulSoup doesn't seem to be able to cope with what comes back from the
+post, so we'll use HTMLParser.
+
+The info reference link uses javascript (typical). As far as I can see there is no way to link directly to the info page for an application, so we'll just have to link to the search page.
+
+Bizarrely, the comment url is fine. e.g.
+
+http://www.ambervalley.gov.uk/services/environment/landandpremises/planningtownandcountry/planningapplications/planappcommentform.htm?frm_AppNum=AVA-2008-0955&frm_SiteAddress=147+Derby+Road%0dDuffield%0dBelper%0dDerbyshire%0dDE56+4FQ%0d&frm_Proposal=Rear+single+storey+extension+and+loft+conversion
+
+"""
+
+import urllib2
+import urllib
+import urlparse
+
+import HTMLParser
+
+import datetime
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+ PlanningAuthorityResults, \
+ getPostcodeFromText
+
+#date_format = "%d/%m/%Y"
+
+class AmberValleyParser(HTMLParser.HTMLParser):
+ def __init__(self, *args):
+
+ HTMLParser.HTMLParser.__init__(self)
+
+ self._in_result_table = False
+ self._td_count = None
+ self._get_ref = False
+ self._get_description = False
+
+ self.authority_name = "Amber Valley Borough Council"
+ self.authority_short_name = "Amber Valley"
+ self.base_url = "http://www.ambervalley.gov.uk/AVBC/Core/TemplateHandler.aspx?NRMODE=Published&NRNODEGUID=%7bAF862CF0-5C6D-4115-9979-5956B24D12DF%7d&NRORIGINALURL=%2fservices%2fenvironment%2flandandpremises%2fplanningtownandcountry%2fplanningapplications%2fPlanningApplicationRegister%2ehtm&NRCACHEHINT=Guest#filterbottom"
+ self.comment_url_template = "http://www.ambervalley.gov.uk/services/environment/landandpremises/planningtownandcountry/planningapplications/planappcommentform.htm?frm_AppNum=%(reference)s&frm_SiteAddress=%(address)s&frm_Proposal=%(description)s"
+
+ self._current_application = None
+ self._search_date = None
+
+ self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+ def handle_starttag(self, tag, attrs):
+ if tag == "table":
+ for key, value in attrs:
+ if key == "class" and value == "test":
+ self._current_application = PlanningApplication()
+
+ # We can set the date_received immediately
+ self._current_application.date_received = self._search_date
+
+ self._in_result_table = True
+ self._td_count = 0
+
+ break
+
+ elif tag == "td":
+ if self._in_result_table:
+ self._td_count += 1
+ self._get_description = False
+ elif tag == "a" and self._td_count == 1:
+ self._get_ref = True
+
+ def handle_endtag(self, tag):
+ if tag == "table" and self._in_result_table:
+ self._current_application.description = self._current_application.description.strip()
+ self._current_application.address = ' '.join(self._current_application.address.strip().split())
+ self._current_application.postcode = getPostcodeFromText(self._current_application.address)
+ self._current_application.info_url = self.base_url # Can't link to the info page, due to javascript idiocy.
+ self._current_application.comment_url = self.comment_url_template %{"reference": urllib.quote_plus(self._current_application.council_reference),
+ "address": urllib.quote_plus(self._current_application.address),
+ "description": urllib.quote_plus(self._current_application.description),
+ }
+
+ self._results.addApplication(self._current_application)
+
+ self._in_result_table = False
+ self._td_count = None
+
+ if tag == "a":
+ self._get_ref = False
+
+ def handle_startendtag(self, tag, attrs):
+ if tag == "br" and self._td_count == 2:
+ self._get_description = True
+
+ def handle_data(self, data):
+ if self._get_ref == True:
+ self._current_application.council_reference = data
+
+ elif self._td_count == 2:
+ # This td contains the address (including postcode)
+ # and the description
+
+ if self._get_description:
+ # We have passed the
, and are looking for the description
+ if not self._current_application.description:
+ self._current_application.description = data
+ else:
+ self._current_application.description += data
+ else:
+ # We have not yet passed the
and are looking for the address and postcode.
+ if not self._current_application.address:
+ self._current_application.address = data
+ else:
+ self._current_application.address += data
+
+
+ def getResultsByDayMonthYear(self, day, month, year):
+ self._search_date = search_start_date = datetime.date(year, month, day)
+ search_end_date = search_start_date + datetime.timedelta(1)
+
+ # Now get the search page
+ get_response = urllib2.urlopen(self.base_url)
+
+ soup = BeautifulSoup(get_response.read())
+
+ form = soup.find("form", id="__aspnetForm")
+
+ # We're going to need __VIEWSTATE for our post
+ viewstate = form.find("input", {"name":"__VIEWSTATE"})['value']
+ action = form['action']
+
+ # Now we have what we need to do a POST
+
+ post_url = urlparse.urljoin(self.base_url, action)
+
+# Example post data without the __VIEWSTATE
+
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AtxbAppNumber=
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AtxbAddressKeyword=
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstDayStart=30
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstMonthStart=Jul
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstYearStart=2008
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstDayEnd=8
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstMonthEnd=Aug
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstYearEnd=2008
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3ArblDateType=0
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstDistance=
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AtxbPostcode=
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstWards=
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstParishes=
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstOrderBy=RegisterDate+DESC
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3ArblViewType=List
+# MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AbtnQueryPlanApps=Lookup
+
+ post_data = urllib.urlencode([
+ ("__VIEWSTATE", viewstate),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:txbAppNumber", ""),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:txbAddressKeyword", ""),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstDayStart", search_start_date.day), # Using the attribute directly to avoid the leading 0
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstMonthStart", search_start_date.strftime("%b")),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstYearStart", search_start_date.strftime("%Y")),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstDayEnd", search_end_date.day), # Using the attribute directly to avoid the leading 0
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstMonthEnd", search_end_date.strftime("%b")),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstYearEnd", search_end_date.strftime("%Y")),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:rblDateType", "0"),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstDistance", ""),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:txbPostcode", ""),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstWards", ""),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstParishes", ""),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstOrderBy", "RegisterDate DESC"),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:rblViewType", "List"),
+ ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:btnQueryPlanApps", "Lookup"),
+ ])
+
+ post_response = urllib2.urlopen(post_url, post_data)
+
+ self.feed(post_response.read())
+
+ return self._results
+
+ def getResults(self, day, month, year):
+ return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+if __name__ == '__main__':
+ parser = AmberValleyParser()
+ print parser.getResults(4,8,2008)
+
diff --git a/trunk/python_scrapers/OtherFilesToCopy.csv b/trunk/python_scrapers/OtherFilesToCopy.csv
index acd5cb7..c84570e 100644
--- a/trunk/python_scrapers/OtherFilesToCopy.csv
+++ b/trunk/python_scrapers/OtherFilesToCopy.csv
@@ -42,3 +42,4 @@
"NorthAyrshire.cgi", "493"
"Redbridge.pl", "493"
"Redbridge.cgi", "493"
+"AmberValley.py", "420"
diff --git a/trunk/python_scrapers/SitesToGenerate.csv b/trunk/python_scrapers/SitesToGenerate.csv
index 2c5a2aa..b1c4021 100644
--- a/trunk/python_scrapers/SitesToGenerate.csv
+++ b/trunk/python_scrapers/SitesToGenerate.csv
@@ -246,3 +246,4 @@
"Conwy County Borough Council", "Conwy", "http://www.conwy.gov.uk/", "PlanningExplorer", "ConwyParser"
"London Borough of Merton", "Merton", "http://planning.merton.gov.uk", "PlanningExplorer", "MertonParser"
"London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display", "SwiftLG", "SwiftLGParser"
+"Amber Valley Borough Council", "Amber Valley", "", "AmberValley", "AmberValleyParser"
diff --git a/trunk/python_scrapers/WAM.py b/trunk/python_scrapers/WAM.py
index b605689..082574e 100644
--- a/trunk/python_scrapers/WAM.py
+++ b/trunk/python_scrapers/WAM.py
@@ -165,7 +165,6 @@ class BraintreeParser(WAMParser):
if __name__ == '__main__':
- #parser = WAMParser("Barking and Dagenham", "Barking and Dagenham", "http://idoxwam.lbbd.gov.uk:8081/WAM/pas/searchApplications.do", debug=True)
#parser = BraintreeParser("Braintree", "Braintree", "http://planningapp.braintree.gov.uk/WAM1/weeklyApplications.do", debug=True)
# Camden
parser = WAMParser("Castle Point", "Castle Point", "http://wam.castlepoint.gov.uk/WAM/pas/searchApplications.do")#, debug=True)