| @@ -16,7 +16,6 @@ | |||
| "SwiftLG.py", "420" | |||
| "Dacorum.cgi", "493" | |||
| "SouthSomerset.cgi", "493" | |||
| "WestDorset.cgi", "493" | |||
| "Christchurch.cgi", "493" | |||
| "WAM.py", "420" | |||
| "Planet.py", "420" | |||
| @@ -60,3 +59,4 @@ | |||
| "Exmoor.py", "420" | |||
| "Eastbourne.py", "420" | |||
| "Gosport.py", "420" | |||
| "WestDorset.py", "420" | |||
| @@ -265,3 +265,4 @@ | |||
| "Waltham Forest Council", "Waltham Forest", "http://planning.walthamforest.gov.uk/", "PlanningExplorer", "WalthamForestParser" | |||
| "Caerphilly County Borough Council", "Caerphilly", "http://publicaccess.caerphilly.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser" | |||
| "West Dorset District Council", "West Dorset", "", "WestDorset", "WestDorsetParser" | |||
| @@ -1,149 +0,0 @@ | |||
| #!/usr/bin/perl | |||
| use strict; | |||
| use warnings; | |||
| use CGI qw(:cgi); | |||
| use HTML::TreeBuilder; | |||
| use LWP::UserAgent; | |||
| use XML::Writer; | |||
| # The master URLs for the West Dorset planning search | |||
| our $SearchURL = "http://webapps.westdorset-dc.gov.uk/planningapplications/pages/applicationsearch.aspx"; | |||
| our $InfoURL = "http://webapps.westdorset-dc.gov.uk/planningapplications/pages/ApplicationDetails.aspx?Authority=West%20Dorset%20District%20Council&Application="; | |||
| # We're a CGI script... | |||
| my $query = CGI->new(); | |||
| # Get the date to fetch | |||
| my $date = $query->param("day") . "/" . $query->param("month") . "/" . $query->param("year"); | |||
| # Construct an LWP user agent | |||
| our $UA = LWP::UserAgent->new(env_proxy => 1); | |||
| # Post the URL to get an initial blank form | |||
| my $page = do_post(); | |||
| # Do the search | |||
| $page = do_post($page, | |||
| {"DetailedSearch\$TextBox_DateRaisedFrom" => $date, | |||
| "DetailedSearch\$TextBox_DateRaisedTo" => $date, | |||
| "DetailedSearch\$Button_DetailedSearch" => "Search"}); | |||
| # Output an HTTP response header | |||
| print $query->header(-type => "text/xml"); | |||
| # Create an XML output stream | |||
| my $Writer = XML::Writer->new(DATA_MODE => 1); | |||
| # Output the XML header data | |||
| $Writer->xmlDecl("UTF-8"); | |||
| $Writer->startTag("planning"); | |||
| $Writer->dataElement("authority_name", "West Dorset District Council"); | |||
| $Writer->dataElement("authority_short_name", "West Dorset"); | |||
| $Writer->startTag("applications"); | |||
| # Output any applications on the first page | |||
| output_applications($page); | |||
| # Loop over any additional results pages | |||
| while (my $link = $page->look_down("_tag" => "a", "id" => "MatchingApplications_ResultsNavigationTop_LinkButton_Next")) | |||
| { | |||
| # Fetch this page... | |||
| $page = do_post_back($page, 'MatchingApplications$ResultsNavigationTop$LinkButton_Next', ''); | |||
| # ...and output the applications from it | |||
| output_applications($page); | |||
| } | |||
| # Finish off XML output | |||
| $Writer->endTag("applications"); | |||
| $Writer->endTag("planning"); | |||
| $Writer->end(); | |||
| exit 0; | |||
| # Fake up what the doPostBack javascript function in the page does... | |||
| sub do_post_back | |||
| { | |||
| my $previous = shift; | |||
| my $target = shift; | |||
| my $argument = shift; | |||
| $target =~ s/\$/:/g; | |||
| my $args = { | |||
| "__EVENTTARGET" => $target, | |||
| "__EVENTARGUMENT" => $argument, | |||
| }; | |||
| return do_post($previous, $args); | |||
| } | |||
| # Make a POST request | |||
| sub do_post | |||
| { | |||
| my $previous = shift; | |||
| my $args = shift || {}; | |||
| if (defined($previous)) | |||
| { | |||
| my $viewstate = $previous->look_down("_tag" => "input", "name" => "__VIEWSTATE"); | |||
| my $eventvalidation = $previous->look_down("_tag" => "input", "name" => "__EVENTVALIDATION"); | |||
| $args->{"__VIEWSTATE"} = $viewstate->attr("value"); | |||
| $args->{"__EVENTVALIDATION"} = $eventvalidation->attr("value"); | |||
| } | |||
| my $response = $UA->post($SearchURL, $args); | |||
| die $response->status_line unless $response->is_success; | |||
| return HTML::TreeBuilder->new_from_content($response->content); | |||
| } | |||
| # Output applications from a results page | |||
| sub output_applications | |||
| { | |||
| my $page = shift; | |||
| # Find the result table | |||
| my $table = $page->look_down("_tag" => "table", "class" => "searchresults"); | |||
| # No results means no results table | |||
| if (defined($table)) | |||
| { | |||
| # Process each row of the results | |||
| foreach my $row ($table->look_down("_tag" => "tr")) | |||
| { | |||
| my $class = $row->attr("class") || ""; | |||
| next if $class eq "searchresultsheader"; | |||
| my @cells = $row->look_down("_tag" => "td"); | |||
| my $reference = $cells[0]->as_trimmed_text; | |||
| my $date = $cells[1]->as_trimmed_text; | |||
| my $address = $cells[2]->as_trimmed_text; | |||
| my $description = $cells[3]->as_trimmed_text; | |||
| my $postcode; | |||
| if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/) | |||
| { | |||
| $postcode = $1; | |||
| } | |||
| $Writer->startTag("application"); | |||
| $Writer->dataElement("council_reference", $reference); | |||
| $Writer->dataElement("address", $address); | |||
| $Writer->dataElement("postcode", $postcode); | |||
| $Writer->dataElement("description", $description); | |||
| $Writer->dataElement("info_url", $InfoURL . $reference); | |||
| $Writer->dataElement("comment_url", $InfoURL . $reference); | |||
| $Writer->dataElement("date_received", $date); | |||
| $Writer->endTag("application"); | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| @@ -0,0 +1,88 @@ | |||
| import urllib2 | |||
| import urllib | |||
| import datetime | |||
| import re | |||
| from BeautifulSoup import BeautifulSoup | |||
| from PlanningUtils import PlanningApplication, \ | |||
| PlanningAuthorityResults, \ | |||
| getPostcodeFromText | |||
| date_format = "%d/%m/%Y" | |||
| class WestDorsetParser: | |||
| def __init__(self, *args): | |||
| self.authority_name = "West Dorset District Council" | |||
| self.authority_short_name = "West Dorset" | |||
| self.base_url = "http://webapps.westdorset-dc.gov.uk/planningapplications/pages/applicationsearch.aspx" | |||
| self.info_url = "http://webapps.westdorset-dc.gov.uk/planningapplications/pages/ApplicationDetails.aspx?Application=%s&Authority=West+Dorset+District+Council+" | |||
| self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) | |||
| def getResultsByDayMonthYear(self, day, month, year): | |||
| search_date = datetime.date(year, month, day) | |||
| get_response = urllib2.urlopen(self.base_url) | |||
| get_soup = BeautifulSoup(get_response.read()) | |||
| post_data = ( | |||
| ("__VIEWSTATE", get_soup.find("input", id="__VIEWSTATE")["value"]), | |||
| # ("QuickSearchApplicationNumber$TextBox_ApplicationNumber", ""), | |||
| # ("QuickSearchThisWeek$DropDownList_PastWeek", ""), | |||
| # ("DetailedSearch$TextBox_PropertyNameNumber", ""), | |||
| # ("DetailedSearch$Textbox_StreetName", ""), | |||
| # ("DetailedSearch$Textbox_TownVillage", ""), | |||
| # ("DetailedSearch$Textbox_Postcode", ""), | |||
| # ("DetailedSearch$Textbox_Parish", ""), | |||
| # ("DetailedSearch$Textbox_ApplicantSurname", ""), | |||
| # ("DetailedSearch$TextBox_AgentName", ""), | |||
| ("DetailedSearch$TextBox_DateRaisedFrom", search_date.strftime(date_format)), | |||
| ("DetailedSearch$TextBox_DateRaisedTo", search_date.strftime(date_format)), | |||
| # ("DetailedSearch$TextBox_DecisionFrom", "dd%2Fmm%2Fyyyy"), | |||
| # ("DetailedSearch$TextBox_DecisionTo", "dd%2Fmm%2Fyyyy"), | |||
| ("DetailedSearch$Button_DetailedSearch", "Search"), | |||
| ("__EVENTVALIDATION", get_soup.find("input", id="__EVENTVALIDATION")["value"]), | |||
| ) | |||
| # The response to the GET is a redirect. We'll need to post to the new url. | |||
| post_response = urllib2.urlopen(get_response.url, urllib.urlencode(post_data)) | |||
| post_soup = BeautifulSoup(post_response.read()) | |||
| if not post_soup.find(text = re.compile("No matching record")): | |||
| # The first row contains headers. | |||
| trs = post_soup.find("table", {"class": "searchresults"}).findAll("tr")[1:] | |||
| for tr in trs: | |||
| application = PlanningApplication() | |||
| # We can fill the date received in straight away from the date we searched for. | |||
| application.date_received = search_date | |||
| tds = tr.findAll("td") | |||
| application.council_reference = tds[0].font.string.strip() | |||
| application.address = tds[2].font.string.strip() | |||
| application.postcode = getPostcodeFromText(application.address) | |||
| application.description = tds[3].font.string.strip() | |||
| # Set the info url and the comment url to be the same - can't get to the comment | |||
| # one directly without javascript. | |||
| application.info_url = self.info_url %(application.council_reference) | |||
| application.comment_url = application.info_url | |||
| self._results.addApplication(application) | |||
| return self._results | |||
| def getResults(self, day, month, year): | |||
| return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() | |||
| if __name__ == '__main__': | |||
| parser = WestDorsetParser() | |||
| print parser.getResults(1,10,2008) | |||