@@ -16,7 +16,6 @@ | |||||
"SwiftLG.py", "420" | "SwiftLG.py", "420" | ||||
"Dacorum.cgi", "493" | "Dacorum.cgi", "493" | ||||
"SouthSomerset.cgi", "493" | "SouthSomerset.cgi", "493" | ||||
"WestDorset.cgi", "493" | |||||
"Christchurch.cgi", "493" | "Christchurch.cgi", "493" | ||||
"WAM.py", "420" | "WAM.py", "420" | ||||
"Planet.py", "420" | "Planet.py", "420" | ||||
@@ -60,3 +59,4 @@ | |||||
"Exmoor.py", "420" | "Exmoor.py", "420" | ||||
"Eastbourne.py", "420" | "Eastbourne.py", "420" | ||||
"Gosport.py", "420" | "Gosport.py", "420" | ||||
"WestDorset.py", "420" |
@@ -265,3 +265,4 @@ | |||||
"Waltham Forest Council", "Waltham Forest", "http://planning.walthamforest.gov.uk/", "PlanningExplorer", "WalthamForestParser" | "Waltham Forest Council", "Waltham Forest", "http://planning.walthamforest.gov.uk/", "PlanningExplorer", "WalthamForestParser" | ||||
"Caerphilly County Borough Council", "Caerphilly", "http://publicaccess.caerphilly.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | "Caerphilly County Borough Council", "Caerphilly", "http://publicaccess.caerphilly.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | ||||
"Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser" | "Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser" | ||||
"West Dorset District Council", "West Dorset", "", "WestDorset", "WestDorsetParser" |
@@ -1,149 +0,0 @@ | |||||
#!/usr/bin/perl | |||||
use strict; | |||||
use warnings; | |||||
use CGI qw(:cgi); | |||||
use HTML::TreeBuilder; | |||||
use LWP::UserAgent; | |||||
use XML::Writer; | |||||
# The master URLs for the West Dorset planning search | |||||
our $SearchURL = "http://webapps.westdorset-dc.gov.uk/planningapplications/pages/applicationsearch.aspx"; | |||||
our $InfoURL = "http://webapps.westdorset-dc.gov.uk/planningapplications/pages/ApplicationDetails.aspx?Authority=West%20Dorset%20District%20Council&Application="; | |||||
# We're a CGI script... | |||||
my $query = CGI->new(); | |||||
# Get the date to fetch | |||||
my $date = $query->param("day") . "/" . $query->param("month") . "/" . $query->param("year"); | |||||
# Construct an LWP user agent | |||||
our $UA = LWP::UserAgent->new(env_proxy => 1); | |||||
# Post the URL to get an initial blank form | |||||
my $page = do_post(); | |||||
# Do the search | |||||
$page = do_post($page, | |||||
{"DetailedSearch\$TextBox_DateRaisedFrom" => $date, | |||||
"DetailedSearch\$TextBox_DateRaisedTo" => $date, | |||||
"DetailedSearch\$Button_DetailedSearch" => "Search"}); | |||||
# Output an HTTP response header | |||||
print $query->header(-type => "text/xml"); | |||||
# Create an XML output stream | |||||
my $Writer = XML::Writer->new(DATA_MODE => 1); | |||||
# Output the XML header data | |||||
$Writer->xmlDecl("UTF-8"); | |||||
$Writer->startTag("planning"); | |||||
$Writer->dataElement("authority_name", "West Dorset District Council"); | |||||
$Writer->dataElement("authority_short_name", "West Dorset"); | |||||
$Writer->startTag("applications"); | |||||
# Output any applications on the first page | |||||
output_applications($page); | |||||
# Loop over any additional results pages | |||||
while (my $link = $page->look_down("_tag" => "a", "id" => "MatchingApplications_ResultsNavigationTop_LinkButton_Next")) | |||||
{ | |||||
# Fetch this page... | |||||
$page = do_post_back($page, 'MatchingApplications$ResultsNavigationTop$LinkButton_Next', ''); | |||||
# ...and output the applications from it | |||||
output_applications($page); | |||||
} | |||||
# Finish off XML output | |||||
$Writer->endTag("applications"); | |||||
$Writer->endTag("planning"); | |||||
$Writer->end(); | |||||
exit 0; | |||||
# Fake up what the doPostBack javascript function in the page does... | |||||
sub do_post_back | |||||
{ | |||||
my $previous = shift; | |||||
my $target = shift; | |||||
my $argument = shift; | |||||
$target =~ s/\$/:/g; | |||||
my $args = { | |||||
"__EVENTTARGET" => $target, | |||||
"__EVENTARGUMENT" => $argument, | |||||
}; | |||||
return do_post($previous, $args); | |||||
} | |||||
# Make a POST request | |||||
sub do_post | |||||
{ | |||||
my $previous = shift; | |||||
my $args = shift || {}; | |||||
if (defined($previous)) | |||||
{ | |||||
my $viewstate = $previous->look_down("_tag" => "input", "name" => "__VIEWSTATE"); | |||||
my $eventvalidation = $previous->look_down("_tag" => "input", "name" => "__EVENTVALIDATION"); | |||||
$args->{"__VIEWSTATE"} = $viewstate->attr("value"); | |||||
$args->{"__EVENTVALIDATION"} = $eventvalidation->attr("value"); | |||||
} | |||||
my $response = $UA->post($SearchURL, $args); | |||||
die $response->status_line unless $response->is_success; | |||||
return HTML::TreeBuilder->new_from_content($response->content); | |||||
} | |||||
# Output applications from a results page | |||||
sub output_applications | |||||
{ | |||||
my $page = shift; | |||||
# Find the result table | |||||
my $table = $page->look_down("_tag" => "table", "class" => "searchresults"); | |||||
# No results means no results table | |||||
if (defined($table)) | |||||
{ | |||||
# Process each row of the results | |||||
foreach my $row ($table->look_down("_tag" => "tr")) | |||||
{ | |||||
my $class = $row->attr("class") || ""; | |||||
next if $class eq "searchresultsheader"; | |||||
my @cells = $row->look_down("_tag" => "td"); | |||||
my $reference = $cells[0]->as_trimmed_text; | |||||
my $date = $cells[1]->as_trimmed_text; | |||||
my $address = $cells[2]->as_trimmed_text; | |||||
my $description = $cells[3]->as_trimmed_text; | |||||
my $postcode; | |||||
if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/) | |||||
{ | |||||
$postcode = $1; | |||||
} | |||||
$Writer->startTag("application"); | |||||
$Writer->dataElement("council_reference", $reference); | |||||
$Writer->dataElement("address", $address); | |||||
$Writer->dataElement("postcode", $postcode); | |||||
$Writer->dataElement("description", $description); | |||||
$Writer->dataElement("info_url", $InfoURL . $reference); | |||||
$Writer->dataElement("comment_url", $InfoURL . $reference); | |||||
$Writer->dataElement("date_received", $date); | |||||
$Writer->endTag("application"); | |||||
} | |||||
} | |||||
return; | |||||
} |
@@ -0,0 +1,88 @@ | |||||
import urllib2 | |||||
import urllib | |||||
import datetime | |||||
import re | |||||
from BeautifulSoup import BeautifulSoup | |||||
from PlanningUtils import PlanningApplication, \ | |||||
PlanningAuthorityResults, \ | |||||
getPostcodeFromText | |||||
date_format = "%d/%m/%Y" | |||||
class WestDorsetParser: | |||||
def __init__(self, *args): | |||||
self.authority_name = "West Dorset District Council" | |||||
self.authority_short_name = "West Dorset" | |||||
self.base_url = "http://webapps.westdorset-dc.gov.uk/planningapplications/pages/applicationsearch.aspx" | |||||
self.info_url = "http://webapps.westdorset-dc.gov.uk/planningapplications/pages/ApplicationDetails.aspx?Application=%s&Authority=West+Dorset+District+Council+" | |||||
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) | |||||
def getResultsByDayMonthYear(self, day, month, year): | |||||
search_date = datetime.date(year, month, day) | |||||
get_response = urllib2.urlopen(self.base_url) | |||||
get_soup = BeautifulSoup(get_response.read()) | |||||
post_data = ( | |||||
("__VIEWSTATE", get_soup.find("input", id="__VIEWSTATE")["value"]), | |||||
# ("QuickSearchApplicationNumber$TextBox_ApplicationNumber", ""), | |||||
# ("QuickSearchThisWeek$DropDownList_PastWeek", ""), | |||||
# ("DetailedSearch$TextBox_PropertyNameNumber", ""), | |||||
# ("DetailedSearch$Textbox_StreetName", ""), | |||||
# ("DetailedSearch$Textbox_TownVillage", ""), | |||||
# ("DetailedSearch$Textbox_Postcode", ""), | |||||
# ("DetailedSearch$Textbox_Parish", ""), | |||||
# ("DetailedSearch$Textbox_ApplicantSurname", ""), | |||||
# ("DetailedSearch$TextBox_AgentName", ""), | |||||
("DetailedSearch$TextBox_DateRaisedFrom", search_date.strftime(date_format)), | |||||
("DetailedSearch$TextBox_DateRaisedTo", search_date.strftime(date_format)), | |||||
# ("DetailedSearch$TextBox_DecisionFrom", "dd%2Fmm%2Fyyyy"), | |||||
# ("DetailedSearch$TextBox_DecisionTo", "dd%2Fmm%2Fyyyy"), | |||||
("DetailedSearch$Button_DetailedSearch", "Search"), | |||||
("__EVENTVALIDATION", get_soup.find("input", id="__EVENTVALIDATION")["value"]), | |||||
) | |||||
# The response to the GET is a redirect. We'll need to post to the new url. | |||||
post_response = urllib2.urlopen(get_response.url, urllib.urlencode(post_data)) | |||||
post_soup = BeautifulSoup(post_response.read()) | |||||
if not post_soup.find(text = re.compile("No matching record")): | |||||
# The first row contains headers. | |||||
trs = post_soup.find("table", {"class": "searchresults"}).findAll("tr")[1:] | |||||
for tr in trs: | |||||
application = PlanningApplication() | |||||
# We can fill the date received in straight away from the date we searched for. | |||||
application.date_received = search_date | |||||
tds = tr.findAll("td") | |||||
application.council_reference = tds[0].font.string.strip() | |||||
application.address = tds[2].font.string.strip() | |||||
application.postcode = getPostcodeFromText(application.address) | |||||
application.description = tds[3].font.string.strip() | |||||
# Set the info url and the comment url to be the same - can't get to the comment | |||||
# one directly without javascript. | |||||
application.info_url = self.info_url %(application.council_reference) | |||||
application.comment_url = application.info_url | |||||
self._results.addApplication(application) | |||||
return self._results | |||||
def getResults(self, day, month, year): | |||||
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() | |||||
if __name__ == '__main__': | |||||
parser = WestDorsetParser() | |||||
print parser.getResults(1,10,2008) | |||||