#!/usr/bin/perl use strict; use warnings; use CGI qw(:cgi); use DateTime; use HTML::TreeBuilder; use LWP::UserAgent; use XML::Writer; # The master URL for the Broxbourne planning search our $SearchURL = "http://www2.broxbourne.gov.uk/planningsearch/webform1.aspx"; # We're a CGI script... my $query = CGI->new(); # Get the date as an offset from 2000-01-01 my $epoch = DateTime->new(year => 2000, month => 1, day => 1); my $querydate = DateTime->new(year => $query->param("year"), month => $query->param("month"), day => $query->param("day")); $querydate = $querydate->delta_days($epoch)->delta_days; # Construct an LWP user agent our $UA = LWP::UserAgent->new(env_proxy => 1); # Post the URL to get an initial blank form my $state = get_state(do_post()); # Post each date in turn to build up the state - you can thank # Microsoft and ASP.NET for the horrible way we have to do this # by posting each argument in turn to build up the state $state = get_state(do_post_back($state, 'DateSelector1$Calendar1', $querydate)); $state = get_state(do_post_back($state, 'DateSelector2$Calendar1', $querydate)); # Output an HTTP response header print $query->header(-type => "text/xml"); # Create an XML output stream my $Writer = XML::Writer->new(DATA_MODE => 1); # Output the XML header data $Writer->xmlDecl("UTF-8"); $Writer->startTag("planning"); $Writer->dataElement("authority_name", "Borough of Broxbourne"); $Writer->dataElement("authority_short_name", "Broxbourne"); $Writer->startTag("applications"); # Get the arguments for the search... my $args = { "Srch" => "rb1", "__VIEWSTATE" => $state, "btnSearch" => "Search", "tbReference" => "", "tbRef2" => "" }; # ...and then (at last) we can do the search! my $page = do_post($args); # Loop processing pages of results while ($page) { my $table = $page->look_down("_tag" => "table", "id" => "DataGrid1"); # Remember the state $state = get_state($page); # Clear the page for now - this will be reinitialised if we # find another page of results to make us go round the loop # all over again undef $page; # Check that we found a table - searches that find no results # produce a page with no table in it if ($table) { # Process each row of the results foreach my $row ($table->look_down("_tag" => "tr")) { my @cells = $row->look_down("_tag" => "td"); if ($cells[0]->look_down("_tag" => "input")) { my $reference = $cells[1]->as_trimmed_text; my $date = $cells[2]->as_trimmed_text; my $address = $cells[3]->as_trimmed_text; my $description = $cells[4]->as_trimmed_text; my $postcode; if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/) { $postcode = $1; } $Writer->startTag("application"); $Writer->dataElement("council_reference", $reference); $Writer->dataElement("address", $address); $Writer->dataElement("postcode", $postcode); $Writer->dataElement("description", $description); $Writer->dataElement("date_received", $date); $Writer->endTag("application"); } elsif ($cells[0]->attr("colspan") && $cells[0]->attr("colspan") eq "5") { foreach my $link ($cells[0]->look_down("_tag" => "a")) { if ($link->as_trimmed_text eq ">" && $link->attr("href") =~ /^javascript:__doPostBack\('([^\']*)','([^\']*)'\)$/) { $page = do_post_back($state, $1, $2); } } } } } } # Finish off XML output $Writer->endTag("applications"); $Writer->endTag("planning"); $Writer->end(); exit 0; # Extract the state from a page so we can repost it sub get_state { my $page = shift; my $viewstate = $page->look_down("_tag" => "input", "name" => "__VIEWSTATE"); return $viewstate->attr("value"); } # Fake up what the doPostBack javascript function in the page does... sub do_post_back { my $state = shift; my $target = shift; my $argument = shift; $target =~ s/\$/:/g; my $args = { "__EVENTTARGET" => $target, "__EVENTARGUMENT" => $argument, "__VIEWSTATE" => $state }; return do_post($args); } # Post to the planning search page sub do_post { my $response = $UA->post($SearchURL, @_); die $response->status_line unless $response->is_success; return HTML::TreeBuilder->new_from_content($response->content); }