| @@ -0,0 +1,162 @@ | |||
| #!/usr/bin/perl | |||
| use strict; | |||
| use warnings; | |||
| use CGI qw(:cgi); | |||
| use DateTime; | |||
| use HTML::TreeBuilder; | |||
| use LWP::UserAgent; | |||
| use XML::Writer; | |||
| # The master URL for the Broxbourne planning search | |||
| our $SearchURL = "http://www2.broxbourne.gov.uk/planningsearch/webform1.aspx"; | |||
| # We're a CGI script... | |||
| my $query = CGI->new(); | |||
| # Get the date as an offset from 2000-01-01 | |||
| my $epoch = DateTime->new(year => 2000, month => 1, day => 1); | |||
| my $querydate = DateTime->new(year => $query->param("year"), | |||
| month => $query->param("month"), | |||
| day => $query->param("day")); | |||
| $querydate = $querydate->delta_days($epoch)->delta_days; | |||
| # Construct an LWP user agent | |||
| our $UA = LWP::UserAgent->new(env_proxy => 1); | |||
| # Post the URL to get an initial blank form | |||
| my $state = get_state(do_post()); | |||
| # Post each date in turn to build up the state - you can thank | |||
| # Microsoft and ASP.NET for the horrible way we have to do this | |||
| # by posting each argument in turn to build up the state | |||
| $state = get_state(do_post_back($state, 'DateSelector1$Calendar1', $querydate)); | |||
| $state = get_state(do_post_back($state, 'DateSelector2$Calendar1', $querydate)); | |||
| # Output an HTTP response header | |||
| print $query->header(-type => "text/xml"); | |||
| # Create an XML output stream | |||
| my $Writer = XML::Writer->new(DATA_MODE => 1); | |||
| # Output the XML header data | |||
| $Writer->xmlDecl("UTF-8"); | |||
| $Writer->startTag("planning"); | |||
| $Writer->dataElement("authority_name", "Borough of Broxbourne"); | |||
| $Writer->dataElement("authority_short_name", "Broxbourne"); | |||
| $Writer->startTag("applications"); | |||
| # Get the arguments for the search... | |||
| my $args = { | |||
| "Srch" => "rb1", | |||
| "__VIEWSTATE" => $state, | |||
| "btnSearch" => "Search", | |||
| "tbReference" => "", | |||
| "tbRef2" => "" | |||
| }; | |||
| # ...and then (at last) we can do the search! | |||
| my $page = do_post($args); | |||
| # Loop processing pages of results | |||
| while ($page) | |||
| { | |||
| my $table = $page->look_down("_tag" => "table", "id" => "DataGrid1"); | |||
| # Remember the state | |||
| $state = get_state($page); | |||
| # Clear the page for now - this will be reinitialised if we | |||
| # find another page of results to make us go round the loop | |||
| # all over again | |||
| undef $page; | |||
| # Check that we found a table - searches that find no results | |||
| # produce a page with no table in it | |||
| if ($table) | |||
| { | |||
| # Process each row of the results | |||
| foreach my $row ($table->look_down("_tag" => "tr")) | |||
| { | |||
| my @cells = $row->look_down("_tag" => "td"); | |||
| if ($cells[0]->look_down("_tag" => "input")) | |||
| { | |||
| my $reference = $cells[1]->as_trimmed_text; | |||
| my $date = $cells[2]->as_trimmed_text; | |||
| my $address = $cells[3]->as_trimmed_text; | |||
| my $description = $cells[4]->as_trimmed_text; | |||
| my $postcode; | |||
| if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/) | |||
| { | |||
| $postcode = $1; | |||
| } | |||
| $Writer->startTag("application"); | |||
| $Writer->dataElement("council_reference", $reference); | |||
| $Writer->dataElement("address", $address); | |||
| $Writer->dataElement("postcode", $postcode); | |||
| $Writer->dataElement("description", $description); | |||
| $Writer->dataElement("date_received", $date); | |||
| $Writer->endTag("application"); | |||
| } | |||
| elsif ($cells[0]->attr("colspan") && $cells[0]->attr("colspan") eq "5") | |||
| { | |||
| foreach my $link ($cells[0]->look_down("_tag" => "a")) | |||
| { | |||
| if ($link->as_trimmed_text eq ">" && | |||
| $link->attr("href") =~ /^javascript:__doPostBack\('([^\']*)','([^\']*)'\)$/) | |||
| { | |||
| $page = do_post_back($state, $1, $2); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| # Finish off XML output | |||
| $Writer->endTag("applications"); | |||
| $Writer->endTag("planning"); | |||
| $Writer->end(); | |||
| exit 0; | |||
| # Extract the state from a page so we can repost it | |||
| sub get_state | |||
| { | |||
| my $page = shift; | |||
| my $viewstate = $page->look_down("_tag" => "input", "name" => "__VIEWSTATE"); | |||
| return $viewstate->attr("value"); | |||
| } | |||
| # Fake up what the doPostBack javascript function in the page does... | |||
| sub do_post_back | |||
| { | |||
| my $state = shift; | |||
| my $target = shift; | |||
| my $argument = shift; | |||
| $target =~ s/\$/:/g; | |||
| my $args = { | |||
| "__EVENTTARGET" => $target, | |||
| "__EVENTARGUMENT" => $argument, | |||
| "__VIEWSTATE" => $state | |||
| }; | |||
| return do_post($args); | |||
| } | |||
| # Post to the planning search page | |||
| sub do_post | |||
| { | |||
| my $response = $UA->post($SearchURL, @_); | |||
| die $response->status_line unless $response->is_success; | |||
| return HTML::TreeBuilder->new_from_content($response->content); | |||
| } | |||