#!/usr/bin/perl use strict; use warnings; use CGI qw(:cgi); use HTML::TreeBuilder; use LWP::UserAgent; use XML::Writer; # Month names our %Months = ( 1 => "Jan", 2 => "Feb", 3 => "Mar", 4 => "Apr", 5 => "May", 5 => "Jun", 7 => "Jul", 8 => "Aug", 9 => "Sep", 10 => "Oct", 11 => "Nov", 12 => "Dec" ); # The master URLs for the South Somerset planning search our $StartURL = "http://www.southsomerset.gov.uk/index.jsp?articleid=1925&page_name=startsearch"; our $SearchURL = "http://www.southsomerset.gov.uk/index.jsp?articleid=1925&page_name=searchresults"; our $InfoURL = "http://www.southsomerset.gov.uk/index.jsp?articleid=1925&page_name=details&p_caseno="; our $CommentURL = "http://www.southsomerset.gov.uk/index.jsp?articleid=1925&page_name=comments&p_caseno="; # We're a CGI script... my $query = CGI->new(); # Get the date to fetch my $date = $query->param("day") . "-" . $Months{$query->param("month")} . "-" . $query->param("year"); # Construct an LWP user agent our $UA = LWP::UserAgent->new(env_proxy => 1, cookie_jar => {}); # Post acceptance of terms and conditions to get a cookie do_post($StartURL, {"acceptTC" => "on"}); # Do the search my $page = do_post($SearchURL, {"startdate" => "12-Nov-2007", #$date, "enddate" => $date, "datesearch" => "applications", "timeframe" => "yearonly", "btnsubmit" => "search", "address" => "", "area" => "", "caseno" => "", "decision" => "", "location" => "", "parish" => "", "postcode" => "", "recentweeks" => "", "ward" => ""}); # Output an HTTP response header print $query->header(-type => "text/xml"); # Create an XML output stream my $Writer = XML::Writer->new(DATA_MODE => 1); # Output the XML header data $Writer->xmlDecl("UTF-8"); $Writer->startTag("planning"); $Writer->dataElement("authority_name", "South Somerset District Council"); $Writer->dataElement("authority_short_name", "South Somerset"); $Writer->startTag("applications"); # Output any applications on the first page output_applications($page); # Loop over any additional results pages while (my $link = $page->look_down("_tag" => "a", sub { $_[0]->as_text eq "Next Page" })) { # Fetch this page... $page = do_get(URI->new_abs($link->attr("href"), $SearchURL)); # ...and output the applications from it output_applications($page); } # Finish off XML output $Writer->endTag("applications"); $Writer->endTag("planning"); $Writer->end(); exit 0; # Make a GET request sub do_get { my $response = $UA->get(@_); die $response->status_line unless $response->is_success; return HTML::TreeBuilder->new_from_content($response->content); } # Make a POST request sub do_post { my $response = $UA->post(@_); die $response->status_line unless $response->is_success; return HTML::TreeBuilder->new_from_content($response->content); } # Output applications from a results page sub output_applications { my $page = shift; my $reference; my $address; my $postcode; my $description; my $date_received; # Find the result table my $table = $page->look_down("_tag" => "div", "class" => "mainText")->look_down("_tag" => "table"); # Process each row of the results foreach my $row ($table->look_down("_tag" => "tr")) { my @cells = $row->look_down("_tag" => "td"); if (@cells == 1 && $cells[0]->look_down("_tag" => "hr")) { if (defined($reference)) { my $linkref = $reference; $linkref =~ s|/||g; $Writer->startTag("application"); $Writer->dataElement("council_reference", $reference); $Writer->dataElement("address", $address); $Writer->dataElement("postcode", $postcode); $Writer->dataElement("description", $description); $Writer->dataElement("info_url", $InfoURL . $linkref); $Writer->dataElement("comment_url", $CommentURL . $linkref); $Writer->dataElement("date_received", $date_received); $Writer->endTag("application"); } undef $reference; undef $address; undef $postcode; undef $description; undef $date_received; } elsif (@cells == 1 && defined($reference)) { $description = $cells[0]->as_trimmed_text; $description =~ s/^Proposal:\s*//; } elsif (@cells == 5 && $cells[0]->as_trimmed_text =~ /^\d+/) { $reference = $cells[0]->as_trimmed_text; $date_received = $cells[1]->as_trimmed_text; $address = $cells[2]->as_trimmed_text; if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/) { $postcode = $1; } } } return; }