Automatically exported from code.google.com/p/planningalerts

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use CGI qw(:cgi);
  5. use HTML::TreeBuilder;
  6. use LWP::UserAgent;
  7. use XML::Writer;
  8. # Month names
  9. our %Months = ( 1 => "Jan", 2 => "Feb", 3 => "Mar", 4 => "Apr",
  10. 5 => "May", 5 => "Jun", 7 => "Jul", 8 => "Aug",
  11. 9 => "Sep", 10 => "Oct", 11 => "Nov", 12 => "Dec" );
  12. # The master URLs for the South Somerset planning search
  13. our $StartURL = "http://www.southsomerset.gov.uk/index.jsp?articleid=1925&page_name=startsearch";
  14. our $SearchURL = "http://www.southsomerset.gov.uk/index.jsp?articleid=1925&page_name=searchresults";
  15. our $InfoURL = "http://www.southsomerset.gov.uk/index.jsp?articleid=1925&page_name=details&p_caseno=";
  16. our $CommentURL = "http://www.southsomerset.gov.uk/index.jsp?articleid=1925&page_name=comments&p_caseno=";
  17. # We're a CGI script...
  18. my $query = CGI->new();
  19. # Get the date to fetch
  20. my $date = $query->param("day") . "-" . $Months{$query->param("month")} . "-" . $query->param("year");
  21. # Construct an LWP user agent
  22. our $UA = LWP::UserAgent->new(env_proxy => 1, cookie_jar => {});
  23. # Post acceptance of terms and conditions to get a cookie
  24. do_post($StartURL, {"acceptTC" => "on"});
  25. # Do the search
  26. my $page = do_post($SearchURL,
  27. {"startdate" => "12-Nov-2007", #$date,
  28. "enddate" => $date,
  29. "datesearch" => "applications",
  30. "timeframe" => "yearonly",
  31. "btnsubmit" => "search",
  32. "address" => "",
  33. "area" => "",
  34. "caseno" => "",
  35. "decision" => "",
  36. "location" => "",
  37. "parish" => "",
  38. "postcode" => "",
  39. "recentweeks" => "",
  40. "ward" => ""});
  41. # Output an HTTP response header
  42. print $query->header(-type => "text/xml");
  43. # Create an XML output stream
  44. my $Writer = XML::Writer->new(DATA_MODE => 1);
  45. # Output the XML header data
  46. $Writer->xmlDecl("UTF-8");
  47. $Writer->startTag("planning");
  48. $Writer->dataElement("authority_name", "South Somerset District Council");
  49. $Writer->dataElement("authority_short_name", "South Somerset");
  50. $Writer->startTag("applications");
  51. # Output any applications on the first page
  52. output_applications($page);
  53. # Loop over any additional results pages
  54. while (my $link = $page->look_down("_tag" => "a", sub { $_[0]->as_text eq "Next Page" }))
  55. {
  56. # Fetch this page...
  57. $page = do_get(URI->new_abs($link->attr("href"), $SearchURL));
  58. # ...and output the applications from it
  59. output_applications($page);
  60. }
  61. # Finish off XML output
  62. $Writer->endTag("applications");
  63. $Writer->endTag("planning");
  64. $Writer->end();
  65. exit 0;
  66. # Make a GET request
  67. sub do_get
  68. {
  69. my $response = $UA->get(@_);
  70. die $response->status_line unless $response->is_success;
  71. return HTML::TreeBuilder->new_from_content($response->content);
  72. }
  73. # Make a POST request
  74. sub do_post
  75. {
  76. my $response = $UA->post(@_);
  77. die $response->status_line unless $response->is_success;
  78. return HTML::TreeBuilder->new_from_content($response->content);
  79. }
  80. # Output applications from a results page
  81. sub output_applications
  82. {
  83. my $page = shift;
  84. my $reference;
  85. my $address;
  86. my $postcode;
  87. my $description;
  88. my $date_received;
  89. # Find the result table
  90. my $table = $page->look_down("_tag" => "div", "class" => "mainText")->look_down("_tag" => "table");
  91. # Process each row of the results
  92. foreach my $row ($table->look_down("_tag" => "tr"))
  93. {
  94. my @cells = $row->look_down("_tag" => "td");
  95. if (@cells == 1 && $cells[0]->look_down("_tag" => "hr"))
  96. {
  97. if (defined($reference))
  98. {
  99. my $linkref = $reference;
  100. $linkref =~ s|/||g;
  101. $Writer->startTag("application");
  102. $Writer->dataElement("council_reference", $reference);
  103. $Writer->dataElement("address", $address);
  104. $Writer->dataElement("postcode", $postcode);
  105. $Writer->dataElement("description", $description);
  106. $Writer->dataElement("info_url", $InfoURL . $linkref);
  107. $Writer->dataElement("comment_url", $CommentURL . $linkref);
  108. $Writer->dataElement("date_received", $date_received);
  109. $Writer->endTag("application");
  110. }
  111. undef $reference;
  112. undef $address;
  113. undef $postcode;
  114. undef $description;
  115. undef $date_received;
  116. }
  117. elsif (@cells == 1 && defined($reference))
  118. {
  119. $description = $cells[0]->as_trimmed_text;
  120. $description =~ s/^Proposal:\s*//;
  121. }
  122. elsif (@cells == 5 && $cells[0]->as_trimmed_text =~ /^\d+/)
  123. {
  124. $reference = $cells[0]->as_trimmed_text;
  125. $date_received = $cells[1]->as_trimmed_text;
  126. $address = $cells[2]->as_trimmed_text;
  127. if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
  128. {
  129. $postcode = $1;
  130. }
  131. }
  132. }
  133. return;
  134. }