Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

161 lines
4.6 KiB

  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use CGI qw(:cgi);
  5. use DateTime;
  6. use DateTime::Format::DateParse;
  7. use HTML::TreeBuilder;
  8. use LWP::UserAgent;
  9. use XML::Writer;
  10. # The master URL for the Broxbourne planning search
  11. our $SearchURL = "http://www2.broxbourne.gov.uk/planningsearch/webform1.aspx";
  12. # We're a CGI script...
  13. my $query = CGI->new();
  14. # Get the date as an offset from 2000-01-01
  15. my $epoch = DateTime->new(year => 2000, month => 1, day => 1);
  16. my $querydate = DateTime->new(year => $query->param("year"),
  17. month => $query->param("month"),
  18. day => $query->param("day"));
  19. $querydate = $querydate->delta_days($epoch)->delta_days;
  20. # Construct an LWP user agent
  21. our $UA = LWP::UserAgent->new(env_proxy => 1);
  22. # Post the URL to get an initial blank form
  23. my $state = get_state(do_post());
  24. # Post each date in turn to build up the state - you can thank
  25. # Microsoft and ASP.NET for the horrible way we have to do this
  26. # by posting each argument in turn to build up the state
  27. $state = get_state(do_post_back($state, 'DateSelector1$Calendar1', $querydate));
  28. $state = get_state(do_post_back($state, 'DateSelector2$Calendar1', $querydate));
  29. # Create an XML output stream
  30. my $Writer = XML::Writer->new(DATA_MODE => 1);
  31. # Output the XML header data
  32. $Writer->xmlDecl("UTF-8");
  33. $Writer->startTag("planning");
  34. $Writer->dataElement("authority_name", "Borough of Broxbourne");
  35. $Writer->dataElement("authority_short_name", "Broxbourne");
  36. $Writer->startTag("applications");
  37. # Get the arguments for the search...
  38. my $args = {
  39. "Srch" => "rb1",
  40. "__VIEWSTATE" => $state,
  41. "btnSearch" => "Search",
  42. "tbReference" => "",
  43. "tbRef2" => ""
  44. };
  45. # ...and then (at last) we can do the search!
  46. my $page = do_post($args);
  47. # Loop processing pages of results
  48. while ($page)
  49. {
  50. my $table = $page->look_down("_tag" => "table", "id" => "DataGrid1");
  51. # Remember the state
  52. $state = get_state($page);
  53. # Clear the page for now - this will be reinitialised if we
  54. # find another page of results to make us go round the loop
  55. # all over again
  56. undef $page;
  57. # Check that we found a table - searches that find no results
  58. # produce a page with no table in it
  59. if ($table)
  60. {
  61. # Process each row of the results
  62. foreach my $row ($table->look_down("_tag" => "tr"))
  63. {
  64. my @cells = $row->look_down("_tag" => "td");
  65. if ($cells[0]->look_down("_tag" => "input"))
  66. {
  67. my $reference = $cells[1]->as_trimmed_text;
  68. my $date = $cells[2]->as_trimmed_text;
  69. my $address = $cells[3]->as_trimmed_text;
  70. my $description = $cells[4]->as_trimmed_text;
  71. my $postcode;
  72. if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
  73. {
  74. $postcode = $1;
  75. }
  76. $Writer->startTag("application");
  77. $Writer->dataElement("council_reference", $reference);
  78. $Writer->dataElement("address", $address);
  79. $Writer->dataElement("postcode", $postcode);
  80. $Writer->dataElement("description", $description);
  81. $Writer->dataElement("date_received", $date);
  82. $Writer->endTag("application");
  83. }
  84. elsif ($cells[0]->attr("colspan") && $cells[0]->attr("colspan") eq "5")
  85. {
  86. foreach my $link ($cells[0]->look_down("_tag" => "a"))
  87. {
  88. if ($link->as_trimmed_text eq ">" &&
  89. $link->attr("href") =~ /^javascript:__doPostBack\('([^\']*)','([^\']*)'\)$/)
  90. {
  91. $page = do_post_back($state, $1, $2);
  92. }
  93. }
  94. }
  95. }
  96. }
  97. }
  98. # Finish off XML output
  99. $Writer->endTag("applications");
  100. $Writer->endTag("planning");
  101. $Writer->end();
  102. exit 0;
  103. # Extract the state from a page so we can repost it
  104. sub get_state
  105. {
  106. my $page = shift;
  107. my $viewstate = $page->look_down("_tag" => "input", "name" => "__VIEWSTATE");
  108. return $viewstate->attr("value");
  109. }
  110. # Fake up what the doPostBack javascript function in the page does...
  111. sub do_post_back
  112. {
  113. my $state = shift;
  114. my $target = shift;
  115. my $argument = shift;
  116. $target =~ s/\$/:/g;
  117. my $args = {
  118. "__EVENTTARGET" => $target,
  119. "__EVENTARGUMENT" => $argument,
  120. "__VIEWSTATE" => $state
  121. };
  122. return do_post($args);
  123. }
  124. # Post to the planning search page
  125. sub do_post
  126. {
  127. my $response = $UA->post($SearchURL, @_);
  128. die $response->status_line unless $response->is_success;
  129. return HTML::TreeBuilder->new_from_content($response->content);
  130. }