Automatically exported from code.google.com/p/planningalerts

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use CGI qw(:cgi);
  5. use DateTime;
  6. use HTML::TreeBuilder;
  7. use LWP::UserAgent;
  8. use XML::Writer;
  9. # The master URL for the Broxbourne planning search
  10. our $SearchURL = "http://www2.broxbourne.gov.uk/planningsearch/webform1.aspx";
  11. # We're a CGI script...
  12. my $query = CGI->new();
  13. # Get the date as an offset from 2000-01-01
  14. my $epoch = DateTime->new(year => 2000, month => 1, day => 1);
  15. my $querydate = DateTime->new(year => $query->param("year"),
  16. month => $query->param("month"),
  17. day => $query->param("day"));
  18. $querydate = $querydate->delta_days($epoch)->delta_days;
  19. # Construct an LWP user agent
  20. our $UA = LWP::UserAgent->new(env_proxy => 1);
  21. # Post the URL to get an initial blank form
  22. my $state = get_state(do_post());
  23. # Post each date in turn to build up the state - you can thank
  24. # Microsoft and ASP.NET for the horrible way we have to do this
  25. # by posting each argument in turn to build up the state
  26. $state = get_state(do_post_back($state, 'DateSelector1$Calendar1', $querydate));
  27. $state = get_state(do_post_back($state, 'DateSelector2$Calendar1', $querydate));
  28. # Output an HTTP response header
  29. print $query->header(-type => "text/xml");
  30. # Create an XML output stream
  31. my $Writer = XML::Writer->new(DATA_MODE => 1);
  32. # Output the XML header data
  33. $Writer->xmlDecl("UTF-8");
  34. $Writer->startTag("planning");
  35. $Writer->dataElement("authority_name", "Borough of Broxbourne");
  36. $Writer->dataElement("authority_short_name", "Broxbourne");
  37. $Writer->startTag("applications");
  38. # Get the arguments for the search...
  39. my $args = {
  40. "Srch" => "rb1",
  41. "__VIEWSTATE" => $state,
  42. "btnSearch" => "Search",
  43. "tbReference" => "",
  44. "tbRef2" => ""
  45. };
  46. # ...and then (at last) we can do the search!
  47. my $page = do_post($args);
  48. # Loop processing pages of results
  49. while ($page)
  50. {
  51. my $table = $page->look_down("_tag" => "table", "id" => "DataGrid1");
  52. # Remember the state
  53. $state = get_state($page);
  54. # Clear the page for now - this will be reinitialised if we
  55. # find another page of results to make us go round the loop
  56. # all over again
  57. undef $page;
  58. # Check that we found a table - searches that find no results
  59. # produce a page with no table in it
  60. if ($table)
  61. {
  62. # Process each row of the results
  63. foreach my $row ($table->look_down("_tag" => "tr"))
  64. {
  65. my @cells = $row->look_down("_tag" => "td");
  66. if ($cells[0]->look_down("_tag" => "input"))
  67. {
  68. my $reference = $cells[1]->as_trimmed_text;
  69. my $date = $cells[2]->as_trimmed_text;
  70. my $address = $cells[3]->as_trimmed_text;
  71. my $description = $cells[4]->as_trimmed_text;
  72. my $postcode;
  73. if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
  74. {
  75. $postcode = $1;
  76. }
  77. $Writer->startTag("application");
  78. $Writer->dataElement("council_reference", $reference);
  79. $Writer->dataElement("address", $address);
  80. $Writer->dataElement("postcode", $postcode);
  81. $Writer->dataElement("description", $description);
  82. $Writer->dataElement("date_received", $date);
  83. $Writer->endTag("application");
  84. }
  85. elsif ($cells[0]->attr("colspan") && $cells[0]->attr("colspan") eq "5")
  86. {
  87. foreach my $link ($cells[0]->look_down("_tag" => "a"))
  88. {
  89. if ($link->as_trimmed_text eq ">" &&
  90. $link->attr("href") =~ /^javascript:__doPostBack\('([^\']*)','([^\']*)'\)$/)
  91. {
  92. $page = do_post_back($state, $1, $2);
  93. }
  94. }
  95. }
  96. }
  97. }
  98. }
  99. # Finish off XML output
  100. $Writer->endTag("applications");
  101. $Writer->endTag("planning");
  102. $Writer->end();
  103. exit 0;
  104. # Extract the state from a page so we can repost it
  105. sub get_state
  106. {
  107. my $page = shift;
  108. my $viewstate = $page->look_down("_tag" => "input", "name" => "__VIEWSTATE");
  109. return $viewstate->attr("value");
  110. }
  111. # Fake up what the doPostBack javascript function in the page does...
  112. sub do_post_back
  113. {
  114. my $state = shift;
  115. my $target = shift;
  116. my $argument = shift;
  117. $target =~ s/\$/:/g;
  118. my $args = {
  119. "__EVENTTARGET" => $target,
  120. "__EVENTARGUMENT" => $argument,
  121. "__VIEWSTATE" => $state
  122. };
  123. return do_post($args);
  124. }
  125. # Post to the planning search page
  126. sub do_post
  127. {
  128. my $response = $UA->post($SearchURL, @_);
  129. die $response->status_line unless $response->is_success;
  130. return HTML::TreeBuilder->new_from_content($response->content);
  131. }