Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

86 linhas
2.2 KiB

  1. #!/usr/bin/perl -w
  2. use strict;
  3. use HTML::TreeBuilder;
  4. use File::Temp qw(tempfile);
  5. use LWP::Simple;
  6. use POSIX;
  7. use Encode;
  8. use CGI;
  9. use CGI::Carp;
  10. sub sanity_check {
  11. my ($var) = @_;
  12. defined $var or return 0;
  13. $var =~ /^[0-9]+$/ or return 0;
  14. return 1;
  15. }
  16. sub no_results {
  17. my ($y, $m, $d, $reason) = @_;
  18. print <<NIL;
  19. Content-type: text/xml
  20. <?xml version="1.0" encoding="UTF-8"?>
  21. <planning>
  22. <authority_name>North Ayrshire Council</authority_name>
  23. <authority_short_name>North Ayrshire</authority_short_name>
  24. <applications>
  25. </applications>
  26. </planning>
  27. NIL
  28. die "$y/$m/$d failed: $reason\n";
  29. }
  30. my $cgi = new CGI;
  31. my $year = $cgi->param("year");
  32. my $month = $cgi->param("month");
  33. my $day = $cgi->param("day");
  34. unless (sanity_check($year) && sanity_check($month) && sanity_check($day)) {
  35. print <<ERROR;
  36. Content-type: text/plain
  37. Need year, month, day parameters
  38. ERROR
  39. exit 0;
  40. }
  41. my $tree = HTML::TreeBuilder->new;
  42. $tree->parse(decode_utf8(get('http://www.north-ayrshire.gov.uk/na/Home.nsf/OtherMenuPage?ReadForm&MenuType=Environment-Planning&DocDisplay=NoDoc&CatLevel=2||') or die "couldn't fetch index page"));
  43. $tree->eof;
  44. my $re = strftime("Planning Applications Received week ending 0?$day %B %Y", 0, 0, 0, $day, $month-1, $year-1900);
  45. my ($day_link) = $tree->look_down(
  46. "_tag", "a",
  47. sub { $_[0]->as_text =~ /$re/i }
  48. );
  49. $day_link or no_results($year, $month, $day, "Cannot find day link");
  50. my $day_absurl = 'http://www.north-ayrshire.gov.uk'.$day_link->attr('href');
  51. my $day_tree = HTML::TreeBuilder->new;
  52. $day_tree->parse(decode_utf8(get($day_absurl) or die "couldn't fetch day page"));
  53. $day_tree->eof;
  54. my ($pdf_img) = $day_tree->look_down(
  55. "_tag", "img",
  56. "alt", qr/\.pdf$/i
  57. );
  58. $pdf_img or die "couldn't find pdf image on day page";
  59. my $pdf_link = $pdf_img->parent;
  60. $pdf_link or die "couldn't find pdf link on day page";
  61. my $pdf_absurl = 'http://www.north-ayrshire.gov.uk'.$pdf_link->attr('href');
  62. my ($fh, $filename) = tempfile(SUFFIX => ".pdf");
  63. print $fh get($pdf_absurl);
  64. close($fh);
  65. print "Content-type: text/xml\n\n";
  66. system "./NorthAyrshire.pl", $filename, $pdf_absurl and die "system failed: $|";
  67. unlink $filename or die "cannot unlink temporary file $filename: $!";