Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

73 lines
2.1 KiB

  1. #!/usr/bin/perl -w
  2. use strict;
  3. use XML::Writer;
  4. my $file = $ARGV[0];
  5. my $info_url = $ARGV[1];
  6. my $writer = new XML::Writer(DATA_MODE => 1, DATA_INDENT => 2);
  7. $writer->xmlDecl("UTF-8");
  8. $writer->startTag("planning");
  9. $writer->dataElement("authority_name", "Highland Council");
  10. $writer->dataElement("authority_short_name", "Highland");
  11. $writer->startTag("applications");
  12. open (my $fh, '-|', "pdftotext", "-layout", $file, "-") or die "open failed: $!";
  13. while (my $line = <$fh>) {
  14. if ($line =~ /^\s*Ref Number\s*(\S+)/) {
  15. my $refno = $1;
  16. my $address = ""; my $proposal = ""; my $case_officer = ""; my $date_received;
  17. my $cur_field;
  18. my $near_end;
  19. while (1) {
  20. chomp $line;
  21. $line =~ s/^\s+//; $line =~ s/\s+$//;
  22. if ($line =~ s/^Location of Works//) {
  23. $cur_field = \$address;
  24. } elsif ($line =~ s/^Description of Works//) {
  25. $cur_field = \$proposal;
  26. } elsif ($line =~ s/^Case Officer//) {
  27. $cur_field = \$case_officer;
  28. } elsif (($line =~ s/^Community Council//) || ($line =~ s/^Applicant Name//) || ($line =~ s/^Applicant Address//)) {
  29. $cur_field = undef;
  30. } elsif ($line =~ /^Validation Date\s*(\S+)/) {
  31. $date_received = $1;
  32. $cur_field = undef;
  33. }
  34. $line =~ s/^\s+//;
  35. if (defined $cur_field) {
  36. $$cur_field .= " " if $$cur_field ne "";
  37. $$cur_field .= $line;
  38. }
  39. last unless defined ($line = <$fh>);
  40. last if $near_end && length $line == 1;
  41. $near_end = 1 if $line =~ /^\s*Case Officer/;
  42. }
  43. my $postcode = "None";
  44. if ($address =~ /([A-Z][A-Z]?\d(\d|[A-Z])? ?\d[A-Z][A-Z])/) {
  45. $postcode = $1;
  46. }
  47. my $comment_url = "None";
  48. if ($case_officer =~ /([A-Za-z0-9\.]+\@[A-Za-z0-9\.]+)/) {
  49. $comment_url = "$1";
  50. }
  51. $writer->startTag("application");
  52. $writer->dataElement("council_reference", $refno);
  53. $writer->dataElement("address", $address);
  54. $writer->dataElement("postcode", $postcode);
  55. $writer->dataElement("description", $proposal);
  56. $writer->dataElement("info_url", $info_url);
  57. $writer->dataElement("comment_url", $comment_url);
  58. $writer->dataElement("date_received", $date_received);
  59. $writer->endTag;
  60. }
  61. }
  62. $writer->endTag;
  63. $writer->endTag;
  64. $writer->end;