|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- #!/usr/bin/perl -w
-
- use strict;
- use XML::Writer;
-
- my $file = $ARGV[0];
- my $info_url = $ARGV[1];
-
- my $writer = new XML::Writer(DATA_MODE => 1, DATA_INDENT => 2);
-
- $writer->xmlDecl("UTF-8");
-
- $writer->startTag("planning");
- $writer->dataElement("authority_name", "Highland Council");
- $writer->dataElement("authority_short_name", "Highland");
- $writer->startTag("applications");
-
- open (my $fh, '-|', "pdftotext", "-layout", $file, "-") or die "open failed: $!";
- while (my $line = <$fh>) {
- if ($line =~ /^\s*Ref Number\s*(\S+)/) {
- my $refno = $1;
- my $address = ""; my $proposal = ""; my $case_officer = ""; my $date_received;
- my $cur_field;
- my $near_end;
- while (1) {
- chomp $line;
- $line =~ s/^\s+//; $line =~ s/\s+$//;
- if ($line =~ s/^Location of Works//) {
- $cur_field = \$address;
- } elsif ($line =~ s/^Description of Works//) {
- $cur_field = \$proposal;
- } elsif ($line =~ s/^Case Officer//) {
- $cur_field = \$case_officer;
- } elsif (($line =~ s/^Community Council//) || ($line =~ s/^Applicant Name//) || ($line =~ s/^Applicant Address//)) {
- $cur_field = undef;
- } elsif ($line =~ /^Validation Date\s*(\S+)/) {
- $date_received = $1;
- $cur_field = undef;
- }
- $line =~ s/^\s+//;
- if (defined $cur_field) {
- $$cur_field .= " " if $$cur_field ne "";
- $$cur_field .= $line;
- }
- last unless defined ($line = <$fh>);
- last if $near_end && length $line == 1;
- $near_end = 1 if $line =~ /^\s*Case Officer/;
- }
- my $postcode = "None";
- if ($address =~ /([A-Z][A-Z]?\d(\d|[A-Z])? ?\d[A-Z][A-Z])/) {
- $postcode = $1;
- }
- my $comment_url = "None";
- if ($case_officer =~ /([A-Za-z0-9\.]+\@[A-Za-z0-9\.]+)/) {
- $comment_url = "$1";
- }
-
- $writer->startTag("application");
- $writer->dataElement("council_reference", $refno);
- $writer->dataElement("address", $address);
- $writer->dataElement("postcode", $postcode);
- $writer->dataElement("description", $proposal);
- $writer->dataElement("info_url", $info_url);
- $writer->dataElement("comment_url", $comment_url);
- $writer->dataElement("date_received", $date_received);
- $writer->endTag;
- }
- }
-
- $writer->endTag;
- $writer->endTag;
- $writer->end;
|