#!/usr/bin/perl -w use strict; use XML::Writer; my $file = $ARGV[0]; my $info_url = $ARGV[1]; my $writer = new XML::Writer(DATA_MODE => 1, DATA_INDENT => 2); $writer->xmlDecl("UTF-8"); $writer->startTag("planning"); $writer->dataElement("authority_name", "Newport City Council"); $writer->dataElement("authority_short_name", "Newport"); $writer->startTag("applications"); open (my $fh, '-|', "pdftotext", "-layout", $file, "-") or die "open failed: $!"; while (my $line = <$fh>) { if ($line =~ /^\s*App No:\s*(\S+)/) { my $refno = $1; my $address = ""; my $proposal = ""; my $date_received; my $cur_field; my $near_end; while (1) { chomp $line; $line =~ s/^\s+//; $line =~ s/\s+$//; if ($line =~ s/^ApplicationSite://) { $cur_field = \$address; } elsif ($line =~ s/^Proposal://) { $cur_field = \$proposal; } elsif (($line =~ s/^Applicant://) || ($line =~ s/^Agent://) || ($line =~ s/^App Type://)) { $cur_field = undef; } elsif ($line =~ /^Date Registered:\s*(\S+)/) { $date_received = $1; $cur_field = undef; } $line =~ s/^\s+//; if (defined $cur_field) { $$cur_field .= " " if $$cur_field ne "" and $line ne ""; $$cur_field .= $line; } last unless defined ($line = <$fh>); last if $near_end && length $line == 1; $near_end = 1 if $line =~ /^\s*Case Officer:/; } my $postcode = "None"; if ($address =~ /([A-Z][A-Z]?\d(\d|[A-Z])? ?\d[A-Z][A-Z])/) { $postcode = $1; } $writer->startTag("application"); $writer->dataElement("council_reference", $refno); $writer->dataElement("address", $address); $writer->dataElement("postcode", $postcode); $writer->dataElement("description", $proposal); $writer->dataElement("info_url", $info_url); $writer->dataElement("comment_url", 'planning@newport.gov.uk'); $writer->dataElement("date_received", $date_received); $writer->endTag; } } $writer->endTag; $writer->endTag; $writer->end;