| @@ -0,0 +1,55 @@ | |||||
| #!/usr/bin/perl -w | |||||
| use strict; | |||||
| use LWP::Simple; | |||||
| use File::Temp qw(tempfile); | |||||
| use POSIX; | |||||
| use CGI; | |||||
| my $cgi = new CGI; | |||||
| my $year = $cgi->param("year"); | |||||
| my $month = $cgi->param("month"); | |||||
| my $day = $cgi->param("day"); | |||||
| unless (defined $year && defined $month && defined $day) { | |||||
| print <<ERROR; | |||||
| Content-type: text/plain | |||||
| Need year, month, day parameters | |||||
| ERROR | |||||
| exit 0; | |||||
| } | |||||
| my $html = get('http://www.brentwood-council.gov.uk/index.php?cid=573'); | |||||
| my $date = strftime("%d %B %Y", 0, 0, 0, $day, $month-1, $year-1900); | |||||
| # quick and dirty | |||||
| my ($url) = ($html =~ /(http:\/\/[^"]*\.pdf)[^<]*(<[^>]*>)*[^<]*$date/); | |||||
| unless (defined $url) { | |||||
| print <<NIL; | |||||
| Content-type: text/xml | |||||
| <?xml version="1.0" encoding="UTF-8"?> | |||||
| <planning> | |||||
| <authority_name>Brentwood Borough Council</authority_name> | |||||
| <authority_short_name>Brentwood</authority_short_name> | |||||
| <applications> | |||||
| </applications> | |||||
| </planning> | |||||
| NIL | |||||
| exit 0; | |||||
| } | |||||
| my $dmy = sprintf("%02d/%02d/%04d", $day, $month, $year); | |||||
| my ($fh, $filename) = tempfile(SUFFIX => ".pdf"); | |||||
| print $fh get($url); | |||||
| close($fh); | |||||
| print "Content-type: text/xml\n\n"; | |||||
| system "./Brentwood.pl", $filename, $url, $dmy; | |||||
| unlink $filename; | |||||
| @@ -0,0 +1,72 @@ | |||||
| #!/usr/bin/perl -w | |||||
| use strict; | |||||
| use XML::Writer; | |||||
| my $file = $ARGV[0]; | |||||
| my $info_url = $ARGV[1]; | |||||
| my $date = $ARGV[2]; | |||||
| my $writer = new XML::Writer(DATA_MODE => 1, DATA_INDENT => 2); | |||||
| $writer->xmlDecl("UTF-8"); | |||||
| $writer->startTag("planning"); | |||||
| $writer->dataElement("authority_name", "Brentwood Borough Council"); | |||||
| $writer->dataElement("authority_short_name", "Brentwood"); | |||||
| $writer->startTag("applications"); | |||||
| open (my $fh, "pdftotext -layout $file -|"); | |||||
| while (my $line = <$fh>) { | |||||
| chomp $line; | |||||
| $line =~ s///g; | |||||
| if ($line =~ /Address:/) { | |||||
| my $ofs_col2 = $-[0]; | |||||
| my $refno = substr $line, 0, $ofs_col2; | |||||
| $refno =~ s/ +$//g; | |||||
| my $address = ""; my $proposal = ""; | |||||
| my $cur_field; | |||||
| while (1) { | |||||
| if (length($line) > $ofs_col2) { | |||||
| my $col2 = substr $line, $ofs_col2; | |||||
| $col2 =~ s/^ +//; | |||||
| if ($col2 =~ s/^((A?d)?d)?ress://) { | |||||
| $cur_field = \$address; | |||||
| } elsif ($col2 =~ s/^((P?r)?o)?posal://) { | |||||
| $cur_field = \$proposal; | |||||
| } elsif ($col2 =~ s/^((A?p)?p)?licant://) { | |||||
| $cur_field = undef; | |||||
| } elsif ($col2 =~ s/^((A?g)?e)?nt://) { | |||||
| $cur_field = undef; | |||||
| } | |||||
| $col2 =~ s/^ +//; $col2 =~ s/ +$//; | |||||
| if (defined $cur_field) { | |||||
| $$cur_field .= " " if $$cur_field ne ""; | |||||
| $$cur_field .= $col2; | |||||
| } | |||||
| } | |||||
| last unless defined ($line = <$fh>); | |||||
| chomp $line; | |||||
| $line =~ s///g; | |||||
| last if length $line == 0; | |||||
| } | |||||
| my $postcode = "None"; | |||||
| if ($address =~ /([A-Z][A-Z]?\d(\d|[A-Z])? ?\d[A-Z][A-Z])/) { | |||||
| $postcode = $1; | |||||
| } | |||||
| $writer->startTag("application"); | |||||
| $writer->dataElement("council_reference", $refno); | |||||
| $writer->dataElement("address", $address); | |||||
| $writer->dataElement("postcode", $postcode); | |||||
| $writer->dataElement("description", $proposal); | |||||
| $writer->dataElement("info_url", $info_url); | |||||
| $writer->dataElement("comment_url", "planning\@brentwood.gov.uk"); | |||||
| $writer->dataElement("date_received", $date); | |||||
| $writer->endTag; | |||||
| } | |||||
| } | |||||
| $writer->endTag; | |||||
| $writer->endTag; | |||||
| $writer->end; | |||||
| @@ -0,0 +1,54 @@ | |||||
| #!/usr/bin/perl -w | |||||
| use strict; | |||||
| use LWP::Simple; | |||||
| use File::Temp qw(tempfile); | |||||
| use POSIX; | |||||
| use CGI; | |||||
| my $cgi = new CGI; | |||||
| my $year = $cgi->param("year"); | |||||
| my $month = $cgi->param("month"); | |||||
| my $day = $cgi->param("day"); | |||||
| unless (defined $year && defined $month && defined $day) { | |||||
| print <<ERROR; | |||||
| Content-type: text/plain | |||||
| Need year, month, day parameters | |||||
| ERROR | |||||
| exit 0; | |||||
| } | |||||
| my $html = get('http://www.glasgow.gov.uk/en/Business/Planning_Development/DevelopmentControl/Sitehistorysearches/'); | |||||
| my $date = sprintf("%02d/%02d/%02d", $day, $month, $year % 100); | |||||
| # quick and dirty | |||||
| my ($url) = ($html =~ /href="(\/[^"]*\.pdf)[^<]*[0-9]{2}\/[0-9]{2}\/[0-9]{2} - $date/); | |||||
| unless (defined $url) { | |||||
| print <<NIL; | |||||
| Content-type: text/xml | |||||
| <?xml version="1.0" encoding="UTF-8"?> | |||||
| <planning> | |||||
| <authority_name>Glasgow City Council</authority_name> | |||||
| <authority_short_name>Glasgow</authority_short_name> | |||||
| <applications> | |||||
| </applications> | |||||
| </planning> | |||||
| NIL | |||||
| exit 0; | |||||
| } | |||||
| my $absurl = "http://www.glasgow.gov.uk$url"; | |||||
| my ($fh, $filename) = tempfile(SUFFIX => ".pdf"); | |||||
| print $fh get($absurl); | |||||
| close($fh); | |||||
| print "Content-type: text/xml\n\n"; | |||||
| system "./Glasgow.pl", $filename, $absurl; | |||||
| unlink $filename; | |||||
| @@ -0,0 +1,63 @@ | |||||
| #!/usr/bin/perl -w | |||||
| use strict; | |||||
| use XML::Writer; | |||||
| my $file = $ARGV[0]; | |||||
| my $info_url = $ARGV[1]; | |||||
| my $writer = new XML::Writer(DATA_MODE => 1, DATA_INDENT => 2); | |||||
| $writer->xmlDecl("UTF-8"); | |||||
| $writer->startTag("planning"); | |||||
| $writer->dataElement("authority_name", "Glasgow City Council"); | |||||
| $writer->dataElement("authority_short_name", "Glasgow"); | |||||
| $writer->startTag("applications"); | |||||
| open (my $fh, "pdftotext -layout $file -|"); | |||||
| while (my $line = <$fh>) { | |||||
| if ($line =~ /^\s*Reference:\s*(\S+)/) { | |||||
| my $refno = $1; | |||||
| my $address = ""; my $proposal = ""; my $date_received; | |||||
| my $cur_field; | |||||
| while (1) { | |||||
| chomp $line; | |||||
| $line =~ s/^\s+//; $line =~ s/\s+$//; | |||||
| if ($line =~ s/^Address://) { | |||||
| $cur_field = \$address; | |||||
| } elsif ($line =~ s/^Proposal://) { | |||||
| $cur_field = \$proposal; | |||||
| } elsif ($line =~ /^Date Received:\s*(\S+)/) { | |||||
| $date_received = $1; | |||||
| $date_received =~ s#\.#/#g; | |||||
| $cur_field = undef; | |||||
| } | |||||
| $line =~ s/^\s+//; | |||||
| if (defined $cur_field) { | |||||
| $$cur_field .= " " if $$cur_field ne ""; | |||||
| $$cur_field .= $line; | |||||
| } | |||||
| last if $line =~ /Map Reference:/; | |||||
| last unless defined ($line = <$fh>); | |||||
| } | |||||
| my $postcode = "None"; | |||||
| if ($address =~ /([A-Z][A-Z]?\d(\d|[A-Z])? ?\d[A-Z][A-Z])/) { | |||||
| $postcode = $1; | |||||
| } | |||||
| $writer->startTag("application"); | |||||
| $writer->dataElement("council_reference", $refno); | |||||
| $writer->dataElement("address", $address); | |||||
| $writer->dataElement("postcode", $postcode); | |||||
| $writer->dataElement("description", $proposal); | |||||
| $writer->dataElement("info_url", $info_url); | |||||
| $writer->dataElement("comment_url", "planning.representations\@drs.glasgow.gov.uk"); | |||||
| $writer->dataElement("date_received", $date_received); | |||||
| $writer->endTag; | |||||
| } | |||||
| } | |||||
| $writer->endTag; | |||||
| $writer->endTag; | |||||
| $writer->end; | |||||
| @@ -0,0 +1,82 @@ | |||||
| #!/usr/bin/perl -w | |||||
| use strict; | |||||
| use HTML::TreeBuilder; | |||||
| use File::Temp qw(tempfile); | |||||
| use LWP::Simple; | |||||
| use POSIX; | |||||
| use Encode; | |||||
| use CGI; | |||||
| use CGI::Carp; | |||||
| sub sanity_check { | |||||
| my ($var) = @_; | |||||
| defined $var or return 0; | |||||
| $var =~ /^[0-9]+$/ or return 0; | |||||
| return 1; | |||||
| } | |||||
| sub no_results { | |||||
| my ($y, $m, $d, $reason) = @_; | |||||
| print <<NIL; | |||||
| Content-type: text/xml | |||||
| <?xml version="1.0" encoding="UTF-8"?> | |||||
| <planning> | |||||
| <authority_name>Highland Council</authority_name> | |||||
| <authority_short_name>Highland</authority_short_name> | |||||
| <applications> | |||||
| </applications> | |||||
| </planning> | |||||
| NIL | |||||
| die "$y/$m/$d failed: $reason\n"; | |||||
| } | |||||
| my $cgi = new CGI; | |||||
| my $year = $cgi->param("year"); | |||||
| my $month = $cgi->param("month"); | |||||
| my $day = $cgi->param("day"); | |||||
| unless (sanity_check($year) && sanity_check($month) && sanity_check($day)) { | |||||
| print <<ERROR; | |||||
| Content-type: text/plain | |||||
| Need year, month, day parameters | |||||
| ERROR | |||||
| exit 0; | |||||
| } | |||||
| my $tree = HTML::TreeBuilder->new; | |||||
| # $tree->parse_file('weekly-planning-bw-lists.htm'); | |||||
| $tree->parse(decode_utf8(get('http://www.highland.gov.uk/yourenvironment/planning/planningapplications/weekly-planning-bw-lists.htm') or die "couldn't fetch index page")); | |||||
| $tree->eof; | |||||
| my $monthyear_re = strftime('%B[ \xa0]%Y', 0, 0, 0, 1, $month-1, $year-1900); | |||||
| my ($month_h2) = $tree->look_down( | |||||
| "_tag", "h2", | |||||
| sub { $_[0]->as_text =~ /$monthyear_re/ } | |||||
| ); | |||||
| $month_h2 or no_results($year, $month, $day, "Cannot find month header"); | |||||
| my $month_list = $month_h2->right; | |||||
| my $day_re = strftime('Planning Applications (?:[A-Za-z0-9 ]*?to )?%b[a-z]* ?%e[a-z]', 0, 0, 0, $day, $month-1, $year-1900); | |||||
| my ($day_link) = $month_list->look_down( | |||||
| "_tag", "a", | |||||
| sub { $_[0]->as_text =~ /$day_re/ } | |||||
| ); | |||||
| $day_link or no_results($year, $month, $day, "Cannot find day link"); | |||||
| my $day_absurl = "http://www.highland.gov.uk".$day_link->attr('href'); | |||||
| my ($fh, $filename) = tempfile(SUFFIX => ".pdf"); | |||||
| print $fh get($day_absurl); | |||||
| close($fh); | |||||
| print "Content-type: text/xml\n\n"; | |||||
| system "./Highland.pl", $filename, $day_absurl and die "system failed: $|"; | |||||
| unlink $filename or die "cannot unlink temporary file $filename: $!"; | |||||
| @@ -0,0 +1,72 @@ | |||||
| #!/usr/bin/perl -w | |||||
| use strict; | |||||
| use XML::Writer; | |||||
| my $file = $ARGV[0]; | |||||
| my $info_url = $ARGV[1]; | |||||
| my $writer = new XML::Writer(DATA_MODE => 1, DATA_INDENT => 2); | |||||
| $writer->xmlDecl("UTF-8"); | |||||
| $writer->startTag("planning"); | |||||
| $writer->dataElement("authority_name", "Highland Council"); | |||||
| $writer->dataElement("authority_short_name", "Highland"); | |||||
| $writer->startTag("applications"); | |||||
| open (my $fh, '-|', "pdftotext", "-layout", $file, "-") or die "open failed: $!"; | |||||
| while (my $line = <$fh>) { | |||||
| if ($line =~ /^\s*Ref Number\s*(\S+)/) { | |||||
| my $refno = $1; | |||||
| my $address = ""; my $proposal = ""; my $case_officer = ""; my $date_received; | |||||
| my $cur_field; | |||||
| my $near_end; | |||||
| while (1) { | |||||
| chomp $line; | |||||
| $line =~ s/^\s+//; $line =~ s/\s+$//; | |||||
| if ($line =~ s/^Location of Works//) { | |||||
| $cur_field = \$address; | |||||
| } elsif ($line =~ s/^Description of Works//) { | |||||
| $cur_field = \$proposal; | |||||
| } elsif ($line =~ s/^Case Officer//) { | |||||
| $cur_field = \$case_officer; | |||||
| } elsif (($line =~ s/^Community Council//) || ($line =~ s/^Applicant Name//) || ($line =~ s/^Applicant Address//)) { | |||||
| $cur_field = undef; | |||||
| } elsif ($line =~ /^Validation Date\s*(\S+)/) { | |||||
| $date_received = $1; | |||||
| $cur_field = undef; | |||||
| } | |||||
| $line =~ s/^\s+//; | |||||
| if (defined $cur_field) { | |||||
| $$cur_field .= " " if $$cur_field ne ""; | |||||
| $$cur_field .= $line; | |||||
| } | |||||
| last unless defined ($line = <$fh>); | |||||
| last if $near_end && length $line == 1; | |||||
| $near_end = 1 if $line =~ /^\s*Case Officer/; | |||||
| } | |||||
| my $postcode = "None"; | |||||
| if ($address =~ /([A-Z][A-Z]?\d(\d|[A-Z])? ?\d[A-Z][A-Z])/) { | |||||
| $postcode = $1; | |||||
| } | |||||
| my $comment_url = "None"; | |||||
| if ($case_officer =~ /([A-Za-z0-9\.]+\@[A-Za-z0-9\.]+)/) { | |||||
| $comment_url = "$1"; | |||||
| } | |||||
| $writer->startTag("application"); | |||||
| $writer->dataElement("council_reference", $refno); | |||||
| $writer->dataElement("address", $address); | |||||
| $writer->dataElement("postcode", $postcode); | |||||
| $writer->dataElement("description", $proposal); | |||||
| $writer->dataElement("info_url", $info_url); | |||||
| $writer->dataElement("comment_url", $comment_url); | |||||
| $writer->dataElement("date_received", $date_received); | |||||
| $writer->endTag; | |||||
| } | |||||
| } | |||||
| $writer->endTag; | |||||
| $writer->endTag; | |||||
| $writer->end; | |||||
| @@ -208,7 +208,7 @@ if __name__ == '__main__': | |||||
| # parser = OcellaParser("Bridgend", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL") | # parser = OcellaParser("Bridgend", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL") | ||||
| parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL") | parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL") | ||||
| # parser = OcellaParser("Castle Point", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL") | # parser = OcellaParser("Castle Point", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL") | ||||
| # parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly") | |||||
| parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly") | |||||
| print parser.getResults(21,5,2008) | print parser.getResults(21,5,2008) | ||||
| @@ -31,3 +31,9 @@ | |||||
| "Maldon.py", "420" | "Maldon.py", "420" | ||||
| "Medway.py", "420" | "Medway.py", "420" | ||||
| "Shropshire.py", "420" | "Shropshire.py", "420" | ||||
| "Brentwood.pl", "493" | |||||
| "Brentwood.cgi", "493" | |||||
| "Glasgow.pl", "493" | |||||
| "Glasgow.cgi", "493" | |||||
| "Highland.pl", "493" | |||||
| "Highland.cgi", "493" | |||||