Explorar el Código

Fix postcode to location code.

Delete some files from cgi-bin which are now moved in my the deployment 
script.
master
duncan.parkes hace 17 años
padre
commit
6e8fd50523
Se han modificado 4 ficheros con 14 adiciones y 412 borrados
  1. +0
    -122
      cgi-bin/EastHerts.cgi
  2. +0
    -98
      cgi-bin/NorthHerts.cgi
  3. +0
    -162
      cgi-bin/broxbourne.cgi
  4. +14
    -30
      docs/include/scraper_support.php

+ 0
- 122
cgi-bin/EastHerts.cgi Ver fichero

@@ -1,122 +0,0 @@
#!/usr/local/bin/perl

use strict;
use warnings;

use CGI qw(:cgi);
use HTML::TreeBuilder;
use LWP::UserAgent;
use XML::Writer;

# The master URLs for the East Herts planning search
our $SearchURL = "http://e-services.eastherts.gov.uk/swiftlg/apas/run/WPHAPPCRITERIA";
our $InfoURL = "http://e-services.eastherts.gov.uk/swiftlg/apas/run/WPHAPPDETAIL.DisplayUrl?theApnID=";
our $CommentURL = "http://e-services.eastherts.gov.uk/swiftlg/apas/run/wphmakerep.displayURL?ApnID=";

# We're a CGI script...
my $query = CGI->new();

# Get the date to fetch
my $date = $query->param("day") . "/" . $query->param("month") . "/" . $query->param("year");

# Construct an LWP user agent
our $UA = LWP::UserAgent->new(env_proxy => 1);

# Do the search
my $page = do_post($SearchURL,
{"REGFROMDATE.MAINBODY.WPACIS.1." => $date,
"REGTODATE.MAINBODY.WPACIS.1." => $date,
"SEARCHBUTTON.MAINBODY.WPACIS.1." => "Search"});

# Output an HTTP response header
print $query->header(-type => "text/xml");

# Create an XML output stream
my $Writer = XML::Writer->new(DATA_MODE => 1);

# Output the XML header data
$Writer->xmlDecl("UTF-8");
$Writer->startTag("planning");
$Writer->dataElement("authority_name", "East Herts Council");
$Writer->dataElement("authority_short_name", "East Herts");
$Writer->startTag("applications");

# Output any applications on the first page
output_applications($page);

# Loop over any additional results pages
foreach my $link ($page->look_down("_tag" => "a", "href" => qr/^WPHAPPSEARCHRES\.displayResultsURL/))
{
# Fetch this page...
$page = do_get(URI->new_abs($link->attr("href"), $SearchURL));

# ...and output the applications from it
output_applications($page);
}

# Finish off XML output
$Writer->endTag("applications");
$Writer->endTag("planning");
$Writer->end();

exit 0;

# Make a GET request
sub do_get
{
my $response = $UA->get(@_);

die $response->status_line unless $response->is_success;

return HTML::TreeBuilder->new_from_content($response->content);
}

# Make a POST request
sub do_post
{
my $response = $UA->post(@_);

die $response->status_line unless $response->is_success;

return HTML::TreeBuilder->new_from_content($response->content);
}

# Output applications from a results page
sub output_applications
{
my $page = shift;

# Find the result table
my $table = $page->look_down("_tag" => "table", "cellspacing" => "2", "cellpadding" => "2");

# Process each row of the results
foreach my $row ($table->look_down("_tag" => "tr"))
{
my @cells = $row->look_down("_tag" => "td");

if (@cells >= 3)
{
my $reference = $cells[0]->as_trimmed_text;
my $description = $cells[1]->as_trimmed_text;
my $address = $cells[2]->as_trimmed_text;
my $postcode;

if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
{
$postcode = $1;
}

$Writer->startTag("application");
$Writer->dataElement("council_reference", $reference);
$Writer->dataElement("address", $address);
$Writer->dataElement("postcode", $postcode);
$Writer->dataElement("description", $description);
$Writer->dataElement("info_url", $InfoURL . $reference);
$Writer->dataElement("comment_url", $CommentURL . $reference);
$Writer->dataElement("date_received", $date);
$Writer->endTag("application");
}
}

return;
}

+ 0
- 98
cgi-bin/NorthHerts.cgi Ver fichero

@@ -1,98 +0,0 @@
#!/usr/bin/perl

use strict;
use warnings;

use CGI qw(:cgi);
use HTML::TreeBuilder;
use LWP::UserAgent;
use XML::Writer;

# The master URLs for the North Hertfordshire planning search
our $SearchURL = "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch";

# We're a CGI script...
my $query = CGI->new();

# Get the date to fetch
my $date = $query->param("day") . "/" . $query->param("month") . "/" . $query->param("year");

# Construct an LWP user agent
our $UA = LWP::UserAgent->new(env_proxy => 1);

# Fetch the search page
my $page = do_get($SearchURL);

# Find the form submission URL
my $form = $page->look_down("_tag" => "form", name => "frmSearch");
my $url = URI->new_abs($form->attr("action"), $SearchURL);

# Do the search
$page = do_post($url, {"regdate1" => $date, "regdate2" => $date});

# Output an HTTP response header
print $query->header(-type => "text/xml");

# Create an XML output stream
my $Writer = XML::Writer->new(DATA_MODE => 1);

# Output the XML header data
$Writer->xmlDecl("UTF-8");
$Writer->startTag("planning");
$Writer->dataElement("authority_name", "North Hertfordshire District Council");
$Writer->dataElement("authority_short_name", "North Hertfordshire");
$Writer->startTag("applications");

# Process each table of the results
foreach my $table ($page->look_down("_tag" => "table", "class" => "results-table"))
{
my @rows = map { $_->look_down("_tag" => "td") } $table->look_down("_tag" => "tr");
my $reference = $rows[0]->as_trimmed_text;
my $infourl = $rows[0]->look_down("_tag" => "a")->attr("href");
my $date = $rows[1]->as_trimmed_text;
my $address = $rows[3]->as_trimmed_text;
my $description = $rows[4]->as_trimmed_text;
my $postcode;

if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
{
$postcode = $1;
}

$Writer->startTag("application");
$Writer->dataElement("council_reference", $reference);
$Writer->dataElement("address", $address);
$Writer->dataElement("postcode", $postcode);
$Writer->dataElement("description", $description);
$Writer->dataElement("info_url", $infourl);
$Writer->dataElement("comment_url", "mailto:service\@north-herts.gov.uk?subject=Comment on Planning Application");
$Writer->dataElement("date_received", $date);
$Writer->endTag("application");
}

# Finish off XML output
$Writer->endTag("applications");
$Writer->endTag("planning");
$Writer->end();

exit 0;

# Make a GET request
sub do_get
{
my $response = $UA->get(@_);

die $response->status_line unless $response->is_success;

return HTML::TreeBuilder->new_from_content($response->content);
}

# Make a POST request
sub do_post
{
my $response = $UA->post(@_, Content_Type => "form-data");

die $response->status_line unless $response->is_success;

return HTML::TreeBuilder->new_from_content($response->content);
}

+ 0
- 162
cgi-bin/broxbourne.cgi Ver fichero

@@ -1,162 +0,0 @@
#!/usr/bin/perl

use strict;
use warnings;

use CGI qw(:cgi);
use DateTime;
use HTML::TreeBuilder;
use LWP::UserAgent;
use XML::Writer;

# The master URL for the Broxbourne planning search
our $SearchURL = "http://www2.broxbourne.gov.uk/planningsearch/webform1.aspx";

# We're a CGI script...
my $query = CGI->new();

# Get the date as an offset from 2000-01-01
my $epoch = DateTime->new(year => 2000, month => 1, day => 1);
my $querydate = DateTime->new(year => $query->param("year"),
month => $query->param("month"),
day => $query->param("day"));
$querydate = $querydate->delta_days($epoch)->delta_days;

# Construct an LWP user agent
our $UA = LWP::UserAgent->new(env_proxy => 1);

# Post the URL to get an initial blank form
my $state = get_state(do_post());

# Post each date in turn to build up the state - you can thank
# Microsoft and ASP.NET for the horrible way we have to do this
# by posting each argument in turn to build up the state
$state = get_state(do_post_back($state, 'DateSelector1$Calendar1', $querydate));
$state = get_state(do_post_back($state, 'DateSelector2$Calendar1', $querydate));

# Output an HTTP response header
print $query->header(-type => "text/xml");

# Create an XML output stream
my $Writer = XML::Writer->new(DATA_MODE => 1);

# Output the XML header data
$Writer->xmlDecl("UTF-8");
$Writer->startTag("planning");
$Writer->dataElement("authority_name", "Borough of Broxbourne");
$Writer->dataElement("authority_short_name", "Broxbourne");
$Writer->startTag("applications");

# Get the arguments for the search...
my $args = {
"Srch" => "rb1",
"__VIEWSTATE" => $state,
"btnSearch" => "Search",
"tbReference" => "",
"tbRef2" => ""
};

# ...and then (at last) we can do the search!
my $page = do_post($args);

# Loop processing pages of results
while ($page)
{
my $table = $page->look_down("_tag" => "table", "id" => "DataGrid1");

# Remember the state
$state = get_state($page);

# Clear the page for now - this will be reinitialised if we
# find another page of results to make us go round the loop
# all over again
undef $page;

# Check that we found a table - searches that find no results
# produce a page with no table in it
if ($table)
{
# Process each row of the results
foreach my $row ($table->look_down("_tag" => "tr"))
{
my @cells = $row->look_down("_tag" => "td");

if ($cells[0]->look_down("_tag" => "input"))
{
my $reference = $cells[1]->as_trimmed_text;
my $date = $cells[2]->as_trimmed_text;
my $address = $cells[3]->as_trimmed_text;
my $description = $cells[4]->as_trimmed_text;
my $postcode;

if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
{
$postcode = $1;
}

$Writer->startTag("application");
$Writer->dataElement("council_reference", $reference);
$Writer->dataElement("address", $address);
$Writer->dataElement("postcode", $postcode);
$Writer->dataElement("description", $description);
$Writer->dataElement("date_received", $date);
$Writer->endTag("application");
}
elsif ($cells[0]->attr("colspan") && $cells[0]->attr("colspan") eq "5")
{
foreach my $link ($cells[0]->look_down("_tag" => "a"))
{
if ($link->as_trimmed_text eq ">" &&
$link->attr("href") =~ /^javascript:__doPostBack\('([^\']*)','([^\']*)'\)$/)
{
$page = do_post_back($state, $1, $2);
}
}
}
}
}
}

# Finish off XML output
$Writer->endTag("applications");
$Writer->endTag("planning");
$Writer->end();

exit 0;

# Extract the state from a page so we can repost it
sub get_state
{
my $page = shift;
my $viewstate = $page->look_down("_tag" => "input", "name" => "__VIEWSTATE");

return $viewstate->attr("value");
}

# Fake up what the doPostBack javascript function in the page does...
sub do_post_back
{
my $state = shift;
my $target = shift;
my $argument = shift;

$target =~ s/\$/:/g;

my $args = {
"__EVENTTARGET" => $target,
"__EVENTARGUMENT" => $argument,
"__VIEWSTATE" => $state
};

return do_post($args);
}

# Post to the planning search page
sub do_post
{
my $response = $UA->post($SearchURL, @_);

die $response->status_line unless $response->is_success;

return HTML::TreeBuilder->new_from_content($response->content);
}

+ 14
- 30
docs/include/scraper_support.php Ver fichero

@@ -287,43 +287,27 @@ function scrape_applications_islington ($search_url, $info_url_base, $comment_ur
return "http://maps.google.co.uk/maps?q=$postcode&z=$zoom";
}

//postcode to location
function postcode_to_location($postcode){
$x = 0;
$y = 0;
// We don't actually need to fetch the page, we
// can get everything we need from the url we are
// redirected to.
$clean_postcode = strtolower($postcode);
$clean_postcode = str_replace(" ","+", $clean_postcode);

$url = "http://www.streetmap.co.uk/newsearch.srf?type=Postcode&name=" . $clean_postcode;

$html = file_get_contents($url);
$x_pattern = "/var _LocationX=\d*;/";
$y_pattern = "/var _LocationY=\d*;/";
//X
preg_match($x_pattern, $html, $matches);
if(sizeof($matches) >0){
$x = $matches[0];
$x = str_replace('var _LocationX=',"", $x);
$x = str_replace(";","", $x);
}
//Y
preg_match($y_pattern, $html, $matches);
if(sizeof($matches) >0){
$y = str_replace("var _LocationY=","", $matches[0]);
$y = str_replace(";","", $y);
}
$return = array();
$return[0] = $x;
$return[1] = $y;
return $return;
$headers = get_headers($url, 1);
$location = $headers["Location"];
$location_regex = "/x=(\d*)&y=(\d*)&/";

preg_match ($location_regex, $location, $matches);

return array_slice ($matches, 1);
}
function valid_email ($string) {
$valid = false;


||||||
xxxxxxxxxx
000:0
Cargando…
Cancelar
Guardar