Переглянути джерело

add 7 new scrapers

master
duncan.parkes 17 роки тому
джерело
коміт
2ea24d8d25
92 змінених файлів з 213 додано та 304 видалено
  1. +0
    -0
      cgi-bin/Allerdale.cgi
  2. +0
    -0
      cgi-bin/Alnwick.cgi
  3. +0
    -0
      cgi-bin/Angus.cgi
  4. +0
    -0
      cgi-bin/Aylesbury Vale.cgi
  5. +0
    -0
      cgi-bin/Barrow.cgi
  6. +0
    -0
      cgi-bin/Basildon.cgi
  7. +0
    -0
      cgi-bin/Bath.cgi
  8. +0
    -0
      cgi-bin/Bexley.cgi
  9. +0
    -0
      cgi-bin/Blaby.cgi
  10. +0
    -0
      cgi-bin/Bolsover.cgi
  11. +0
    -0
      cgi-bin/Bristol.cgi
  12. +0
    -0
      cgi-bin/Buckinghamshire.cgi
  13. +0
    -0
      cgi-bin/Chelmsford.cgi
  14. +0
    -0
      cgi-bin/Cherwell.cgi
  15. +0
    -0
      cgi-bin/Chorley.cgi
  16. +0
    -0
      cgi-bin/City of London.cgi
  17. +0
    -0
      cgi-bin/Cornwall.cgi
  18. +0
    -0
      cgi-bin/Coventry.cgi
  19. +0
    -79
      cgi-bin/DateTime/Format/DateParse.pm
  20. +0
    -0
      cgi-bin/Denbighshire.cgi
  21. +0
    -0
      cgi-bin/Doncaster.cgi
  22. +0
    -0
      cgi-bin/Dundee.cgi
  23. +0
    -0
      cgi-bin/Durham.cgi
  24. +0
    -0
      cgi-bin/Ealing.cgi
  25. +0
    -0
      cgi-bin/Easington.cgi
  26. +0
    -0
      cgi-bin/East Devon.cgi
  27. +29
    -0
      cgi-bin/East Dorset.cgi
  28. +0
    -0
      cgi-bin/Edinburgh.cgi
  29. +0
    -0
      cgi-bin/Epsom and Ewell.cgi
  30. +0
    -0
      cgi-bin/Fenland.cgi
  31. +29
    -0
      cgi-bin/Gateshead.cgi
  32. +0
    -0
      cgi-bin/Gedling.cgi
  33. +29
    -0
      cgi-bin/Gloucestershire.cgi
  34. +0
    -0
      cgi-bin/Gravesham.cgi
  35. +0
    -0
      cgi-bin/Hammersmith and Fulham.cgi
  36. +0
    -0
      cgi-bin/Haringey.cgi
  37. +0
    -0
      cgi-bin/Harrogate.cgi
  38. +0
    -0
      cgi-bin/Hart.cgi
  39. +0
    -0
      cgi-bin/Hartlepool.cgi
  40. +0
    -0
      cgi-bin/High Peak.cgi
  41. +0
    -0
      cgi-bin/Huntingdonshire.cgi
  42. +0
    -0
      cgi-bin/Kerrier.cgi
  43. +0
    -0
      cgi-bin/Knowsley.cgi
  44. +0
    -0
      cgi-bin/Lancaster.cgi
  45. +0
    -0
      cgi-bin/Luton.cgi
  46. +0
    -0
      cgi-bin/Malvern Hills.cgi
  47. +0
    -0
      cgi-bin/Mid Devon.cgi
  48. +0
    -0
      cgi-bin/Milton Keynes.cgi
  49. +0
    -0
      cgi-bin/NW Leicestershire.cgi
  50. +29
    -0
      cgi-bin/Newcastle-under-Lyme.cgi
  51. +0
    -0
      cgi-bin/Newham.cgi
  52. +0
    -0
      cgi-bin/North Tyneside.cgi
  53. +0
    -0
      cgi-bin/North Warwickshire.cgi
  54. +0
    -0
      cgi-bin/Northumberland.cgi
  55. +0
    -0
      cgi-bin/Oadby and Wigston.cgi
  56. +0
    -0
      cgi-bin/Oswestry.cgi
  57. +0
    -0
      cgi-bin/Peterborough.cgi
  58. +0
    -0
      cgi-bin/Portsmouth.cgi
  59. +0
    -0
      cgi-bin/Redditch.cgi
  60. +0
    -0
      cgi-bin/Rushmoor.cgi
  61. +0
    -0
      cgi-bin/Scarborough.cgi
  62. +0
    -0
      cgi-bin/Sevenoaks.cgi
  63. +0
    -0
      cgi-bin/South Bucks.cgi
  64. +0
    -0
      cgi-bin/South Ribble.cgi
  65. +0
    -0
      cgi-bin/South Staffordshire.cgi
  66. +0
    -0
      cgi-bin/SouthOxfordshire.cgi
  67. +0
    -0
      cgi-bin/Southampton.cgi
  68. +0
    -0
      cgi-bin/Spelthorne.cgi
  69. +0
    -0
      cgi-bin/St Helens.cgi
  70. +0
    -0
      cgi-bin/Stevenage.cgi
  71. +0
    -0
      cgi-bin/Stirling.cgi
  72. +0
    -0
      cgi-bin/Stockton-On-Tees.cgi
  73. +0
    -0
      cgi-bin/Stratford.cgi
  74. +0
    -0
      cgi-bin/Sunderland.cgi
  75. +0
    -0
      cgi-bin/Teignbridge.cgi
  76. +0
    -0
      cgi-bin/Test Valley.cgi
  77. +0
    -0
      cgi-bin/Tonbridge.cgi
  78. +0
    -0
      cgi-bin/Torbay.cgi
  79. +29
    -0
      cgi-bin/Vale Royal.cgi
  80. +0
    -0
      cgi-bin/Waveney.cgi
  81. +0
    -0
      cgi-bin/Wear Valley.cgi
  82. +0
    -0
      cgi-bin/Wellingborough.cgi
  83. +0
    -0
      cgi-bin/West Berkshire.cgi
  84. +0
    -0
      cgi-bin/West Lancashire.cgi
  85. +0
    -0
      cgi-bin/West Norfolk.cgi
  86. +29
    -0
      cgi-bin/Winchester.cgi
  87. +0
    -0
      cgi-bin/Woking.cgi
  88. +29
    -0
      cgi-bin/Wolverhampton.cgi
  89. +0
    -0
      cgi-bin/York.cgi
  90. +0
    -160
      cgi-bin/broxbourne.cgi
  91. +0
    -64
      cgi-bin/westminster.rb
  92. +10
    -1
      python_scrapers/PublicAccessSites.csv

+ 0
- 0
cgi-bin/Allerdale.cgi Переглянути файл


+ 0
- 0
cgi-bin/Alnwick.cgi Переглянути файл


+ 0
- 0
cgi-bin/Angus.cgi Переглянути файл


+ 0
- 0
cgi-bin/Aylesbury Vale.cgi Переглянути файл


+ 0
- 0
cgi-bin/Barrow.cgi Переглянути файл


+ 0
- 0
cgi-bin/Basildon.cgi Переглянути файл


+ 0
- 0
cgi-bin/Bath.cgi Переглянути файл


+ 0
- 0
cgi-bin/Bexley.cgi Переглянути файл


+ 0
- 0
cgi-bin/Blaby.cgi Переглянути файл


+ 0
- 0
cgi-bin/Bolsover.cgi Переглянути файл


+ 0
- 0
cgi-bin/Bristol.cgi Переглянути файл


+ 0
- 0
cgi-bin/Buckinghamshire.cgi Переглянути файл


+ 0
- 0
cgi-bin/Chelmsford.cgi Переглянути файл


+ 0
- 0
cgi-bin/Cherwell.cgi Переглянути файл


+ 0
- 0
cgi-bin/Chorley.cgi Переглянути файл


+ 0
- 0
cgi-bin/City of London.cgi Переглянути файл


+ 0
- 0
cgi-bin/Cornwall.cgi Переглянути файл


+ 0
- 0
cgi-bin/Coventry.cgi Переглянути файл


+ 0
- 79
cgi-bin/DateTime/Format/DateParse.pm Переглянути файл

@@ -1,79 +0,0 @@
package DateTime::Format::DateParse;

# Copyright (C) 2005-6 Joshua Hoblitt
#
# $Id: DateParse.pm 3517 2006-09-17 23:10:10Z jhoblitt $

use strict;

use vars qw($VERSION);
$VERSION = '0.04';

use DateTime;
use DateTime::TimeZone;
use Date::Parse qw( strptime );
use Time::Zone qw( tz_offset );

sub parse_datetime {
my ($class, $date, $zone) = @_;

# str2time() calls strptime() internally so it's more efficent to use
# strptime() directly. However, the extra validation done by using
# DateTime->new() instad of DateTime->from_epoch() may make it into a net
# loss. In the end, it turns out that strptime()'s offset information is
# needed anyways.
my @t = strptime( $date, $zone );

return undef unless @t;

my ($ss, $mm, $hh, $day, $month, $year, $offset) = @t;

my %p;
if ( $ss ) {
my $fraction = $ss - int( $ss );
$p{ nanosecond } = $fraction * 1e9 if $fraction;
$p{ second } = int $ss;
}
$p{ minute } = $mm if $mm;
$p{ hour } = $hh if $hh;
$p{ day } = $day if $day;
$p{ month } = $month + 1 if $month;
$p{ year } = $year ? $year + 1900 : DateTime->now->year;

# unless there is an explict ZONE, Date::Parse seems to parse date only
# formats, eg. "1995-01-24", as being in the 'local' timezone.
unless ( defined $zone || defined $offset ) {
return DateTime->new(
%p,
time_zone => 'local',
);
}

if ( $zone ) {
if ( DateTime::TimeZone->is_valid_name( $zone ) ) {
return DateTime->new(
%p,
time_zone => $zone,
);
} else {
# attempt to convert Time::Zone tz's into an offset
return DateTime->new(
%p,
time_zone =>
# not an Olson timezone, no DST info
DateTime::TimeZone::offset_as_string( tz_offset( $zone ) ),
);
}
}

return DateTime->new(
%p,
time_zone =>
# not an Olson timezone, no DST info
DateTime::TimeZone::offset_as_string( $offset ),
);
}

1;

__END__

+ 0
- 0
cgi-bin/Denbighshire.cgi Переглянути файл


+ 0
- 0
cgi-bin/Doncaster.cgi Переглянути файл


+ 0
- 0
cgi-bin/Dundee.cgi Переглянути файл


+ 0
- 0
cgi-bin/Durham.cgi Переглянути файл


+ 0
- 0
cgi-bin/Ealing.cgi Переглянути файл


+ 0
- 0
cgi-bin/Easington.cgi Переглянути файл


+ 0
- 0
cgi-bin/East Devon.cgi Переглянути файл


+ 29
- 0
cgi-bin/East Dorset.cgi Переглянути файл

@@ -0,0 +1,29 @@
#!/usr/local/bin/python

# This is the parser for East Dorset District Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "East Dorset District Council"
authority_short_name = "East Dorset"
base_url = "http://193.243.228.16/PublicAccess/dc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 0
- 0
cgi-bin/Edinburgh.cgi Переглянути файл


+ 0
- 0
cgi-bin/Epsom and Ewell.cgi Переглянути файл


+ 0
- 0
cgi-bin/Fenland.cgi Переглянути файл


+ 29
- 0
cgi-bin/Gateshead.cgi Переглянути файл

@@ -0,0 +1,29 @@
#!/usr/local/bin/python

# This is the parser for Gateshead Metropolitan Borough Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "Gateshead Metropolitan Borough Council"
authority_short_name = "Gateshead"
base_url = "http://planning.gateshead.gov.uk/publicaccess/tdc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 0
- 0
cgi-bin/Gedling.cgi Переглянути файл


+ 29
- 0
cgi-bin/Gloucestershire.cgi Переглянути файл

@@ -0,0 +1,29 @@
#!/usr/local/bin/python

# This is the parser for Gloucestershire County Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "Gloucestershire County Council"
authority_short_name = "Gloucestershire"
base_url = "http://planning.gloucestershire.gov.uk/PublicAccess/tdc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 0
- 0
cgi-bin/Gravesham.cgi Переглянути файл


+ 0
- 0
cgi-bin/Hammersmith and Fulham.cgi Переглянути файл


+ 0
- 0
cgi-bin/Haringey.cgi Переглянути файл


+ 0
- 0
cgi-bin/Harrogate.cgi Переглянути файл


+ 0
- 0
cgi-bin/Hart.cgi Переглянути файл


+ 0
- 0
cgi-bin/Hartlepool.cgi Переглянути файл


+ 0
- 0
cgi-bin/High Peak.cgi Переглянути файл


+ 0
- 0
cgi-bin/Huntingdonshire.cgi Переглянути файл


+ 0
- 0
cgi-bin/Kerrier.cgi Переглянути файл


+ 0
- 0
cgi-bin/Knowsley.cgi Переглянути файл


+ 0
- 0
cgi-bin/Lancaster.cgi Переглянути файл


+ 0
- 0
cgi-bin/Luton.cgi Переглянути файл


+ 0
- 0
cgi-bin/Malvern Hills.cgi Переглянути файл


+ 0
- 0
cgi-bin/Mid Devon.cgi Переглянути файл


+ 0
- 0
cgi-bin/Milton Keynes.cgi Переглянути файл


+ 0
- 0
cgi-bin/NW Leicestershire.cgi Переглянути файл


+ 29
- 0
cgi-bin/Newcastle-under-Lyme.cgi Переглянути файл

@@ -0,0 +1,29 @@
#!/usr/local/bin/python

# This is the parser for Newcastle-under-Lyme Borough Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "Newcastle-under-Lyme Borough Council"
authority_short_name = "Newcastle-under-Lyme"
base_url = "http://publicaccess.newcastle-staffs.gov.uk/PublicAccess/tdc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 0
- 0
cgi-bin/Newham.cgi Переглянути файл


+ 0
- 0
cgi-bin/North Tyneside.cgi Переглянути файл


+ 0
- 0
cgi-bin/North Warwickshire.cgi Переглянути файл


+ 0
- 0
cgi-bin/Northumberland.cgi Переглянути файл


+ 0
- 0
cgi-bin/Oadby and Wigston.cgi Переглянути файл


+ 0
- 0
cgi-bin/Oswestry.cgi Переглянути файл


+ 0
- 0
cgi-bin/Peterborough.cgi Переглянути файл


+ 0
- 0
cgi-bin/Portsmouth.cgi Переглянути файл


+ 0
- 0
cgi-bin/Redditch.cgi Переглянути файл


+ 0
- 0
cgi-bin/Rushmoor.cgi Переглянути файл


+ 0
- 0
cgi-bin/Scarborough.cgi Переглянути файл


+ 0
- 0
cgi-bin/Sevenoaks.cgi Переглянути файл


+ 0
- 0
cgi-bin/South Bucks.cgi Переглянути файл


+ 0
- 0
cgi-bin/South Ribble.cgi Переглянути файл


+ 0
- 0
cgi-bin/South Staffordshire.cgi Переглянути файл


+ 0
- 0
cgi-bin/SouthOxfordshire.cgi Переглянути файл


+ 0
- 0
cgi-bin/Southampton.cgi Переглянути файл


+ 0
- 0
cgi-bin/Spelthorne.cgi Переглянути файл


+ 0
- 0
cgi-bin/St Helens.cgi Переглянути файл


+ 0
- 0
cgi-bin/Stevenage.cgi Переглянути файл


+ 0
- 0
cgi-bin/Stirling.cgi Переглянути файл


+ 0
- 0
cgi-bin/Stockton-On-Tees.cgi Переглянути файл


+ 0
- 0
cgi-bin/Stratford.cgi Переглянути файл


+ 0
- 0
cgi-bin/Sunderland.cgi Переглянути файл


+ 0
- 0
cgi-bin/Teignbridge.cgi Переглянути файл


+ 0
- 0
cgi-bin/Test Valley.cgi Переглянути файл


+ 0
- 0
cgi-bin/Tonbridge.cgi Переглянути файл


+ 0
- 0
cgi-bin/Torbay.cgi Переглянути файл


+ 29
- 0
cgi-bin/Vale Royal.cgi Переглянути файл

@@ -0,0 +1,29 @@
#!/usr/local/bin/python

# This is the parser for Vale Royal Borough Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "Vale Royal Borough Council"
authority_short_name = "Vale Royal"
base_url = "http://pa.valeroyal.gov.uk/publicaccess/tdc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 0
- 0
cgi-bin/Waveney.cgi Переглянути файл


+ 0
- 0
cgi-bin/Wear Valley.cgi Переглянути файл


+ 0
- 0
cgi-bin/Wellingborough.cgi Переглянути файл


+ 0
- 0
cgi-bin/West Berkshire.cgi Переглянути файл


+ 0
- 0
cgi-bin/West Lancashire.cgi Переглянути файл


+ 0
- 0
cgi-bin/West Norfolk.cgi Переглянути файл


+ 29
- 0
cgi-bin/Winchester.cgi Переглянути файл

@@ -0,0 +1,29 @@
#!/usr/local/bin/python

# This is the parser for Winchester City Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "Winchester City Council"
authority_short_name = "Winchester"
base_url = "http://win2padmz.winchester.gov.uk/publicaccess/tdc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 0
- 0
cgi-bin/Woking.cgi Переглянути файл


+ 29
- 0
cgi-bin/Wolverhampton.cgi Переглянути файл

@@ -0,0 +1,29 @@
#!/usr/local/bin/python

# This is the parser for Wolverhampton City Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "Wolverhampton City Council"
authority_short_name = "Wolverhampton"
base_url = "http://planningonline.wolverhampton.gov.uk/PublicAccess/dc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name, authority_short_name, base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 0
- 0
cgi-bin/York.cgi Переглянути файл


+ 0
- 160
cgi-bin/broxbourne.cgi Переглянути файл

@@ -1,160 +0,0 @@
#!/usr/bin/perl

use strict;
use warnings;

use CGI qw(:cgi);
use DateTime;
use DateTime::Format::DateParse;
use HTML::TreeBuilder;
use LWP::UserAgent;
use XML::Writer;

# The master URL for the Broxbourne planning search
our $SearchURL = "http://www2.broxbourne.gov.uk/planningsearch/webform1.aspx";

# We're a CGI script...
my $query = CGI->new();

# Get the date as an offset from 2000-01-01
my $epoch = DateTime->new(year => 2000, month => 1, day => 1);
my $querydate = DateTime->new(year => $query->param("year"),
month => $query->param("month"),
day => $query->param("day"));
$querydate = $querydate->delta_days($epoch)->delta_days;

# Construct an LWP user agent
our $UA = LWP::UserAgent->new(env_proxy => 1);

# Post the URL to get an initial blank form
my $state = get_state(do_post());

# Post each date in turn to build up the state - you can thank
# Microsoft and ASP.NET for the horrible way we have to do this
# by posting each argument in turn to build up the state
$state = get_state(do_post_back($state, 'DateSelector1$Calendar1', $querydate));
$state = get_state(do_post_back($state, 'DateSelector2$Calendar1', $querydate));

# Create an XML output stream
my $Writer = XML::Writer->new(DATA_MODE => 1);

# Output the XML header data
$Writer->xmlDecl("UTF-8");
$Writer->startTag("planning");
$Writer->dataElement("authority_name", "Borough of Broxbourne");
$Writer->dataElement("authority_short_name", "Broxbourne");
$Writer->startTag("applications");

# Get the arguments for the search...
my $args = {
"Srch" => "rb1",
"__VIEWSTATE" => $state,
"btnSearch" => "Search",
"tbReference" => "",
"tbRef2" => ""
};

# ...and then (at last) we can do the search!
my $page = do_post($args);

# Loop processing pages of results
while ($page)
{
my $table = $page->look_down("_tag" => "table", "id" => "DataGrid1");

# Remember the state
$state = get_state($page);

# Clear the page for now - this will be reinitialised if we
# find another page of results to make us go round the loop
# all over again
undef $page;

# Check that we found a table - searches that find no results
# produce a page with no table in it
if ($table)
{
# Process each row of the results
foreach my $row ($table->look_down("_tag" => "tr"))
{
my @cells = $row->look_down("_tag" => "td");

if ($cells[0]->look_down("_tag" => "input"))
{
my $reference = $cells[1]->as_trimmed_text;
my $date = $cells[2]->as_trimmed_text;
my $address = $cells[3]->as_trimmed_text;
my $description = $cells[4]->as_trimmed_text;
my $postcode;

if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
{
$postcode = $1;
}

$Writer->startTag("application");
$Writer->dataElement("council_reference", $reference);
$Writer->dataElement("address", $address);
$Writer->dataElement("postcode", $postcode);
$Writer->dataElement("description", $description);
$Writer->dataElement("date_received", $date);
$Writer->endTag("application");
}
elsif ($cells[0]->attr("colspan") && $cells[0]->attr("colspan") eq "5")
{
foreach my $link ($cells[0]->look_down("_tag" => "a"))
{
if ($link->as_trimmed_text eq ">" &&
$link->attr("href") =~ /^javascript:__doPostBack\('([^\']*)','([^\']*)'\)$/)
{
$page = do_post_back($state, $1, $2);
}
}
}
}
}
}

# Finish off XML output
$Writer->endTag("applications");
$Writer->endTag("planning");
$Writer->end();

exit 0;

# Extract the state from a page so we can repost it
sub get_state
{
my $page = shift;
my $viewstate = $page->look_down("_tag" => "input", "name" => "__VIEWSTATE");

return $viewstate->attr("value");
}

# Fake up what the doPostBack javascript function in the page does...
sub do_post_back
{
my $state = shift;
my $target = shift;
my $argument = shift;

$target =~ s/\$/:/g;

my $args = {
"__EVENTTARGET" => $target,
"__EVENTARGUMENT" => $argument,
"__VIEWSTATE" => $state
};

return do_post($args);
}

# Post to the planning search page
sub do_post
{
my $response = $UA->post($SearchURL, @_);

die $response->status_line unless $response->is_success;

return HTML::TreeBuilder->new_from_content($response->content);
}

+ 0
- 64
cgi-bin/westminster.rb Переглянути файл

@@ -1,64 +0,0 @@
#!/opt/local/bin/ruby

description = nil
address = nil
ref = nil
getaddress = false
url = nil

puts '<planning>
<authority_name>City of Westminster</authority_name>
<authority_short_name>Westminster</authority_short_name>
<applications>'

while line = gets
if line.index('Proposal:')
description = line.gsub('<tr><td class="tablefeature">Proposal:</td><td class="tablefeature">','').gsub('</td></tr>','').strip
end

if line.index('<tr><td><a href="currentsearch-details.cfm?CASENO=')
ref = line.gsub('<tr><td><a href="currentsearch-details.cfm?CASENO=','').gsub('</a></td></tr>','').strip
ref = ref[ref.index('>')+1..ref.length]

url = line.gsub('<tr><td><a href="','')
url = url[0, url.index('">')]
end

if getaddress
getaddress = false if line.index('Proposal:')
end

if getaddress
address += line.gsub('</td></tr>','').strip + ' '
end

if line.index('<tr><td class="tablefeature">Address:</td><td class="tablefeature">')
address = line.gsub('<tr><td class="tablefeature">Address:</td><td class="tablefeature">','').strip + ' '
getaddress = true
end

if line.index('</table>') and address
puts '<application>'
puts "<council_reference>#{ref}</council_reference>"
puts "<address>#{address.strip}</address>"
if address
m = address.strip.match(/[A-Z]?[A-Z]\d?\d \d[A-Z][A-Z]/)
if m
puts "<postcode>#{m}</postcode>"
else
puts "<postcode></postcode>"
end
end
puts "<description>#{description}</description>"

puts "<info_url>http://www3.westminster.gov.uk/planningapplications/#{url}</info_url>"
puts "<comment_url></comment_url>"

puts '</application>'

end
end

puts '</applications>'
puts '</planning>'

+ 10
- 1
python_scrapers/PublicAccessSites.csv Переглянути файл

@@ -80,4 +80,13 @@
"Bolsover District Council", "Bolsover", "http://217.158.161.181/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Doncaster Metropolitan Borough Council", "Doncaster", "http://maps.doncaster.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Dundee City Council", "Dundee", "http://bwarrant.dundeecity.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Durham City Council", "Durham", "http://publicaccess.durhamcity.gov.uk/publicaccess/dc/", "PublicAccess", "PublicAccessParser"
"Durham City Council", "Durham", "http://publicaccess.durhamcity.gov.uk/publicaccess/dc/", "PublicAccess", "PublicAccessParser"
"East Dorset District Council", "East Dorset", "http://193.243.228.16/PublicAccess/dc/", "PublicAccess", "PublicAccessParser"
"Epsom and Ewell Borough Council", "Epsom and Ewell", "http://eplanning.epsom-ewell.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Gateshead Metropolitan Borough Council", "Gateshead", "http://planning.gateshead.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Gedling Borough Council", "Gedling", "http://publicaccess.gedling.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Gloucestershire County Council", "Gloucestershire", "http://planning.gloucestershire.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
"Newcastle-under-Lyme Borough Council", "Newcastle-under-Lyme", "http://publicaccess.newcastle-staffs.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
"Vale Royal Borough Council", "Vale Royal", "http://pa.valeroyal.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Winchester City Council", "Winchester", "http://win2padmz.winchester.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Wolverhampton City Council", "Wolverhampton", "http://planningonline.wolverhampton.gov.uk/PublicAccess/dc/", "PublicAccess", "PublicAccessParser"

Завантаження…
Відмінити
Зберегти