Ver código fonte

adding new publicaccess scrapers

master
duncan.parkes 17 anos atrás
pai
commit
c9c35aa262
7 arquivos alterados com 168 adições e 2 exclusões
  1. +31
    -0
      CGI/Cornwall.cgi
  2. +31
    -0
      CGI/Malvern Hills.cgi
  3. +20
    -1
      CGI/PublicAccess.py
  4. +31
    -0
      CGI/South Bucks.cgi
  5. +31
    -0
      CGI/West Lancashire.cgi
  6. +20
    -1
      python_scrapers/PublicAccess.py
  7. +4
    -0
      python_scrapers/PublicAccessSites.csv

+ 31
- 0
CGI/Cornwall.cgi Ver arquivo

@@ -0,0 +1,31 @@
#!/usr/bin/python

# This is the parser for Cornwall County Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "Cornwall County Council"
authority_short_name = "Cornwall"
base_url = "http://planapps.cornwall.gov.uk/publicaccess/tdc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name,
authority_short_name,
base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 31
- 0
CGI/Malvern Hills.cgi Ver arquivo

@@ -0,0 +1,31 @@
#!/usr/bin/python

# This is the parser for Malvern Hills District Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "Malvern Hills District Council"
authority_short_name = "Malvern Hills"
base_url = "http://public.malvernhills.gov.uk/publicaccess/tdc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name,
authority_short_name,
base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 20
- 1
CGI/PublicAccess.py Ver arquivo

@@ -350,7 +350,10 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser):
# but are always empty for my script...
#http://planning.hambleton.gov.uk/publicaccess/tdc/DcApplication/application_searchform.aspx
#"Hambleton District Council", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/"
"Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/dc/"
#"Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/dc/"
#"City of Durham Council", "Durham", "http://publicaccess.durhamcity.gov.uk/publicaccess/dc/"
#"Selby District Council", "Selby", "http://publicaccess.selby.gov.uk/publicaccess/dc/"


# Bromley
# http://83.244.199.114/publicaccess/
@@ -379,6 +382,22 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser):

# Gravesham seems to be broken
#"Gravesham Borough Council", "Gravesham", "http://195.102.67.4/PublicAccess/tdc/"

# Manchester
# Missing the main menu on the left.
#http://www.publicaccess.manchester.gov.uk/publicaccess/tdc/tdc_home.aspx

#Bracknell Forest - has an error
#https://my.bracknell-forest.gov.uk/publicaccess/tdc/tdc_home.aspx

# Redditch
# Gives an error
# http://access.redditchbc.gov.uk/publicaccess/tdc/tdc_home.aspx

# Stirling
# Gives an error
#http://planpub.stirling.gov.uk/publicaccess/tdc/tdc_home.aspx

# These use https:

# Chiltern


+ 31
- 0
CGI/South Bucks.cgi Ver arquivo

@@ -0,0 +1,31 @@
#!/usr/bin/python

# This is the parser for South Bucks District Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "South Bucks District Council"
authority_short_name = "South Bucks"
base_url = "http://sbdc-paweb.southbucks.gov.uk/publicaccess/tdc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name,
authority_short_name,
base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 31
- 0
CGI/West Lancashire.cgi Ver arquivo

@@ -0,0 +1,31 @@
#!/usr/bin/python

# This is the parser for West Lancashire District Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "West Lancashire District Council"
authority_short_name = "West Lancashire"
base_url = "http://publicaccess.westlancsdc.gov.uk/PublicAccess/tdc/"

import PublicAccess

parser = PublicAccess.PublicAccessParser(authority_name,
authority_short_name,
base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 20
- 1
python_scrapers/PublicAccess.py Ver arquivo

@@ -350,7 +350,10 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser):
# but are always empty for my script...
#http://planning.hambleton.gov.uk/publicaccess/tdc/DcApplication/application_searchform.aspx
#"Hambleton District Council", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/"
"Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/dc/"
#"Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/dc/"
#"City of Durham Council", "Durham", "http://publicaccess.durhamcity.gov.uk/publicaccess/dc/"
#"Selby District Council", "Selby", "http://publicaccess.selby.gov.uk/publicaccess/dc/"


# Bromley
# http://83.244.199.114/publicaccess/
@@ -379,6 +382,22 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser):

# Gravesham seems to be broken
#"Gravesham Borough Council", "Gravesham", "http://195.102.67.4/PublicAccess/tdc/"

# Manchester
# Missing the main menu on the left.
#http://www.publicaccess.manchester.gov.uk/publicaccess/tdc/tdc_home.aspx

#Bracknell Forest - has an error
#https://my.bracknell-forest.gov.uk/publicaccess/tdc/tdc_home.aspx

# Redditch
# Gives an error
# http://access.redditchbc.gov.uk/publicaccess/tdc/tdc_home.aspx

# Stirling
# Gives an error
#http://planpub.stirling.gov.uk/publicaccess/tdc/tdc_home.aspx

# These use https:

# Chiltern


+ 4
- 0
python_scrapers/PublicAccessSites.csv Ver arquivo

@@ -44,3 +44,7 @@
"Aylesbury Vale District Council", "Aylesbury Vale", "http://eplanning.aylesburyvaledc.gov.uk/tdc/"
"Epsom and Ewell Borough Council", "Epsom and Ewell", "http://eplanning.epsom-ewell.gov.uk/publicaccess/tdc/"
"Gedling Borough Council", "Gedling", "http://publicaccess.gedling.gov.uk/publicaccess/tdc/"
"Cornwall County Council", "Cornwall", "http://planapps.cornwall.gov.uk/publicaccess/tdc/"
"South Bucks District Council", "South Bucks", "http://sbdc-paweb.southbucks.gov.uk/publicaccess/tdc/"
"Malvern Hills District Council", "Malvern Hills", "http://public.malvernhills.gov.uk/publicaccess/tdc/"
"West Lancashire District Council", "West Lancashire", "http://publicaccess.westlancsdc.gov.uk/PublicAccess/tdc/"

Carregando…
Cancelar
Salvar