Pārlūkot izejas kodu

add North Hertfordshire

import/raw
duncan.parkes pirms 17 gadiem
vecāks
revīzija
4af42b765b
100 mainītis faili ar 95 papildinājumiem un 16 dzēšanām
  1. +33
    -8
      trunk/cgi-bin/AcolnetParser.py
  2. +0
    -0
      trunk/cgi-bin/Allerdale.cgi
  3. +0
    -0
      trunk/cgi-bin/Alnwick.cgi
  4. +0
    -0
      trunk/cgi-bin/Angus.cgi
  5. +0
    -0
      trunk/cgi-bin/Aylesbury Vale.cgi
  6. +0
    -0
      trunk/cgi-bin/Babergh.cgi
  7. +0
    -0
      trunk/cgi-bin/Barrow.cgi
  8. +0
    -0
      trunk/cgi-bin/Basildon.cgi
  9. +0
    -0
      trunk/cgi-bin/Basingstoke and Deane.cgi
  10. +0
    -0
      trunk/cgi-bin/Bassetlaw.cgi
  11. +0
    -0
      trunk/cgi-bin/Bath.cgi
  12. +0
    -0
      trunk/cgi-bin/Bexley.cgi
  13. +0
    -0
      trunk/cgi-bin/Blaby.cgi
  14. +0
    -0
      trunk/cgi-bin/Bolsover.cgi
  15. +0
    -0
      trunk/cgi-bin/Bristol.cgi
  16. +0
    -0
      trunk/cgi-bin/Buckinghamshire.cgi
  17. +0
    -0
      trunk/cgi-bin/Bury.cgi
  18. +0
    -0
      trunk/cgi-bin/Chelmsford.cgi
  19. +0
    -0
      trunk/cgi-bin/Cherwell.cgi
  20. +0
    -0
      trunk/cgi-bin/Chorley.cgi
  21. +0
    -0
      trunk/cgi-bin/City of London.cgi
  22. +0
    -0
      trunk/cgi-bin/Cornwall.cgi
  23. +0
    -0
      trunk/cgi-bin/Coventry.cgi
  24. +0
    -0
      trunk/cgi-bin/Croydon.cgi
  25. +0
    -0
      trunk/cgi-bin/Denbighshire.cgi
  26. +0
    -0
      trunk/cgi-bin/Derby.cgi
  27. +0
    -0
      trunk/cgi-bin/Doncaster.cgi
  28. +0
    -0
      trunk/cgi-bin/Dundee.cgi
  29. +0
    -0
      trunk/cgi-bin/Durham.cgi
  30. +0
    -0
      trunk/cgi-bin/Ealing.cgi
  31. +0
    -0
      trunk/cgi-bin/Easington.cgi
  32. +0
    -0
      trunk/cgi-bin/East Devon.cgi
  33. +0
    -0
      trunk/cgi-bin/East Dorset.cgi
  34. +0
    -0
      trunk/cgi-bin/East Lindsey.cgi
  35. +0
    -0
      trunk/cgi-bin/Edinburgh.cgi
  36. +0
    -0
      trunk/cgi-bin/Epsom and Ewell.cgi
  37. +0
    -0
      trunk/cgi-bin/Fenland.cgi
  38. +0
    -0
      trunk/cgi-bin/Fylde.cgi
  39. +0
    -0
      trunk/cgi-bin/Gateshead.cgi
  40. +0
    -0
      trunk/cgi-bin/Gedling.cgi
  41. +0
    -0
      trunk/cgi-bin/Gloucestershire.cgi
  42. +0
    -0
      trunk/cgi-bin/Gravesham.cgi
  43. +0
    -0
      trunk/cgi-bin/Hammersmith and Fulham.cgi
  44. +0
    -0
      trunk/cgi-bin/Haringey.cgi
  45. +0
    -0
      trunk/cgi-bin/Harlow.cgi
  46. +0
    -0
      trunk/cgi-bin/Harrogate.cgi
  47. +0
    -0
      trunk/cgi-bin/Hart.cgi
  48. +0
    -0
      trunk/cgi-bin/Hartlepool.cgi
  49. +0
    -0
      trunk/cgi-bin/High Peak.cgi
  50. +0
    -0
      trunk/cgi-bin/Huntingdonshire.cgi
  51. +0
    -0
      trunk/cgi-bin/Kerrier.cgi
  52. +0
    -0
      trunk/cgi-bin/Knowsley.cgi
  53. +0
    -0
      trunk/cgi-bin/Lancaster.cgi
  54. +0
    -0
      trunk/cgi-bin/Luton.cgi
  55. +0
    -0
      trunk/cgi-bin/Malvern Hills.cgi
  56. +0
    -0
      trunk/cgi-bin/Mid Devon.cgi
  57. +0
    -0
      trunk/cgi-bin/Milton Keynes.cgi
  58. +0
    -0
      trunk/cgi-bin/NW Leicestershire.cgi
  59. +0
    -0
      trunk/cgi-bin/Newcastle-under-Lyme.cgi
  60. +0
    -0
      trunk/cgi-bin/Newham.cgi
  61. +29
    -0
      trunk/cgi-bin/North Hertfordshire.cgi
  62. +0
    -0
      trunk/cgi-bin/North Tyneside.cgi
  63. +0
    -0
      trunk/cgi-bin/North Warwickshire.cgi
  64. +0
    -0
      trunk/cgi-bin/Northumberland.cgi
  65. +0
    -0
      trunk/cgi-bin/Oadby and Wigston.cgi
  66. +0
    -0
      trunk/cgi-bin/Oswestry.cgi
  67. +0
    -0
      trunk/cgi-bin/Peterborough.cgi
  68. +0
    -0
      trunk/cgi-bin/Portsmouth.cgi
  69. +0
    -0
      trunk/cgi-bin/Redditch.cgi
  70. +0
    -0
      trunk/cgi-bin/Rushmoor.cgi
  71. +0
    -0
      trunk/cgi-bin/Scarborough.cgi
  72. +0
    -0
      trunk/cgi-bin/Sevenoaks.cgi
  73. +0
    -0
      trunk/cgi-bin/South Bucks.cgi
  74. +0
    -0
      trunk/cgi-bin/South Ribble.cgi
  75. +0
    -0
      trunk/cgi-bin/South Staffordshire.cgi
  76. +0
    -0
      trunk/cgi-bin/SouthOxfordshire.cgi
  77. +0
    -0
      trunk/cgi-bin/Southampton.cgi
  78. +0
    -0
      trunk/cgi-bin/Spelthorne.cgi
  79. +0
    -0
      trunk/cgi-bin/St Helens.cgi
  80. +0
    -0
      trunk/cgi-bin/Stevenage.cgi
  81. +0
    -0
      trunk/cgi-bin/Stirling.cgi
  82. +0
    -0
      trunk/cgi-bin/Stockton-On-Tees.cgi
  83. +0
    -0
      trunk/cgi-bin/Stratford.cgi
  84. +0
    -0
      trunk/cgi-bin/Sunderland.cgi
  85. +0
    -0
      trunk/cgi-bin/Teignbridge.cgi
  86. +0
    -0
      trunk/cgi-bin/Test Valley.cgi
  87. +0
    -0
      trunk/cgi-bin/Tonbridge.cgi
  88. +0
    -0
      trunk/cgi-bin/Torbay.cgi
  89. +0
    -0
      trunk/cgi-bin/Vale Royal.cgi
  90. +0
    -0
      trunk/cgi-bin/Waveney.cgi
  91. +0
    -0
      trunk/cgi-bin/Wear Valley.cgi
  92. +0
    -0
      trunk/cgi-bin/Wellingborough.cgi
  93. +0
    -0
      trunk/cgi-bin/West Berkshire.cgi
  94. +0
    -0
      trunk/cgi-bin/West Lancashire.cgi
  95. +0
    -0
      trunk/cgi-bin/West Norfolk.cgi
  96. +0
    -0
      trunk/cgi-bin/Winchester.cgi
  97. +0
    -0
      trunk/cgi-bin/Woking.cgi
  98. +0
    -0
      trunk/cgi-bin/Wolverhampton.cgi
  99. +0
    -0
      trunk/cgi-bin/York.cgi
  100. +33
    -8
      trunk/python_scrapers/AcolnetParser.py

+ 33
- 8
trunk/cgi-bin/AcolnetParser.py Parādīt failu

@@ -161,6 +161,9 @@ class AcolnetParser(HTMLParser.HTMLParser):
search_form_response = urllib2.urlopen(self.base_url)
search_form_contents = search_form_response.read()

#outfile = open("tmpfile", "w")
#outfile.write(search_form_contents)

# This sometimes causes a problem in HTMLParser, so let's just get the link
# out with a regex...

@@ -270,7 +273,7 @@ class BridgenorthParser(AcolnetParser):

comments_email_address = "contactus@bridgnorth-dc.gov.uk"

action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class BuryParser(AcolnetParser):
#search_url = "http://e-planning.bury.gov.uk/ePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
@@ -284,7 +287,7 @@ class BuryParser(AcolnetParser):
#authority_short_name = "Bury"

comments_email_address = "development.control@bury.gov.uk"
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

## class CanterburyParser(AcolnetParser):
## search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
@@ -312,7 +315,7 @@ class CarlisleParser(AcolnetParser):
#authority_short_name = "Carlisle"

comments_email_address = "dc@carlisle.gov.uk"
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">", re.IGNORECASE)


class DerbyParser(AcolnetParser):
@@ -327,7 +330,7 @@ class DerbyParser(AcolnetParser):
#authority_short_name = "Derby"

comments_email_address = "developmentcontrol@derby.gov.uk"
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class CroydonParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
@@ -336,7 +339,7 @@ class CroydonParser(AcolnetParser):
proposal_tr = 6

comments_email_address = "planning.control@croydon.gov.uk"
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class EastLindseyParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
@@ -345,7 +348,7 @@ class EastLindseyParser(AcolnetParser):
proposal_tr = 6

comments_email_address = "development.control@e-lindsey.gov.uk"
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"return ValidateSearch\(\)\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"return ValidateSearch\(\)\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class FyldeParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
@@ -364,7 +367,26 @@ class HarlowParser(AcolnetParser):
proposal_tr = 5

comments_email_address = "Planning.services@harlow.gov.uk"
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class HavantParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
reg_date_tr = 2
location_tr = 4
proposal_tr = 5

comments_email_address = "representations@havant.gov.uk"
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" theme=\"\"[theme]\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">", re.IGNORECASE)
class NorthHertfordshireParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
reg_date_tr = 2
location_tr = 4
proposal_tr = 5

comments_email_address = "planningcontrol@north-herts.gov.uk"
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

if __name__ == '__main__':
@@ -374,7 +396,7 @@ if __name__ == '__main__':

# working
# parser = BasingstokeParser()
parser = BaberghParser("Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
#parser = BaberghParser("Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")

# works with the divs stripped out
#parser = BassetlawParser()
@@ -396,6 +418,9 @@ if __name__ == '__main__':

# working
#parser = DerbyParser()


parser = HavantParser("HavantBC", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
print parser.getResults(day, month, year)

+ 0
- 0
trunk/cgi-bin/Allerdale.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Alnwick.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Angus.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Aylesbury Vale.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Babergh.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Barrow.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Basildon.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Basingstoke and Deane.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Bassetlaw.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Bath.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Bexley.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Blaby.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Bolsover.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Bristol.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Buckinghamshire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Bury.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Chelmsford.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Cherwell.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Chorley.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/City of London.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Cornwall.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Coventry.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Croydon.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Denbighshire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Derby.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Doncaster.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Dundee.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Durham.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Ealing.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Easington.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/East Devon.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/East Dorset.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/East Lindsey.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Edinburgh.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Epsom and Ewell.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Fenland.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Fylde.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Gateshead.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Gedling.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Gloucestershire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Gravesham.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Hammersmith and Fulham.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Haringey.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Harlow.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Harrogate.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Hart.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Hartlepool.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/High Peak.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Huntingdonshire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Kerrier.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Knowsley.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Lancaster.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Luton.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Malvern Hills.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Mid Devon.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Milton Keynes.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/NW Leicestershire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Newcastle-under-Lyme.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Newham.cgi Parādīt failu


+ 29
- 0
trunk/cgi-bin/North Hertfordshire.cgi Parādīt failu

@@ -0,0 +1,29 @@
#!/usr/local/bin/python

# This is the parser for North Hertfordshire District Council.
# it is generated from the file CGITemplate

import cgi
import cgitb
#cgitb.enable(display=0, logdir="/tmp")


form = cgi.FieldStorage()
day = form.getfirst('day')
month = form.getfirst('month')
year = form.getfirst('year')


authority_name = "North Hertfordshire District Council"
authority_short_name = "North Hertfordshire"
base_url = "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"

import AcolnetParser

parser = AcolnetParser.NorthHertfordshireParser(authority_name, authority_short_name, base_url)

xml = parser.getResults(day, month, year)

print "Content-Type: text/xml" # XML is following
print
print xml # print the xml

+ 0
- 0
trunk/cgi-bin/North Tyneside.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/North Warwickshire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Northumberland.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Oadby and Wigston.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Oswestry.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Peterborough.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Portsmouth.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Redditch.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Rushmoor.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Scarborough.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Sevenoaks.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/South Bucks.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/South Ribble.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/South Staffordshire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/SouthOxfordshire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Southampton.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Spelthorne.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/St Helens.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Stevenage.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Stirling.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Stockton-On-Tees.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Stratford.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Sunderland.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Teignbridge.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Test Valley.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Tonbridge.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Torbay.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Vale Royal.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Waveney.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Wear Valley.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Wellingborough.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/West Berkshire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/West Lancashire.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/West Norfolk.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Winchester.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Woking.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/Wolverhampton.cgi Parādīt failu


+ 0
- 0
trunk/cgi-bin/York.cgi Parādīt failu


+ 33
- 8
trunk/python_scrapers/AcolnetParser.py Parādīt failu

@@ -161,6 +161,9 @@ class AcolnetParser(HTMLParser.HTMLParser):
search_form_response = urllib2.urlopen(self.base_url)
search_form_contents = search_form_response.read()

#outfile = open("tmpfile", "w")
#outfile.write(search_form_contents)

# This sometimes causes a problem in HTMLParser, so let's just get the link
# out with a regex...

@@ -270,7 +273,7 @@ class BridgenorthParser(AcolnetParser):

comments_email_address = "contactus@bridgnorth-dc.gov.uk"

action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class BuryParser(AcolnetParser):
#search_url = "http://e-planning.bury.gov.uk/ePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
@@ -284,7 +287,7 @@ class BuryParser(AcolnetParser):
#authority_short_name = "Bury"

comments_email_address = "development.control@bury.gov.uk"
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

## class CanterburyParser(AcolnetParser):
## search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
@@ -312,7 +315,7 @@ class CarlisleParser(AcolnetParser):
#authority_short_name = "Carlisle"

comments_email_address = "dc@carlisle.gov.uk"
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">", re.IGNORECASE)


class DerbyParser(AcolnetParser):
@@ -327,7 +330,7 @@ class DerbyParser(AcolnetParser):
#authority_short_name = "Derby"

comments_email_address = "developmentcontrol@derby.gov.uk"
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class CroydonParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
@@ -336,7 +339,7 @@ class CroydonParser(AcolnetParser):
proposal_tr = 6

comments_email_address = "planning.control@croydon.gov.uk"
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class EastLindseyParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
@@ -345,7 +348,7 @@ class EastLindseyParser(AcolnetParser):
proposal_tr = 6

comments_email_address = "development.control@e-lindsey.gov.uk"
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"return ValidateSearch\(\)\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"return ValidateSearch\(\)\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class FyldeParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
@@ -364,7 +367,26 @@ class HarlowParser(AcolnetParser):
proposal_tr = 5

comments_email_address = "Planning.services@harlow.gov.uk"
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

class HavantParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
reg_date_tr = 2
location_tr = 4
proposal_tr = 5

comments_email_address = "representations@havant.gov.uk"
action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" theme=\"\"[theme]\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">", re.IGNORECASE)
class NorthHertfordshireParser(AcolnetParser):
case_number_tr = 1 # this one can be got by the td class attribute
reg_date_tr = 2
location_tr = 4
proposal_tr = 5

comments_email_address = "planningcontrol@north-herts.gov.uk"
action_regex = re.compile("<FORM name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)

if __name__ == '__main__':
@@ -374,7 +396,7 @@ if __name__ == '__main__':

# working
# parser = BasingstokeParser()
parser = BaberghParser("Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
#parser = BaberghParser("Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")

# works with the divs stripped out
#parser = BassetlawParser()
@@ -396,6 +418,9 @@ if __name__ == '__main__':

# working
#parser = DerbyParser()


parser = HavantParser("HavantBC", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
print parser.getResults(day, month, year)

Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels

Notiek ielāde…
Atcelt
Saglabāt