Browse Source

Fix Mole Valley scraper.

master
duncan.parkes 16 years ago
parent
commit
886d866adb
4 changed files with 13 additions and 16 deletions
  1. +1
    -1
      python_scrapers/Gosport.py
  2. +1
    -1
      python_scrapers/Planet.py
  3. +2
    -2
      python_scrapers/PublicAccess.py
  4. +9
    -12
      python_scrapers/SwiftLG.py

+ 1
- 1
python_scrapers/Gosport.py View File

@@ -96,5 +96,5 @@ class GosportParser:


if __name__ == '__main__': if __name__ == '__main__':
parser = GosportParser() parser = GosportParser()
print parser.getResults(1,10,2008)
print parser.getResults(20,11,2008)



+ 1
- 1
python_scrapers/Planet.py View File

@@ -151,7 +151,7 @@ if __name__ == '__main__':
# parser = PlanetParser("North Lincolnshire Council", "North Lincolnshire", "http://www.planning.northlincs.gov.uk/planet/ispforms.asp?ServiceKey=SysDoc-PlanetApplicationEnquiry") # parser = PlanetParser("North Lincolnshire Council", "North Lincolnshire", "http://www.planning.northlincs.gov.uk/planet/ispforms.asp?ServiceKey=SysDoc-PlanetApplicationEnquiry")
# parser = PlanetParser("Rydale District Council", "Rydale", "http://www.ryedale.gov.uk/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry") # parser = PlanetParser("Rydale District Council", "Rydale", "http://www.ryedale.gov.uk/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry")
parser = PlanetParser("Tewkesbury Borough Council", "Tewkesbury", "http://planning.tewkesbury.gov.uk/Planet/ispforms.asp?serviceKey=07WCC04163103430") parser = PlanetParser("Tewkesbury Borough Council", "Tewkesbury", "http://planning.tewkesbury.gov.uk/Planet/ispforms.asp?serviceKey=07WCC04163103430")
print parser.getResults(21,5,2008)
print parser.getResults(20,11,2008)
# parser = PlanetParser("Worcester City Council", "Worcester", "http://www.worcester.gov.uk:8080/planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry", debug=True) # parser = PlanetParser("Worcester City Council", "Worcester", "http://www.worcester.gov.uk:8080/planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry", debug=True)


# TODO # TODO


+ 2
- 2
python_scrapers/PublicAccess.py View File

@@ -350,8 +350,8 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser):




if __name__ == '__main__': if __name__ == '__main__':
day = 1
month = 8
day = 20
month = 11
year = 2008 year = 2008


#parser = PublicAccessParser("East Northants", "East Northants", "http://publicaccesssrv.east-northamptonshire.gov.uk/PublicAccess/tdc/", True) #parser = PublicAccessParser("East Northants", "East Northants", "http://publicaccesssrv.east-northamptonshire.gov.uk/PublicAccess/tdc/", True)


+ 9
- 12
python_scrapers/SwiftLG.py View File

@@ -6,7 +6,7 @@ import cgi
import re import re
import datetime import datetime


from BeautifulSoup import BeautifulSoup
import BeautifulSoup


from PlanningUtils import getPostcodeFromText, \ from PlanningUtils import getPostcodeFromText, \
PlanningAuthorityResults, \ PlanningAuthorityResults, \
@@ -85,7 +85,7 @@ class SwiftLGParser:


# Check for the no results warning # Check for the no results warning
if not contents.count("No Matching Applications Found"): if not contents.count("No Matching Applications Found"):
soup = BeautifulSoup(contents)
soup = BeautifulSoup.BeautifulSoup(contents)


# Get the links to later pages of results. # Get the links to later pages of results.
later_pages = soup.findAll("a", {"href": re.compile("WPHAPPSEARCHRES\.displayResultsURL.*StartIndex=\d*.*")}) later_pages = soup.findAll("a", {"href": re.compile("WPHAPPSEARCHRES\.displayResultsURL.*StartIndex=\d*.*")})
@@ -118,7 +118,7 @@ class SwiftLGParser:
this_page_url = urlparse.urljoin(self.base_url, url) this_page_url = urlparse.urljoin(self.base_url, url)
response = urllib2.urlopen(this_page_url) response = urllib2.urlopen(this_page_url)
contents = response.read() contents = response.read()
soup = BeautifulSoup(contents)
soup = BeautifulSoup.BeautifulSoup(contents)


results_table = self._findResultsTable(soup)#.body.find("table", {"class": "apas_tbl"}) results_table = self._findResultsTable(soup)#.body.find("table", {"class": "apas_tbl"})


@@ -152,11 +152,7 @@ class SwiftLGParser:
#LAND ADJ. BRAMBLING, HAWKENBURY ROAD, HAWKENBURY, TN120EA #LAND ADJ. BRAMBLING, HAWKENBURY ROAD, HAWKENBURY, TN120EA
#</td> #</td>


# For some reason, this doesn't work:
#address = tds[2].string

# But this does
address = tds[2].input.next.strip()
address = ' '.join([x for x in tds[2].contents if isinstance(x, BeautifulSoup.NavigableString)]).strip()


self._current_application.address = address self._current_application.address = address
self._current_application.postcode = getPostcodeFromText(address) self._current_application.postcode = getPostcodeFromText(address)
@@ -190,7 +186,8 @@ class MacclesfieldParser(SwiftLGParser):


class MoleValleyParser(SwiftLGParser): class MoleValleyParser(SwiftLGParser):
def _findResultsTable(self, soup): def _findResultsTable(self, soup):
return soup.findAll("table")[5]
# import pdb;pdb.set_trace()
return soup.findAll("table")[2]


class SloughParser(SwiftLGParser): class SloughParser(SwiftLGParser):
def _findResultsTable(self, soup): def _findResultsTable(self, soup):
@@ -207,18 +204,18 @@ if __name__ == '__main__':
# parser = IslingtonParser("Islington", "Islington", "https://www.islington.gov.uk/onlineplanning/apas/run/") # parser = IslingtonParser("Islington", "Islington", "https://www.islington.gov.uk/onlineplanning/apas/run/")
# parser = LakeDistrictParser("Lake District", "Lake District", "http://www.lake-district.gov.uk/swiftlg/apas/run/") # parser = LakeDistrictParser("Lake District", "Lake District", "http://www.lake-district.gov.uk/swiftlg/apas/run/")
# parser = SwiftLGParser("Maidstone Borough Council", "Maidstone", "http://digitalmaidstone.co.uk/swiftlg/apas/run/") # parser = SwiftLGParser("Maidstone Borough Council", "Maidstone", "http://digitalmaidstone.co.uk/swiftlg/apas/run/")
# parser = MoleValleyParser("Mole Valley", "Mole Valley", "http://www.molevalley.gov.uk/swiftlg/apas/run/")
parser = MoleValleyParser("Mole Valley", "Mole Valley", "http://www.molevalley.gov.uk/swiftlg/apas/run/")
# parser = SwiftLGParser("Pembrokeshire County Council", "Pembrokeshire", "http://planning.pembrokeshire.gov.uk/swiftlg/apas/run/") # parser = SwiftLGParser("Pembrokeshire County Council", "Pembrokeshire", "http://planning.pembrokeshire.gov.uk/swiftlg/apas/run/")
# parser = SwiftLGParser("Rochdale Metropolitan Borough Council", "Rochdale", "http://www.rochdale.gov.uk/swiftlg/apas/run/") # parser = SwiftLGParser("Rochdale Metropolitan Borough Council", "Rochdale", "http://www.rochdale.gov.uk/swiftlg/apas/run/")
# parser = SloughParser("Slough", "Slough", "http://www2.slough.gov.uk/swiftlg/apas/run/") # parser = SloughParser("Slough", "Slough", "http://www2.slough.gov.uk/swiftlg/apas/run/")
parser = SwiftLGParser("Snowdonia National Park", "Snowdonia", "http://www.snowdonia-npa.gov.uk/swiftlg/apas/run/")
# parser = SwiftLGParser("Snowdonia National Park", "Snowdonia", "http://www.snowdonia-npa.gov.uk/swiftlg/apas/run/")
# parser = SwiftLGParser("St Edmundsbury", "Bury St Edmunds", "http://www.stedmundsbury.gov.uk/swiftlg/apas/run/") # parser = SwiftLGParser("St Edmundsbury", "Bury St Edmunds", "http://www.stedmundsbury.gov.uk/swiftlg/apas/run/")
# parser = MacclesfieldParser("Macclesfield", "Macclesfield", "http://www.planportal.macclesfield.gov.uk/swiftlg/apas/run/") # parser = MacclesfieldParser("Macclesfield", "Macclesfield", "http://www.planportal.macclesfield.gov.uk/swiftlg/apas/run/")
# parser = SwiftLGParser("Daventry District Council", "Daventry", "http://62.231.149.150/swiftlg/apas/run/wphappcriteria.display") # parser = SwiftLGParser("Daventry District Council", "Daventry", "http://62.231.149.150/swiftlg/apas/run/wphappcriteria.display")
# parser = SwiftLGParser("Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display") # parser = SwiftLGParser("Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display")
# parser = SwiftLGParser("Cannock Chase District Council", "Cannock Chase", "http://planning.cannockchasedc.com/swiftlg/apas/run/wphappcriteria.display") # parser = SwiftLGParser("Cannock Chase District Council", "Cannock Chase", "http://planning.cannockchasedc.com/swiftlg/apas/run/wphappcriteria.display")
# parser = SwiftLGParser("London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display") # parser = SwiftLGParser("London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display")
print parser.getResults(1,8,2008)
print parser.getResults(20,11,2008)




# To Do: # To Do:


Loading…
Cancel
Save