浏览代码

Fix Lake District scraper - small change to the HTML reducing the depth of tables.

master
duncan.parkes 15 年前
父节点
当前提交
5369f8e633
共有 2 个文件被更改,包括 5 次插入8 次删除
  1. +1
    -1
      SitesToGenerate.csv
  2. +4
    -7
      python_scrapers/SwiftLG.py

+ 1
- 1
SitesToGenerate.csv 查看文件

@@ -159,7 +159,7 @@ full_name,short_name,planning_email,feed_url,external,disabled,notes,base_url,py
"Royal Borough of Kingston upon Thames","Kingston upon Thames",,,,,,,KingstonUponThames,KingstonParser,
"Kirklees Council",Kirklees,,,,,,,Kirklees,KirkleesParser,
"Knowsley Council",Knowsley,,,,,,http://publicaccess.knowsley.gov.uk/PublicAccess/tdc/,PublicAccess,PublicAccessParser,
"Lake District National Park Authority","Lake District",,,,,,http://www.lake-district.gov.uk/swiftlg/apas/run/,SwiftLG,LakeDistrictParser,
"Lake District National Park Authority","Lake District",,,,,,http://www.lake-district.gov.uk/swiftlg/apas/run/,SwiftLG,SwiftLGParser,
"London Borough of Lambeth",Lambeth,,,,,,http://planning.lambeth.gov.uk/publicaccess/tdc/,PublicAccess,PublicAccessParser,
"Lancaster City Council",Lancaster,,,,,,http://planapps.lancaster.gov.uk/PublicAccess/tdc/,PublicAccess,PublicAccessParser,
"Leeds City Council",Leeds,,,,,,http://planningapplications.leeds.gov.uk/publicaccess/tdc/,PublicAccess,PublicAccessParser,


+ 4
- 7
python_scrapers/SwiftLG.py 查看文件

@@ -50,6 +50,7 @@ class SwiftLGParser:
"""The usual situation is for the results table to contain
one row of headers, followed by a row per app.
If this is not the case, override this in a subclass."""
# import pdb;pdb.set_trace()
return results_table.findAll("tr")[1:]

def __init__(self,
@@ -176,10 +177,6 @@ class IslingtonParser(SwiftLGParser):
def _findResultsTable(self, soup):
return soup.table.table

class LakeDistrictParser(SwiftLGParser):
def _findResultsTable(self, soup):
return soup.table.table

class MacclesfieldParser(SwiftLGParser):
def _findResultsTable(self, soup):
return soup.findAll("table")[6]
@@ -202,9 +199,9 @@ if __name__ == '__main__':
# parser = EastHertsParser("East Hertfordshire", "East Herts", "http://e-services.eastherts.gov.uk/swiftlg/apas/run/")
# parser = GwyneddParser("Gwynedd", "Gwynedd", "http://www.gwynedd.gov.uk/swiftlg/apas/run/")
# parser = IslingtonParser("Islington", "Islington", "https://www.islington.gov.uk/onlineplanning/apas/run/")
# parser = LakeDistrictParser("Lake District", "Lake District", "http://www.lake-district.gov.uk/swiftlg/apas/run/")
parser = SwiftLGParser("Lake District", "Lake District", "http://www.lake-district.gov.uk/swiftlg/apas/run/")
# parser = SwiftLGParser("Maidstone Borough Council", "Maidstone", "http://digitalmaidstone.co.uk/swiftlg/apas/run/")
parser = MoleValleyParser("Mole Valley", "Mole Valley", "http://www.molevalley.gov.uk/swiftlg/apas/run/")
# parser = MoleValleyParser("Mole Valley", "Mole Valley", "http://www.molevalley.gov.uk/swiftlg/apas/run/")
# parser = SwiftLGParser("Pembrokeshire County Council", "Pembrokeshire", "http://planning.pembrokeshire.gov.uk/swiftlg/apas/run/")
# parser = SwiftLGParser("Rochdale Metropolitan Borough Council", "Rochdale", "http://www.rochdale.gov.uk/swiftlg/apas/run/")
# parser = SloughParser("Slough", "Slough", "http://www2.slough.gov.uk/swiftlg/apas/run/")
@@ -215,7 +212,7 @@ if __name__ == '__main__':
# parser = SwiftLGParser("Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display")
# parser = SwiftLGParser("Cannock Chase District Council", "Cannock Chase", "http://planning.cannockchasedc.com/swiftlg/apas/run/wphappcriteria.display")
# parser = SwiftLGParser("London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display")
print parser.getResults(20,11,2008)
print parser.getResults(18,3,2009)


# To Do:


正在加载...
取消
保存