浏览代码

Fix Mendip - now using PlanningExplorer rather than a bespoke scraper.

import/raw
duncan.parkes@gmail.com 15 年前
父节点
当前提交
fa9d72f15b
共有 5 个文件被更改,包括 15 次插入78 次删除
  1. +2
    -2
      trunk/SitesToGenerate.csv
  2. +1
    -1
      trunk/python_scrapers/Maldon.py
  3. +0
    -71
      trunk/python_scrapers/Mendip.py
  4. +9
    -1
      trunk/python_scrapers/PlanningExplorer.py
  5. +3
    -3
      trunk/python_scrapers/Shropshire.py

+ 2
- 2
trunk/SitesToGenerate.csv 查看文件

@@ -179,7 +179,7 @@
"Mansfield District Council","Mansfield",,,,,,"http://www.mansfield.gov.uk/Fastweb/","FastWeb","FastWeb",
"Medway Council","Medway",,,,,,,"Medway","MedwayParser",
"Melton Borough Council","Melton",,,,,,"http://publicaccess.melton.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser",
"Mendip District Council","Mendip",,,,,,,"Mendip","MendipParser",
"Mendip District Council","Mendip",,,,,"Now using planningexplorer rather than bespoke","http://planning.mendip.gov.uk/","PlanningExplorer","MendipParser",
"London Borough of Merton","Merton",,,,,,"http://planning.merton.gov.uk/","PlanningExplorer","MertonParser",
"Mid Bedfordshire District Council","Mid Beds",,,,,,"http://www.midbeds.gov.uk/acolnetDC/DCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch","AcolnetParser","MidBedsParser",
"Mid Devon District Council","Mid Devon",,,,,,"http://planning.middevon.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser",
@@ -314,7 +314,7 @@
"West Sussex County Council","West Sussex",,,,,,"http://eplanning.westsussex.gov.uk/ePlanningOPS/loadResults.do","AtriumePlanning","AtriumePlanningParser",
"West Wiltshire District Council","West Wiltshire",,,,,,"http://planning.westwiltshire.gov.uk/PublicAccess/tdc/","PublicAccess","PublicAccessParser",
"Wiltshire County Council","Wiltshire",,,,,,,,,"wiltshire"
"Winchester City Council","Winchester",,,,,,"http://win2padmz.winchester.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser",
"Winchester City Council","Winchester",,,,1,"Now using a bespoke system","http://win2padmz.winchester.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser",
"Woking Borough Council","Woking",,,,,,"http://caps.woking.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser",
"Wolverhampton City Council","Wolverhampton",,,,,,"http://planningonline.wolverhampton.gov.uk/PublicAccess/tdc/","PublicAccess","PublicAccessParser",
"Worcester City Council","Worcester",,,,,,"http://www.worcester.gov.uk:8080/planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry","Planet","PlanetParser",


+ 1
- 1
trunk/python_scrapers/Maldon.py 查看文件

@@ -106,7 +106,7 @@ class PendleParser(MaldonParser):
if __name__ == '__main__':
#parser = MaldonParser("Maldon District Council", "Maldon", "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlanApp-action.do")
parser = PendleParser("Pendle Borough Council", "Pendle", "http://bopdoccip.pendle.gov.uk/PlanApp/jsp/searchPlanApp-action.do")
print parser.getResults(21,5,2008)
print parser.getResults(12,6,2009)

# TODO



+ 0
- 71
trunk/python_scrapers/Mendip.py 查看文件

@@ -1,71 +0,0 @@
import urllib2
import urllib
import urlparse

import datetime

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

date_format = "%d%%2F%m%%2F%Y"

class MendipParser:
def __init__(self, *args):
self.authority_name = "Mendip District Council"
self.authority_short_name = "Mendip"

# The site itelf uses a search by validated date, but received date seems
# to be there too, and to work...
# self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=&DateRecvTo=&DateValidFrom=%(date)s&DateValidTo=%(date)s&Search=Search"
self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=%(date)s&DateRecvTo=%(date)s&Search=Search"
self.comment_url = "http://www.mendip.gov.uk/ShowForm.asp?fm_fid=107&AppNo=%(reference)s&SiteAddress=%(address)s"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_date = datetime.date(year, month, day)

search_url = self.base_url %{"date": search_date.strftime(date_format)}

while search_url:
response = urllib2.urlopen(search_url)
soup = BeautifulSoup(response.read())

if soup.find(text="No applications matched the search criteria"):
break

for tr in soup.find("table", summary="Application Results").tbody.findAll("tr"):
application = PlanningApplication()
application.date_received = search_date

tds = tr.findAll("td")

application.council_reference = tds[0].a.string.strip()
application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
application.description = tds[1].p.string.strip()
application.address = tds[2].p.string.strip()

application.comment_url = self.comment_url %{
"reference": application.council_reference,
"address": urllib.quote_plus(application.address),
}

self._results.addApplication(application)

next_link = soup.find("a", title="Go to the next page")
search_url = urlparse.urljoin(self.base_url, next_link['href']) if next_link else None

return self._results


def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = MendipParser()
print parser.getResults(1,10,2008)


+ 9
- 1
trunk/python_scrapers/PlanningExplorer.py 查看文件

@@ -681,6 +681,13 @@ class ConwyParser(BroadlandLike, PlanningExplorerParser):

use_firefox_user_agent = True

class MendipParser(BroadlandLike, PlanningExplorerParser):
comments_email_address = "customerservices@mendip.gov.uk"


# search_url_path = "northgate/planningexplorer/generalsearch.aspx"

#&first=1&quick=1&search=&txtApplicationNumber=&txtApplicantName=&txtAgentName=&txtProposal=&txtSiteAddress=&txtStreetName=&cboWardCode=&cboParishCode=&cboApplicationTypeCode=&cboDevelopmentTypeCode=&cboStatusCode=&cboSelectDateValue=DATE_RECEIVED&cboMonths=1&cboDays=1&rbGroup=rbRange&dateStart=12%2F06%2F2009&dateEnd=12%2F06%2F2009&edrDateSelection=&csbtnSearch=Search

#&txtApplicationNumber=&txtProposal=&txtSiteAddress=&cboWardCode=&cboParishCode=&cboApplicationTypeCode=&cboDevelopmentTypeCode=&cboStatusCode=&cboSelectDateValue=DATE_RECEIVED&cboMonths=1&cboDays=1&rbGroup=rbRange&dateStart=10%2F07%2F2008&dateEnd=20%2F07%2F2008&edrDateSelection=&csbtnSearch=Search

@@ -731,7 +738,8 @@ if __name__ == '__main__':
# parser = WalthamForestParser("Waltham Forest", "Waltham Forest", "http://planning.walthamforest.gov.uk/")
# parser = ConwyParser("Conwy County Borough Council", "Conwy", "http://www.conwy.gov.uk/")
# parser = MertonParser("London Borough of Merton", "Merton", "http://planning.merton.gov.uk")
print parser.getResults(30, 3, 2009)
parser = MendipParser("Mendip District Council", "Mendip", "http://planning.mendip.gov.uk/")
print parser.getResults(12, 6, 2009)

# To Do



+ 3
- 3
trunk/python_scrapers/Shropshire.py 查看文件

@@ -149,12 +149,12 @@ class SouthNorthamptonshireParser(ShropshireParser):
if __name__ == '__main__':
# parser = ShropshireParser("Shropshire County Council", "Shropshire", "http://planning.shropshire.gov.uk/PlanAppList.asp")
# print parser.getResults(6,6,2008)
# parser = NorthYorkshireParser("North Yorkshire County Council", "North Yorkshire", "https://onlineplanningregister.northyorks.gov.uk/Online%20Register/PlanAppList.asp")
parser = NorthYorkshireParser("North Yorkshire County Council", "North Yorkshire", "https://onlineplanningregister.northyorks.gov.uk/Online%20Register/PlanAppList.asp")
# print parser.getResults(10,6,2008)
# parser = SouthNorthamptonshireParser("South Northamptonshire Council", "South Northamptonshire", "http://snc.planning-register.co.uk/PlanAppList.asp")
# print parser.getResults(5,6,2008)
parser = ShropshireParser("Purbeck District Council", "Purbeck", "http://www.purbeck.gov.uk/planning/PlanAppList.asp")
print parser.getResults(5,6,2008)
# parser = ShropshireParser("Purbeck District Council", "Purbeck", "http://www.purbeck.gov.uk/planning/PlanAppList.asp")
print parser.getResults(12,6,2009)

# TODO



正在加载...
取消
保存