ソースを参照

Add Carmarthenshire scraper.

master
duncan.parkes 16年前
コミット
fb7ba977ae
3個のファイルの変更80行の追加0行の削除
  1. +78
    -0
      python_scrapers/Carmarthenshire.py
  2. +1
    -0
      python_scrapers/OtherFilesToCopy.csv
  3. +1
    -0
      python_scrapers/SitesToGenerate.csv

+ 78
- 0
python_scrapers/Carmarthenshire.py ファイルの表示

@@ -0,0 +1,78 @@
import urllib2
import urllib
import urlparse

import datetime, time
import cgi

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

class CarmarthenshireParser:
def __init__(self, *args):
self.comments_email_address = "planning@carmarthenshire.gov.uk"

self.authority_name = "Carmarthenshire County Council"
self.authority_short_name = "Carmarthenshire"
self.base_url = "http://www.carmarthenshire.gov.uk/CCC_APPS/eng/plannaps/CCC_PlanningApplicationsResults.asp?datemode=range&in_lo_date=%(day)s%%2F%(month)s%%2F%(year)s&in_hi_date=%(day)s%%2F%(month)s%%2F%(year)s&SUBMIT=Search"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_day = datetime.date(year, month, day)

# Now get the search page
response = urllib2.urlopen(self.base_url %{"day": day,
"month": month,
"year": year,
})
soup = BeautifulSoup(response.read())

trs = soup.findAll("tr", valign="middle")

count = 0
for tr in trs:
# The odd trs are just spacers
if count % 2 == 0:
application = PlanningApplication()

tds = tr.findAll("td")
application.date_received = search_day
application.council_reference = tds[1].a.string
application.address = tds[3].a.string
application.postcode = getPostcodeFromText(application.address)
# All the links in this <tr> go to the same place...
application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])

# Still looking for description and comment url
# For the description, we'll need the info page
info_soup = BeautifulSoup(urllib2.urlopen(application.info_url).read())

application.description = info_soup.find(text="Description").findNext("td").findNext("td").font.string

# While we're here, lets get the OSGB grid ref
application.osgb_x, application.osgb_y = info_soup.find(text="Grid Reference").findNext("td").font.string.split("-")

# We'll have to use an email address for comments
application.comment_url = self.comments_email_address

self._results.addApplication(application)

count += 1

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = CarmarthenshireParser()
print parser.getResults(8,8,2008)


+ 1
- 0
python_scrapers/OtherFilesToCopy.csv ファイルの表示

@@ -45,3 +45,4 @@
"AmberValley.py", "420"
"Aberdeenshire.py", "420"
"Brent.py", "420"
"Carmarthenshire.py", "420"

+ 1
- 0
python_scrapers/SitesToGenerate.csv ファイルの表示

@@ -249,3 +249,4 @@
"Amber Valley Borough Council", "Amber Valley", "", "AmberValley", "AmberValleyParser"
"Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser"
"London Borough of Brent", "Brent", "", "Brent", "BrentParser"
"Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser"

読み込み中…
キャンセル
保存