Przeglądaj źródła

Add Carmarthenshire scraper.

master
duncan.parkes 16 lat temu
rodzic
commit
fb7ba977ae
3 zmienionych plików z 80 dodań i 0 usunięć
  1. +78
    -0
      python_scrapers/Carmarthenshire.py
  2. +1
    -0
      python_scrapers/OtherFilesToCopy.csv
  3. +1
    -0
      python_scrapers/SitesToGenerate.csv

+ 78
- 0
python_scrapers/Carmarthenshire.py Wyświetl plik

@@ -0,0 +1,78 @@
import urllib2
import urllib
import urlparse

import datetime, time
import cgi

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

class CarmarthenshireParser:
def __init__(self, *args):
self.comments_email_address = "planning@carmarthenshire.gov.uk"

self.authority_name = "Carmarthenshire County Council"
self.authority_short_name = "Carmarthenshire"
self.base_url = "http://www.carmarthenshire.gov.uk/CCC_APPS/eng/plannaps/CCC_PlanningApplicationsResults.asp?datemode=range&in_lo_date=%(day)s%%2F%(month)s%%2F%(year)s&in_hi_date=%(day)s%%2F%(month)s%%2F%(year)s&SUBMIT=Search"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_day = datetime.date(year, month, day)

# Now get the search page
response = urllib2.urlopen(self.base_url %{"day": day,
"month": month,
"year": year,
})
soup = BeautifulSoup(response.read())

trs = soup.findAll("tr", valign="middle")

count = 0
for tr in trs:
# The odd trs are just spacers
if count % 2 == 0:
application = PlanningApplication()

tds = tr.findAll("td")
application.date_received = search_day
application.council_reference = tds[1].a.string
application.address = tds[3].a.string
application.postcode = getPostcodeFromText(application.address)
# All the links in this <tr> go to the same place...
application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])

# Still looking for description and comment url
# For the description, we'll need the info page
info_soup = BeautifulSoup(urllib2.urlopen(application.info_url).read())

application.description = info_soup.find(text="Description").findNext("td").findNext("td").font.string

# While we're here, lets get the OSGB grid ref
application.osgb_x, application.osgb_y = info_soup.find(text="Grid Reference").findNext("td").font.string.split("-")

# We'll have to use an email address for comments
application.comment_url = self.comments_email_address

self._results.addApplication(application)

count += 1

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = CarmarthenshireParser()
print parser.getResults(8,8,2008)


+ 1
- 0
python_scrapers/OtherFilesToCopy.csv Wyświetl plik

@@ -45,3 +45,4 @@
"AmberValley.py", "420"
"Aberdeenshire.py", "420"
"Brent.py", "420"
"Carmarthenshire.py", "420"

+ 1
- 0
python_scrapers/SitesToGenerate.csv Wyświetl plik

@@ -249,3 +249,4 @@
"Amber Valley Borough Council", "Amber Valley", "", "AmberValley", "AmberValleyParser"
"Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser"
"London Borough of Brent", "Brent", "", "Brent", "BrentParser"
"Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser"

Ładowanie…
Anuluj
Zapisz