Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

86 行
3.3 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. search_date_format = "%d%%2F%m%%2F%Y"
  11. received_date_format = "%d %b %Y"
  12. class KirkleesParser:
  13. def __init__(self, *args):
  14. self.authority_name = "Kirklees Council"
  15. self.authority_short_name = "Kirklees"
  16. self.base_url = "http://www.kirklees.gov.uk/business/planning/List.asp?SrchApp=&SrchName=&SrchPostCode=&SrchStreet=&SrchDetails=&SrchLocality=&RorD=A&SrchDteFr=%(date)s&SrchDteTo=%(date)s&Submit=Search&pageNum=%(pagenum)d"
  17. self.comments_email_address = "planning.contactcentre@kirklees.gov.uk"
  18. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  19. def getResultsByDayMonthYear(self, day, month, year):
  20. search_date = datetime.date(year, month, day)
  21. pagenum = 1
  22. while pagenum:
  23. response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format),
  24. "pagenum": pagenum}
  25. )
  26. soup = BeautifulSoup.BeautifulSoup(response.read())
  27. # This is not a nice way to find the results table, but I can't
  28. # see anything good to use, and it works...
  29. # There are two trs with style attributes per app. This will find all the first ones of the pairs.
  30. trs = soup.find("table", border="0", cellpadding="0", cellspacing="2", width="100%", summary="").findAll("tr", style=True)[::2]
  31. for tr in trs:
  32. tds = tr.findAll("td")
  33. date_received = datetime.datetime.strptime(tds[3].string.strip(), received_date_format).date()
  34. # Stop looking through the list if we have found one which is earlier than the date searched for.
  35. if date_received < search_date:
  36. # If we break out, then we won't want the next page
  37. pagenum = None
  38. break
  39. application = PlanningApplication()
  40. application.date_received = date_received
  41. application.council_reference = tds[0].small.string.strip()
  42. # The second <td> contains the address, split up with <br/>s
  43. application.address = ' '.join([x for x in tds[1].contents if isinstance(x, BeautifulSoup.NavigableString)])
  44. application.postcode = getPostcodeFromText(application.address)
  45. application.description = tds[2].string.strip()
  46. application.info_url = urlparse.urljoin(self.base_url, tr.findNext("a")['href'])
  47. application.comment_url = self.comments_email_address
  48. self._results.addApplication(application)
  49. else:
  50. # If we got through the whole list without breaking out,
  51. # then we'll want to get the next page.
  52. pagenum += 1
  53. return self._results
  54. def getResults(self, day, month, year):
  55. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  56. if __name__ == '__main__':
  57. parser = KirkleesParser()
  58. print parser.getResults(1,10,2008)
  59. # TODO