Automatically exported from code.google.com/p/planningalerts
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 
 
 
 
 

86 Zeilen
3.3 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. search_date_format = "%d%%2F%m%%2F%Y"
  11. received_date_format = "%d %b %Y"
  12. class KirkleesParser:
  13. def __init__(self, *args):
  14. self.authority_name = "Kirklees Council"
  15. self.authority_short_name = "Kirklees"
  16. self.base_url = "http://www.kirklees.gov.uk/business/planning/List.asp?SrchApp=&SrchName=&SrchPostCode=&SrchStreet=&SrchDetails=&SrchLocality=&RorD=A&SrchDteFr=%(date)s&SrchDteTo=%(date)s&Submit=Search&pageNum=%(pagenum)d"
  17. self.comments_email_address = "planning.contactcentre@kirklees.gov.uk"
  18. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  19. def getResultsByDayMonthYear(self, day, month, year):
  20. search_date = datetime.date(year, month, day)
  21. pagenum = 1
  22. while pagenum:
  23. response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format),
  24. "pagenum": pagenum}
  25. )
  26. soup = BeautifulSoup.BeautifulSoup(response.read())
  27. # This is not a nice way to find the results table, but I can't
  28. # see anything good to use, and it works...
  29. # There are two trs with style attributes per app. This will find all the first ones of the pairs.
  30. trs = soup.find("table", border="0", cellpadding="0", cellspacing="2", width="100%", summary="").findAll("tr", style=True)[::2]
  31. for tr in trs:
  32. tds = tr.findAll("td")
  33. date_received = datetime.datetime.strptime(tds[3].string.strip(), received_date_format).date()
  34. # Stop looking through the list if we have found one which is earlier than the date searched for.
  35. if date_received < search_date:
  36. # If we break out, then we won't want the next page
  37. pagenum = None
  38. break
  39. application = PlanningApplication()
  40. application.date_received = date_received
  41. application.council_reference = tds[0].small.string.strip()
  42. # The second <td> contains the address, split up with <br/>s
  43. application.address = ' '.join([x for x in tds[1].contents if isinstance(x, BeautifulSoup.NavigableString)])
  44. application.postcode = getPostcodeFromText(application.address)
  45. application.description = tds[2].string.strip()
  46. application.info_url = urlparse.urljoin(self.base_url, tr.findNext("a")['href'])
  47. application.comment_url = self.comments_email_address
  48. self._results.addApplication(application)
  49. else:
  50. # If we got through the whole list without breaking out,
  51. # then we'll want to get the next page.
  52. pagenum += 1
  53. return self._results
  54. def getResults(self, day, month, year):
  55. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  56. if __name__ == '__main__':
  57. parser = KirkleesParser()
  58. print parser.getResults(1,10,2008)
  59. # TODO