Automatically exported from code.google.com/p/planningalerts
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 
 
 

86 rader
3.3 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. search_date_format = "%d%%2F%m%%2F%Y"
  11. received_date_format = "%d %b %Y"
  12. class KirkleesParser:
  13. def __init__(self, *args):
  14. self.authority_name = "Kirklees Council"
  15. self.authority_short_name = "Kirklees"
  16. self.base_url = "http://www.kirklees.gov.uk/business/planning/List.asp?SrchApp=&SrchName=&SrchPostCode=&SrchStreet=&SrchDetails=&SrchLocality=&RorD=A&SrchDteFr=%(date)s&SrchDteTo=%(date)s&Submit=Search&pageNum=%(pagenum)d"
  17. self.comments_email_address = "planning.contactcentre@kirklees.gov.uk"
  18. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  19. def getResultsByDayMonthYear(self, day, month, year):
  20. search_date = datetime.date(year, month, day)
  21. pagenum = 1
  22. while pagenum:
  23. response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format),
  24. "pagenum": pagenum}
  25. )
  26. soup = BeautifulSoup.BeautifulSoup(response.read())
  27. # This is not a nice way to find the results table, but I can't
  28. # see anything good to use, and it works...
  29. # There are two trs with style attributes per app. This will find all the first ones of the pairs.
  30. trs = soup.find("table", border="0", cellpadding="0", cellspacing="2", width="100%", summary="").findAll("tr", style=True)[::2]
  31. for tr in trs:
  32. tds = tr.findAll("td")
  33. date_received = datetime.datetime.strptime(tds[3].string.strip(), received_date_format).date()
  34. # Stop looking through the list if we have found one which is earlier than the date searched for.
  35. if date_received < search_date:
  36. # If we break out, then we won't want the next page
  37. pagenum = None
  38. break
  39. application = PlanningApplication()
  40. application.date_received = date_received
  41. application.council_reference = tds[0].small.string.strip()
  42. # The second <td> contains the address, split up with <br/>s
  43. application.address = ' '.join([x for x in tds[1].contents if isinstance(x, BeautifulSoup.NavigableString)])
  44. application.postcode = getPostcodeFromText(application.address)
  45. application.description = tds[2].string.strip()
  46. application.info_url = urlparse.urljoin(self.base_url, tr.findNext("a")['href'])
  47. application.comment_url = self.comments_email_address
  48. self._results.addApplication(application)
  49. else:
  50. # If we got through the whole list without breaking out,
  51. # then we'll want to get the next page.
  52. pagenum += 1
  53. return self._results
  54. def getResults(self, day, month, year):
  55. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  56. if __name__ == '__main__':
  57. parser = KirkleesParser()
  58. print parser.getResults(1,10,2008)
  59. # TODO