Automatically exported from code.google.com/p/planningalerts
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
 
 
 
 
 
 

86 строки
3.3 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. search_date_format = "%d%%2F%m%%2F%Y"
  11. received_date_format = "%d %b %Y"
  12. class KirkleesParser:
  13. def __init__(self, *args):
  14. self.authority_name = "Kirklees Council"
  15. self.authority_short_name = "Kirklees"
  16. self.base_url = "http://www.kirklees.gov.uk/business/planning/List.asp?SrchApp=&SrchName=&SrchPostCode=&SrchStreet=&SrchDetails=&SrchLocality=&RorD=A&SrchDteFr=%(date)s&SrchDteTo=%(date)s&Submit=Search&pageNum=%(pagenum)d"
  17. self.comments_email_address = "planning.contactcentre@kirklees.gov.uk"
  18. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  19. def getResultsByDayMonthYear(self, day, month, year):
  20. search_date = datetime.date(year, month, day)
  21. pagenum = 1
  22. while pagenum:
  23. response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format),
  24. "pagenum": pagenum}
  25. )
  26. soup = BeautifulSoup.BeautifulSoup(response.read())
  27. # This is not a nice way to find the results table, but I can't
  28. # see anything good to use, and it works...
  29. # There are two trs with style attributes per app. This will find all the first ones of the pairs.
  30. trs = soup.find("table", border="0", cellpadding="0", cellspacing="2", width="100%", summary="").findAll("tr", style=True)[::2]
  31. for tr in trs:
  32. tds = tr.findAll("td")
  33. date_received = datetime.datetime.strptime(tds[3].string.strip(), received_date_format).date()
  34. # Stop looking through the list if we have found one which is earlier than the date searched for.
  35. if date_received < search_date:
  36. # If we break out, then we won't want the next page
  37. pagenum = None
  38. break
  39. application = PlanningApplication()
  40. application.date_received = date_received
  41. application.council_reference = tds[0].small.string.strip()
  42. # The second <td> contains the address, split up with <br/>s
  43. application.address = ' '.join([x for x in tds[1].contents if isinstance(x, BeautifulSoup.NavigableString)])
  44. application.postcode = getPostcodeFromText(application.address)
  45. application.description = tds[2].string.strip()
  46. application.info_url = urlparse.urljoin(self.base_url, tr.findNext("a")['href'])
  47. application.comment_url = self.comments_email_address
  48. self._results.addApplication(application)
  49. else:
  50. # If we got through the whole list without breaking out,
  51. # then we'll want to get the next page.
  52. pagenum += 1
  53. return self._results
  54. def getResults(self, day, month, year):
  55. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  56. if __name__ == '__main__':
  57. parser = KirkleesParser()
  58. print parser.getResults(1,10,2008)
  59. # TODO