Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

140 líneas
4.7 KiB

  1. """
  2. This is the screenscraper for Westminster City Council.
  3. I have just noticed that there is a PublicAccess underneath all this, but
  4. it only has the apps in for which they are accepting comments, so I think
  5. we may as well use this url and get the lot...
  6. This is the PublicAccess url:
  7. http://publicaccess.westminster.gov.uk/publicaccess/
  8. """
  9. import urllib2
  10. import urllib
  11. import urlparse
  12. import datetime, time
  13. import cgi
  14. import sys
  15. from BeautifulSoup import BeautifulSoup
  16. from PlanningUtils import PlanningApplication, \
  17. PlanningAuthorityResults, \
  18. getPostcodeFromText
  19. date_format = "%d%%2F%m%%2F%Y"
  20. class WestminsterParser:
  21. def __init__(self, *args):
  22. self.authority_name = "City of Westminster"
  23. self.authority_short_name = "Westminster"
  24. self.base_url = "http://www3.westminster.gov.uk/planningapplications/currentsearch-results.cfm"
  25. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  26. def getResultsByDayMonthYear(self, day, month, year):
  27. search_day = datetime.date(year, month, day)
  28. # post_data = [
  29. # ("EFNO", ""),
  30. # ("STName", ""),
  31. # ("STNUMB", ""),
  32. # ("ADRSNO", ""),
  33. # ("WARD", "AllWards"),
  34. # ("AGT", ""),
  35. # ("ATCDE", "AllApps"),
  36. # ("DECDE", "AllDecs"),
  37. # ("DTErec", search_day.strftime(date_format)),
  38. # ("DTErecTo", search_day.strftime(date_format)),
  39. # ("DTEvalid", ""),
  40. # ("DTEvalidTo", ""),
  41. # ("APDECDE", "AllAppDecs"),
  42. # ("submit", "Start+Search"),
  43. # ]
  44. post_data = "REFNO=&STName=&STNUMB=&ADRSNO=&WARD=AllWards&AGT=&ATCDE=AllApps&DECDE=AllDecs&DTErec=%(date)s&DTErecTo=%(date)s&DTEvalid=&DTEvalidTo=&APDECDE=AllAppDecs&submit=Start+Search" %{"date": search_day.strftime(date_format)}
  45. while post_data:
  46. # Now get the search page
  47. # sys.stderr.write("Fetching: %s\n" %self.base_url)
  48. # sys.stderr.write("post data: %s\n" %post_data)
  49. response = urllib2.urlopen(self.base_url, post_data)
  50. # sys.stderr.write("Got it\n")
  51. soup = BeautifulSoup(response.read())
  52. # sys.stderr.write("Created soup\n")
  53. results_form = soup.find("form", {"name": "currentsearchresultsNext"})
  54. # Sort out the post_data for the next page, if there is one
  55. # If there is no next page then there will be no inputs in the form.
  56. # In this case, post_data will be '', which is false.
  57. # sys.stderr.write("Found form containing results\n")
  58. post_data = urllib.urlencode([(x['name'], x['value']) for x in results_form.findAll("input")])
  59. # sys.stderr.write("Got post data\n")
  60. # Each result has one link, and they are the only links in the form
  61. links = results_form.findAll("a")
  62. # sys.stderr.write("Got list of links\n")
  63. for link in links:
  64. # sys.stderr.write("Working on link: %s\n" %link['href'])
  65. application = PlanningApplication()
  66. application.date_received = search_day
  67. application.info_url = urlparse.urljoin(self.base_url, link['href'])
  68. application.council_reference = link.string.strip()
  69. application.address = link.findNext("td").string.strip()
  70. application.postcode = getPostcodeFromText(application.address)
  71. application.description = link.findNext("tr").findAll("td")[-1].string.strip()
  72. # To get the comment url, we're going to have to go to each info url :-(
  73. # sys.stderr.write("Fetching: %s\n" %application.info_url)
  74. info_response = urllib2.urlopen(application.info_url)
  75. # sys.stderr.write("Got it\n")
  76. info_soup = BeautifulSoup(info_response)
  77. comment_nav_string = info_soup.find(text="Comment on this case")
  78. if comment_nav_string:
  79. application.comment_url = comment_nav_string.parent['href']
  80. else:
  81. application.comment_url = "No Comments"
  82. #http://publicaccess.westminster.gov.uk/publicaccess/tdc/dcapplication/application_comments_entryform.aspx?caseno=K586GHRP03500
  83. self._results.addApplication(application)
  84. # sys.stderr.write("Finished that link\n")
  85. # sys.stderr.write("Finished while loop, returning stuff.\n")
  86. return self._results
  87. def getResults(self, day, month, year):
  88. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  89. if __name__ == '__main__':
  90. parser = WestminsterParser()
  91. print parser.getResults(1,8,2008)