Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

128 lines
5.5 KiB

  1. """
  2. This is the screenscraper for planning apps from Wychavon District Council.
  3. This appears to be an Acolnet variant, and is searched by a block of months.
  4. """
  5. import urllib
  6. import urlparse
  7. import datetime
  8. from BeautifulSoup import BeautifulSoup
  9. from PlanningUtils import PlanningApplication, \
  10. PlanningAuthorityResults, \
  11. getPostcodeFromText
  12. class WychavonParser:
  13. def __init__(self, *args):
  14. self.authority_name = "Wychavon"
  15. self.authority_short_name = "Wychavon"
  16. # Currently hard coded--if this address updates, we'll need to scrape
  17. # the search form to get it each time.
  18. self.base_url = "http://www.e-wychavon.org.uk/scripts/plan2005/\
  19. acolnetcgi.exe?ACTION=UNWRAP&WhereDescription=General%20Search&\
  20. Whereclause3=%27%30%31%2F%7BEdtMonthEnd%7D%2F%7BEdtYearEnd%7D%27&\
  21. RIPNAME=Root%2EPages%2EPgeDC%2EPgeListCases"
  22. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  23. def getResultsByDayMonthYear(self, day, month, year):
  24. form_data = "EdtYearNo=&EdtCaseNo=&EdtApplicant=&EdtAgent=&EdtLocation"\
  25. + "=&EdtWard=&EdtMonthStart1=" + str(month) + "&EdtYearStart=" \
  26. + str(year) + "&EdtMonthEnd=" + str(month) + "&EdtYearEnd="\
  27. + str(year) + "&submit=Search"
  28. # Fetch the results
  29. response = urllib.urlopen(self.base_url, form_data)
  30. soup = BeautifulSoup(response.read())
  31. #Each set of results has its own table
  32. results_tables = soup.findAll("table", cellpadding="2", cols="4")
  33. for table in results_tables:
  34. application = PlanningApplication()
  35. trs = table.findAll("tr")
  36. application.council_reference = trs[0].findAll("td")[1].font.font.\
  37. font.string.strip()
  38. relative_info_url = trs[0].findAll("td")[1].a['href']
  39. application.info_url = urlparse.urljoin(self.base_url, relative_info_url)
  40. application.address = trs[1].findAll("td")[1].font.string.strip()
  41. application.postcode = getPostcodeFromText(application.address)
  42. #This code avoids an error if there's no description given.
  43. descrip = trs[2].findAll("td")[1].font.string
  44. if descrip == None:
  45. application.description = ""
  46. else:
  47. application.description = descrip.strip()
  48. date_format = "%d/%m/%y"
  49. date_string = trs[1].findAll("td")[3].font.string.strip()
  50. application.date_received = datetime.datetime.strptime(date_string, date_format)
  51. apptype = trs[0].findAll("td")[3].font.string
  52. # Avoids throwing an error if no apptype is given (this can happen)
  53. if apptype != None:
  54. apptype = apptype.strip()
  55. # Is all this really necessary? I don't know, but I've assumed that
  56. # it is. The form will appear without the suffix, I don't know if
  57. # the council's backend would accept it or not. Current behaviour
  58. # is to degrade silently to no suffix if it can't match an
  59. # application type.
  60. if apptype == "Telecommunications":
  61. # Don't know why it's a naked IP rather than sitting on the
  62. # same site, but there it is.
  63. application.comment_url = "http://81.171.139.151/WAM/createCom"\
  64. +"ment.do?action=CreateApplicationComment&applicationType=PLANNI"\
  65. +"NG&appNumber=T3/" + application.council_reference + "/TC"
  66. else:
  67. comment_url = "http://81.171.139.151/WAM/createComment.do?acti"\
  68. +"on=CreateApplicationComment&applicationType=PLANNING&appNumber"\
  69. +"=W/" + application.council_reference + "/"
  70. suffix = ""
  71. if apptype == "Householder planning application":
  72. suffix = "PP"
  73. elif apptype == "Non-householder planning application":
  74. suffix = "PN"
  75. elif apptype == "Outline applications":
  76. suffix = "OU"
  77. elif apptype == "Change of use":
  78. suffix = "CU"
  79. elif apptype == "Listed Building consent":
  80. suffix = "LB"
  81. elif apptype == "Advertisement application":
  82. suffix = "AA"
  83. elif apptype == "Certificate of Lawfulness Existing":
  84. suffix = "LUE"
  85. elif apptype == "Approval of reserved matters":
  86. suffix = "VOC"
  87. #These are all the ones that I found, except "Advice - Pre-app/
  88. #Householder", the suffix for which is inconsistent. The suffix
  89. #for this could be obtained by scraping the description page for
  90. #each application.
  91. application.comment_url = comment_url + suffix
  92. self._results.addApplication(application)
  93. return self._results
  94. def getResults(self, day, month, year):
  95. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  96. if __name__ == '__main__':
  97. parser = WychavonParser()
  98. #Put this in with constant numbers, copying the Barnsley example. Works for testing, but should it use the arguments for a real run?
  99. print parser.getResults(16,3,2009)