Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Wychavon.py 5.6 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. """
  2. This is the screenscraper for planning apps from Wychavon District Council.
  3. This appears to be an Acolnet variant, and is searched by a block of months.
  4. """
  5. import urllib
  6. import urlparse
  7. import datetime
  8. from BeautifulSoup import BeautifulSoup
  9. from PlanningUtils import PlanningApplication, \
  10. PlanningAuthorityResults, \
  11. getPostcodeFromText
  12. class WychavonParser:
  13. def __init__(self, *args):
  14. self.authority_name = "Wychavon"
  15. self.authority_short_name = "Wychavon"
  16. # Currently hard coded--if this address updates, we'll need to scrape
  17. # the search form to get it each time.
  18. self.base_url = "http://www.e-wychavon.org.uk/scripts/plan2005/\
  19. acolnetcgi.exe?ACTION=UNWRAP&WhereDescription=General%20Search&\
  20. Whereclause3=%27%30%31%2F%7BEdtMonthEnd%7D%2F%7BEdtYearEnd%7D%27&\
  21. RIPNAME=Root%2EPages%2EPgeDC%2EPgeListCases"
  22. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  23. def getResultsByDayMonthYear(self, day, month, year):
  24. form_data = "EdtYearNo=&EdtCaseNo=&EdtApplicant=&EdtAgent=&EdtLocation"\
  25. + "=&EdtWard=&EdtMonthStart1=" + str(month) + "&EdtYearStart=" \
  26. + str(year) + "&EdtMonthEnd=" + str(month) + "&EdtYearEnd="\
  27. + str(year) + "&submit=Search"
  28. # Fetch the results
  29. response = urllib.urlopen(self.base_url, form_data)
  30. soup = BeautifulSoup(response.read())
  31. #Each set of results has its own table
  32. results_tables = soup.findAll("table", cellpadding="2", cols="4")
  33. for table in results_tables:
  34. application = PlanningApplication()
  35. trs = table.findAll("tr")
  36. application.council_reference = trs[0].findAll("td")[1].font.font.\
  37. font.string.strip()
  38. relative_info_url = trs[0].findAll("td")[1].a['href']
  39. application.info_url = urlparse.urljoin(self.base_url, relative_info_url)
  40. application.address = trs[1].findAll("td")[1].font.string.strip()
  41. application.postcode = getPostcodeFromText(application.address)
  42. #This code avoids an error if there's no description given.
  43. descrip = trs[2].findAll("td")[1].font.string
  44. if descrip == None:
  45. application.description = ""
  46. else:
  47. application.description = descrip.strip()
  48. rec_m, rec_d, rec_y = trs[1].findAll("td")[3].font.string.strip().\
  49. split("/")
  50. application.date_received = datetime.date(int(rec_y), int(rec_m), \
  51. int(rec_d))
  52. apptype = trs[0].findAll("td")[3].font.string
  53. # Avoids throwing an error if no apptype is given (this can happen)
  54. if apptype != None:
  55. apptype = apptype.strip()
  56. # Is all this really necessary? I don't know, but I've assumed that
  57. # it is. The form will appear without the suffix, I don't know if
  58. # the council's backend would accept it or not. Current behaviour
  59. # is to degrade silently to no suffix if it can't match an
  60. # application type.
  61. if apptype == "Telecommunications":
  62. # Don't know why it's a naked IP rather than sitting on the
  63. # same site, but there it is.
  64. application.comment_url = "http://81.171.139.151/WAM/createCom"\
  65. +"ment.do?action=CreateApplicationComment&applicationType=PLANNI"\
  66. +"NG&appNumber=T3/" + application.council_reference + "/TC"
  67. else:
  68. comment_url = "http://81.171.139.151/WAM/createComment.do?acti"\
  69. +"on=CreateApplicationComment&applicationType=PLANNING&appNumber"\
  70. +"=W/" + application.council_reference + "/"
  71. suffix = ""
  72. if apptype == "Householder planning application":
  73. suffix = "PP"
  74. elif apptype == "Non-householder planning application":
  75. suffix = "PN"
  76. elif apptype == "Outline applications":
  77. suffix = "OU"
  78. elif apptype == "Change of use":
  79. suffix = "CU"
  80. elif apptype == "Listed Building consent":
  81. suffix = "LB"
  82. elif apptype == "Advertisement application":
  83. suffix = "AA"
  84. elif apptype == "Certificate of Lawfulness Existing":
  85. suffix = "LUE"
  86. elif apptype == "Approval of reserved matters":
  87. suffix = "VOC"
  88. #These are all the ones that I found, except "Advice - Pre-app/
  89. #Householder", the suffix for which is inconsistent. The suffix
  90. #for this could be obtained by scraping the description page for
  91. #each application.
  92. application.comment_url = comment_url + suffix
  93. self._results.addApplication(application)
  94. return self._results
  95. def getResults(self, day, month, year):
  96. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  97. if __name__ == '__main__':
  98. parser = WychavonParser()
  99. #Put this in with constant numbers, copying the Barnsley example. Works for testing, but should it use the arguments for a real run?
  100. print parser.getResults(16,3,2009)