Automatically exported from code.google.com/p/planningalerts
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
 
 
 
 
 
 

129 řádky
5.6 KiB

  1. """
  2. This is the screenscraper for planning apps from Wychavon District Council.
  3. This appears to be an Acolnet variant, and is searched by a block of months.
  4. """
  5. import urllib
  6. import urlparse
  7. import datetime
  8. from BeautifulSoup import BeautifulSoup
  9. from PlanningUtils import PlanningApplication, \
  10. PlanningAuthorityResults, \
  11. getPostcodeFromText
  12. class WychavonParser:
  13. def __init__(self, *args):
  14. self.authority_name = "Wychavon"
  15. self.authority_short_name = "Wychavon"
  16. # Currently hard coded--if this address updates, we'll need to scrape
  17. # the search form to get it each time.
  18. self.base_url = "http://www.e-wychavon.org.uk/scripts/plan2005/\
  19. acolnetcgi.exe?ACTION=UNWRAP&WhereDescription=General%20Search&\
  20. Whereclause3=%27%30%31%2F%7BEdtMonthEnd%7D%2F%7BEdtYearEnd%7D%27&\
  21. RIPNAME=Root%2EPages%2EPgeDC%2EPgeListCases"
  22. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  23. def getResultsByDayMonthYear(self, day, month, year):
  24. form_data = "EdtYearNo=&EdtCaseNo=&EdtApplicant=&EdtAgent=&EdtLocation"\
  25. + "=&EdtWard=&EdtMonthStart1=" + str(month) + "&EdtYearStart=" \
  26. + str(year) + "&EdtMonthEnd=" + str(month) + "&EdtYearEnd="\
  27. + str(year) + "&submit=Search"
  28. # Fetch the results
  29. response = urllib.urlopen(self.base_url, form_data)
  30. soup = BeautifulSoup(response.read())
  31. #Each set of results has its own table
  32. results_tables = soup.findAll("table", cellpadding="2", cols="4")
  33. for table in results_tables:
  34. application = PlanningApplication()
  35. trs = table.findAll("tr")
  36. application.council_reference = trs[0].findAll("td")[1].font.font.\
  37. font.string.strip()
  38. relative_info_url = trs[0].findAll("td")[1].a['href']
  39. application.info_url = urlparse.urljoin(self.base_url, relative_info_url)
  40. application.address = trs[1].findAll("td")[1].font.string.strip()
  41. application.postcode = getPostcodeFromText(application.address)
  42. #This code avoids an error if there's no description given.
  43. descrip = trs[2].findAll("td")[1].font.string
  44. if descrip == None:
  45. application.description = ""
  46. else:
  47. application.description = descrip.strip()
  48. rec_m, rec_d, rec_y = trs[1].findAll("td")[3].font.string.strip().\
  49. split("/")
  50. application.date_received = datetime.date(int(rec_y), int(rec_m), \
  51. int(rec_d))
  52. apptype = trs[0].findAll("td")[3].font.string
  53. # Avoids throwing an error if no apptype is given (this can happen)
  54. if apptype != None:
  55. apptype = apptype.strip()
  56. # Is all this really necessary? I don't know, but I've assumed that
  57. # it is. The form will appear without the suffix, I don't know if
  58. # the council's backend would accept it or not. Current behaviour
  59. # is to degrade silently to no suffix if it can't match an
  60. # application type.
  61. if apptype == "Telecommunications":
  62. # Don't know why it's a naked IP rather than sitting on the
  63. # same site, but there it is.
  64. application.comment_url = "http://81.171.139.151/WAM/createCom"\
  65. +"ment.do?action=CreateApplicationComment&applicationType=PLANNI"\
  66. +"NG&appNumber=T3/" + application.council_reference + "/TC"
  67. else:
  68. comment_url = "http://81.171.139.151/WAM/createComment.do?acti"\
  69. +"on=CreateApplicationComment&applicationType=PLANNING&appNumber"\
  70. +"=W/" + application.council_reference + "/"
  71. suffix = ""
  72. if apptype == "Householder planning application":
  73. suffix = "PP"
  74. elif apptype == "Non-householder planning application":
  75. suffix = "PN"
  76. elif apptype == "Outline applications":
  77. suffix = "OU"
  78. elif apptype == "Change of use":
  79. suffix = "CU"
  80. elif apptype == "Listed Building consent":
  81. suffix = "LB"
  82. elif apptype == "Advertisement application":
  83. suffix = "AA"
  84. elif apptype == "Certificate of Lawfulness Existing":
  85. suffix = "LUE"
  86. elif apptype == "Approval of reserved matters":
  87. suffix = "VOC"
  88. #These are all the ones that I found, except "Advice - Pre-app/
  89. #Householder", the suffix for which is inconsistent. The suffix
  90. #for this could be obtained by scraping the description page for
  91. #each application.
  92. application.comment_url = comment_url + suffix
  93. self._results.addApplication(application)
  94. return self._results
  95. def getResults(self, day, month, year):
  96. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  97. if __name__ == '__main__':
  98. parser = WychavonParser()
  99. #Put this in with constant numbers, copying the Barnsley example. Works for testing, but should it use the arguments for a real run?
  100. print parser.getResults(16,3,2009)