adrianshort
/
planningalerts
spegling av https://github.com/adrianshort/planningalerts.git

import urllib2
import urllib
import urlparse

import datetime, time
import cgi

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
    PlanningAuthorityResults, \
    getPostcodeFromText

search_date_format = "%d+%b+%Y"
received_date_format = "%d %b %Y"

class ExmoorParser:
    def __init__(self, *args):

        self.authority_name = "Exmoor National Park"
        self.authority_short_name = "Exmoor"
        self.base_url = "http://www.exmoor-nationalpark.gov.uk/planning_weekly_list.htm?weeklylist=%s"

        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        response = urllib2.urlopen(self.base_url %(search_day.strftime(search_date_format)))
        soup = BeautifulSoup(response.read())

        # The first <tr> contains headers
        trs = soup.table.findAll("tr")[1:]

        for tr in trs:
            application = PlanningApplication()

            tds = tr.findAll("td")

            application.date_received = datetime.datetime.strptime(tds[0].string, received_date_format).date()

            application.info_url = urllib.unquote(urllib.quote_plus(urlparse.urljoin(self.base_url, tds[1].a['href'])))
            application.council_reference = tds[1].a.string.strip()
            application.address = tds[2].a.string.strip()
            application.postcode = getPostcodeFromText(application.address)

            # Now fetch the info url

            info_response = urllib.urlopen(application.info_url)
            info_soup = BeautifulSoup(info_response.read())

            application.description = info_soup.find(text="Proposal:").findNext("td").string.strip()

            try:
                application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text="Comment").parent['href'])
            except:
                application.comment_url = "No Comments"

            self._results.addApplication(application)

        return self._results

    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
    parser = ExmoorParser()
    print parser.getResults(1,8,2008)