adrianshort
/
planningalerts
espelhamento de https://github.com/adrianshort/planningalerts.git

import urllib2
import urllib
import urlparse

import datetime, time
import cgi

import BeautifulSoup

import cookielib
cookie_jar = cookielib.CookieJar()

from PlanningUtils import PlanningApplication, \
    PlanningAuthorityResults, \
    getPostcodeFromText

date_format = "%d/%m/%Y"

class EastbourneParser:
    def __init__(self, *args):

        self.authority_name = "Eastbourne Borough Council"
        self.authority_short_name = "Eastbourne"
#        self.base_url = "http://www.eastbourne.gov.uk/planningapplications/search.asp"
        self.first_url = "http://www.eastbourne.gov.uk/planningapplications/index.asp"
        self.base_url = "http://www.eastbourne.gov.uk/planningapplications/results.asp"

        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # There's going to be some faffing around here. We need a cookie to say we have agreed to some T&Cs.

        # First get the search page - we'll be redirected somewhere else for not having the cookie

        first_request = urllib2.Request(self.first_url)
        first_response = urllib2.urlopen(first_request)
        cookie_jar.extract_cookies(first_response, first_request)

        first_page_soup = BeautifulSoup.BeautifulSoup(first_response.read())

        first_page_action = urlparse.urljoin(self.first_url, first_page_soup.form['action'])
        
        the_input = first_page_soup.form.input

        second_page_post_data = urllib.urlencode(
            (
                (the_input['name'], the_input['value']),
                )
            )
        
        second_request = urllib2.Request(first_page_action, second_page_post_data)
        cookie_jar.add_cookie_header(second_request)
        second_response = urllib2.urlopen(second_request)
        cookie_jar.extract_cookies(second_response, second_request)

        # Now (finally) get the search page

#ApplicationNumber=&AddressPrefix=&Postcode=&CaseOfficer=&WardMember=&DateReceivedStart=31%2F08%2F2008&DateReceivedEnd=31%2F08%2F2008&DateDecidedStart=&DateDecidedEnd=&Locality=&AgentName=&ApplicantName=&ShowDecided=&DecisionLevel=&Sort1=FullAddressPrefix&Sort2=DateReceived+DESC&Submit=Search

        post_data = urllib.urlencode(
            (
                ("ApplicationNumber", ""),
                ("AddressPrefix", ""),
                ("Postcode", ""),
                ("CaseOfficer", ""),
                ("WardMember", ""),
                ("DateReceivedStart", search_day.strftime(date_format)),
                ("DateReceivedEnd", search_day.strftime(date_format)),
                ("DateDecidedStart", ""),
                ("DateDecidedEnd", ""),
                ("Locality", ""),
                ("AgentName", ""),
                ("ApplicantName", ""),
                ("ShowDecided", ""),
                ("DecisionLevel", ""),
                ("Sort1", "FullAddressPrefix"),
                ("Sort2", "DateReceived DESC"),
                ("Submit", "Search"),
                )
            )

        search_request = urllib2.Request(self.base_url)
        cookie_jar.add_cookie_header(search_request)
        search_response = urllib2.urlopen(search_request, post_data)

        soup = BeautifulSoup.BeautifulSoup(search_response.read())

        app_no_strings = soup.findAll(text="App. No.:")

        for app_no_string in app_no_strings:
            application = PlanningApplication()
            application.date_received = search_day

            application.council_reference = app_no_string.findNext("a").string.strip()
            application.info_url = urlparse.urljoin(self.base_url, app_no_string.findNext("a")['href'])

            application.address = ' '.join([x.strip() for x in app_no_string.findNext(text="Site Address:").findNext("td").contents if type(x) == BeautifulSoup.NavigableString])
            application.postcode = getPostcodeFromText(application.address)

            application.comment_url = urlparse.urljoin(self.base_url, app_no_string.findNext(text="Comment on application").parent['href'])

            application.description = app_no_string.findNext(text="Description:").findNext("td").string.strip()

            self._results.addApplication(application)
        
        return self._results

    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
    parser = EastbourneParser()
    print parser.getResults(1,9,2008)


# TODO - currently paginates at 20