Add scraper for Eastbourne. The info and comment links won't work since they require you to have a cookie. If you go

back to them once you have the cookie, you're fine...
há 17 anos · 92e757b8dd
--- a/python_scrapers/Eastbourne.py
+++ b/python_scrapers/Eastbourne.py
@@ -0,0 +1,120 @@
 import urllib2
 import urllib
 import urlparse

 import datetime, time
 import cgi

 import BeautifulSoup

 import cookielib
 cookie_jar = cookielib.CookieJar()

 from PlanningUtils import PlanningApplication, \
    PlanningAuthorityResults, \
    getPostcodeFromText

 date_format = "%d/%m/%Y"

 class EastbourneParser:
    def __init__(self, *args):

        self.authority_name = "Eastbourne Borough Council"
        self.authority_short_name = "Eastbourne"
 #        self.base_url = "http://www.eastbourne.gov.uk/planningapplications/search.asp"
        self.first_url = "http://www.eastbourne.gov.uk/planningapplications/index.asp"
        self.base_url = "http://www.eastbourne.gov.uk/planningapplications/results.asp"

        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        # There's going to be some faffing around here. We need a cookie to say we have agreed to some T&Cs.

        # First get the search page - we'll be redirected somewhere else for not having the cookie

        first_request = urllib2.Request(self.first_url)
        first_response = urllib2.urlopen(first_request)
        cookie_jar.extract_cookies(first_response, first_request)

        first_page_soup = BeautifulSoup.BeautifulSoup(first_response.read())

        first_page_action = urlparse.urljoin(self.first_url, first_page_soup.form['action'])
        
        the_input = first_page_soup.form.input

        second_page_post_data = urllib.urlencode(
            (
                (the_input['name'], the_input['value']),
                )
            )
        
        second_request = urllib2.Request(first_page_action, second_page_post_data)
        cookie_jar.add_cookie_header(second_request)
        second_response = urllib2.urlopen(second_request)
        cookie_jar.extract_cookies(second_response, second_request)

        # Now (finally) get the search page

 #ApplicationNumber=&AddressPrefix=&Postcode=&CaseOfficer=&WardMember=&DateReceivedStart=31%2F08%2F2008&DateReceivedEnd=31%2F08%2F2008&DateDecidedStart=&DateDecidedEnd=&Locality=&AgentName=&ApplicantName=&ShowDecided=&DecisionLevel=&Sort1=FullAddressPrefix&Sort2=DateReceived+DESC&Submit=Search

        post_data = urllib.urlencode(
            (
                ("ApplicationNumber", ""),
                ("AddressPrefix", ""),
                ("Postcode", ""),
                ("CaseOfficer", ""),
                ("WardMember", ""),
                ("DateReceivedStart", search_day.strftime(date_format)),
                ("DateReceivedEnd", search_day.strftime(date_format)),
                ("DateDecidedStart", ""),
                ("DateDecidedEnd", ""),
                ("Locality", ""),
                ("AgentName", ""),
                ("ApplicantName", ""),
                ("ShowDecided", ""),
                ("DecisionLevel", ""),
                ("Sort1", "FullAddressPrefix"),
                ("Sort2", "DateReceived DESC"),
                ("Submit", "Search"),
                )
            )

        search_request = urllib2.Request(self.base_url)
        cookie_jar.add_cookie_header(search_request)
        search_response = urllib2.urlopen(search_request, post_data)

        soup = BeautifulSoup.BeautifulSoup(search_response.read())

        app_no_strings = soup.findAll(text="App. No.:")

        for app_no_string in app_no_strings:
            application = PlanningApplication()
            application.date_received = search_day

            application.council_reference = app_no_string.findNext("a").string.strip()
            application.info_url = urlparse.urljoin(self.base_url, app_no_string.findNext("a")['href'])

            application.address = ' '.join([x.strip() for x in app_no_string.findNext(text="Site Address:").findNext("td").contents if type(x) == BeautifulSoup.NavigableString])
            application.postcode = getPostcodeFromText(application.address)

            application.comment_url = urlparse.urljoin(self.base_url, app_no_string.findNext(text="Comment on application").parent['href'])

            application.description = app_no_string.findNext(text="Description:").findNext("td").string.strip()

            self._results.addApplication(application)
        
        return self._results

    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

 if __name__ == '__main__':
    parser = EastbourneParser()
    print parser.getResults(1,9,2008)



 # TODO - currently paginates at 20
--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -57,3 +57,4 @@
 "Hastings.py", "420"
 "Herefordshire.py", "420"
 "Exmoor.py", "420"
 "Eastbourne.py", "420"
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -261,3 +261,4 @@
 "Hastings Borough Council", "Hastings", "", "Hastings", "HastingsParser"
 "Herefordshire Council", "Herefordshire", "", "Herefordshire", "HerefordshireParser"
 "Exmoor National Park", "Exmoor", "", "Exmoor", "ExmoorParser"
 "Eastbourne Borough Council", "Eastbourne", "", "Eastbourne", "EastbourneParser"
--- a/python_scrapers/WAM.py
+++ b/python_scrapers/WAM.py
@@ -172,15 +172,15 @@ if __name__ == '__main__':
    #parser = BraintreeParser("Colchester", "Colchester", "http://www.planning.colchester.gov.uk/WAM/weeklyApplications.do", debug=True)
    #parser = WAMParser("East Lothian", "East Lothian", "http://www.planning.eastlothian.gov.uk/WAM/pas/searchApplications.do", debug=True)
    #parser = BraintreeParser("North Somerset", "North Somerset", "http://wam.n-somerset.gov.uk/MULTIWAM/weeklyApplications.do", debug=True)
    #parser = WAMParser("Nottingham", "Nottingham", "http://plan4.nottinghamcity.gov.uk/WAM/pas/searchApplications.do", debug=True)
    parser = WAMParser("Nottingham", "Nottingham", "http://plan4.nottinghamcity.gov.uk/WAM/pas/searchApplications.do", debug=True)
    #parser = PooleParser("Poole long", "Poole", "http://wam.boroughofpoole.com/WAM/pas/searchApplications.do", debug=True)
    #parser = WAMParser("Rother long", "Rother", "http://www.planning.rother.gov.uk/WAM/pas/searchApplications.do", debug=True)
    #parser = BraintreeParser("South Gloucestershire", "South Gloucestershire", "http://planning.southglos.gov.uk/WAM/pas/WeeklyApplications.do", debug=True)
    #parser = WAMParser("South Norfolk", "South Norfolk", "http://wam.south-norfolk.gov.uk/WAM/pas/searchApplications.do", debug=True)
    #parser = BraintreeParser("Tower Hamlets", "Tower Hamlets", "http://194.201.98.213/WAM/weeklyApplications.do", debug=True)
    parser = WAMParser("Westminster", "Westminster", "http://idocs.westminster.gov.uk:8080/WAM/search/pas/index.htm", debug=True)
    #parser = WAMParser("Westminster", "Westminster", "http://idocs.westminster.gov.uk:8080/WAM/search/pas/index.htm", debug=True)

    print parser.getResults(1,8,2008)
    print parser.getResults(31,8,2008)

 # Left to fix