Add Medway scraper.

18 年前 · 442f48ab30
--- a/python_scrapers/Maldon.py
+++ b/python_scrapers/Maldon.py
@@ -97,3 +97,4 @@ if __name__ == '__main__':

 # 1) Check that it works ok on a no results page.
 # 2) Email the council about non-linkable info page.
 # 3) Email the council about missing descriptions?
--- a/python_scrapers/Medway.py
+++ b/python_scrapers/Medway.py
@@ -0,0 +1,90 @@
 import urllib2
 import urllib
 import urlparse

 import datetime, time

 from BeautifulSoup import BeautifulSoup

 from PlanningUtils import PlanningApplication, \
    PlanningAuthorityResults, \
    getPostcodeFromText

 date_format = "%d/%m/%Y"

 class MedwayParser:
    comment_email_address = "planning.representations@medway.gov.uk"

    def __init__(self, *args):
        self.authority_name = "Medway Council"
        self.authority_short_name = "Medway"

        self.base_url = "http://www.medway.gov.uk/index/environment/planning/planapp/planonline.htm"
        self._split_base_url = urlparse.urlsplit(self.base_url)

        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


    def getResultsByDayMonthYear(self, day, month, year):
        search_date = datetime.date(year, month, day)
        search_date_string = search_date.strftime(date_format)

        "appstat=&decision=&appdec=&ward=&parish=&dadfrom=&dadto=&davfrom=01%2F06%2F2008&davto=02%2F06%2F2008&searchbut=Search"
        search_data = urllib.urlencode(
            [("searchtype", "1"),
             ("appstat", ""),
             ("decision", ""),
             ("appdec", ""),
             ("ward", ""),
             ("parish", ""),
             ("dadfrom", ""),
             ("dadto", ""),
             ("davfrom", search_date_string),
             ("davto", search_date_string),
             ("searchbut", "Search"),
                ]
            )

        split_search_url = self._split_base_url[:3] + (search_data, '')
        search_url = urlparse.urlunsplit(split_search_url)

        response = urllib2.urlopen(search_url)
        soup = BeautifulSoup(response.read())

        results_table = soup.find(text="Application No").parent.parent.parent
        trs = results_table.findAll("tr")[1:]

        tr_counter = 0
        
        while tr_counter < len(trs):
            tr = trs[tr_counter]

            if tr_counter % 2 == 0:
                application = PlanningApplication()
                application.date_received = search_date
                application.comment_url = self.comment_email_address

                tds = tr.findAll("td")

                application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
                application.council_reference = tr.a.string.strip()

                application.address = tds[1].string.strip()
                application.postcode = getPostcodeFromText(application.address)

                application.description = tds[2].string.strip()

                self._results.addApplication(application)

            tr_counter += 1

        return self._results


    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

 if __name__ == '__main__':
    parser = MedwayParser()
    print parser.getResults(02,6,2008)

--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -29,3 +29,4 @@
 "ForestOfDean.py", "420"
 "Flintshire.py", "420"
 "Maldon.py", "420"
 "Medway.py", "420"
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -229,3 +229,4 @@
 "Forest of Dean District Council", "Forest of Dean", "", "ForestOfDean", "ForestOfDeanParser"
 "Flintshire County Council", "Flintshire", "", "Flintshire", "FlintshireParser"
 "Maldon District Council", "Maldon", "", "Maldon", "MaldonParser"
 "Medway Council", "Medway", "", "Medway", "MedwayParser"