Fix error with bad info and comment urls for Acolnet scrapers.

There is a bug in urlparse.urljoin which means that a relative URL which is just a query string is not correctly added to the base. This is a workaround for that. Once we have Python 2.6 it can all go back to how it was before.
14 years ago · 0710aac91c
--- a/python_scrapers/AcolnetParser.py
+++ b/python_scrapers/AcolnetParser.py
@@ -78,7 +78,20 @@ class AcolnetParser:
        """
        url = app_table.a['href']
        self._current_application.system_key = system_key_regex.search(url).groups()[0]
        return urlparse.urljoin(self.base_url, url)

        # This is the right way to do this, but it doesn't work in Python 2.5 as
        # it doesn't quite implement RFC 3986. This will work fine when we are on
        # Python 2.6
        # info_url = urlparse.urljoin(self.base_url, url)

        # In the meantime, we'll have to work around it. Let's assume url
        # is a query string

        split_base_url = urlparse.urlsplit(self.base_url)
        split_info_url = urlparse.urlsplit(url)
        info_url = urlparse.urlunsplit(split_base_url[:3] + (split_info_url.query,) + split_base_url[4:])

        return info_url

    def _getCommentUrl(self, app_table):
        """This must be run after _getInfoUrl"""
@@ -341,9 +354,9 @@ class HarlowParser(AcolnetParser):
        return self._current_application.info_url.replace("PgeResultDetail", "PgeCommentNeighbourForm&amp;hasreference=no")

 if __name__ == '__main__':
    day = 12
    month = 6
    year = 2009
    day = 6
    month = 8
    year = 2010

    #parser = AcolnetParser("Babergh", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Barnet", "Barnet", "http://194.75.183.100/planning-cases/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
@@ -357,8 +370,8 @@ if __name__ == '__main__':
    #parser = AcolnetParser("Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Derby", "Derby", "http://eplanning.derby.gov.uk/acolnet/planningpages02/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("East Lindsey", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser")
    # parser = AcolnetParser("Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    parser = AcolnetParser("Stoke on Trent City Council", "Stoke", "http://www.planning.stoke.gov.uk/dataonlineplanning/AcolNetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    parser = AcolnetParser("Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    # parser = AcolnetParser("Stoke on Trent City Council", "Stoke", "http://www.planning.stoke.gov.uk/dataonlineplanning/AcolNetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = BoltonParser("Fylde", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Guildford", "Guildford", "http://www.guildford.gov.uk/DLDC_Version_2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Harlow", "Harlow", "http://planning.harlow.gov.uk/DLDC_Version_2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")