~launchpad-pqm/launchpad/devel

« back to all changes in this revision

Viewing changes to lib/lp/services/googlesearch/__init__.py

  • Committer: Launchpad Patch Queue Manager
  • Date: 2011-05-23 18:43:31 UTC
  • mfrom: (13084.2.6 page-match-rewrite-url)
  • Revision ID: launchpad@pqm.canonical.com-20110523184331-dhd2c7cgfuu49epw
[r=sinzui][bug=784273] Adds facility to the PageMatch to handle bad
        URIs

Show diffs side-by-side

added added

removed removed

Lines of Context:
16
16
import xml.etree.cElementTree as ET
17
17
import urllib
18
18
import urllib2
19
 
from urlparse import urlunparse
 
19
from urlparse import (
 
20
    urlunparse,
 
21
    parse_qsl,
 
22
    )
20
23
 
21
24
from lazr.restful.utils import get_current_browser_request
22
25
from lazr.uri import URI
80
83
        self.summary = summary
81
84
        self.url = self._rewrite_url(url)
82
85
 
 
86
    def _sanitize_query_string(self, url):
 
87
        """Escapes invalid urls."""
 
88
        parts = urlparse(url)
 
89
        querydata = parse_qsl(parts.query)
 
90
        querystring = urllib.urlencode(querydata)
 
91
        urldata = list(parts)
 
92
        urldata[-2] = querystring
 
93
        return urlunparse(urldata)
 
94
 
83
95
    def _strip_trailing_slash(self, url):
84
96
        """Return the url without a trailing slash."""
85
97
        uri = URI(url).ensureNoSlash()
96
108
            launchpad environment.
97
109
        :return: A URL str.
98
110
        """
 
111
        url = self._sanitize_query_string(url)
99
112
        if self.url_rewrite_hostname == 'launchpad.net':
100
113
            # Do not rewrite the url is the hostname is the public hostname.
101
114
            return self._strip_trailing_slash(url)