~launchpad-pqm/launchpad/devel

8687.15.17 by Karl Fogel
Add the copyright header block to the rest of the files under lib/lp/.
1
# Copyright 2009 Canonical Ltd.  This software is licensed under the
2
# GNU Affero General Public License version 3 (see the file LICENSE).
3
7459.5.14 by Michael Hudson
move stuff out of script file, test it, including one that shows we haven't got
4
"""Implementation of the dynamic RewriteMap used to serve branches over HTTP.
5
"""
6
7
import time
8
7459.5.22 by Michael Hudson
review comments
9
from bzrlib import urlutils
9719.2.6 by Michael Hudson
use getIdAndTrailingPath in rewrite.py, will have to monitor performance impact of this
10
from zope.component import getUtility
11
14612.2.1 by William Grant
format-imports on lib/. So many imports.
12
from lp.code.interfaces.branchlookup import IBranchLookup
13
from lp.code.interfaces.codehosting import BRANCH_ID_ALIAS_PREFIX
14
from lp.codehosting.vfs import branch_id_to_path
14605.1.1 by Curtis Hovey
Moved canonical.config to lp.services.
15
from lp.services.config import config
14612.2.1 by William Grant
format-imports on lib/. So many imports.
16
from lp.services.utils import iter_split
14600.2.2 by Curtis Hovey
Moved webapp to lp.services.
17
from lp.services.webapp.adapter import (
12827.1.3 by Robert Collins
Formatting.
18
    clear_request_started,
19
    set_request_started,
20
    )
8971.10.2 by Michael Hudson
version that does direct database access, no caching though!
21
7459.5.14 by Michael Hudson
move stuff out of script file, test it, including one that shows we haven't got
22
23
__all__ = ['BranchRewriter']
24
7459.5.26 by Michael Hudson
fix non-textual conflict
25
7459.5.14 by Michael Hudson
move stuff out of script file, test it, including one that shows we haven't got
26
class BranchRewriter:
27
8971.10.9 by Michael Hudson
a cache which never expires
28
    def __init__(self, logger, _now=None):
7459.5.22 by Michael Hudson
review comments
29
        """
30
8971.10.17 by Michael Hudson
enhance test, small stub-inspired tweaks
31
        :param logger: Logger that messages about what the rewriter is doing
7459.5.22 by Michael Hudson
review comments
32
            will be sent to.
33
        :param proxy: A blocking proxy for a branchfilesystem endpoint.
34
        """
8971.10.9 by Michael Hudson
a cache which never expires
35
        if _now is None:
36
            self._now = time.time
37
        else:
8971.10.10 by Michael Hudson
cache expiry
38
            self._now = _now
7459.5.14 by Michael Hudson
move stuff out of script file, test it, including one that shows we haven't got
39
        self.logger = logger
8971.10.9 by Michael Hudson
a cache which never expires
40
        self._cache = {}
7459.5.14 by Michael Hudson
move stuff out of script file, test it, including one that shows we haven't got
41
7551.3.4 by Michael Hudson
all requests for private branches are forwarded to codebrowse
42
    def _codebrowse_url(self, path):
43
        return urlutils.join(
44
            config.codehosting.internal_codebrowse_root,
45
            path)
46
8971.10.8 by Michael Hudson
refactoring
47
    def _getBranchIdAndTrailingPath(self, location):
8971.10.12 by Michael Hudson
docstring
48
        """Return the branch id and trailing path for 'location'.
49
50
        In addition this method returns whether the answer can from the cache
51
        or from the database.
52
        """
9719.2.2 by Michael Hudson
use iter_split in rewrite.py
53
        for first, second in iter_split(location[1:], '/'):
54
            if first in self._cache:
55
                branch_id, inserted_time = self._cache[first]
8971.10.11 by Michael Hudson
configize
56
                if (self._now() < inserted_time +
57
                    config.codehosting.branch_rewrite_cache_lifetime):
7675.376.1 by Aaron Bentley
Ensure that cached results match uncached results.
58
                    return branch_id, '/' + second, "HIT"
9719.2.6 by Michael Hudson
use getIdAndTrailingPath in rewrite.py, will have to monitor performance impact of this
59
        branch_id, trailing = getUtility(IBranchLookup).getIdAndTrailingPath(
9719.2.10 by Michael Hudson
use slave for rewritemap again
60
            location, from_slave=True)
9719.2.6 by Michael Hudson
use getIdAndTrailingPath in rewrite.py, will have to monitor performance impact of this
61
        if branch_id is None:
8971.10.8 by Michael Hudson
refactoring
62
            return None, None, "MISS"
8971.10.15 by Michael Hudson
review comment 1: rearrange so I can use .one()
63
        else:
9719.2.6 by Michael Hudson
use getIdAndTrailingPath in rewrite.py, will have to monitor performance impact of this
64
            unique_name = location[1:-len(trailing)]
8971.10.15 by Michael Hudson
review comment 1: rearrange so I can use .one()
65
            self._cache[unique_name] = (branch_id, self._now())
66
            return branch_id, trailing, "MISS"
8971.10.8 by Michael Hudson
refactoring
67
7459.5.16 by Michael Hudson
XXX--
68
    def rewriteLine(self, resource_location):
69
        """Rewrite 'resource_location' to a more concrete location.
70
71
        We use the 'translatePath' BranchFileSystemClient method.  There are
72
        three cases:
73
74
         (1) The request is for something within the .bzr directory of a
75
             branch.
76
77
             In this case we rewrite the request to the location from which
78
             branches are served by ID.
79
80
         (2) The request is for something within a branch, but not the .bzr
81
             directory.
82
83
             In this case, we hand the request off to codebrowse.
84
85
         (3) The branch is not found.  Two sub-cases: the request is for a
86
             product control directory or the we don't know how to translate
87
             the path.
88
89
             In both these cases we return 'NULL' which indicates to Apache
90
             that we don't know how to rewrite the request (and so it should
91
             go on to generate a 404 response).
92
93
        Other errors are allowed to propagate, on the assumption that the
94
        caller will catch and log them.
7459.5.14 by Michael Hudson
move stuff out of script file, test it, including one that shows we haven't got
95
        """
7551.3.16 by Michael Hudson
actually we don't need to send https connections through the dynamic rewrite at all, which enables some simplifications
96
        # Codebrowse generates references to its images and stylesheets
97
        # starting with "/static", so pass them on unthinkingly.
8971.10.2 by Michael Hudson
version that does direct database access, no caching though!
98
        T = time.time()
12827.1.2 by Robert Collins
Note why we set a request up.
99
        # Tell the webapp adapter that we are in a request, so that DB
100
        # statement timeouts will be applied.
12827.1.1 by Robert Collins
Start and clear a request around each rewrite.
101
        set_request_started()
102
        try:
103
            cached = None
104
            if resource_location.startswith('/static/'):
105
                r = self._codebrowse_url(resource_location)
106
                cached = 'N/A'
8971.10.2 by Michael Hudson
version that does direct database access, no caching though!
107
            else:
12827.1.1 by Robert Collins
Start and clear a request around each rewrite.
108
                branch_id, trailing, cached = self._getBranchIdAndTrailingPath(
109
                    resource_location)
110
                if branch_id is None:
111
                    if resource_location.startswith('/' + BRANCH_ID_ALIAS_PREFIX):
112
                        r = 'NULL'
113
                    else:
114
                        r = self._codebrowse_url(resource_location)
8971.10.2 by Michael Hudson
version that does direct database access, no caching though!
115
                else:
12827.1.1 by Robert Collins
Start and clear a request around each rewrite.
116
                    if trailing.startswith('/.bzr'):
117
                        r = urlutils.join(
118
                            config.codehosting.internal_branch_by_id_root,
119
                            branch_id_to_path(branch_id), trailing[1:])
120
                    else:
121
                        r = self._codebrowse_url(resource_location)
122
        finally:
123
            clear_request_started()
8971.10.2 by Michael Hudson
version that does direct database access, no caching though!
124
        self.logger.info(
8971.10.5 by Michael Hudson
log cache miss/hit (always miss for now), test this, fix BufferLogger
125
            "%r -> %r (%fs, cache: %s)",
126
            resource_location, r, time.time() - T, cached)
8971.10.2 by Michael Hudson
version that does direct database access, no caching though!
127
        return r