~launchpad-pqm/launchpad/devel

14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
1
# Copyright 2011 Canonical Ltd.  This software is licensed under the
2
# GNU Affero General Public License version 3 (see the file LICENSE).
3
14213.4.15 by Gavin Panella
Update/add docstrings.
4
"""Script code relating to team participations."""
14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
5
6
__metaclass__ = type
7
__all__ = [
14213.4.26 by Gavin Panella
Enable loading and saving of team participation info. Work in progress.
8
    "check_teamparticipation_circular",
9
    "check_teamparticipation_consistency",
10
    "fetch_team_participation_info",
14464.1.2 by Gavin Panella
Test for new function fix_teamparticipation_consistency().
11
    "fix_teamparticipation_consistency",
14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
12
    ]
13
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
14
from collections import (
15
    defaultdict,
16
    namedtuple,
17
    )
14213.4.21 by Gavin Panella
Report on query progress.
18
from functools import partial
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
19
from itertools import (
20
    chain,
14213.4.21 by Gavin Panella
Report on query progress.
21
    count,
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
22
    imap,
14213.4.21 by Gavin Panella
Report on query progress.
23
    izip,
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
24
    )
14213.4.3 by Gavin Panella
New test case TestCheckTeamParticipationScriptPerformance that runs check_teamparticipation() in-process, so we can monitor database activity more closely.
25
14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
26
import transaction
14213.4.10 by Gavin Panella
Convert check_teamparticipation_self() to use Store.execute().
27
from zope.component import getUtility
14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
28
14464.1.6 by Gavin Panella
Implement fix_teamparticipation_consistency().
29
from canonical.database.sqlbase import (
30
    quote,
31
    sqlvalues,
32
    )
14213.4.10 by Gavin Panella
Convert check_teamparticipation_self() to use Store.execute().
33
from canonical.launchpad.webapp.interfaces import (
34
    IStoreSelector,
35
    MAIN_STORE,
14464.1.6 by Gavin Panella
Implement fix_teamparticipation_consistency().
36
    MASTER_FLAVOR,
14213.4.10 by Gavin Panella
Convert check_teamparticipation_self() to use Store.execute().
37
    SLAVE_FLAVOR,
38
    )
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
39
from lp.registry.interfaces.teammembership import ACTIVE_STATES
14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
40
from lp.services.scripts.base import LaunchpadScriptFailure
41
42
14464.1.6 by Gavin Panella
Implement fix_teamparticipation_consistency().
43
def get_master_store():
44
    """Return a master store.
45
46
    Errors in `TeamPartipation` must be fixed in the master.
47
    """
48
    return getUtility(IStoreSelector).get(MAIN_STORE, MASTER_FLAVOR)
49
50
51
def get_slave_store():
14213.4.15 by Gavin Panella
Update/add docstrings.
52
    """Return a slave store.
53
54
    Errors in `TeamPartipation` can be detected using a replicated copy.
55
    """
14213.4.10 by Gavin Panella
Convert check_teamparticipation_self() to use Store.execute().
56
    return getUtility(IStoreSelector).get(MAIN_STORE, SLAVE_FLAVOR)
57
58
14213.4.4 by Gavin Panella
Break up check_teamparticipation into its component checks.
59
def check_teamparticipation_circular(log):
14213.4.15 by Gavin Panella
Update/add docstrings.
60
    """Check circular references.
61
62
    There can be no mutual participation between teams.
63
    """
14213.4.11 by Gavin Panella
Convert check_teamparticipation_circular() and check_teamparticipation_consistency() to use Store.execute() too.
64
    query = """
14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
65
        SELECT tp.team, tp2.team
14213.4.18 by Gavin Panella
Prettify queries.
66
          FROM TeamParticipation AS tp,
67
               TeamParticipation AS tp2
68
         WHERE tp.team = tp2.person
69
           AND tp.person = tp2.team
70
           AND tp.id != tp2.id;
14213.4.11 by Gavin Panella
Convert check_teamparticipation_circular() and check_teamparticipation_consistency() to use Store.execute() too.
71
        """
14464.1.6 by Gavin Panella
Implement fix_teamparticipation_consistency().
72
    circular_references = list(get_slave_store().execute(query))
14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
73
    if len(circular_references) > 0:
74
        raise LaunchpadScriptFailure(
75
            "Circular references found: %s" % circular_references)
76
14213.4.4 by Gavin Panella
Break up check_teamparticipation into its component checks.
77
14213.4.9 by Gavin Panella
Tidy up, and add some TODOs.
78
ConsistencyError = namedtuple(
79
    "ConsistencyError", ("type", "team", "people"))
80
81
14213.4.23 by Gavin Panella
Report progress of queries and of checks.
82
def report_progress(log, interval, results, what):
83
    """Iterate through `results`, reporting on progress.
84
85
    :param log: A logger.
86
    :param interval: How many results to report progress about.
87
    :param results: An iterable of things.
88
    :param what: A string descriping what the results are.
89
    """
90
    for num, result in izip(count(1), results):
91
        if num % interval == 0:
92
            log.debug("%d %s", num, what)
93
        yield result
94
    log.debug("%d %s", num, what)
95
96
14213.4.22 by Gavin Panella
Use a more descriptive function name for query: execute_long_query.
97
def execute_long_query(store, log, interval, query):
14213.4.21 by Gavin Panella
Report on query progress.
98
    """Execute the given query, reporting as results are fetched.
99
100
    The query is logged, then every `interval` rows a message is logged with
101
    the total number of rows fetched thus far.
102
    """
103
    log.debug(query)
14213.4.23 by Gavin Panella
Report progress of queries and of checks.
104
    results = store.execute(query)
105
    # Hackish; the default is 10 which seems fairly low.
106
    results._raw_cursor.arraysize = interval
107
    return report_progress(log, interval, results, "rows")
14213.4.21 by Gavin Panella
Report on query progress.
108
109
14213.4.25 by Gavin Panella
Pass the team participation info into check_teamparticipation_consistency.
110
def fetch_team_participation_info(log):
14213.4.24 by Gavin Panella
Split out the fetch from the check code.
111
    """Fetch people, teams, memberships and participations."""
14464.1.6 by Gavin Panella
Implement fix_teamparticipation_consistency().
112
    slurp = partial(execute_long_query, get_slave_store(), log, 10000)
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
113
114
    people = dict(
14213.4.21 by Gavin Panella
Report on query progress.
115
        slurp(
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
116
            "SELECT id, name FROM Person"
117
            " WHERE teamowner IS NULL"
118
            "   AND merged IS NULL"))
119
    teams = dict(
14213.4.21 by Gavin Panella
Report on query progress.
120
        slurp(
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
121
            "SELECT id, name FROM Person"
122
            " WHERE teamowner IS NOT NULL"
123
            "   AND merged IS NULL"))
124
    team_memberships = defaultdict(set)
14213.4.21 by Gavin Panella
Report on query progress.
125
    results = slurp(
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
126
        "SELECT team, person FROM TeamMembership"
14213.4.11 by Gavin Panella
Convert check_teamparticipation_circular() and check_teamparticipation_consistency() to use Store.execute() too.
127
        " WHERE status in %s" % quote(ACTIVE_STATES))
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
128
    for (team, person) in results:
129
        team_memberships[team].add(person)
130
    team_participations = defaultdict(set)
14213.4.21 by Gavin Panella
Report on query progress.
131
    results = slurp(
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
132
        "SELECT team, person FROM TeamParticipation")
133
    for (team, person) in results:
134
        team_participations[team].add(person)
135
136
    # Don't hold any locks.
14213.4.16 by Gavin Panella
Commit instead of abort.
137
    transaction.commit()
14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
138
14213.4.24 by Gavin Panella
Split out the fetch from the check code.
139
    return people, teams, team_memberships, team_participations
140
141
14213.4.37 by Gavin Panella
Decouple check_teamparticipation_consistency() from fetch_team_participation_info().
142
def check_teamparticipation_consistency(log, info):
14213.4.24 by Gavin Panella
Split out the fetch from the check code.
143
    """Check for missing or spurious participations.
144
145
    For example, participations for people who are not members, or missing
146
    participations for people who are members.
147
    """
14213.4.25 by Gavin Panella
Pass the team participation info into check_teamparticipation_consistency.
148
    people, teams, team_memberships, team_participations = info
14213.4.24 by Gavin Panella
Split out the fetch from the check code.
149
14213.4.39 by Gavin Panella
Fix performance problems arising from set.intersection(dict).
150
    # set.intersection() with a dict is slow.
151
    people_set = frozenset(people)
152
    teams_set = frozenset(teams)
153
14213.4.2 by Gavin Panella
Move check_teamparticipation() into sys.path.
154
    def get_participants(team):
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
155
        """Recurse through membership records to get participants."""
14213.4.39 by Gavin Panella
Fix performance problems arising from set.intersection(dict).
156
        member_people = team_memberships[team].intersection(people_set)
14213.4.14 by Gavin Panella
Fix comment.
157
        member_people.add(team)  # Teams always participate in themselves.
14213.4.39 by Gavin Panella
Fix performance problems arising from set.intersection(dict).
158
        member_teams = team_memberships[team].intersection(teams_set)
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
159
        return member_people.union(
160
            chain.from_iterable(imap(get_participants, member_teams)))
161
14213.4.26 by Gavin Panella
Enable loading and saving of team participation info. Work in progress.
162
    def check_participants(person, expected, observed):
14213.4.19 by Gavin Panella
Check that the only participant of a person is the person.
163
        spurious = observed - expected
164
        missing = expected - observed
165
        if len(spurious) > 0:
14213.4.26 by Gavin Panella
Enable loading and saving of team participation info. Work in progress.
166
            yield ConsistencyError("spurious", person, sorted(spurious))
14213.4.19 by Gavin Panella
Check that the only participant of a person is the person.
167
        if len(missing) > 0:
14213.4.26 by Gavin Panella
Enable loading and saving of team participation info. Work in progress.
168
            yield ConsistencyError("missing", person, sorted(missing))
14213.4.19 by Gavin Panella
Check that the only participant of a person is the person.
169
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
170
    errors = []
14213.4.19 by Gavin Panella
Check that the only participant of a person is the person.
171
14213.4.23 by Gavin Panella
Report progress of queries and of checks.
172
    log.debug("Checking consistency of %d people", len(people))
14213.4.39 by Gavin Panella
Fix performance problems arising from set.intersection(dict).
173
    for person in report_progress(log, 50000, people, "people"):
14213.4.19 by Gavin Panella
Check that the only participant of a person is the person.
174
        participants_expected = set((person,))
175
        participants_observed = team_participations[person]
176
        errors.extend(
14213.4.26 by Gavin Panella
Enable loading and saving of team participation info. Work in progress.
177
            check_participants(
178
                person, participants_expected, participants_observed))
14213.4.19 by Gavin Panella
Check that the only participant of a person is the person.
179
14213.4.23 by Gavin Panella
Report progress of queries and of checks.
180
    log.debug("Checking consistency of %d teams", len(teams))
181
    for team in report_progress(log, 1000, teams, "teams"):
14213.4.19 by Gavin Panella
Check that the only participant of a person is the person.
182
        participants_expected = get_participants(team)
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
183
        participants_observed = team_participations[team]
14213.4.19 by Gavin Panella
Check that the only participant of a person is the person.
184
        errors.extend(
14213.4.26 by Gavin Panella
Enable loading and saving of team participation info. Work in progress.
185
            check_participants(
186
                team, participants_expected, participants_observed))
14213.4.9 by Gavin Panella
Tidy up, and add some TODOs.
187
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
188
    def get_repr(id):
14213.4.20 by Gavin Panella
No need to check for participations for merged people/teams, afaict.
189
        if id in people:
190
            name = people[id]
191
        elif id in teams:
192
            name = teams[id]
193
        else:
194
            name = "<unknown>"
195
        return "%s (%d)" % (name, id)
14213.4.7 by Gavin Panella
Make check_teamparticipation_consistency super efficient.
196
197
    for error in errors:
198
        people_repr = ", ".join(imap(get_repr, error.people))
199
        log.warn(
200
            "%s: %s TeamParticipation entries for %s.",
201
            get_repr(error.team), error.type, people_repr)
202
203
    return errors
14464.1.2 by Gavin Panella
Test for new function fix_teamparticipation_consistency().
204
205
206
def fix_teamparticipation_consistency(log, errors):
207
    """Fix missing or spurious participations.
208
14464.1.6 by Gavin Panella
Implement fix_teamparticipation_consistency().
209
    This function does not consult `TeamMembership` at all, so it /may/
210
    introduce another participation inconsistency if the records that are the
211
    subject of the given errors have been modified since being checked.
212
14464.1.2 by Gavin Panella
Test for new function fix_teamparticipation_consistency().
213
    :param errors: An iterable of `ConsistencyError` tuples.
214
    """
14464.1.6 by Gavin Panella
Implement fix_teamparticipation_consistency().
215
    sql_missing = (
216
        """
217
        INSERT INTO TeamParticipation (team, person)
218
        SELECT %(team)s, %(person)s
219
        EXCEPT
220
        SELECT team, person
221
          FROM TeamParticipation
222
         WHERE team = %(team)s
223
           AND person = %(person)s
224
        """)
225
    sql_spurious = (
226
        """
227
        DELETE FROM TeamParticipation
228
         WHERE team = %(team)s
229
           AND person IN %(people)s
230
        """)
231
    store = get_master_store()
232
    for error in errors:
233
        if error.type == "missing":
234
            for person in error.people:
235
                statement = sql_missing % sqlvalues(
236
                    team=error.team, person=person)
237
                log.debug(statement)
238
                store.execute(statement)
239
                transaction.commit()
240
        elif error.type == "spurious":
241
            statement = sql_spurious % sqlvalues(
242
                team=error.team, people=error.people)
243
            log.debug(statement)
244
            store.execute(statement)
245
            transaction.commit()
246
        else:
247
            log.warn("Unrecognized error: %r", error)