~launchpad-pqm/launchpad/devel

14513.3.10 by Raphael Badin
Precache more data.
1
# Copyright 2009-2012 Canonical Ltd.  This software is licensed under the
8687.15.16 by Karl Fogel
Add the copyright header block to files under lib/lp/buildmaster/.
2
# GNU Affero General Public License version 3 (see the file LICENSE).
3
7813.2.1 by Celso Providelo
re-basing slave-scanner-ng changes.
4
"""Soyuz buildd slave manager logic."""
5
7813.2.14 by Celso Providelo
applying review comments, r=bigjools.
6
__metaclass__ = type
7
8
__all__ = [
10130.1.25 by Jonathan Lange
Fix the test by adding the method to the recording slave.
9
    'BuilddManager',
11458.1.3 by Jelmer Vernooij
Fix typo.
10
    'BUILDD_MANAGER_LOG_NAME',
7813.2.14 by Celso Providelo
applying review comments, r=bigjools.
11
    ]
12
7813.2.1 by Celso Providelo
re-basing slave-scanner-ng changes.
13
import logging
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
14
8137.17.24 by Barry Warsaw
thread merge
15
import transaction
7813.2.1 by Celso Providelo
re-basing slave-scanner-ng changes.
16
from twisted.application import service
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
17
from twisted.internet import (
18
    defer,
19
    reactor,
20
    )
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
21
from twisted.internet.task import LoopingCall
7856.1.1 by Celso Providelo
Fixing remaining issues with BuilddManager, specially the dispatch-error-handler part.
22
from twisted.python import log
7813.2.1 by Celso Providelo
re-basing slave-scanner-ng changes.
23
from zope.component import getUtility
24
7675.805.8 by Jelmer Vernooij
Fix up some tests.
25
from lp.buildmaster.enums import BuildStatus
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
26
from lp.buildmaster.interfaces.builder import (
27
    BuildDaemonError,
28
    BuildSlaveFailure,
29
    CannotBuild,
30
    CannotFetchFile,
31
    CannotResumeHost,
32
    )
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
33
from lp.buildmaster.interfaces.buildfarmjobbehavior import (
34
    BuildBehaviorMismatch,
35
    )
36
from lp.buildmaster.model.builder import Builder
14513.3.10 by Raphael Badin
Precache more data.
37
from lp.services.propertycache import get_property_cache
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
38
from lp.services.database.transaction_policy import DatabaseTransactionPolicy
10548.1.1 by Jonathan Lange
Move twistedsupport to lp.services
39
7813.2.1 by Celso Providelo
re-basing slave-scanner-ng changes.
40
11458.1.1 by Jelmer Vernooij
Move enums of buildmaster.
41
BUILDD_MANAGER_LOG_NAME = "slave-scanner"
42
43
10888.7.23 by Julian Edwards
jml's review comments
44
def get_builder(name):
45
    """Helper to return the builder given the slave for this request."""
46
    # Avoiding circular imports.
47
    from lp.buildmaster.interfaces.builder import IBuilderSet
48
    return getUtility(IBuilderSet)[name]
49
50
51
def assessFailureCounts(builder, fail_notes):
10888.7.25 by Julian Edwards
round 2 of jml's comments
52
    """View builder/job failure_count and work out which needs to die.  """
10888.7.27 by Julian Edwards
another review round with jml
53
    # builder.currentjob hides a complicated query, don't run it twice.
14513.3.10 by Raphael Badin
Precache more data.
54
    # See bug 623281 (Note that currentjob is a cachedproperty).
55
56
    del get_property_cache(builder).currentjob
10888.7.23 by Julian Edwards
jml's review comments
57
    current_job = builder.currentjob
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
58
    if current_job is None:
59
        job_failure_count = 0
60
    else:
61
        job_failure_count = current_job.specific_job.build.failure_count
10888.7.23 by Julian Edwards
jml's review comments
62
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
63
    if builder.failure_count == job_failure_count and current_job is not None:
10888.7.23 by Julian Edwards
jml's review comments
64
        # If the failure count for the builder is the same as the
65
        # failure count for the job being built, then we cannot
66
        # tell whether the job or the builder is at fault. The  best
67
        # we can do is try them both again, and hope that the job
68
        # runs against a different builder.
69
        current_job.reset()
14513.3.18 by Raphael Badin
Fix doctest.
70
        del get_property_cache(builder).currentjob
10888.7.25 by Julian Edwards
round 2 of jml's comments
71
        return
10888.7.23 by Julian Edwards
jml's review comments
72
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
73
    if builder.failure_count > job_failure_count:
10888.7.23 by Julian Edwards
jml's review comments
74
        # The builder has failed more than the jobs it's been
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
75
        # running.
76
77
        # Re-schedule the build if there is one.
78
        if current_job is not None:
79
            current_job.reset()
80
81
        # We are a little more tolerant with failing builders than
82
        # failing jobs because sometimes they get unresponsive due to
83
        # human error, flaky networks etc.  We expect the builder to get
84
        # better, whereas jobs are very unlikely to get better.
85
        if builder.failure_count >= Builder.FAILURE_THRESHOLD:
86
            # It's also gone over the threshold so let's disable it.
87
            builder.failBuilder(fail_notes)
10888.7.23 by Julian Edwards
jml's review comments
88
    else:
89
        # The job is the culprit!  Override its status to 'failed'
90
        # to make sure it won't get automatically dispatched again,
91
        # and remove the buildqueue request.  The failure should
92
        # have already caused any relevant slave data to be stored
93
        # on the build record so don't worry about that here.
11983.2.1 by Julian Edwards
Ensure that builder failure_count is reset when deciding to fail a job
94
        builder.resetFailureCount()
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
95
        build_job = current_job.specific_job.build
10888.7.23 by Julian Edwards
jml's review comments
96
        build_job.status = BuildStatus.FAILEDTOBUILD
97
        builder.currentjob.destroySelf()
98
99
        # N.B. We could try and call _handleStatus_PACKAGEFAIL here
100
        # but that would cause us to query the slave for its status
101
        # again, and if the slave is non-responsive it holds up the
102
        # next buildd scan.
14513.3.10 by Raphael Badin
Precache more data.
103
    del get_property_cache(builder).currentjob
10888.7.23 by Julian Edwards
jml's review comments
104
105
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
106
class SlaveScanner:
107
    """A manager for a single builder."""
108
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
109
    # The interval between each poll cycle, in seconds.  We'd ideally
11593.3.123 by Julian Edwards
increase the polling interval to 15 seconds
110
    # like this to be lower but 15 seems a reasonable compromise between
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
111
    # responsivity and load on the database server, since in each cycle
112
    # we can run quite a few queries.
11593.3.123 by Julian Edwards
increase the polling interval to 15 seconds
113
    #
114
    # NB. This used to be as low as 5 but as more builders are added to
115
    # the farm this rapidly increases the query count, PG load and this
116
    # process's load.  It's backed off until we come up with a better
117
    # algorithm for polling.
118
    SCAN_INTERVAL = 15
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
119
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
120
    def __init__(self, builder_name, logger, clock=None):
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
121
        self.builder_name = builder_name
122
        self.logger = logger
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
123
        if clock is None:
124
            clock = reactor
125
        self._clock = clock
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
126
127
    def startCycle(self):
10888.6.24 by Julian Edwards
first chunk of jml's review comments
128
        """Scan the builder and dispatch to it or deal with failures."""
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
129
        self.loop = LoopingCall(self.singleCycle)
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
130
        self.loop.clock = self._clock
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
131
        self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL)
132
        return self.stopping_deferred
133
134
    def stopCycle(self):
135
        """Terminate the LoopingCall."""
136
        self.loop.stop()
137
138
    def singleCycle(self):
11593.3.57 by Julian Edwards
Use LoopingCall instead of repeated callLater and move the inner disaster() function to a _scanFailed() method.
139
        self.logger.debug("Scanning builder: %s" % self.builder_name)
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
140
        d = self.scan()
141
142
        d.addErrback(self._scanFailed)
143
        return d
144
145
    def _scanFailed(self, failure):
146
        """Deal with failures encountered during the scan cycle.
147
148
        1. Print the error in the log
149
        2. Increment and assess failure counts on the builder and job.
150
        """
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
151
        # Since this is a failure path, we could be in a broken
152
        # transaction.  Get us a fresh one.
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
153
        transaction.abort()
154
155
        # If we don't recognise the exception include a stack trace with
156
        # the error.
157
        error_message = failure.getErrorMessage()
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
158
        familiar_error = failure.check(
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
159
            BuildSlaveFailure, CannotBuild, BuildBehaviorMismatch,
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
160
            CannotResumeHost, BuildDaemonError, CannotFetchFile)
161
        if familiar_error:
162
            self.logger.info(
163
                "Scanning %s failed with: %s",
164
                self.builder_name, error_message)
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
165
        else:
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
166
            self.logger.info(
167
                "Scanning %s failed with: %s\n%s",
11929.3.5 by Julian Edwards
Add builder name to scan failed message
168
                self.builder_name, failure.getErrorMessage(),
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
169
                failure.getTraceback())
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
170
171
        # Decide if we need to terminate the job or fail the
172
        # builder.
11804.1.1 by Brad Crittenden
Reverted r 11801 due to buildbot failures.
173
        try:
14047.3.14 by Jeroen Vermeulen
Tightened up more write-transaction code.
174
            builder = get_builder(self.builder_name)
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
175
            transaction.commit()
176
177
            with DatabaseTransactionPolicy(read_only=False):
178
                builder.gotFailure()
179
180
                if builder.currentjob is None:
181
                    self.logger.info(
182
                        "Builder %s failed a probe, count: %s",
183
                        self.builder_name, builder.failure_count)
184
                else:
185
                    build_farm_job = builder.getCurrentBuildFarmJob()
186
                    build_farm_job.gotFailure()
187
                    self.logger.info(
188
                        "builder %s failure count: %s, "
189
                        "job '%s' failure count: %s",
14047.3.35 by Jeroen Vermeulen
Review change: restore changed logging.
190
                        self.builder_name,
191
                        builder.failure_count,
192
                        build_farm_job.title,
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
193
                        build_farm_job.failure_count)
194
195
                assessFailureCounts(builder, failure.getErrorMessage())
196
                transaction.commit()
11804.1.1 by Brad Crittenden
Reverted r 11801 due to buildbot failures.
197
        except:
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
198
            # Catastrophic code failure! Not much we can do.
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
199
            transaction.abort()
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
200
            self.logger.error(
201
                "Miserable failure when trying to examine failure counts:\n",
202
                exc_info=True)
203
14206.2.2 by Julian Edwards
merge remainder of backed out branch
204
    def checkCancellation(self, builder):
205
        """See if there is a pending cancellation request.
206
207
        If the current build is in status CANCELLING then terminate it
208
        immediately.
209
210
        :return: A deferred whose value is True if we cancelled the build.
211
        """
212
        if not builder.virtualized:
213
            return defer.succeed(False)
214
        buildqueue = self.builder.getBuildQueue()
215
        if not buildqueue:
216
            return defer.succeed(False)
217
        build = buildqueue.specific_job.build
218
        if build.status != BuildStatus.CANCELLING:
219
            return defer.succeed(False)
220
221
        def resume_done(ignored):
222
            return defer.succeed(True)
223
14206.2.8 by Julian Edwards
Add cancellation logging
224
        self.logger.info("Cancelling build '%s'" % build.title)
14542.2.28 by Gavin Panella
Use BuilddManagerTestFixture in TestCancellationChecking.
225
        with DatabaseTransactionPolicy(read_only=False):
226
            buildqueue.cancel()
227
            transaction.commit()
14206.2.2 by Julian Edwards
merge remainder of backed out branch
228
        d = builder.resumeSlaveHost()
229
        d.addCallback(resume_done)
230
        return d
231
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
232
    def scan(self):
233
        """Probe the builder and update/dispatch/collect as appropriate.
10888.6.22 by Julian Edwards
fix lint
234
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
235
        There are several steps to scanning:
236
237
        1. If the builder is marked as "ok" then probe it to see what state
238
            it's in.  This is where lost jobs are rescued if we think the
239
            builder is doing something that it later tells us it's not,
240
            and also where the multi-phase abort procedure happens.
241
            See IBuilder.rescueIfLost, which is called by
242
            IBuilder.updateStatus().
243
        2. If the builder is still happy, we ask it if it has an active build
244
            and then either update the build in Launchpad or collect the
245
            completed build. (builder.updateBuild)
246
        3. If the builder is not happy or it was marked as unavailable
247
            mid-build, we need to reset the job that we thought it had, so
248
            that the job is dispatched elsewhere.
249
        4. If the builder is idle and we have another build ready, dispatch
250
            it.
251
252
        :return: A Deferred that fires when the scan is complete, whose
253
            value is A `BuilderSlave` if we dispatched a job to it, or None.
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
254
        """
255
        # We need to re-fetch the builder object on each cycle as the
256
        # Storm store is invalidated over transaction boundaries.
10888.7.23 by Julian Edwards
jml's review comments
257
        self.builder = get_builder(self.builder_name)
10888.6.22 by Julian Edwards
fix lint
258
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
259
        def status_updated(ignored):
260
            # See if we think there's an active build on the builder.
261
            buildqueue = self.builder.getBuildQueue()
262
263
            # Scan the slave and get the logtail, or collect the build if
264
            # it's ready.  Yes, "updateBuild" is a bad name.
265
            if buildqueue is not None:
266
                return self.builder.updateBuild(buildqueue)
267
268
        def build_updated(ignored):
269
            # If the builder is in manual mode, don't dispatch anything.
270
            if self.builder.manual:
271
                self.logger.debug(
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
272
                    '%s is in manual mode, not dispatching.',
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
273
                    self.builder.name)
274
                return
275
276
            # If the builder is marked unavailable, don't dispatch anything.
277
            # Additionaly, because builders can be removed from the pool at
278
            # any time, we need to see if we think there was a build running
279
            # on it before it was marked unavailable. In this case we reset
280
            # the build thusly forcing it to get re-dispatched to another
281
            # builder.
282
283
            return self.builder.isAvailable().addCallback(got_available)
284
285
        def got_available(available):
286
            if not available:
287
                job = self.builder.currentjob
288
                if job is not None and not self.builder.builderok:
289
                    self.logger.info(
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
290
                        "%s was made unavailable; resetting attached job.",
291
                        self.builder.name)
14047.3.38 by Jeroen Vermeulen
Review change: commit read-only transactions, don't abort.
292
                    transaction.commit()
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
293
                    with DatabaseTransactionPolicy(read_only=False):
294
                        job.reset()
295
                        transaction.commit()
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
296
                return
297
298
            # See if there is a job we can dispatch to the builder slave.
299
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
300
            # XXX JeroenVermeulen 2011-10-11, bug=872112: The job's
301
            # failure count will be reset once the job has started
302
            # successfully.  Because of intervening commits, you may see
303
            # a build with a nonzero failure count that's actually going
304
            # to succeed later (and have a failure count of zero).  Or
305
            # it may fail yet end up with a lower failure count than you
306
            # saw earlier.
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
307
            d = self.builder.findAndStartJob()
14047.3.7 by Jeroen Vermeulen
Wrap SlaveScanner in read-only DB policy. (Not expected to be functional yet).
308
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
309
            def job_started(candidate):
310
                if self.builder.currentjob is not None:
311
                    # After a successful dispatch we can reset the
312
                    # failure_count.
14047.3.38 by Jeroen Vermeulen
Review change: commit read-only transactions, don't abort.
313
                    transaction.commit()
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
314
                    with DatabaseTransactionPolicy(read_only=False):
315
                        self.builder.resetFailureCount()
316
                        transaction.commit()
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
317
                    return self.builder.slave
318
                else:
11593.3.40 by Julian Edwards
Fix the manager's scan method to not use the RecordingSlave any more, and instead use our new funky asynchronous BuilderSlave
319
                    return None
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
320
            return d.addCallback(job_started)
321
14206.2.2 by Julian Edwards
merge remainder of backed out branch
322
        def cancellation_checked(cancelled):
323
            if cancelled:
324
                return defer.succeed(None)
325
            d = self.builder.updateStatus(self.logger)
326
            d.addCallback(status_updated)
327
            d.addCallback(build_updated)
328
            return d
329
330
        if self.builder.builderok:
331
            d = self.checkCancellation(self.builder)
332
            d.addCallback(cancellation_checked)
333
        else:
14206.2.3 by Julian Edwards
Start of unit tests for new checkCancellation method
334
            d = defer.succeed(None)
14206.2.2 by Julian Edwards
merge remainder of backed out branch
335
            d.addCallback(status_updated)
336
            d.addCallback(build_updated)
337
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
338
        return d
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
339
340
10888.6.31 by Julian Edwards
more review comments from jml
341
class NewBuildersScanner:
10888.6.11 by Julian Edwards
First part of detecting new builders
342
    """If new builders appear, create a scanner for them."""
343
344
    # How often to check for new builders, in seconds.
345
    SCAN_INTERVAL = 300
346
10888.6.26 by Julian Edwards
more cleanups
347
    def __init__(self, manager, clock=None):
10888.6.15 by Julian Edwards
refactor code to add scanners and add a test for it
348
        self.manager = manager
10888.6.31 by Julian Edwards
more review comments from jml
349
        # Use the clock if provided, it's so that tests can
350
        # advance it.  Use the reactor by default.
351
        if clock is None:
352
            clock = reactor
10888.6.26 by Julian Edwards
more cleanups
353
        self._clock = clock
10888.6.11 by Julian Edwards
First part of detecting new builders
354
        # Avoid circular import.
355
        from lp.buildmaster.interfaces.builder import IBuilderSet
356
        self.current_builders = [
357
            builder.name for builder in getUtility(IBuilderSet)]
358
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
359
    def stop(self):
360
        """Terminate the LoopingCall."""
361
        self.loop.stop()
362
10888.6.12 by Julian Edwards
Add scheduleScan and a test
363
    def scheduleScan(self):
364
        """Schedule a callback SCAN_INTERVAL seconds later."""
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
365
        self.loop = LoopingCall(self.scan)
366
        self.loop.clock = self._clock
367
        self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL)
368
        return self.stopping_deferred
10888.6.12 by Julian Edwards
Add scheduleScan and a test
369
10888.6.13 by Julian Edwards
basic case for checkForNewBuilders not returning anything
370
    def scan(self):
10888.6.16 by Julian Edwards
add scan() and a test
371
        """If a new builder appears, create a SlaveScanner for it."""
372
        new_builders = self.checkForNewBuilders()
10888.6.26 by Julian Edwards
more cleanups
373
        self.manager.addScanForBuilders(new_builders)
10888.6.12 by Julian Edwards
Add scheduleScan and a test
374
10888.6.13 by Julian Edwards
basic case for checkForNewBuilders not returning anything
375
    def checkForNewBuilders(self):
376
        """See if any new builders were added."""
377
        # Avoid circular import.
378
        from lp.buildmaster.interfaces.builder import IBuilderSet
10888.6.22 by Julian Edwards
fix lint
379
        new_builders = set(
380
            builder.name for builder in getUtility(IBuilderSet))
10888.6.13 by Julian Edwards
basic case for checkForNewBuilders not returning anything
381
        old_builders = set(self.current_builders)
382
        extra_builders = new_builders.difference(old_builders)
12374.2.1 by Gavin Panella
Ensure that checkForNewBuilders() only detects a new builder once.
383
        self.current_builders.extend(extra_builders)
10888.6.26 by Julian Edwards
more cleanups
384
        return list(extra_builders)
10888.6.14 by Julian Edwards
checkForNewBuilders returns builders if they're new
385
10888.6.11 by Julian Edwards
First part of detecting new builders
386
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
387
class BuilddManager(service.Service):
388
    """Main Buildd Manager service class."""
389
10888.6.26 by Julian Edwards
more cleanups
390
    def __init__(self, clock=None):
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
391
        self.builder_slaves = []
392
        self.logger = self._setupLogger()
10888.6.26 by Julian Edwards
more cleanups
393
        self.new_builders_scanner = NewBuildersScanner(
394
            manager=self, clock=clock)
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
395
        self.transaction_policy = DatabaseTransactionPolicy(read_only=True)
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
396
397
    def _setupLogger(self):
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
398
        """Set up a 'slave-scanner' logger that redirects to twisted.
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
399
400
        Make it less verbose to avoid messing too much with the old code.
401
        """
402
        level = logging.INFO
403
        logger = logging.getLogger(BUILDD_MANAGER_LOG_NAME)
404
405
        # Redirect the output to the twisted log module.
406
        channel = logging.StreamHandler(log.StdioOnnaStick())
407
        channel.setLevel(level)
408
        channel.setFormatter(logging.Formatter('%(message)s'))
409
410
        logger.addHandler(channel)
411
        logger.setLevel(level)
412
        return logger
413
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
414
    def enterReadOnlyDatabasePolicy(self):
415
        """Set the database transaction policy to read-only.
416
417
        Any previously pending changes are committed first.
418
        """
419
        transaction.commit()
420
        self.transaction_policy.__enter__()
421
422
    def exitReadOnlyDatabasePolicy(self, *args):
423
        """Reset database transaction policy to the default read-write."""
424
        self.transaction_policy.__exit__(None, None, None)
425
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
426
    def startService(self):
427
        """Service entry point, called when the application starts."""
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
428
        # Avoiding circular imports.
429
        from lp.buildmaster.interfaces.builder import IBuilderSet
430
431
        self.enterReadOnlyDatabasePolicy()
14542.1.1 by Gavin Panella
Revert r14459 because the read-only transactions are causing production issues.
432
433
        # Get a list of builders and set up scanners on each one.
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
434
        self.addScanForBuilders(
435
            [builder.name for builder in getUtility(IBuilderSet)])
10888.6.20 by Julian Edwards
add code that starts up the scan for new builders and tests for it
436
        self.new_builders_scanner.scheduleScan()
10888.6.15 by Julian Edwards
refactor code to add scanners and add a test for it
437
438
        # Events will now fire in the SlaveScanner objects to scan each
439
        # builder.
440
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
441
    def stopService(self):
442
        """Callback for when we need to shut down."""
443
        # XXX: lacks unit tests
444
        # All the SlaveScanner objects need to be halted gracefully.
445
        deferreds = [slave.stopping_deferred for slave in self.builder_slaves]
446
        deferreds.append(self.new_builders_scanner.stopping_deferred)
447
448
        self.new_builders_scanner.stop()
449
        for slave in self.builder_slaves:
450
            slave.stopCycle()
451
452
        # The 'stopping_deferred's are called back when the loops are
453
        # stopped, so we can wait on them all at once here before
454
        # exiting.
455
        d = defer.DeferredList(deferreds, consumeErrors=True)
14557.2.1 by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster.
456
        d.addCallback(self.exitReadOnlyDatabasePolicy)
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
457
        return d
458
10888.6.15 by Julian Edwards
refactor code to add scanners and add a test for it
459
    def addScanForBuilders(self, builders):
460
        """Set up scanner objects for the builders specified."""
461
        for builder in builders:
462
            slave_scanner = SlaveScanner(builder, self.logger)
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
463
            self.builder_slaves.append(slave_scanner)
11593.3.120 by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug
464
            slave_scanner.startCycle()
10888.6.8 by Julian Edwards
move the .old file to the .py file so the diff is not massive
465
466
        # Return the slave list for the benefit of tests.
467
        return self.builder_slaves