14513.3.10
by Raphael Badin
Precache more data. |
1 |
# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
|
8687.15.16
by Karl Fogel
Add the copyright header block to files under lib/lp/buildmaster/. |
2 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
3 |
||
7813.2.1
by Celso Providelo
re-basing slave-scanner-ng changes. |
4 |
"""Soyuz buildd slave manager logic."""
|
5 |
||
7813.2.14
by Celso Providelo
applying review comments, r=bigjools. |
6 |
__metaclass__ = type |
7 |
||
8 |
__all__ = [ |
|
10130.1.25
by Jonathan Lange
Fix the test by adding the method to the recording slave. |
9 |
'BuilddManager', |
11458.1.3
by Jelmer Vernooij
Fix typo. |
10 |
'BUILDD_MANAGER_LOG_NAME', |
7813.2.14
by Celso Providelo
applying review comments, r=bigjools. |
11 |
]
|
12 |
||
7813.2.1
by Celso Providelo
re-basing slave-scanner-ng changes. |
13 |
import logging |
11403.1.4
by Henning Eggers
Reformatted imports using format-imports script r32. |
14 |
|
8137.17.24
by Barry Warsaw
thread merge |
15 |
import transaction |
7813.2.1
by Celso Providelo
re-basing slave-scanner-ng changes. |
16 |
from twisted.application import service |
11403.1.4
by Henning Eggers
Reformatted imports using format-imports script r32. |
17 |
from twisted.internet import ( |
18 |
defer, |
|
19 |
reactor, |
|
20 |
)
|
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
21 |
from twisted.internet.task import LoopingCall |
7856.1.1
by Celso Providelo
Fixing remaining issues with BuilddManager, specially the dispatch-error-handler part. |
22 |
from twisted.python import log |
7813.2.1
by Celso Providelo
re-basing slave-scanner-ng changes. |
23 |
from zope.component import getUtility |
24 |
||
7675.805.8
by Jelmer Vernooij
Fix up some tests. |
25 |
from lp.buildmaster.enums import BuildStatus |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
26 |
from lp.buildmaster.interfaces.builder import ( |
27 |
BuildDaemonError, |
|
28 |
BuildSlaveFailure, |
|
29 |
CannotBuild, |
|
30 |
CannotFetchFile, |
|
31 |
CannotResumeHost, |
|
32 |
)
|
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
33 |
from lp.buildmaster.interfaces.buildfarmjobbehavior import ( |
34 |
BuildBehaviorMismatch, |
|
35 |
)
|
|
36 |
from lp.buildmaster.model.builder import Builder |
|
14513.3.10
by Raphael Badin
Precache more data. |
37 |
from lp.services.propertycache import get_property_cache |
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
38 |
from lp.services.database.transaction_policy import DatabaseTransactionPolicy |
10548.1.1
by Jonathan Lange
Move twistedsupport to lp.services |
39 |
|
7813.2.1
by Celso Providelo
re-basing slave-scanner-ng changes. |
40 |
|
11458.1.1
by Jelmer Vernooij
Move enums of buildmaster. |
41 |
BUILDD_MANAGER_LOG_NAME = "slave-scanner" |
42 |
||
43 |
||
10888.7.23
by Julian Edwards
jml's review comments |
44 |
def get_builder(name): |
45 |
"""Helper to return the builder given the slave for this request."""
|
|
46 |
# Avoiding circular imports.
|
|
47 |
from lp.buildmaster.interfaces.builder import IBuilderSet |
|
48 |
return getUtility(IBuilderSet)[name] |
|
49 |
||
50 |
||
51 |
def assessFailureCounts(builder, fail_notes): |
|
10888.7.25
by Julian Edwards
round 2 of jml's comments |
52 |
"""View builder/job failure_count and work out which needs to die. """
|
10888.7.27
by Julian Edwards
another review round with jml |
53 |
# builder.currentjob hides a complicated query, don't run it twice.
|
14513.3.10
by Raphael Badin
Precache more data. |
54 |
# See bug 623281 (Note that currentjob is a cachedproperty).
|
55 |
||
56 |
del get_property_cache(builder).currentjob |
|
10888.7.23
by Julian Edwards
jml's review comments |
57 |
current_job = builder.currentjob |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
58 |
if current_job is None: |
59 |
job_failure_count = 0 |
|
60 |
else: |
|
61 |
job_failure_count = current_job.specific_job.build.failure_count |
|
10888.7.23
by Julian Edwards
jml's review comments |
62 |
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
63 |
if builder.failure_count == job_failure_count and current_job is not None: |
10888.7.23
by Julian Edwards
jml's review comments |
64 |
# If the failure count for the builder is the same as the
|
65 |
# failure count for the job being built, then we cannot
|
|
66 |
# tell whether the job or the builder is at fault. The best
|
|
67 |
# we can do is try them both again, and hope that the job
|
|
68 |
# runs against a different builder.
|
|
69 |
current_job.reset() |
|
14513.3.18
by Raphael Badin
Fix doctest. |
70 |
del get_property_cache(builder).currentjob |
10888.7.25
by Julian Edwards
round 2 of jml's comments |
71 |
return
|
10888.7.23
by Julian Edwards
jml's review comments |
72 |
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
73 |
if builder.failure_count > job_failure_count: |
10888.7.23
by Julian Edwards
jml's review comments |
74 |
# The builder has failed more than the jobs it's been
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
75 |
# running.
|
76 |
||
77 |
# Re-schedule the build if there is one.
|
|
78 |
if current_job is not None: |
|
79 |
current_job.reset() |
|
80 |
||
81 |
# We are a little more tolerant with failing builders than
|
|
82 |
# failing jobs because sometimes they get unresponsive due to
|
|
83 |
# human error, flaky networks etc. We expect the builder to get
|
|
84 |
# better, whereas jobs are very unlikely to get better.
|
|
85 |
if builder.failure_count >= Builder.FAILURE_THRESHOLD: |
|
86 |
# It's also gone over the threshold so let's disable it.
|
|
87 |
builder.failBuilder(fail_notes) |
|
10888.7.23
by Julian Edwards
jml's review comments |
88 |
else: |
89 |
# The job is the culprit! Override its status to 'failed'
|
|
90 |
# to make sure it won't get automatically dispatched again,
|
|
91 |
# and remove the buildqueue request. The failure should
|
|
92 |
# have already caused any relevant slave data to be stored
|
|
93 |
# on the build record so don't worry about that here.
|
|
11983.2.1
by Julian Edwards
Ensure that builder failure_count is reset when deciding to fail a job |
94 |
builder.resetFailureCount() |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
95 |
build_job = current_job.specific_job.build |
10888.7.23
by Julian Edwards
jml's review comments |
96 |
build_job.status = BuildStatus.FAILEDTOBUILD |
97 |
builder.currentjob.destroySelf() |
|
98 |
||
99 |
# N.B. We could try and call _handleStatus_PACKAGEFAIL here
|
|
100 |
# but that would cause us to query the slave for its status
|
|
101 |
# again, and if the slave is non-responsive it holds up the
|
|
102 |
# next buildd scan.
|
|
14513.3.10
by Raphael Badin
Precache more data. |
103 |
del get_property_cache(builder).currentjob |
10888.7.23
by Julian Edwards
jml's review comments |
104 |
|
105 |
||
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
106 |
class SlaveScanner: |
107 |
"""A manager for a single builder."""
|
|
108 |
||
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
109 |
# The interval between each poll cycle, in seconds. We'd ideally
|
11593.3.123
by Julian Edwards
increase the polling interval to 15 seconds |
110 |
# like this to be lower but 15 seems a reasonable compromise between
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
111 |
# responsivity and load on the database server, since in each cycle
|
112 |
# we can run quite a few queries.
|
|
11593.3.123
by Julian Edwards
increase the polling interval to 15 seconds |
113 |
#
|
114 |
# NB. This used to be as low as 5 but as more builders are added to
|
|
115 |
# the farm this rapidly increases the query count, PG load and this
|
|
116 |
# process's load. It's backed off until we come up with a better
|
|
117 |
# algorithm for polling.
|
|
118 |
SCAN_INTERVAL = 15 |
|
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
119 |
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
120 |
def __init__(self, builder_name, logger, clock=None): |
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
121 |
self.builder_name = builder_name |
122 |
self.logger = logger |
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
123 |
if clock is None: |
124 |
clock = reactor |
|
125 |
self._clock = clock |
|
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
126 |
|
127 |
def startCycle(self): |
|
10888.6.24
by Julian Edwards
first chunk of jml's review comments |
128 |
"""Scan the builder and dispatch to it or deal with failures."""
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
129 |
self.loop = LoopingCall(self.singleCycle) |
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
130 |
self.loop.clock = self._clock |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
131 |
self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL) |
132 |
return self.stopping_deferred |
|
133 |
||
134 |
def stopCycle(self): |
|
135 |
"""Terminate the LoopingCall."""
|
|
136 |
self.loop.stop() |
|
137 |
||
138 |
def singleCycle(self): |
|
11593.3.57
by Julian Edwards
Use LoopingCall instead of repeated callLater and move the inner disaster() function to a _scanFailed() method. |
139 |
self.logger.debug("Scanning builder: %s" % self.builder_name) |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
140 |
d = self.scan() |
141 |
||
142 |
d.addErrback(self._scanFailed) |
|
143 |
return d |
|
144 |
||
145 |
def _scanFailed(self, failure): |
|
146 |
"""Deal with failures encountered during the scan cycle.
|
|
147 |
||
148 |
1. Print the error in the log
|
|
149 |
2. Increment and assess failure counts on the builder and job.
|
|
150 |
"""
|
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
151 |
# Since this is a failure path, we could be in a broken
|
152 |
# transaction. Get us a fresh one.
|
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
153 |
transaction.abort() |
154 |
||
155 |
# If we don't recognise the exception include a stack trace with
|
|
156 |
# the error.
|
|
157 |
error_message = failure.getErrorMessage() |
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
158 |
familiar_error = failure.check( |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
159 |
BuildSlaveFailure, CannotBuild, BuildBehaviorMismatch, |
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
160 |
CannotResumeHost, BuildDaemonError, CannotFetchFile) |
161 |
if familiar_error: |
|
162 |
self.logger.info( |
|
163 |
"Scanning %s failed with: %s", |
|
164 |
self.builder_name, error_message) |
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
165 |
else: |
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
166 |
self.logger.info( |
167 |
"Scanning %s failed with: %s\n%s", |
|
11929.3.5
by Julian Edwards
Add builder name to scan failed message |
168 |
self.builder_name, failure.getErrorMessage(), |
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
169 |
failure.getTraceback()) |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
170 |
|
171 |
# Decide if we need to terminate the job or fail the
|
|
172 |
# builder.
|
|
11804.1.1
by Brad Crittenden
Reverted r 11801 due to buildbot failures. |
173 |
try: |
14047.3.14
by Jeroen Vermeulen
Tightened up more write-transaction code. |
174 |
builder = get_builder(self.builder_name) |
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
175 |
transaction.commit() |
176 |
||
177 |
with DatabaseTransactionPolicy(read_only=False): |
|
178 |
builder.gotFailure() |
|
179 |
||
180 |
if builder.currentjob is None: |
|
181 |
self.logger.info( |
|
182 |
"Builder %s failed a probe, count: %s", |
|
183 |
self.builder_name, builder.failure_count) |
|
184 |
else: |
|
185 |
build_farm_job = builder.getCurrentBuildFarmJob() |
|
186 |
build_farm_job.gotFailure() |
|
187 |
self.logger.info( |
|
188 |
"builder %s failure count: %s, " |
|
189 |
"job '%s' failure count: %s", |
|
14047.3.35
by Jeroen Vermeulen
Review change: restore changed logging. |
190 |
self.builder_name, |
191 |
builder.failure_count, |
|
192 |
build_farm_job.title, |
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
193 |
build_farm_job.failure_count) |
194 |
||
195 |
assessFailureCounts(builder, failure.getErrorMessage()) |
|
196 |
transaction.commit() |
|
11804.1.1
by Brad Crittenden
Reverted r 11801 due to buildbot failures. |
197 |
except: |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
198 |
# Catastrophic code failure! Not much we can do.
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
199 |
transaction.abort() |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
200 |
self.logger.error( |
201 |
"Miserable failure when trying to examine failure counts:\n", |
|
202 |
exc_info=True) |
|
203 |
||
14206.2.2
by Julian Edwards
merge remainder of backed out branch |
204 |
def checkCancellation(self, builder): |
205 |
"""See if there is a pending cancellation request.
|
|
206 |
||
207 |
If the current build is in status CANCELLING then terminate it
|
|
208 |
immediately.
|
|
209 |
||
210 |
:return: A deferred whose value is True if we cancelled the build.
|
|
211 |
"""
|
|
212 |
if not builder.virtualized: |
|
213 |
return defer.succeed(False) |
|
214 |
buildqueue = self.builder.getBuildQueue() |
|
215 |
if not buildqueue: |
|
216 |
return defer.succeed(False) |
|
217 |
build = buildqueue.specific_job.build |
|
218 |
if build.status != BuildStatus.CANCELLING: |
|
219 |
return defer.succeed(False) |
|
220 |
||
221 |
def resume_done(ignored): |
|
222 |
return defer.succeed(True) |
|
223 |
||
14206.2.8
by Julian Edwards
Add cancellation logging |
224 |
self.logger.info("Cancelling build '%s'" % build.title) |
14542.2.28
by Gavin Panella
Use BuilddManagerTestFixture in TestCancellationChecking. |
225 |
with DatabaseTransactionPolicy(read_only=False): |
226 |
buildqueue.cancel() |
|
227 |
transaction.commit() |
|
14206.2.2
by Julian Edwards
merge remainder of backed out branch |
228 |
d = builder.resumeSlaveHost() |
229 |
d.addCallback(resume_done) |
|
230 |
return d |
|
231 |
||
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
232 |
def scan(self): |
233 |
"""Probe the builder and update/dispatch/collect as appropriate.
|
|
10888.6.22
by Julian Edwards
fix lint |
234 |
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
235 |
There are several steps to scanning:
|
236 |
||
237 |
1. If the builder is marked as "ok" then probe it to see what state
|
|
238 |
it's in. This is where lost jobs are rescued if we think the
|
|
239 |
builder is doing something that it later tells us it's not,
|
|
240 |
and also where the multi-phase abort procedure happens.
|
|
241 |
See IBuilder.rescueIfLost, which is called by
|
|
242 |
IBuilder.updateStatus().
|
|
243 |
2. If the builder is still happy, we ask it if it has an active build
|
|
244 |
and then either update the build in Launchpad or collect the
|
|
245 |
completed build. (builder.updateBuild)
|
|
246 |
3. If the builder is not happy or it was marked as unavailable
|
|
247 |
mid-build, we need to reset the job that we thought it had, so
|
|
248 |
that the job is dispatched elsewhere.
|
|
249 |
4. If the builder is idle and we have another build ready, dispatch
|
|
250 |
it.
|
|
251 |
||
252 |
:return: A Deferred that fires when the scan is complete, whose
|
|
253 |
value is A `BuilderSlave` if we dispatched a job to it, or None.
|
|
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
254 |
"""
|
255 |
# We need to re-fetch the builder object on each cycle as the
|
|
256 |
# Storm store is invalidated over transaction boundaries.
|
|
10888.7.23
by Julian Edwards
jml's review comments |
257 |
self.builder = get_builder(self.builder_name) |
10888.6.22
by Julian Edwards
fix lint |
258 |
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
259 |
def status_updated(ignored): |
260 |
# See if we think there's an active build on the builder.
|
|
261 |
buildqueue = self.builder.getBuildQueue() |
|
262 |
||
263 |
# Scan the slave and get the logtail, or collect the build if
|
|
264 |
# it's ready. Yes, "updateBuild" is a bad name.
|
|
265 |
if buildqueue is not None: |
|
266 |
return self.builder.updateBuild(buildqueue) |
|
267 |
||
268 |
def build_updated(ignored): |
|
269 |
# If the builder is in manual mode, don't dispatch anything.
|
|
270 |
if self.builder.manual: |
|
271 |
self.logger.debug( |
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
272 |
'%s is in manual mode, not dispatching.', |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
273 |
self.builder.name) |
274 |
return
|
|
275 |
||
276 |
# If the builder is marked unavailable, don't dispatch anything.
|
|
277 |
# Additionaly, because builders can be removed from the pool at
|
|
278 |
# any time, we need to see if we think there was a build running
|
|
279 |
# on it before it was marked unavailable. In this case we reset
|
|
280 |
# the build thusly forcing it to get re-dispatched to another
|
|
281 |
# builder.
|
|
282 |
||
283 |
return self.builder.isAvailable().addCallback(got_available) |
|
284 |
||
285 |
def got_available(available): |
|
286 |
if not available: |
|
287 |
job = self.builder.currentjob |
|
288 |
if job is not None and not self.builder.builderok: |
|
289 |
self.logger.info( |
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
290 |
"%s was made unavailable; resetting attached job.", |
291 |
self.builder.name) |
|
14047.3.38
by Jeroen Vermeulen
Review change: commit read-only transactions, don't abort. |
292 |
transaction.commit() |
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
293 |
with DatabaseTransactionPolicy(read_only=False): |
294 |
job.reset() |
|
295 |
transaction.commit() |
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
296 |
return
|
297 |
||
298 |
# See if there is a job we can dispatch to the builder slave.
|
|
299 |
||
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
300 |
# XXX JeroenVermeulen 2011-10-11, bug=872112: The job's
|
301 |
# failure count will be reset once the job has started
|
|
302 |
# successfully. Because of intervening commits, you may see
|
|
303 |
# a build with a nonzero failure count that's actually going
|
|
304 |
# to succeed later (and have a failure count of zero). Or
|
|
305 |
# it may fail yet end up with a lower failure count than you
|
|
306 |
# saw earlier.
|
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
307 |
d = self.builder.findAndStartJob() |
14047.3.7
by Jeroen Vermeulen
Wrap SlaveScanner in read-only DB policy. (Not expected to be functional yet). |
308 |
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
309 |
def job_started(candidate): |
310 |
if self.builder.currentjob is not None: |
|
311 |
# After a successful dispatch we can reset the
|
|
312 |
# failure_count.
|
|
14047.3.38
by Jeroen Vermeulen
Review change: commit read-only transactions, don't abort. |
313 |
transaction.commit() |
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
314 |
with DatabaseTransactionPolicy(read_only=False): |
315 |
self.builder.resetFailureCount() |
|
316 |
transaction.commit() |
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
317 |
return self.builder.slave |
318 |
else: |
|
11593.3.40
by Julian Edwards
Fix the manager's scan method to not use the RecordingSlave any more, and instead use our new funky asynchronous BuilderSlave |
319 |
return None |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
320 |
return d.addCallback(job_started) |
321 |
||
14206.2.2
by Julian Edwards
merge remainder of backed out branch |
322 |
def cancellation_checked(cancelled): |
323 |
if cancelled: |
|
324 |
return defer.succeed(None) |
|
325 |
d = self.builder.updateStatus(self.logger) |
|
326 |
d.addCallback(status_updated) |
|
327 |
d.addCallback(build_updated) |
|
328 |
return d |
|
329 |
||
330 |
if self.builder.builderok: |
|
331 |
d = self.checkCancellation(self.builder) |
|
332 |
d.addCallback(cancellation_checked) |
|
333 |
else: |
|
14206.2.3
by Julian Edwards
Start of unit tests for new checkCancellation method |
334 |
d = defer.succeed(None) |
14206.2.2
by Julian Edwards
merge remainder of backed out branch |
335 |
d.addCallback(status_updated) |
336 |
d.addCallback(build_updated) |
|
337 |
||
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
338 |
return d |
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
339 |
|
340 |
||
10888.6.31
by Julian Edwards
more review comments from jml |
341 |
class NewBuildersScanner: |
10888.6.11
by Julian Edwards
First part of detecting new builders |
342 |
"""If new builders appear, create a scanner for them."""
|
343 |
||
344 |
# How often to check for new builders, in seconds.
|
|
345 |
SCAN_INTERVAL = 300 |
|
346 |
||
10888.6.26
by Julian Edwards
more cleanups |
347 |
def __init__(self, manager, clock=None): |
10888.6.15
by Julian Edwards
refactor code to add scanners and add a test for it |
348 |
self.manager = manager |
10888.6.31
by Julian Edwards
more review comments from jml |
349 |
# Use the clock if provided, it's so that tests can
|
350 |
# advance it. Use the reactor by default.
|
|
351 |
if clock is None: |
|
352 |
clock = reactor |
|
10888.6.26
by Julian Edwards
more cleanups |
353 |
self._clock = clock |
10888.6.11
by Julian Edwards
First part of detecting new builders |
354 |
# Avoid circular import.
|
355 |
from lp.buildmaster.interfaces.builder import IBuilderSet |
|
356 |
self.current_builders = [ |
|
357 |
builder.name for builder in getUtility(IBuilderSet)] |
|
358 |
||
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
359 |
def stop(self): |
360 |
"""Terminate the LoopingCall."""
|
|
361 |
self.loop.stop() |
|
362 |
||
10888.6.12
by Julian Edwards
Add scheduleScan and a test |
363 |
def scheduleScan(self): |
364 |
"""Schedule a callback SCAN_INTERVAL seconds later."""
|
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
365 |
self.loop = LoopingCall(self.scan) |
366 |
self.loop.clock = self._clock |
|
367 |
self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL) |
|
368 |
return self.stopping_deferred |
|
10888.6.12
by Julian Edwards
Add scheduleScan and a test |
369 |
|
10888.6.13
by Julian Edwards
basic case for checkForNewBuilders not returning anything |
370 |
def scan(self): |
10888.6.16
by Julian Edwards
add scan() and a test |
371 |
"""If a new builder appears, create a SlaveScanner for it."""
|
372 |
new_builders = self.checkForNewBuilders() |
|
10888.6.26
by Julian Edwards
more cleanups |
373 |
self.manager.addScanForBuilders(new_builders) |
10888.6.12
by Julian Edwards
Add scheduleScan and a test |
374 |
|
10888.6.13
by Julian Edwards
basic case for checkForNewBuilders not returning anything |
375 |
def checkForNewBuilders(self): |
376 |
"""See if any new builders were added."""
|
|
377 |
# Avoid circular import.
|
|
378 |
from lp.buildmaster.interfaces.builder import IBuilderSet |
|
10888.6.22
by Julian Edwards
fix lint |
379 |
new_builders = set( |
380 |
builder.name for builder in getUtility(IBuilderSet)) |
|
10888.6.13
by Julian Edwards
basic case for checkForNewBuilders not returning anything |
381 |
old_builders = set(self.current_builders) |
382 |
extra_builders = new_builders.difference(old_builders) |
|
12374.2.1
by Gavin Panella
Ensure that checkForNewBuilders() only detects a new builder once. |
383 |
self.current_builders.extend(extra_builders) |
10888.6.26
by Julian Edwards
more cleanups |
384 |
return list(extra_builders) |
10888.6.14
by Julian Edwards
checkForNewBuilders returns builders if they're new |
385 |
|
10888.6.11
by Julian Edwards
First part of detecting new builders |
386 |
|
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
387 |
class BuilddManager(service.Service): |
388 |
"""Main Buildd Manager service class."""
|
|
389 |
||
10888.6.26
by Julian Edwards
more cleanups |
390 |
def __init__(self, clock=None): |
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
391 |
self.builder_slaves = [] |
392 |
self.logger = self._setupLogger() |
|
10888.6.26
by Julian Edwards
more cleanups |
393 |
self.new_builders_scanner = NewBuildersScanner( |
394 |
manager=self, clock=clock) |
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
395 |
self.transaction_policy = DatabaseTransactionPolicy(read_only=True) |
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
396 |
|
397 |
def _setupLogger(self): |
|
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
398 |
"""Set up a 'slave-scanner' logger that redirects to twisted.
|
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
399 |
|
400 |
Make it less verbose to avoid messing too much with the old code.
|
|
401 |
"""
|
|
402 |
level = logging.INFO |
|
403 |
logger = logging.getLogger(BUILDD_MANAGER_LOG_NAME) |
|
404 |
||
405 |
# Redirect the output to the twisted log module.
|
|
406 |
channel = logging.StreamHandler(log.StdioOnnaStick()) |
|
407 |
channel.setLevel(level) |
|
408 |
channel.setFormatter(logging.Formatter('%(message)s')) |
|
409 |
||
410 |
logger.addHandler(channel) |
|
411 |
logger.setLevel(level) |
|
412 |
return logger |
|
413 |
||
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
414 |
def enterReadOnlyDatabasePolicy(self): |
415 |
"""Set the database transaction policy to read-only.
|
|
416 |
||
417 |
Any previously pending changes are committed first.
|
|
418 |
"""
|
|
419 |
transaction.commit() |
|
420 |
self.transaction_policy.__enter__() |
|
421 |
||
422 |
def exitReadOnlyDatabasePolicy(self, *args): |
|
423 |
"""Reset database transaction policy to the default read-write."""
|
|
424 |
self.transaction_policy.__exit__(None, None, None) |
|
425 |
||
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
426 |
def startService(self): |
427 |
"""Service entry point, called when the application starts."""
|
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
428 |
# Avoiding circular imports.
|
429 |
from lp.buildmaster.interfaces.builder import IBuilderSet |
|
430 |
||
431 |
self.enterReadOnlyDatabasePolicy() |
|
14542.1.1
by Gavin Panella
Revert r14459 because the read-only transactions are causing production issues. |
432 |
|
433 |
# Get a list of builders and set up scanners on each one.
|
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
434 |
self.addScanForBuilders( |
435 |
[builder.name for builder in getUtility(IBuilderSet)]) |
|
10888.6.20
by Julian Edwards
add code that starts up the scan for new builders and tests for it |
436 |
self.new_builders_scanner.scheduleScan() |
10888.6.15
by Julian Edwards
refactor code to add scanners and add a test for it |
437 |
|
438 |
# Events will now fire in the SlaveScanner objects to scan each
|
|
439 |
# builder.
|
|
440 |
||
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
441 |
def stopService(self): |
442 |
"""Callback for when we need to shut down."""
|
|
443 |
# XXX: lacks unit tests
|
|
444 |
# All the SlaveScanner objects need to be halted gracefully.
|
|
445 |
deferreds = [slave.stopping_deferred for slave in self.builder_slaves] |
|
446 |
deferreds.append(self.new_builders_scanner.stopping_deferred) |
|
447 |
||
448 |
self.new_builders_scanner.stop() |
|
449 |
for slave in self.builder_slaves: |
|
450 |
slave.stopCycle() |
|
451 |
||
452 |
# The 'stopping_deferred's are called back when the loops are
|
|
453 |
# stopped, so we can wait on them all at once here before
|
|
454 |
# exiting.
|
|
455 |
d = defer.DeferredList(deferreds, consumeErrors=True) |
|
14557.2.1
by Gavin Panella
Revert r14552, thus unreverting r14499 and r14459, to bring back read-only transactions in buildmaster. |
456 |
d.addCallback(self.exitReadOnlyDatabasePolicy) |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
457 |
return d |
458 |
||
10888.6.15
by Julian Edwards
refactor code to add scanners and add a test for it |
459 |
def addScanForBuilders(self, builders): |
460 |
"""Set up scanner objects for the builders specified."""
|
|
461 |
for builder in builders: |
|
462 |
slave_scanner = SlaveScanner(builder, self.logger) |
|
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
463 |
self.builder_slaves.append(slave_scanner) |
11593.3.120
by Julian Edwards
re-add revno 11801 which was backed out in devel due to test failures resulting from a twisted bug |
464 |
slave_scanner.startCycle() |
10888.6.8
by Julian Edwards
move the .old file to the .py file so the diff is not massive |
465 |
|
466 |
# Return the slave list for the benefit of tests.
|
|
467 |
return self.builder_slaves |