112
111
# algorithm for polling.
113
112
SCAN_INTERVAL = 15
115
def __init__(self, builder_name, logger, clock=None):
114
def __init__(self, builder_name, logger):
116
115
self.builder_name = builder_name
117
116
self.logger = logger
122
118
def startCycle(self):
123
119
"""Scan the builder and dispatch to it or deal with failures."""
124
120
self.loop = LoopingCall(self.singleCycle)
125
self.loop.clock = self._clock
126
121
self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL)
127
122
return self.stopping_deferred
143
138
1. Print the error in the log
144
139
2. Increment and assess failure counts on the builder and job.
146
# Since this is a failure path, we could be in a broken
147
# transaction. Get us a fresh one.
141
# Make sure that pending database updates are removed as it
142
# could leave the database in an inconsistent state (e.g. The
143
# job says it's running but the buildqueue has no builder set).
148
144
transaction.abort()
150
146
# If we don't recognise the exception include a stack trace with
152
148
error_message = failure.getErrorMessage()
153
familiar_error = failure.check(
154
150
BuildSlaveFailure, CannotBuild, BuildBehaviorMismatch,
155
CannotResumeHost, BuildDaemonError, CannotFetchFile)
158
"Scanning %s failed with: %s",
159
self.builder_name, error_message)
151
CannotResumeHost, BuildDaemonError, CannotFetchFile):
152
self.logger.info("Scanning %s failed with: %s" % (
153
self.builder_name, error_message))
162
"Scanning %s failed with: %s\n%s",
155
self.logger.info("Scanning %s failed with: %s\n%s" % (
163
156
self.builder_name, failure.getErrorMessage(),
164
failure.getTraceback())
157
failure.getTraceback()))
166
159
# Decide if we need to terminate the job or fail the
169
162
builder = get_builder(self.builder_name)
172
with DatabaseTransactionPolicy(read_only=False):
175
if builder.currentjob is None:
177
"Builder %s failed a probe, count: %s",
178
self.builder_name, builder.failure_count)
180
build_farm_job = builder.getCurrentBuildFarmJob()
181
build_farm_job.gotFailure()
183
"builder %s failure count: %s, "
184
"job '%s' failure count: %s",
164
if builder.currentjob is not None:
165
build_farm_job = builder.getCurrentBuildFarmJob()
166
build_farm_job.gotFailure()
168
"builder %s failure count: %s, "
169
"job '%s' failure count: %s" % (
185
170
self.builder_name,
186
171
builder.failure_count,
187
172
build_farm_job.title,
188
build_farm_job.failure_count)
190
assessFailureCounts(builder, failure.getErrorMessage())
173
build_farm_job.failure_count))
176
"Builder %s failed a probe, count: %s" % (
177
self.builder_name, builder.failure_count))
178
assessFailureCounts(builder, failure.getErrorMessage())
193
181
# Catastrophic code failure! Not much we can do.
195
182
self.logger.error(
196
183
"Miserable failure when trying to examine failure counts:\n",
199
187
def checkCancellation(self, builder):
200
188
"""See if there is a pending cancellation request.
250
237
# We need to re-fetch the builder object on each cycle as the
251
238
# Storm store is invalidated over transaction boundaries.
252
240
self.builder = get_builder(self.builder_name)
254
242
def status_updated(ignored):
243
# Commit the changes done while possibly rescuing jobs, to
244
# avoid holding table locks.
255
247
# See if we think there's an active build on the builder.
256
248
buildqueue = self.builder.getBuildQueue()
261
253
return self.builder.updateBuild(buildqueue)
263
255
def build_updated(ignored):
256
# Commit changes done while updating the build, to avoid
257
# holding table locks.
264
260
# If the builder is in manual mode, don't dispatch anything.
265
261
if self.builder.manual:
266
262
self.logger.debug(
267
'%s is in manual mode, not dispatching.',
263
'%s is in manual mode, not dispatching.' %
268
264
self.builder.name)
282
278
job = self.builder.currentjob
283
279
if job is not None and not self.builder.builderok:
284
280
self.logger.info(
285
"%s was made unavailable; resetting attached job.",
281
"%s was made unavailable, resetting attached "
282
"job" % self.builder.name)
287
284
transaction.commit()
288
with DatabaseTransactionPolicy(read_only=False):
293
287
# See if there is a job we can dispatch to the builder slave.
295
# XXX JeroenVermeulen 2011-10-11, bug=872112: The job's
296
# failure count will be reset once the job has started
297
# successfully. Because of intervening commits, you may see
298
# a build with a nonzero failure count that's actually going
299
# to succeed later (and have a failure count of zero). Or
300
# it may fail yet end up with a lower failure count than you
302
289
d = self.builder.findAndStartJob()
304
291
def job_started(candidate):
305
292
if self.builder.currentjob is not None:
306
293
# After a successful dispatch we can reset the
295
self.builder.resetFailureCount()
308
296
transaction.commit()
309
with DatabaseTransactionPolicy(read_only=False):
310
self.builder.resetFailureCount()
312
297
return self.builder.slave
406
390
logger.setLevel(level)
409
def enterReadOnlyDatabasePolicy(self):
410
"""Set the database transaction policy to read-only.
412
Any previously pending changes are committed first.
415
self.transaction_policy.__enter__()
417
def exitReadOnlyDatabasePolicy(self, *args):
418
"""Reset database transaction policy to the default read-write."""
419
self.transaction_policy.__exit__(None, None, None)
421
393
def startService(self):
422
394
"""Service entry point, called when the application starts."""
396
# Get a list of builders and set up scanners on each one.
423
398
# Avoiding circular imports.
424
399
from lp.buildmaster.interfaces.builder import IBuilderSet
426
self.enterReadOnlyDatabasePolicy()
428
# Get a list of builders and set up scanners on each one.
429
self.addScanForBuilders(
430
[builder.name for builder in getUtility(IBuilderSet)])
400
builder_set = getUtility(IBuilderSet)
401
builders = [builder.name for builder in builder_set]
402
self.addScanForBuilders(builders)
431
403
self.new_builders_scanner.scheduleScan()
433
405
# Events will now fire in the SlaveScanner objects to scan each