175
158
self.logger.warning(
176
159
'Parsing submission %s: %s' % (self.submission_key, message))
178
def fixFrequentErrors(self, submission):
179
"""Fixes for frequent formal errors in the submissions.
181
# A considerable number of reports for Lucid has ESC characters
182
# in comment nodes. We don't need the comment nodes at all, so
183
# we can simply empty them.
184
submission = _broken_comment_nodes_re.sub('<comment/>', submission)
186
# Submissions from Natty don't have the nodes <dmi> and <udev>
187
# as children of the <hardware> node. Fortunately, they provide
191
# <info command="grep -r . /sys/class/dmi/id/ 2>/dev/null">
194
# <info command="udevadm info --export-db">
199
# We can try to find the two relevant <info> nodes inside <context>
200
# and move their content into the proper subnodes of <hardware>.
201
if _udev_node_exists.search(submission) is None:
202
mo = _missing_udev_node_data.search(submission)
204
missing_data = mo.group(1)
205
missing_data = '<udev>%s</udev>\n</hardware>' % missing_data
206
submission = submission.replace('</hardware>', missing_data)
207
if _dmi_node_exists.search(submission) is None:
208
mo = _missing_dmi_node_data.search(submission)
210
missing_data = mo.group(1)
211
missing_data = '<dmi>%s</dmi>\n</hardware>' % missing_data
212
submission = submission.replace('</hardware>', missing_data)
215
161
def _getValidatedEtree(self, submission, submission_key):
216
162
"""Create an etree doc from the XML string submission and validate it.
218
164
:return: an `lxml.etree` instance representation of a valid
219
165
submission or None for invalid submissions.
221
submission = self.fixFrequentErrors(submission)
223
168
tree = etree.parse(StringIO(submission), parser=self.doc_parser)
224
169
except SyntaxError, error_value:
1523
1456
dmi_data = parsed_data['hardware']['dmi']
1524
1457
for udev_data in parsed_data['hardware']['udev']:
1525
1458
device_path = udev_data['P']
1526
if sysfs_data is not None:
1527
sysfs_data_for_device = sysfs_data.get(device_path)
1529
# broken Lucid, Maverick and Natty submissions.
1530
# See also bug 835103.
1531
sysfs_data_for_device = None
1532
1459
if device_path == UDEV_ROOT_PATH:
1533
1460
device = UdevDevice(
1534
self, udev_data, sysfs_data=sysfs_data_for_device,
1461
self, udev_data, sysfs_data=sysfs_data.get(device_path),
1535
1462
dmi_data=dmi_data)
1537
1464
device = UdevDevice(
1538
self, udev_data, sysfs_data=sysfs_data_for_device)
1465
self, udev_data, sysfs_data=sysfs_data.get(device_path))
1539
1466
self.devices[device_path] = device
1541
1468
# The parent-child relations are derived from the path names of
3013
2934
error_utility.raising(info, request)
3014
2935
self.logger.error('%s (%s)' % (error_explanation, request.oopsid))
3016
def getUnprocessedSubmissions(self, chunk_size):
3017
raise NotImplementedError
3019
2937
def __call__(self, chunk_size):
3020
2938
"""Process a batch of yet unprocessed HWDB submissions."""
3021
2939
# chunk_size is a float; we compare it below with an int value,
3022
2940
# which can lead to unexpected results. Since it is also used as
3023
2941
# a limit for an SQL query, convert it into an integer.
3024
2942
chunk_size = int(chunk_size)
3025
submissions = self.getUnprocessedSubmissions(chunk_size)
2943
submissions = getUtility(IHWSubmissionSet).getByStatus(
2944
HWSubmissionProcessingStatus.SUBMITTED,
3026
2947
# Listify the submissions, since we'll have to loop over each
3027
2948
# one anyway. This saves a COUNT query for getting the number of
3087
3007
self.transaction.commit()
3090
class ProcessingLoopForPendingSubmissions(ProcessingLoopBase):
3092
def getUnprocessedSubmissions(self, chunk_size):
3093
submissions = getUtility(IHWSubmissionSet).getByStatus(
3094
HWSubmissionProcessingStatus.SUBMITTED,
3097
submissions = list(submissions)
3101
class ProcessingLoopForReprocessingBadSubmissions(ProcessingLoopBase):
3103
def __init__(self, start, transaction, logger,
3104
max_submissions, record_warnings):
3105
super(ProcessingLoopForReprocessingBadSubmissions, self).__init__(
3106
transaction, logger, max_submissions, record_warnings)
3109
def getUnprocessedSubmissions(self, chunk_size):
3110
submissions = getUtility(IHWSubmissionSet).getByStatus(
3111
HWSubmissionProcessingStatus.INVALID, user=self.janitor)
3112
submissions = removeSecurityProxy(submissions).find(
3113
HWSubmission.id >= self.start)
3114
submissions = list(submissions[:chunk_size])
3118
def process_pending_submissions(transaction, logger, max_submissions=None,
3119
record_warnings=True):
3009
def process_pending_submissions(transaction, logger, max_submissions=None):
3120
3010
"""Process pending submissions.
3122
3012
Parse pending submissions, store extracted data in HWDB tables and
3123
3013
mark them as either PROCESSED or INVALID.
3125
loop = ProcessingLoopForPendingSubmissions(
3126
transaction, logger, max_submissions, record_warnings)
3127
# It is hard to predict how long it will take to parse a submission.
3128
# we don't want to last a DB transaction too long but we also
3129
# don't want to commit more often than necessary. The LoopTuner
3130
# handles this for us. The loop's run time will be approximated to
3131
# 2 seconds, but will never handle more than 50 submissions.
3132
loop_tuner = LoopTuner(
3133
loop, 2, minimum_chunk_size=1, maximum_chunk_size=50)
3136
'Processed %i valid and %i invalid HWDB submissions'
3137
% (loop.valid_submissions, loop.invalid_submissions))
3140
def reprocess_invalid_submissions(start, transaction, logger,
3141
max_submissions=None, record_warnings=True):
3142
"""Reprocess invalid submissions.
3144
Parse submissions that have been marked as invalid. A newer
3145
variant of the parser might be able to process them.
3147
loop = ProcessingLoopForReprocessingBadSubmissions(
3148
start, transaction, logger, max_submissions, record_warnings)
3149
# It is hard to predict how long it will take to parse a submission.
3150
# we don't want to last a DB transaction too long but we also
3151
# don't want to commit more often than necessary. The LoopTuner
3152
# handles this for us. The loop's run time will be approximated to
3153
# 2 seconds, but will never handle more than 50 submissions.
3154
loop_tuner = LoopTuner(
3155
loop, 2, minimum_chunk_size=1, maximum_chunk_size=50)
3158
'Processed %i valid and %i invalid HWDB submissions'
3159
% (loop.valid_submissions, loop.invalid_submissions))
3160
logger.info('last processed: %i' % loop.start)
3015
loop = ProcessingLoop(transaction, logger, max_submissions)
3016
# It is hard to predict how long it will take to parse a submission.
3017
# we don't want to last a DB transaction too long but we also
3018
# don't want to commit more often than necessary. The LoopTuner
3019
# handles this for us. The loop's run time will be approximated to
3020
# 2 seconds, but will never handle more than 50 submissions.
3021
loop_tuner = LoopTuner(
3022
loop, 2, minimum_chunk_size=1, maximum_chunk_size=50)
3025
'Processed %i valid and %i invalid HWDB submissions'
3026
% (loop.valid_submissions, loop.invalid_submissions))