8687.15.18
by Karl Fogel
Add the copyright header block to files under lib/canonical/. |
1 |
# Copyright 2009 Canonical Ltd. This software is licensed under the
|
2 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
|
6374.15.13
by Barry Warsaw
mergeRF |
3 |
|
4 |
"""Validate XML documents against a schema."""
|
|
5 |
||
6 |
__all__ = [ |
|
7 |
'XMLValidator', |
|
8 |
'RelaxNGValidator', |
|
9 |
]
|
|
10 |
||
11 |
||
12 |
import os |
|
13 |
from tempfile import NamedTemporaryFile |
|
14606.4.18
by William Grant
format-imports |
14 |
|
14593.2.15
by Curtis Hovey
Moved helpers to lp.services. |
15 |
from lp.services.helpers import simple_popen2 |
6374.15.13
by Barry Warsaw
mergeRF |
16 |
|
14606.4.18
by William Grant
format-imports |
17 |
|
6374.15.13
by Barry Warsaw
mergeRF |
18 |
class XMLValidator: |
19 |
"""A validator for XML files against a schema."""
|
|
20 |
||
21 |
SCHEMA_ARGUMENT = 'schema' |
|
22 |
||
23 |
def __init__(self, schema_filename): |
|
24 |
"""Create a validator instance.
|
|
25 |
||
26 |
:param schema_filename: The name of a file containing the schema.
|
|
27 |
"""
|
|
28 |
self.schema_filename = schema_filename |
|
29 |
self._errors = '' |
|
30 |
||
31 |
||
32 |
def validate(self, xml): |
|
33 |
"""Validate the string xml
|
|
34 |
||
35 |
:return: True, if xml is valid, else False.
|
|
36 |
"""
|
|
37 |
# XXX Abel Deuring, 2008-03-20
|
|
38 |
# The original implementation of the validation used the lxml
|
|
39 |
# package. Unfortunately, running lxml's Relax NG validator
|
|
40 |
# caused segfaults during PQM test runs, hence this class uses
|
|
41 |
# an external validator.
|
|
42 |
||
43 |
# Performance penalty of the external validator:
|
|
44 |
# Using the lxml validator, the tests in this module need ca.
|
|
45 |
# 3 seconds on a 2GHz Core2Duo laptop.
|
|
46 |
# If the xml data to be validated is passed to xmllint via
|
|
14593.2.15
by Curtis Hovey
Moved helpers to lp.services. |
47 |
# lp.services.helpers.simple_popen2, the run time
|
6374.15.13
by Barry Warsaw
mergeRF |
48 |
# of the tests is 38..40 seconds; if the validation input
|
49 |
# is not passed via stdin but saved in a temporary file,
|
|
50 |
# the tests need 28..30 seconds.
|
|
51 |
||
52 |
xml_file = NamedTemporaryFile() |
|
53 |
xml_file.write(xml) |
|
54 |
xml_file.flush() |
|
55 |
command = ['xmllint', '--noout', '--nonet', |
|
56 |
'--%s' % self.SCHEMA_ARGUMENT, |
|
57 |
self.schema_filename, xml_file.name] |
|
58 |
local_catalog_path = os.path.abspath( |
|
14606.4.19
by William Grant
Fix missing comma to unbreak xx-wadl.txt. |
59 |
os.path.join(os.path.dirname(__file__), "tests", "testfiles", |
6374.15.13
by Barry Warsaw
mergeRF |
60 |
"catalog", "catalog")) |
61 |
catalogs = " ".join( |
|
62 |
[local_catalog_path, "/etc/xml/catalog"]) |
|
63 |
env = {"XML_CATALOG_FILES": catalogs} |
|
64 |
result = simple_popen2(command, '', env=env).strip() |
|
65 |
||
66 |
# The output consists of lines describing possible errors; the
|
|
67 |
# last line is either "(file) fails to validate" or
|
|
68 |
# "(file) validates".
|
|
69 |
parts = result.rsplit('\n', 1) |
|
70 |
if len(parts) > 1: |
|
71 |
self._errors = parts[0] |
|
72 |
status = parts[1] |
|
73 |
else: |
|
74 |
self._errors = '' |
|
75 |
status = parts[0] |
|
76 |
if status == xml_file.name + ' fails to validate': |
|
77 |
return False |
|
78 |
elif status == xml_file.name + ' validates': |
|
79 |
return True |
|
80 |
else: |
|
81 |
raise AssertionError( |
|
82 |
'Unexpected result of running xmllint: %s' % result) |
|
83 |
||
84 |
@property
|
|
85 |
def error_log(self): |
|
86 |
"""A string with the errors detected by the validator.
|
|
87 |
||
88 |
Each line contains one error; if the validation was successful,
|
|
89 |
error_log is the empty string.
|
|
90 |
"""
|
|
91 |
return self._errors |
|
92 |
||
93 |
||
94 |
class RelaxNGValidator(XMLValidator): |
|
95 |
"""A validator for Relax NG schemas."""
|
|
96 |
||
97 |
SCHEMA_ARGUMENT = 'relaxng' |