Skip to content

Commit 1533164

Browse files
committed
Document strict flag and also use it to enable recover xml parsing
1 parent 07393c4 commit 1533164

File tree

6 files changed

+55
-13
lines changed

6 files changed

+55
-13
lines changed

CHANGES

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
1.2.0 (unreleased)
22
------------------
3+
- Add flag to disable strict mode in the Client. This allows zeep to better
4+
work with non standard compliant SOAP Servers. See the documentation for
5+
usage and potential downsides.
6+
- Minor refactor of resolving of elements for improved performance
37
- Support the SOAP 1.2 'http://www.w3.org/2003/05/soap/bindings/HTTP/'
48
transport uri (#355)
59
- Fallback to matching wsdl lookups to matching when the target namespace is
@@ -11,6 +15,7 @@
1115
- Filter out duplicate types and elements when dump the wsdl schema (#360)
1216
- Add ``zeep.CachingClient()`` which enables the SqliteCache by default
1317

18+
1419
1.1.0 (2017-02-18)
1520
------------------
1621
- Fix an attribute error when an complexType used xsd:anyType as base

docs/client.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,20 @@ Otherwise the first service and first port within that service are used as the
1212
default.
1313

1414

15+
Strict mode
16+
-----------
17+
By default zeep will operate in 'strict' mode. This can be disabled if you are
18+
working with a SOAP server which is not standards compliant by passing the
19+
kwarg ``strict=False`` to the ``Client``. Disabling strict mode will change
20+
the following behaviour:
21+
22+
- The XML is parsed with the recover mode enabled
23+
- Non optional elements are allowed to be missing in xsd:sequences
24+
25+
Note that disabling strict mode should be considered a last resort since it
26+
might result in data-loss between the XML and the returned response.
27+
28+
1529
The ServiceProxy object
1630
-----------------------
1731

src/zeep/__main__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ def parse_arguments(args=None):
2727
'--verbose', action='store_true', help='Enable verbose output')
2828
parser.add_argument(
2929
'--profile', help="Enable profiling and save output to given file")
30+
parser.add_argument(
31+
'--no-strict', action='store_true', default=False,
32+
help="Disable strict mode")
3033
return parser.parse_args(args)
3134

3235

@@ -72,7 +75,9 @@ def main(args):
7275

7376
transport = Transport(cache=cache, session=session)
7477
st = time.time()
75-
client = Client(args.wsdl_file, transport=transport)
78+
79+
strict = not args.no_strict
80+
client = Client(args.wsdl_file, transport=transport, strict=strict)
7681
logger.debug("Loading WSDL took %sms", (time.time() - st) * 1000)
7782

7883
if args.profile:

src/zeep/loader.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def resolve(self, url, pubid, context):
1818
return self.resolve_string(content, context)
1919

2020

21-
def parse_xml(content, transport, base_url=None, recover=False):
21+
def parse_xml(content, transport, base_url=None, strict=False):
2222
"""Parse an XML string and return the root Element.
2323
2424
:param content: The XML string
@@ -28,27 +28,34 @@ def parse_xml(content, transport, base_url=None, recover=False):
2828
:param base_url: The base url of the document, used to make relative
2929
lookups absolute.
3030
:type base_url: str
31-
:param recover: boolean to indicate if the lxml recover mode should be
32-
enabled. Recover mode tries to parse invalid XML as best as it can.
33-
:type recover: boolean
31+
:param strict: boolean to indicate if the lxml should be parsed a 'strict'.
32+
If false then the recover mode is enabled which tries to parse invalid
33+
XML as best as it can.
34+
:type strict: boolean
3435
:returns: The document root
3536
:rtype: lxml.etree._Element
3637
3738
"""
38-
parser = etree.XMLParser(remove_comments=True, resolve_entities=False)
39+
recover = not strict
40+
parser = etree.XMLParser(
41+
remove_comments=True, resolve_entities=False, recover=recover)
3942
parser.resolvers.add(ImportResolver(transport))
4043
try:
4144
return fromstring(content, parser=parser, base_url=base_url)
4245
except etree.XMLSyntaxError as exc:
4346
raise XMLSyntaxError("Invalid XML content received (%s)" % exc.message)
4447

4548

46-
def load_external(url, transport, base_url=None):
49+
def load_external(url, transport, base_url=None, strict=True):
4750
"""Load an external XML document.
4851
4952
:param url:
5053
:param transport:
5154
:param base_url:
55+
:param strict: boolean to indicate if the lxml should be parsed a 'strict'.
56+
If false then the recover mode is enabled which tries to parse invalid
57+
XML as best as it can.
58+
:type strict: boolean
5259
5360
"""
5461
if hasattr(url, 'read'):
@@ -57,7 +64,7 @@ def load_external(url, transport, base_url=None):
5764
if base_url:
5865
url = absolute_location(url, base_url)
5966
content = transport.load(url)
60-
return parse_xml(content, transport, base_url)
67+
return parse_xml(content, transport, base_url, strict=strict)
6168

6269

6370
def absolute_location(location, base):

src/zeep/wsdl/wsdl.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
import six
99
from lxml import etree
1010

11-
from zeep.loader import (
12-
absolute_location, is_relative_path, load_external, parse_xml)
11+
from zeep.loader import absolute_location, is_relative_path, load_external
1312
from zeep.utils import findall_multiple_ns
1413
from zeep.wsdl import parse
1514
from zeep.xsd import Schema
@@ -125,7 +124,8 @@ def _get_xml_document(self, location):
125124
:type location: string
126125
127126
"""
128-
return load_external(location, self.transport, self.location)
127+
return load_external(
128+
location, self.transport, self.location, strict=self.strict)
129129

130130
def _add_definition(self, definition):
131131
key = (definition.target_namespace, definition.location)

src/zeep/xsd/visitor.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,13 @@ class SchemaVisitor(object):
3535
"""Visitor which processes XSD files and registers global elements and
3636
types in the given schema.
3737
38+
:param schema:
39+
:type schema: zeep.xsd.schema.Schema
40+
:param document:
41+
:type document: zeep.xsd.schema.SchemaDocument
42+
3843
"""
44+
3945
def __init__(self, schema, document):
4046
self.document = document
4147
self.schema = schema
@@ -183,7 +189,10 @@ def visit_import(self, node, parent):
183189
return
184190

185191
# Load the XML
186-
schema_node = load_external(location, self.schema._transport)
192+
schema_node = load_external(
193+
location,
194+
self.schema._transport,
195+
strict=self.schema.strict)
187196

188197
# Check if the xsd:import namespace matches the targetNamespace. If
189198
# the xsd:import statement didn't specify a namespace then make sure
@@ -227,7 +236,9 @@ def visit_include(self, node, parent):
227236
return
228237

229238
schema_node = load_external(
230-
location, self.schema._transport, base_url=self.document._base_url)
239+
location, self.schema._transport,
240+
base_url=self.document._base_url,
241+
strict=self.schema.strict)
231242
self._includes.add(location)
232243

233244
# When the included document has no default namespace defined but the

0 commit comments

Comments
 (0)