diff --git a/CHANGES.md b/CHANGES.md index 0dcdbba3..6faac1b8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,6 @@ ### 0.12.0 (2016-xx-xx xx:xx:xx UTC) -* +* Update xmltodict library 0.9.2 (579a005) to 0.9.2 (eac0031) ### 0.11.0 (2016-01-10 22:30:00 UTC) diff --git a/lib/xmltodict.py b/lib/xmltodict.py index 746a4bcd..310130b0 100644 --- a/lib/xmltodict.py +++ b/lib/xmltodict.py @@ -50,10 +50,11 @@ class _DictSAXHandler(object): dict_constructor=OrderedDict, strip_whitespace=True, namespace_separator=':', - namespaces=None): + namespaces=None, + force_list=()): self.path = [] self.stack = [] - self.data = None + self.data = [] self.item = None self.item_depth = item_depth self.xml_attribs = xml_attribs @@ -67,6 +68,7 @@ class _DictSAXHandler(object): self.strip_whitespace = strip_whitespace self.namespace_separator = namespace_separator self.namespaces = namespaces + self.force_list = force_list def _build_name(self, full_name): if not self.namespaces: @@ -99,21 +101,25 @@ class _DictSAXHandler(object): else: attrs = None self.item = attrs or None - self.data = None + self.data = [] def endElement(self, full_name): name = self._build_name(full_name) if len(self.path) == self.item_depth: item = self.item if item is None: - item = self.data + item = (None if not self.data + else self.cdata_separator.join(self.data)) + should_continue = self.item_callback(self.path, item) if not should_continue: raise ParsingInterrupted() if len(self.stack): - item, data = self.item, self.data + data = (None if not self.data + else self.cdata_separator.join(self.data)) + item = self.item self.item, self.data = self.stack.pop() - if self.strip_whitespace and data is not None: + if self.strip_whitespace and data: data = data.strip() or None if data and self.force_cdata and item is None: item = self.dict_constructor() @@ -124,14 +130,15 @@ class _DictSAXHandler(object): else: self.item = self.push_data(self.item, name, data) else: - self.item = self.data = None + self.item = None + self.data = [] self.path.pop() def characters(self, data): if not self.data: - self.data = data + self.data = [data] else: - self.data += self.cdata_separator + data + self.data.append(data) def push_data(self, item, key, data): if self.postprocessor is not None: @@ -148,7 +155,10 @@ class _DictSAXHandler(object): else: item[key] = [value, data] except KeyError: - item[key] = data + if key in self.force_list: + item[key] = [data] + else: + item[key] = data return item @@ -220,6 +230,37 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, >>> xmltodict.parse('hello', expat=defusedexpat.pyexpat) OrderedDict([(u'a', u'hello')]) + You can use the force_list argument to force lists to be created even + when there is only a single child of a given level of hierarchy. The + force_list argument is a tuple of keys. If the key for a given level + of hierarchy is in the force_list argument, that level of hierarchy + will have a list as a child (even if there is only one sub-element). + The index_keys operation takes precendence over this. This is applied + after any user-supplied postprocessor has already run. + + For example, given this input: + + + host1 + Linux + + + em0 + 10.0.0.1 + + + + + + If called with force_list=('interface',), it will produce + this dictionary: + {'servers': + {'server': + {'name': 'host1', + 'os': 'Linux'}, + 'interfaces': + {'interface': + [ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } } """ handler = _DictSAXHandler(namespace_separator=namespace_separator, **kwargs)