diff --git a/AUTHORS.rst b/AUTHORS.rst index 029aca1..19af02b 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -10,4 +10,4 @@ Development Lead Contributors ------------ -None yet. Why not be the first? +* Dag Wieers diff --git a/README.rst b/README.rst index cace635..1a21896 100644 --- a/README.rst +++ b/README.rst @@ -39,12 +39,16 @@ can be converted into this data structure (which also a valid JSON object):: This uses the `BadgerFish`_ convention that prefixes attributes with ``@``. The conventions supported by this library are: +* `Abdera`_: Use ``"attributes"`` for attributes, ``"children"`` for nodes * `BadgerFish`_: Use ``"$"`` for text content, ``@`` to prefix attributes +* `Cobra`_: Use ``"attributes"`` for attributes (even when empty), ``"children"`` for nodes, values are strings * `GData`_: Use ``"$t"`` for text content, attributes added as-is * `Yahoo`_ Use ``"content"`` for text content, attributes added as-is * `Parker`_: Use tail nodes for text content, ignore attributes +.. _Abdera: http://wiki.open311.org/JSON_and_XML_Conversion/#the-abdera-convention .. _BadgerFish: http://www.sklar.com/badgerfish/ +.. _Cobra: http://wiki.open311.org/JSON_and_XML_Conversion/#the-cobra-convention .. _GData: http://wiki.open311.org/JSON_and_XML_Conversion/#the-gdata-convention .. _Parker: https://developer.mozilla.org/en-US/docs/JXON#The_Parker_Convention .. _Yahoo: https://developer.yahoo.com/javascript/json.html#xml @@ -148,7 +152,9 @@ Conventions To use a different conversion method, replace ``BadgerFish`` with one of the other classes. Currently, these are supported:: + >>> from xmljson import abdera # == xmljson.Abdera() >>> from xmljson import badgerfish # == xmljson.BadgerFish() + >>> from xmljson import cobra # == xmljson.Cobra() >>> from xmljson import gdata # == xmljson.GData() >>> from xmljson import parker # == xmljson.Parker() >>> from xmljson import yahoo # == xmljson.Yahoo() diff --git a/tests/test_xmljson.py b/tests/test_xmljson.py index c81541c..2f1e991 100644 --- a/tests/test_xmljson.py +++ b/tests/test_xmljson.py @@ -537,3 +537,99 @@ def test_xml_fromstring(self): j2x_strings({"x": True}, 'True') j2x_convert({"x": False}, 'false') j2x_strings({"x": False}, 'False') + + +class TestAbdera(TestXmlJson): + @unittest.skip('To be written') + def test_etree(self, converter=None): + 'Abdera conversion from data to etree' + pass + + @unittest.skip('To be written') + def test_html(self): + 'Abdera conversion from data to HTML' + pass + + def test_data(self): + 'Abdera conversion from etree to data' + eq = self.check_data(xmljson.abdera) + + # Dicts + eq('{"x": {"a": {}}}', + '') + eq('{"x": {"attributes": {"x": 1}}}', + '') + eq('{"root": {"children": [{"x": {"attributes": {"x": 1}}}, {"y": {"z": {}}}]}}', + '') + + # Attributes + eq('{"p": {"attributes": {"id": 1}, "children": ["text"]}}', + '

text

') + eq('{"div": {"attributes": {"id": 2}, "children": ["parent-text", {"p": "text"}]}}', + '
parent-text

text

') + + # Text content of elements + eq('{"alice": "bob"}', + 'bob') + + # Nested elements become nested properties + eq('{"alice": {"children": [{"bob": "charlie"}, {"david": "edgar"}]}}', + 'charlieedgar') + + # Multiple elements at the same level become array elements. + eq('{"alice": {"bob": "charlie"}}', + 'charlie') + eq('{"alice": {"children": [{"bob": "charlie"}, {"bob": "david"}]}}', + 'charliedavid') + + # Attributes go in specific "attributes" dictionary + eq('{"alice": {"attributes": {"charlie": "david"}, "children": ["bob"]}}', + 'bob') + + +class TestCobra(TestXmlJson): + @unittest.skip('To be written') + def test_etree(self, converter=None): + 'Cobra conversion from data to etree' + pass + + @unittest.skip('To be written') + def test_html(self): + 'Cobra conversion from data to HTML' + pass + + def test_data(self): + 'Cobra conversion from etree to data' + eq = self.check_data(xmljson.cobra) + + # Dicts + eq('{"x": {"attributes": {}, "children": [{"a": {"attributes": {}}}]}}', + '
') + eq('{"x": {"attributes": {"x": "1"}}}', + '') + eq('{"root": {"attributes": {}, "children": [{"x": {"attributes": {"x": "1"}}}, {"y": {"attributes": {}, "children": [{"z": {"attributes": {}}}]}}]}}', + '') + + # Attributes + eq('{"p": {"attributes": {"id": "1"}, "children": ["text"]}}', + '

text

') + eq('{"div": {"attributes": {"id": "2"}, "children": ["parent-text", {"p": "text"}]}}', + '
parent-text

text

') + + # Text content of elements + eq('{"alice": "bob"}', + 'bob') + + # Nested elements become nested properties + eq('{"alice": {"attributes": {}, "children": [{"bob": "charlie"}, {"david": "edgar"}]}}', + 'charlieedgar') + + # Multiple elements at the same level become array elements. + eq('{"alice": {"attributes": {}, "children": [{"bob": "charlie"}]}}', + 'charlie') + eq('{"alice": {"attributes": {}, "children": [{"bob": "charlie"}, {"bob": "david"}]}}', + 'charliedavid') + + # Attributes go in specific "attributes" dictionary + eq('{"alice": {"attributes": {"charlie": "david"}, "children": ["bob"]}}', + 'bob') diff --git a/xmljson/__init__.py b/xmljson/__init__.py index 37a83ae..327650b 100644 --- a/xmljson/__init__.py +++ b/xmljson/__init__.py @@ -193,7 +193,106 @@ def data(self, root, preserve_root=False): return result +class Abdera(XMLData): + '''Converts between XML and data using the Abdera convention''' + def __init__(self, **kwargs): + super(Abdera, self).__init__(simple_text=True, text_content=True, **kwargs) + + def data(self, root): + '''Convert etree.Element into a dictionary''' + + value = self.dict() + + # Add attributes specific 'attributes' key + if root.attrib: + value[u'attributes'] = self.dict() + for attr, attrval in root.attrib.items(): + value[u'attributes'][unicode(attr)] = self._fromstring(attrval) + + # Add children to specific 'children' key + children_list = self.list() + children = [node for node in root if isinstance(node.tag, basestring)] + + # Add root text + if root.text and self.text_content is not None: + text = root.text.strip() + if text: + if self.simple_text and len(children) == len(root.attrib) == 0: + value = self._fromstring(text) + else: + children_list = [ self._fromstring(text), ] + + count = Counter(child.tag for child in children) + for child in children: + child_data = self.data(child) + if count[child.tag] == 1 and len(children_list) > 1 and isinstance(children_list[-1], types.DictType): + # Merge keys to existing dictionary + children_list[-1].update(child_data) + else: + # Add additional text + children_list.append(self.data(child)) + + # Flatten children + if len(root.attrib) == 0 and len(children_list) == 1: + value = children_list[0] + + elif len(children_list) > 0: + value[u'children'] = children_list + + return self.dict([(unicode(root.tag), value)]) + + +# The difference between Cobra and Abdera is that Cobra _always_ has 'attributes' keys, +# 'children' key is remove when only one child and everything is a string. +# https://github.com/datacenter/cobra/blob/master/cobra/internal/codec/jsoncodec.py +class Cobra(XMLData): + '''Converts between XML and data using the Cobra convention''' + def __init__(self, **kwargs): + super(Cobra, self).__init__(simple_text=True, text_content=True, xml_fromstring=False, **kwargs) + + def data(self, root): + '''Convert etree.Element into a dictionary''' + + value = self.dict() + + # Add attributes to 'attributes' key (sorted!) even when empty + value[u'attributes'] = self.dict() + if root.attrib: + for attr in sorted(root.attrib): + value[u'attributes'][unicode(attr)] = root.attrib[attr] + + # Add children to specific 'children' key + children_list = self.list() + children = [node for node in root if isinstance(node.tag, basestring)] + + # Add root text + if root.text and self.text_content is not None: + text = root.text.strip() + if text: + if self.simple_text and len(children) == len(root.attrib) == 0: + value = self._fromstring(text) + else: + children_list = [ self._fromstring(text), ] + + count = Counter(child.tag for child in children) + for child in children: + child_data = self.data(child) + if count[child.tag] == 1 and len(children_list) > 1 and isinstance(children_list[-1], types.DictType): + # Merge keys to existing dictionary + children_list[-1].update(child_data) + else: + # Add additional text + children_list.append(self.data(child)) + + if len(children_list) > 0: + value[u'children'] = children_list + + return self.dict([(unicode(root.tag), value)]) + + +abdera = Abdera() badgerfish = BadgerFish() +cobra = Cobra() gdata = GData() parker = Parker() yahoo = Yahoo()