Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add (Apache) Abdera and (Cisco) Cobra convention #17

Merged
merged 11 commits into from
May 9, 2017
2 changes: 1 addition & 1 deletion AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ Development Lead
Contributors
------------

None yet. Why not be the first?
* Dag Wieers <dag@wieers.com>
6 changes: 6 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,16 @@ can be converted into this data structure (which also a valid JSON object)::
This uses the `BadgerFish`_ convention that prefixes attributes with ``@``.
The conventions supported by this library are:

* `Abdera`_: Use ``"attributes"`` for attributes, ``"children"`` for nodes
* `BadgerFish`_: Use ``"$"`` for text content, ``@`` to prefix attributes
* `Cobra`_: Use ``"attributes"`` for attributes (even when empty), ``"children"`` for nodes, values are strings
* `GData`_: Use ``"$t"`` for text content, attributes added as-is
* `Yahoo`_ Use ``"content"`` for text content, attributes added as-is
* `Parker`_: Use tail nodes for text content, ignore attributes

.. _Abdera: http://wiki.open311.org/JSON_and_XML_Conversion/#the-abdera-convention
.. _BadgerFish: http://www.sklar.com/badgerfish/
.. _Cobra: http://wiki.open311.org/JSON_and_XML_Conversion/#the-cobra-convention
.. _GData: http://wiki.open311.org/JSON_and_XML_Conversion/#the-gdata-convention
.. _Parker: https://developer.mozilla.org/en-US/docs/JXON#The_Parker_Convention
.. _Yahoo: https://developer.yahoo.com/javascript/json.html#xml
Expand Down Expand Up @@ -148,7 +152,9 @@ Conventions
To use a different conversion method, replace ``BadgerFish`` with one of the
other classes. Currently, these are supported::

>>> from xmljson import abdera # == xmljson.Abdera()
>>> from xmljson import badgerfish # == xmljson.BadgerFish()
>>> from xmljson import cobra # == xmljson.Cobra()
>>> from xmljson import gdata # == xmljson.GData()
>>> from xmljson import parker # == xmljson.Parker()
>>> from xmljson import yahoo # == xmljson.Yahoo()
Expand Down
96 changes: 96 additions & 0 deletions tests/test_xmljson.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,3 +537,99 @@ def test_xml_fromstring(self):
j2x_strings({"x": True}, '<x>True</x>')
j2x_convert({"x": False}, '<x>false</x>')
j2x_strings({"x": False}, '<x>False</x>')


class TestAbdera(TestXmlJson):
@unittest.skip('To be written')
def test_etree(self, converter=None):
'Abdera conversion from data to etree'
pass

@unittest.skip('To be written')
def test_html(self):
'Abdera conversion from data to HTML'
pass

def test_data(self):
'Abdera conversion from etree to data'
eq = self.check_data(xmljson.abdera)

# Dicts
eq('{"x": {"a": {}}}',
'<x><a/></x>')
eq('{"x": {"attributes": {"x": 1}}}',
'<x x="1"/>')
eq('{"root": {"children": [{"x": {"attributes": {"x": 1}}}, {"y": {"z": {}}}]}}',
'<root><x x="1"/><y><z/></y></root>')

# Attributes
eq('{"p": {"attributes": {"id": 1}, "children": ["text"]}}',
'<p id="1">text</p>')
eq('{"div": {"attributes": {"id": 2}, "children": ["parent-text", {"p": "text"}]}}',
'<div id="2">parent-text<p>text</p></div>')

# Text content of elements
eq('{"alice": "bob"}',
'<alice>bob</alice>')

# Nested elements become nested properties
eq('{"alice": {"children": [{"bob": "charlie"}, {"david": "edgar"}]}}',
'<alice><bob>charlie</bob><david>edgar</david></alice>')

# Multiple elements at the same level become array elements.
eq('{"alice": {"bob": "charlie"}}',
'<alice><bob>charlie</bob></alice>')
eq('{"alice": {"children": [{"bob": "charlie"}, {"bob": "david"}]}}',
'<alice><bob>charlie</bob><bob>david</bob></alice>')

# Attributes go in specific "attributes" dictionary
eq('{"alice": {"attributes": {"charlie": "david"}, "children": ["bob"]}}',
'<alice charlie="david">bob</alice>')


class TestCobra(TestXmlJson):
@unittest.skip('To be written')
def test_etree(self, converter=None):
'Cobra conversion from data to etree'
pass

@unittest.skip('To be written')
def test_html(self):
'Cobra conversion from data to HTML'
pass

def test_data(self):
'Cobra conversion from etree to data'
eq = self.check_data(xmljson.cobra)

# Dicts
eq('{"x": {"attributes": {}, "children": [{"a": {"attributes": {}}}]}}',
'<x><a/></x>')
eq('{"x": {"attributes": {"x": "1"}}}',
'<x x="1"/>')
eq('{"root": {"attributes": {}, "children": [{"x": {"attributes": {"x": "1"}}}, {"y": {"attributes": {}, "children": [{"z": {"attributes": {}}}]}}]}}',
'<root><x x="1"/><y><z/></y></root>')

# Attributes
eq('{"p": {"attributes": {"id": "1"}, "children": ["text"]}}',
'<p id="1">text</p>')
eq('{"div": {"attributes": {"id": "2"}, "children": ["parent-text", {"p": "text"}]}}',
'<div id="2">parent-text<p>text</p></div>')

# Text content of elements
eq('{"alice": "bob"}',
'<alice>bob</alice>')

# Nested elements become nested properties
eq('{"alice": {"attributes": {}, "children": [{"bob": "charlie"}, {"david": "edgar"}]}}',
'<alice><bob>charlie</bob><david>edgar</david></alice>')

# Multiple elements at the same level become array elements.
eq('{"alice": {"attributes": {}, "children": [{"bob": "charlie"}]}}',
'<alice><bob>charlie</bob></alice>')
eq('{"alice": {"attributes": {}, "children": [{"bob": "charlie"}, {"bob": "david"}]}}',
'<alice><bob>charlie</bob><bob>david</bob></alice>')

# Attributes go in specific "attributes" dictionary
eq('{"alice": {"attributes": {"charlie": "david"}, "children": ["bob"]}}',
'<alice charlie="david">bob</alice>')
99 changes: 99 additions & 0 deletions xmljson/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,106 @@ def data(self, root, preserve_root=False):
return result


class Abdera(XMLData):
'''Converts between XML and data using the Abdera convention'''
def __init__(self, **kwargs):
super(Abdera, self).__init__(simple_text=True, text_content=True, **kwargs)

def data(self, root):
'''Convert etree.Element into a dictionary'''

value = self.dict()

# Add attributes specific 'attributes' key
if root.attrib:
value[u'attributes'] = self.dict()
for attr, attrval in root.attrib.items():
value[u'attributes'][unicode(attr)] = self._fromstring(attrval)

# Add children to specific 'children' key
children_list = self.list()
children = [node for node in root if isinstance(node.tag, basestring)]

# Add root text
if root.text and self.text_content is not None:
text = root.text.strip()
if text:
if self.simple_text and len(children) == len(root.attrib) == 0:
value = self._fromstring(text)
else:
children_list = [ self._fromstring(text), ]

count = Counter(child.tag for child in children)
for child in children:
child_data = self.data(child)
if count[child.tag] == 1 and len(children_list) > 1 and isinstance(children_list[-1], types.DictType):
# Merge keys to existing dictionary
children_list[-1].update(child_data)
else:
# Add additional text
children_list.append(self.data(child))

# Flatten children
if len(root.attrib) == 0 and len(children_list) == 1:
value = children_list[0]

elif len(children_list) > 0:
value[u'children'] = children_list

return self.dict([(unicode(root.tag), value)])


# The difference between Cobra and Abdera is that Cobra _always_ has 'attributes' keys,
# 'children' key is remove when only one child and everything is a string.
# https://github.com/datacenter/cobra/blob/master/cobra/internal/codec/jsoncodec.py
class Cobra(XMLData):
'''Converts between XML and data using the Cobra convention'''
def __init__(self, **kwargs):
super(Cobra, self).__init__(simple_text=True, text_content=True, xml_fromstring=False, **kwargs)

def data(self, root):
'''Convert etree.Element into a dictionary'''

value = self.dict()

# Add attributes to 'attributes' key (sorted!) even when empty
value[u'attributes'] = self.dict()
if root.attrib:
for attr in sorted(root.attrib):
value[u'attributes'][unicode(attr)] = root.attrib[attr]

# Add children to specific 'children' key
children_list = self.list()
children = [node for node in root if isinstance(node.tag, basestring)]

# Add root text
if root.text and self.text_content is not None:
text = root.text.strip()
if text:
if self.simple_text and len(children) == len(root.attrib) == 0:
value = self._fromstring(text)
else:
children_list = [ self._fromstring(text), ]

count = Counter(child.tag for child in children)
for child in children:
child_data = self.data(child)
if count[child.tag] == 1 and len(children_list) > 1 and isinstance(children_list[-1], types.DictType):
# Merge keys to existing dictionary
children_list[-1].update(child_data)
else:
# Add additional text
children_list.append(self.data(child))

if len(children_list) > 0:
value[u'children'] = children_list

return self.dict([(unicode(root.tag), value)])


abdera = Abdera()
badgerfish = BadgerFish()
cobra = Cobra()
gdata = GData()
parker = Parker()
yahoo = Yahoo()