Compare XML snippets?

后端 未结 10 865
名媛妹妹
名媛妹妹 2020-11-30 03:42

Building on another SO question, how can one check whether two well-formed XML snippets are semantically equal. All I need is \"equal\" or not, since I\'m using this for un

相关标签:
10条回答
  • 2020-11-30 04:35

    I had the same problem: two documents I wanted to compare that had the same attributes but in different orders.

    It seems that XML Canonicalization (C14N) in lxml works well for this, but I'm definitely not an XML expert. I'm curious to know if somebody else can point out drawbacks to this approach.

    parser = etree.XMLParser(remove_blank_text=True)
    
    xml1 = etree.fromstring(xml_string1, parser)
    xml2 = etree.fromstring(xml_string2, parser)
    
    print "xml1 == xml2: " + str(xml1 == xml2)
    
    ppxml1 = etree.tostring(xml1, pretty_print=True)
    ppxml2 = etree.tostring(xml2, pretty_print=True)
    
    print "pretty(xml1) == pretty(xml2): " + str(ppxml1 == ppxml2)
    
    xml_string_io1 = StringIO()
    xml1.getroottree().write_c14n(xml_string_io1)
    cxml1 = xml_string_io1.getvalue()
    
    xml_string_io2 = StringIO()
    xml2.getroottree().write_c14n(xml_string_io2)
    cxml2 = xml_string_io2.getvalue()
    
    print "canonicalize(xml1) == canonicalize(xml2): " + str(cxml1 == cxml2)
    

    Running this gives me:

    $ python test.py 
    xml1 == xml2: false
    pretty(xml1) == pretty(xml2): false
    canonicalize(xml1) == canonicalize(xml2): true
    
    0 讨论(0)
  • 2020-11-30 04:37

    Adapting Anentropic's great answer to Python 3 (basically, change iteritems() to items(), and basestring to string):

    from lxml import etree
    import xmltodict  # pip install xmltodict
    
    def normalise_dict(d):
        """
        Recursively convert dict-like object (eg OrderedDict) into plain dict.
        Sorts list values.
        """
        out = {}
        for k, v in dict(d).items():
            if hasattr(v, 'iteritems'):
                out[k] = normalise_dict(v)
            elif isinstance(v, list):
                out[k] = []
                for item in sorted(v):
                    if hasattr(item, 'iteritems'):
                        out[k].append(normalise_dict(item))
                    else:
                        out[k].append(item)
            else:
                out[k] = v
        return out
    
    
    def xml_compare(a, b):
        """
        Compares two XML documents (as string or etree)
    
        Does not care about element order
        """
        if not isinstance(a, str):
            a = etree.tostring(a)
        if not isinstance(b, str):
            b = etree.tostring(b)
        a = normalise_dict(xmltodict.parse(a))
        b = normalise_dict(xmltodict.parse(b))
        return a == b
    
    0 讨论(0)
  • 2020-11-30 04:38

    Here a simple solution, convert XML into dictionaries (with xmltodict) and compare dictionaries together

    import json
    import xmltodict
    
    class XmlDiff(object):
        def __init__(self, xml1, xml2):
            self.dict1 = json.loads(json.dumps((xmltodict.parse(xml1))))
            self.dict2 = json.loads(json.dumps((xmltodict.parse(xml2))))
    
        def equal(self):
            return self.dict1 == self.dict2
    

    unit test

    import unittest
    
    class XMLDiffTestCase(unittest.TestCase):
    
        def test_xml_equal(self):
            xml1 = """<?xml version='1.0' encoding='utf-8' standalone='yes'?>
            <Stats start="1275955200" end="1276041599">
            </Stats>"""
            xml2 = """<?xml version='1.0' encoding='utf-8' standalone='yes'?>
            <Stats end="1276041599" start="1275955200" >
            </Stats>"""
            self.assertTrue(XmlDiff(xml1, xml2).equal())
    
        def test_xml_not_equal(self):
            xml1 = """<?xml version='1.0' encoding='utf-8' standalone='yes'?>
            <Stats start="1275955200">
            </Stats>"""
            xml2 = """<?xml version='1.0' encoding='utf-8' standalone='yes'?>
            <Stats end="1276041599" start="1275955200" >
            </Stats>"""
            self.assertFalse(XmlDiff(xml1, xml2).equal())
    

    or in simple python method :

    import json
    import xmltodict
    
    def xml_equal(a, b):
        """
        Compares two XML documents (as string or etree)
    
        Does not care about element order
        """
        return json.loads(json.dumps((xmltodict.parse(a)))) == json.loads(json.dumps((xmltodict.parse(b))))
    
    0 讨论(0)
  • 2020-11-30 04:43

    Thinking about this problem, I came up with the following solution that renders XML elements comparable and sortable:

    import xml.etree.ElementTree as ET
    def cmpElement(x, y):
        # compare type
        r = cmp(type(x), type(y))
        if r: return r 
        # compare tag
        r = cmp(x.tag, y.tag)
        if r: return r
        # compare tag attributes
        r = cmp(x.attrib, y.attrib)
        if r: return r
        # compare stripped text content
        xtext = (x.text and x.text.strip()) or None
        ytext = (y.text and y.text.strip()) or None
        r = cmp(xtext, ytext)
        if r: return r
        # compare sorted children
        if len(x) or len(y):
            return cmp(sorted(x.getchildren()), sorted(y.getchildren()))
        return 0
    
    ET._ElementInterface.__lt__ = lambda self, other: cmpElement(self, other) == -1
    ET._ElementInterface.__gt__ = lambda self, other: cmpElement(self, other) == 1
    ET._ElementInterface.__le__ = lambda self, other: cmpElement(self, other) <= 0
    ET._ElementInterface.__ge__ = lambda self, other: cmpElement(self, other) >= 0
    ET._ElementInterface.__eq__ = lambda self, other: cmpElement(self, other) == 0
    ET._ElementInterface.__ne__ = lambda self, other: cmpElement(self, other) != 0
    
    0 讨论(0)
提交回复
热议问题