XMLDiff fails to recognize differences correcly?

自闭症网瘾萝莉.ら 提交于 2020-01-14 04:21:11

问题


What am I missing here? Is there an option that XMLDiff should care about element names and seek for best match to recognize following changes correctly?

a Helper class for making comparisons between two XML files:

public class XMLDiffer
{
    public XDocument Diff(string originalXML, string changedXML)
    {
        //http://msdn2.microsoft.com/en-us/library/aa302294.aspx
        XmlDiff xmlDiff = new XmlDiff(XmlDiffOptions.IgnoreChildOrder | XmlDiffOptions.IgnoreComments | XmlDiffOptions.IgnoreWhitespace);

        xmlDiff.Algorithm = XmlDiffAlgorithm.Precise;
        StringBuilder diffgramStringBuilder = new StringBuilder();
        bool xmlComparisonResult = false;
        using (StringReader legacySr = new StringReader(originalXML), nextgenSr = new StringReader(changedXML))
        {
            using (XmlReader legacyReader = XmlReader.Create(legacySr), nextgenReader = XmlReader.Create(nextgenSr))
            {
                using (StringWriter sw = new StringWriter(diffgramStringBuilder))
                {
                    using (XmlWriter diffgramWriter = XmlWriter.Create(sw))
                    {
                        xmlComparisonResult = xmlDiff.Compare(legacyReader, nextgenReader, diffgramWriter);
                    }
                }
            }
        }
        XDocument xdoc = XDocument.Parse(diffgramStringBuilder.ToString());
        return xdoc;
    }

    public string GetChangeHtml(string originalXML, string changedXML)
    {
        XmlDiffView view = new XmlDiffView();
        var diffgram = Diff(originalXML, changedXML);
        string ret = "";
        using (StringReader legacySr = new StringReader(originalXML), diffGramSr = new StringReader(diffgram.ToString()))
        {
            using (XmlReader legacyReader = XmlReader.Create(legacySr), diffgramReader = XmlReader.Create(diffGramSr))
            {
                using (StringWriter sw = new StringWriter())
                {
                    view.Load(legacyReader, diffgramReader);
                    view.GetHtml(sw);
                    ret = sw.ToString();
                }
            }
        }
        return ret;
    }

}

With Following test:

[TestMethod]
public void XMLDiff_AreNotSame_GetChangeHtmlAll()
{
    //Arrange
    string source = "<root><child>some text</child><child>more text</child><child1>REMOVED</child1></root>";
    //Ordering of the generic child nodes is not changed,  but it might
    string target = "<root><child>some text CHANGE</child><child>more text</child><child>ADDITION</child></root>";

    XMLDiffer differ = new XMLDiffer();

    //Act
    var diffview = differ.GetChangeHtml(source, target);

    //Assert
    Assert.IsNotNull(diffview);
}

Produces following (html and table elements added): https://pste.eu/p/Fm7Z.html

More info about library: http://msdn2.microsoft.com/en-us/library/aa302294.aspx

Nuget link for references: https://www.nuget.org/packages/XMLDiffPatch/


回答1:


I ended up implementing following classes to get changes:

public class XMLComparer : IEqualityComparer<XNode>
{
    public bool Equals(XNode e1, XNode e2)
    {
        if (!(e1 is XElement)) return true;
        if (!(e2 is XElement)) return false;
        var el1 = e1 as XElement;
        var el2 = e2 as XElement;
        return Tuple.Create(el1.Name, el1.Value).Equals(Tuple.Create(el2.Name, el2.Value));
    }

    public int GetHashCode(XNode n)
    {
        if (!(n is XElement)) return 0;
        var el = n as XElement;
        return Tuple.Create(el.Name, el.Value).GetHashCode();
    }

}

public class XMLDifference
{
    public bool IsNew { get; set; }
    public XElement Node { get; set; }
}

public class XMLDifferenceComparer
{
    public List<XMLDifference> GetDifferences(string original, string changed)
    {
        List<XMLDifference> ret = new List<XMLDifference>();
        var originalDoc = XDocument.Parse(original);
        var changedDoc = XDocument.Parse(changed);
        //Get differences that are present in new xml version
        var differences = changedDoc.Root.Descendants().Except(originalDoc.Root.Descendants(), new XMLComparer());
        ret.AddRange(GetList(differences, true));
        //Get differences that have changed since the old xml version
        var oldValues = originalDoc.Root.Descendants().Except(changedDoc.Root.Descendants(), new XMLComparer());
        ret.AddRange(GetList(oldValues, false));
        return ret;
    }

    private List<XMLDifference> GetList(IEnumerable<XNode> nodes, bool isNew)
    {
        List<XMLDifference> ret = new List<XMLDifference>();
        foreach (XNode d in nodes)
        {
            var diff = new XMLDifference();
            diff.IsNew = isNew;

            var el = d as XElement;
            diff.Node = el;
            ret.Add(diff);
        }
        return ret;
    }
}

This can recognize changes but is not element specific, it cannot map which element exactly was changed and how, caused by lack of unique identifiers for each element.

The main idea for this solution came from here: https://gist.github.com/krcourville/6933451



来源:https://stackoverflow.com/questions/43499264/xmldiff-fails-to-recognize-differences-correcly

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!