How to get xpath from an XmlNode instance

后端 未结 14 988
难免孤独
难免孤独 2020-11-30 18:17

Could someone supply some code that would get the xpath of a System.Xml.XmlNode instance?

Thanks!

相关标签:
14条回答
  • 2020-11-30 18:57

    I found that none of the above worked with XDocument, so I wrote my own code to support XDocument and used recursion. I think this code handles multiple identical nodes better than some of the other code here because it first tries to go as deep in to the XML path as it can and then backs up to build only what is needed. So if you have /home/white/bob and /home/white/mike and you want to create /home/white/bob/garage the code will know how to create that. However, I didn't want to mess with predicates or wildcards, so I explicitly disallowed those; but it would be easy to add support for them.

    Private Sub NodeItterate(XDoc As XElement, XPath As String)
        'get the deepest path
        Dim nodes As IEnumerable(Of XElement)
    
        nodes = XDoc.XPathSelectElements(XPath)
    
        'if it doesn't exist, try the next shallow path
        If nodes.Count = 0 Then
            NodeItterate(XDoc, XPath.Substring(0, XPath.LastIndexOf("/")))
            'by this time all the required parent elements will have been constructed
            Dim ParentPath As String = XPath.Substring(0, XPath.LastIndexOf("/"))
            Dim ParentNode As XElement = XDoc.XPathSelectElement(ParentPath)
            Dim NewElementName As String = XPath.Substring(XPath.LastIndexOf("/") + 1, XPath.Length - XPath.LastIndexOf("/") - 1)
            ParentNode.Add(New XElement(NewElementName))
        End If
    
        'if we find there are more than 1 elements at the deepest path we have access to, we can't proceed
        If nodes.Count > 1 Then
            Throw New ArgumentOutOfRangeException("There are too many paths that match your expression.")
        End If
    
        'if there is just one element, we can proceed
        If nodes.Count = 1 Then
            'just proceed
        End If
    
    End Sub
    
    Public Sub CreateXPath(ByVal XDoc As XElement, ByVal XPath As String)
    
        If XPath.Contains("//") Or XPath.Contains("*") Or XPath.Contains(".") Then
            Throw New ArgumentException("Can't create a path based on searches, wildcards, or relative paths.")
        End If
    
        If Regex.IsMatch(XPath, "\[\]()@='<>\|") Then
            Throw New ArgumentException("Can't create a path based on predicates.")
        End If
    
        'we will process this recursively.
        NodeItterate(XDoc, XPath)
    
    End Sub
    
    0 讨论(0)
  • 2020-11-30 18:58

    I produced VBA for Excel to do this for a work project. It outputs tuples of an Xpath and the associated text from an elemen or attribute. The purpose was to allow business analysts to identify and map some xml. Appreciate that this is a C# forum, but thought this may be of interest.

    Sub Parse2(oSh As Long, inode As IXMLDOMNode, Optional iXstring As String = "", Optional indexes)
    
    
    Dim chnode As IXMLDOMNode
    Dim attr As IXMLDOMAttribute
    Dim oXString As String
    Dim chld As Long
    Dim idx As Variant
    Dim addindex As Boolean
    chld = 0
    idx = 0
    addindex = False
    
    
    'determine the node type:
    Select Case inode.NodeType
    
        Case NODE_ELEMENT
            If inode.ParentNode.NodeType = NODE_DOCUMENT Then 'This gets the root node name but ignores all the namespace attributes
                oXString = iXstring & "//" & fp(inode.nodename)
            Else
    
                'Need to deal with indexing. Where an element has siblings with the same nodeName,it needs to be indexed using [index], e.g swapstreams or schedules
    
                For Each chnode In inode.ParentNode.ChildNodes
                    If chnode.NodeType = NODE_ELEMENT And chnode.nodename = inode.nodename Then chld = chld + 1
                Next chnode
    
                If chld > 1 Then '//inode has siblings of the same nodeName, so needs to be indexed
                    'Lookup the index from the indexes array
                    idx = getIndex(inode.nodename, indexes)
                    addindex = True
                Else
                End If
    
                'build the XString
                oXString = iXstring & "/" & fp(inode.nodename)
                If addindex Then oXString = oXString & "[" & idx & "]"
    
                'If type is element then check for attributes
                For Each attr In inode.Attributes
                    'If the element has attributes then extract the data pair XString + Element.Name, @Attribute.Name=Attribute.Value
                    Call oSheet(oSh, oXString & "/@" & attr.Name, attr.Value)
                Next attr
    
            End If
    
        Case NODE_TEXT
            'build the XString
            oXString = iXstring
            Call oSheet(oSh, oXString, inode.NodeValue)
    
        Case NODE_ATTRIBUTE
        'Do nothing
        Case NODE_CDATA_SECTION
        'Do nothing
        Case NODE_COMMENT
        'Do nothing
        Case NODE_DOCUMENT
        'Do nothing
        Case NODE_DOCUMENT_FRAGMENT
        'Do nothing
        Case NODE_DOCUMENT_TYPE
        'Do nothing
        Case NODE_ENTITY
        'Do nothing
        Case NODE_ENTITY_REFERENCE
        'Do nothing
        Case NODE_INVALID
        'do nothing
        Case NODE_NOTATION
        'do nothing
        Case NODE_PROCESSING_INSTRUCTION
        'do nothing
    End Select
    
    'Now call Parser2 on each of inode's children.
    If inode.HasChildNodes Then
        For Each chnode In inode.ChildNodes
            Call Parse2(oSh, chnode, oXString, indexes)
        Next chnode
    Set chnode = Nothing
    Else
    End If
    
    End Sub
    

    Manages the counting of elements using:

    Function getIndex(tag As Variant, indexes) As Variant
    'Function to get the latest index for an xml tag from the indexes array
    'indexes array is passed from one parser function to the next up and down the tree
    
    Dim i As Integer
    Dim n As Integer
    
    If IsArrayEmpty(indexes) Then
        ReDim indexes(1, 0)
        indexes(0, 0) = "Tag"
        indexes(1, 0) = "Index"
    Else
    End If
    For i = 0 To UBound(indexes, 2)
        If indexes(0, i) = tag Then
            'tag found, increment and return the index then exit
            'also destroy all recorded tag names BELOW that level
            indexes(1, i) = indexes(1, i) + 1
            getIndex = indexes(1, i)
            ReDim Preserve indexes(1, i) 'should keep all tags up to i but remove all below it
            Exit Function
        Else
        End If
    Next i
    
    'tag not found so add the tag with index 1 at the end of the array
    n = UBound(indexes, 2)
    ReDim Preserve indexes(1, n + 1)
    indexes(0, n + 1) = tag
    indexes(1, n + 1) = 1
    getIndex = 1
    
    End Function
    
    0 讨论(0)
  • 2020-11-30 18:59

    Another solution to your problem might be to 'mark' the xmlnodes which you will want to later identify with a custom attribute:

    var id = _currentNode.OwnerDocument.CreateAttribute("some_id");
    id.Value = Guid.NewGuid().ToString();
    _currentNode.Attributes.Append(id);
    

    which you can store in a Dictionary for example. And you can later identify the node with an xpath query:

    newOrOldDocument.SelectSingleNode(string.Format("//*[contains(@some_id,'{0}')]", id));
    

    I know this is not a direct answer to your question, but it can help if the reason you wish to know the xpath of a node is to have a way of 'reaching' the node later after you have lost the reference to it in code.

    This also overcomes problems when the document gets elements added/moved, which can mess up the xpath (or indexes, as suggested in other answers).

    0 讨论(0)
  • 2020-11-30 19:03

    This is even easier

     ''' <summary>
        ''' Gets the full XPath of a single node.
        ''' </summary>
        ''' <param name="node"></param>
        ''' <returns></returns>
        ''' <remarks></remarks>
        Private Function GetXPath(ByVal node As Xml.XmlNode) As String
            Dim temp As String
            Dim sibling As Xml.XmlNode
            Dim previousSiblings As Integer = 1
    
            'I dont want to know that it was a generic document
            If node.Name = "#document" Then Return ""
    
            'Prime it
            sibling = node.PreviousSibling
            'Perculate up getting the count of all of this node's sibling before it.
            While sibling IsNot Nothing
                'Only count if the sibling has the same name as this node
                If sibling.Name = node.Name Then
                    previousSiblings += 1
                End If
                sibling = sibling.PreviousSibling
            End While
    
            'Mark this node's index, if it has one
            ' Also mark the index to 1 or the default if it does have a sibling just no previous.
            temp = node.Name + IIf(previousSiblings > 0 OrElse node.NextSibling IsNot Nothing, "[" + previousSiblings.ToString() + "]", "").ToString()
    
            If node.ParentNode IsNot Nothing Then
                Return GetXPath(node.ParentNode) + "/" + temp
            End If
    
            Return temp
        End Function
    
    0 讨论(0)
  • 2020-11-30 19:04

    I know, old post but the version I liked the most (the one with names) was flawed: When a parent node has nodes with different names, it stopped counting the index after it found the first non-matching node-name.

    Here is my fixed version of it:

    /// <summary>
    /// Gets the X-Path to a given Node
    /// </summary>
    /// <param name="node">The Node to get the X-Path from</param>
    /// <returns>The X-Path of the Node</returns>
    public string GetXPathToNode(XmlNode node)
    {
        if (node.NodeType == XmlNodeType.Attribute)
        {
            // attributes have an OwnerElement, not a ParentNode; also they have             
            // to be matched by name, not found by position             
            return String.Format("{0}/@{1}", GetXPathToNode(((XmlAttribute)node).OwnerElement), node.Name);
        }
        if (node.ParentNode == null)
        {
            // the only node with no parent is the root node, which has no path
            return "";
        }
    
        // Get the Index
        int indexInParent = 1;
        XmlNode siblingNode = node.PreviousSibling;
        // Loop thru all Siblings
        while (siblingNode != null)
        {
            // Increase the Index if the Sibling has the same Name
            if (siblingNode.Name == node.Name)
            {
                indexInParent++;
            }
            siblingNode = siblingNode.PreviousSibling;
        }
    
        // the path to a node is the path to its parent, plus "/node()[n]", where n is its position among its siblings.         
        return String.Format("{0}/{1}[{2}]", GetXPathToNode(node.ParentNode), node.Name, indexInParent);
    }
    
    0 讨论(0)
  • 2020-11-30 19:11

    Okay, I couldn't resist having a go at it. It'll only work for attributes and elements, but hey... what can you expect in 15 minutes :) Likewise there may very well be a cleaner way of doing it.

    It is superfluous to include the index on every element (particularly the root one!) but it's easier than trying to work out whether there's any ambiguity otherwise.

    using System;
    using System.Text;
    using System.Xml;
    
    class Test
    {
        static void Main()
        {
            string xml = @"
    <root>
      <foo />
      <foo>
         <bar attr='value'/>
         <bar other='va' />
      </foo>
      <foo><bar /></foo>
    </root>";
            XmlDocument doc = new XmlDocument();
            doc.LoadXml(xml);
            XmlNode node = doc.SelectSingleNode("//@attr");
            Console.WriteLine(FindXPath(node));
            Console.WriteLine(doc.SelectSingleNode(FindXPath(node)) == node);
        }
    
        static string FindXPath(XmlNode node)
        {
            StringBuilder builder = new StringBuilder();
            while (node != null)
            {
                switch (node.NodeType)
                {
                    case XmlNodeType.Attribute:
                        builder.Insert(0, "/@" + node.Name);
                        node = ((XmlAttribute) node).OwnerElement;
                        break;
                    case XmlNodeType.Element:
                        int index = FindElementIndex((XmlElement) node);
                        builder.Insert(0, "/" + node.Name + "[" + index + "]");
                        node = node.ParentNode;
                        break;
                    case XmlNodeType.Document:
                        return builder.ToString();
                    default:
                        throw new ArgumentException("Only elements and attributes are supported");
                }
            }
            throw new ArgumentException("Node was not in a document");
        }
    
        static int FindElementIndex(XmlElement element)
        {
            XmlNode parentNode = element.ParentNode;
            if (parentNode is XmlDocument)
            {
                return 1;
            }
            XmlElement parent = (XmlElement) parentNode;
            int index = 1;
            foreach (XmlNode candidate in parent.ChildNodes)
            {
                if (candidate is XmlElement && candidate.Name == element.Name)
                {
                    if (candidate == element)
                    {
                        return index;
                    }
                    index++;
                }
            }
            throw new ArgumentException("Couldn't find element within parent");
        }
    }
    
    0 讨论(0)
提交回复
热议问题