How to get xpath from an XmlNode instance

后端未结

关注

 14  988

难免孤独

Could someone supply some code that would get the xpath of a System.Xml.XmlNode instance?

Thanks!

相关标签:

14条回答

独厮守ぢ

2020-11-30 18:57

I found that none of the above worked with XDocument, so I wrote my own code to support XDocument and used recursion. I think this code handles multiple identical nodes better than some of the other code here because it first tries to go as deep in to the XML path as it can and then backs up to build only what is needed. So if you have /home/white/bob and /home/white/mike and you want to create /home/white/bob/garage the code will know how to create that. However, I didn't want to mess with predicates or wildcards, so I explicitly disallowed those; but it would be easy to add support for them.

Private Sub NodeItterate(XDoc As XElement, XPath As String)
    'get the deepest path
    Dim nodes As IEnumerable(Of XElement)

    nodes = XDoc.XPathSelectElements(XPath)

    'if it doesn't exist, try the next shallow path
    If nodes.Count = 0 Then
        NodeItterate(XDoc, XPath.Substring(0, XPath.LastIndexOf("/")))
        'by this time all the required parent elements will have been constructed
        Dim ParentPath As String = XPath.Substring(0, XPath.LastIndexOf("/"))
        Dim ParentNode As XElement = XDoc.XPathSelectElement(ParentPath)
        Dim NewElementName As String = XPath.Substring(XPath.LastIndexOf("/") + 1, XPath.Length - XPath.LastIndexOf("/") - 1)
        ParentNode.Add(New XElement(NewElementName))
    End If

    'if we find there are more than 1 elements at the deepest path we have access to, we can't proceed
    If nodes.Count > 1 Then
        Throw New ArgumentOutOfRangeException("There are too many paths that match your expression.")
    End If

    'if there is just one element, we can proceed
    If nodes.Count = 1 Then
        'just proceed
    End If

End Sub

Public Sub CreateXPath(ByVal XDoc As XElement, ByVal XPath As String)

    If XPath.Contains("//") Or XPath.Contains("*") Or XPath.Contains(".") Then
        Throw New ArgumentException("Can't create a path based on searches, wildcards, or relative paths.")
    End If

    If Regex.IsMatch(XPath, "\[\]()@='<>\|") Then
        Throw New ArgumentException("Can't create a path based on predicates.")
    End If

    'we will process this recursively.
    NodeItterate(XDoc, XPath)

End Sub

0 讨论(0)

鱼传尺愫

2020-11-30 18:58

I produced VBA for Excel to do this for a work project. It outputs tuples of an Xpath and the associated text from an elemen or attribute. The purpose was to allow business analysts to identify and map some xml. Appreciate that this is a C# forum, but thought this may be of interest.

Sub Parse2(oSh As Long, inode As IXMLDOMNode, Optional iXstring As String = "", Optional indexes)


Dim chnode As IXMLDOMNode
Dim attr As IXMLDOMAttribute
Dim oXString As String
Dim chld As Long
Dim idx As Variant
Dim addindex As Boolean
chld = 0
idx = 0
addindex = False


'determine the node type:
Select Case inode.NodeType

    Case NODE_ELEMENT
        If inode.ParentNode.NodeType = NODE_DOCUMENT Then 'This gets the root node name but ignores all the namespace attributes
            oXString = iXstring & "//" & fp(inode.nodename)
        Else

            'Need to deal with indexing. Where an element has siblings with the same nodeName,it needs to be indexed using [index], e.g swapstreams or schedules

            For Each chnode In inode.ParentNode.ChildNodes
                If chnode.NodeType = NODE_ELEMENT And chnode.nodename = inode.nodename Then chld = chld + 1
            Next chnode

            If chld > 1 Then '//inode has siblings of the same nodeName, so needs to be indexed
                'Lookup the index from the indexes array
                idx = getIndex(inode.nodename, indexes)
                addindex = True
            Else
            End If

            'build the XString
            oXString = iXstring & "/" & fp(inode.nodename)
            If addindex Then oXString = oXString & "[" & idx & "]"

            'If type is element then check for attributes
            For Each attr In inode.Attributes
                'If the element has attributes then extract the data pair XString + Element.Name, @Attribute.Name=Attribute.Value
                Call oSheet(oSh, oXString & "/@" & attr.Name, attr.Value)
            Next attr

        End If

    Case NODE_TEXT
        'build the XString
        oXString = iXstring
        Call oSheet(oSh, oXString, inode.NodeValue)

    Case NODE_ATTRIBUTE
    'Do nothing
    Case NODE_CDATA_SECTION
    'Do nothing
    Case NODE_COMMENT
    'Do nothing
    Case NODE_DOCUMENT
    'Do nothing
    Case NODE_DOCUMENT_FRAGMENT
    'Do nothing
    Case NODE_DOCUMENT_TYPE
    'Do nothing
    Case NODE_ENTITY
    'Do nothing
    Case NODE_ENTITY_REFERENCE
    'Do nothing
    Case NODE_INVALID
    'do nothing
    Case NODE_NOTATION
    'do nothing
    Case NODE_PROCESSING_INSTRUCTION
    'do nothing
End Select

'Now call Parser2 on each of inode's children.
If inode.HasChildNodes Then
    For Each chnode In inode.ChildNodes
        Call Parse2(oSh, chnode, oXString, indexes)
    Next chnode
Set chnode = Nothing
Else
End If

End Sub

Manages the counting of elements using:

Function getIndex(tag As Variant, indexes) As Variant
'Function to get the latest index for an xml tag from the indexes array
'indexes array is passed from one parser function to the next up and down the tree

Dim i As Integer
Dim n As Integer

If IsArrayEmpty(indexes) Then
    ReDim indexes(1, 0)
    indexes(0, 0) = "Tag"
    indexes(1, 0) = "Index"
Else
End If
For i = 0 To UBound(indexes, 2)
    If indexes(0, i) = tag Then
        'tag found, increment and return the index then exit
        'also destroy all recorded tag names BELOW that level
        indexes(1, i) = indexes(1, i) + 1
        getIndex = indexes(1, i)
        ReDim Preserve indexes(1, i) 'should keep all tags up to i but remove all below it
        Exit Function
    Else
    End If
Next i

'tag not found so add the tag with index 1 at the end of the array
n = UBound(indexes, 2)
ReDim Preserve indexes(1, n + 1)
indexes(0, n + 1) = tag
indexes(1, n + 1) = 1
getIndex = 1

End Function

0 讨论(0)

你的背包

2020-11-30 18:59
Another solution to your problem might be to 'mark' the xmlnodes which you will want to later identify with a custom attribute:
```
var id = _currentNode.OwnerDocument.CreateAttribute("some_id");
id.Value = Guid.NewGuid().ToString();
_currentNode.Attributes.Append(id);
```
which you can store in a Dictionary for example. And you can later identify the node with an xpath query:
```
newOrOldDocument.SelectSingleNode(string.Format("//*[contains(@some_id,'{0}')]", id));
```
I know this is not a direct answer to your question, but it can help if the reason you wish to know the xpath of a node is to have a way of 'reaching' the node later after you have lost the reference to it in code.

This also overcomes problems when the document gets elements added/moved, which can mess up the xpath (or indexes, as suggested in other answers).
0 讨论(0)
发布评论:

提交评论
- 加载中...

春和景丽

2020-11-30 19:03

This is even easier

 ''' <summary>
    ''' Gets the full XPath of a single node.
    ''' </summary>
    ''' <param name="node"></param>
    ''' <returns></returns>
    ''' <remarks></remarks>
    Private Function GetXPath(ByVal node As Xml.XmlNode) As String
        Dim temp As String
        Dim sibling As Xml.XmlNode
        Dim previousSiblings As Integer = 1

        'I dont want to know that it was a generic document
        If node.Name = "#document" Then Return ""

        'Prime it
        sibling = node.PreviousSibling
        'Perculate up getting the count of all of this node's sibling before it.
        While sibling IsNot Nothing
            'Only count if the sibling has the same name as this node
            If sibling.Name = node.Name Then
                previousSiblings += 1
            End If
            sibling = sibling.PreviousSibling
        End While

        'Mark this node's index, if it has one
        ' Also mark the index to 1 or the default if it does have a sibling just no previous.
        temp = node.Name + IIf(previousSiblings > 0 OrElse node.NextSibling IsNot Nothing, "[" + previousSiblings.ToString() + "]", "").ToString()

        If node.ParentNode IsNot Nothing Then
            Return GetXPath(node.ParentNode) + "/" + temp
        End If

        Return temp
    End Function

0 讨论(0)

无人共我

2020-11-30 19:04

I know, old post but the version I liked the most (the one with names) was flawed: When a parent node has nodes with different names, it stopped counting the index after it found the first non-matching node-name.

Here is my fixed version of it:

/// <summary>
/// Gets the X-Path to a given Node
/// </summary>
/// <param name="node">The Node to get the X-Path from</param>
/// <returns>The X-Path of the Node</returns>
public string GetXPathToNode(XmlNode node)
{
    if (node.NodeType == XmlNodeType.Attribute)
    {
        // attributes have an OwnerElement, not a ParentNode; also they have             
        // to be matched by name, not found by position             
        return String.Format("{0}/@{1}", GetXPathToNode(((XmlAttribute)node).OwnerElement), node.Name);
    }
    if (node.ParentNode == null)
    {
        // the only node with no parent is the root node, which has no path
        return "";
    }

    // Get the Index
    int indexInParent = 1;
    XmlNode siblingNode = node.PreviousSibling;
    // Loop thru all Siblings
    while (siblingNode != null)
    {
        // Increase the Index if the Sibling has the same Name
        if (siblingNode.Name == node.Name)
        {
            indexInParent++;
        }
        siblingNode = siblingNode.PreviousSibling;
    }

    // the path to a node is the path to its parent, plus "/node()[n]", where n is its position among its siblings.         
    return String.Format("{0}/{1}[{2}]", GetXPathToNode(node.ParentNode), node.Name, indexInParent);
}

0 讨论(0)

暖寄归人

2020-11-30 19:11

Okay, I couldn't resist having a go at it. It'll only work for attributes and elements, but hey... what can you expect in 15 minutes :) Likewise there may very well be a cleaner way of doing it.

It is superfluous to include the index on every element (particularly the root one!) but it's easier than trying to work out whether there's any ambiguity otherwise.

using System;
using System.Text;
using System.Xml;

class Test
{
    static void Main()
    {
        string xml = @"
<root>
  <foo />
  <foo>
     <bar attr='value'/>
     <bar other='va' />
  </foo>
  <foo><bar /></foo>
</root>";
        XmlDocument doc = new XmlDocument();
        doc.LoadXml(xml);
        XmlNode node = doc.SelectSingleNode("//@attr");
        Console.WriteLine(FindXPath(node));
        Console.WriteLine(doc.SelectSingleNode(FindXPath(node)) == node);
    }

    static string FindXPath(XmlNode node)
    {
        StringBuilder builder = new StringBuilder();
        while (node != null)
        {
            switch (node.NodeType)
            {
                case XmlNodeType.Attribute:
                    builder.Insert(0, "/@" + node.Name);
                    node = ((XmlAttribute) node).OwnerElement;
                    break;
                case XmlNodeType.Element:
                    int index = FindElementIndex((XmlElement) node);
                    builder.Insert(0, "/" + node.Name + "[" + index + "]");
                    node = node.ParentNode;
                    break;
                case XmlNodeType.Document:
                    return builder.ToString();
                default:
                    throw new ArgumentException("Only elements and attributes are supported");
            }
        }
        throw new ArgumentException("Node was not in a document");
    }

    static int FindElementIndex(XmlElement element)
    {
        XmlNode parentNode = element.ParentNode;
        if (parentNode is XmlDocument)
        {
            return 1;
        }
        XmlElement parent = (XmlElement) parentNode;
        int index = 1;
        foreach (XmlNode candidate in parent.ChildNodes)
        {
            if (candidate is XmlElement && candidate.Name == element.Name)
            {
                if (candidate == element)
                {
                    return index;
                }
                index++;
            }
        }
        throw new ArgumentException("Couldn't find element within parent");
    }
}

0 讨论(0)