I am looking for the clean, elegant and smart solution to remove namespacees from all XML elements? How would function to do that look like?
Defined interface:
user892217's answer is almost correct. It won't compile as is, so needs a slight correction to the recursive call:
private static XElement RemoveAllNamespaces(XElement xmlDocument)
{
XElement xElement;
if (!xmlDocument.HasElements)
{
xElement = new XElement(xmlDocument.Name.LocalName) { Value = xmlDocument.Value };
}
else
{
xElement = new XElement(xmlDocument.Name.LocalName, xmlDocument.Elements().Select(x => RemoveAllNamespaces(x)));
}
foreach (var attribute in xmlDocument.Attributes())
{
if (!attribute.IsNamespaceDeclaration)
{
xElement.Add(attribute);
}
}
return xElement;
}
After much searching for a solution to this very issue, this particular page seemed to have the most beef...however, nothing quite fit exactly, so I took the old-fashioned way and just parsed the stuff out I wanted out. Hope this helps someone. (Note: this also removes the SOAP or similar envelope stuff.)
public static string RemoveNamespaces(string psXml)
{
//
// parse through the passed XML, and remove any and all namespace references...also
// removes soap envelope/header(s)/body, or any other references via ":" entities,
// leaving all data intact
//
string xsXml = "", xsCurrQtChr = "";
int xiPos = 0, xiLastPos = psXml.Length - 1;
bool xbInNode = false;
while (xiPos <= xiLastPos)
{
string xsCurrChr = psXml.Substring(xiPos, 1);
xiPos++;
if (xbInNode)
{
if (xsCurrChr == ":")
{
// soap envelope or body (or some such)
// we'll strip these node wrappers completely
// need to first strip the beginning of it off (i.e. "<soap" or "<s")
int xi = xsXml.Length;
string xsChr = "";
do
{
xi--;
xsChr = xsXml.Substring(xi, 1);
xsXml = xsXml.Substring(0, xi);
} while (xsChr != "<");
// next, find end of node
string xsQt = "";
do
{
xiPos++;
if (xiPos <= xiLastPos)
{
xsChr = psXml.Substring(xiPos, 1);
if (xsQt.Length == 0)
{
if (xsChr == "'" || xsChr == "\"")
{
xsQt = xsChr;
}
}
else
{
if (xsChr == xsQt)
{
xsQt = ""; // end of quote
}
else
{
if (xsChr == ">") xsChr = "x"; // stay in loop...this is not end of node
}
}
}
} while (xsChr != ">" && xiPos <= xiLastPos);
xiPos++; // skip over closing ">"
xbInNode = false;
}
else
{
if (xsCurrChr == ">")
{
xbInNode = false;
xsXml += xsCurrChr;
}
else
{
if (xsCurrChr == " " || xsCurrChr == "\t")
{
// potential namespace...let's check...next character must be "/"
// or more white space, and if not, skip until we find such
string xsChr = "";
int xiOrgLen = xsXml.Length;
xsXml += xsCurrChr;
do
{
if (xiPos <= xiLastPos)
{
xsChr = psXml.Substring(xiPos, 1);
xiPos++;
if (xsChr == " " || xsChr == "\r" || xsChr == "\n" || xsChr == "\t")
{
// carry on..white space
xsXml += xsChr;
}
else
{
if (xsChr == "/" || xsChr == ">")
{
xsXml += xsChr;
}
else
{
// namespace! - get rid of it
xsXml = xsXml.Substring(0, xiOrgLen - 0); // first, truncate any added whitespace
// next, peek forward until we find "/" or ">"
string xsQt = "";
do
{
if (xiPos <= xiLastPos)
{
xsChr = psXml.Substring(xiPos, 1);
xiPos++;
if (xsQt.Length > 0)
{
if (xsChr == xsQt) xsQt = ""; else xsChr = "x";
}
else
{
if (xsChr == "'" || xsChr == "\"") xsQt = xsChr;
}
}
} while (xsChr != ">" && xsChr != "/" && xiPos <= xiLastPos);
if (xsChr == ">" || xsChr == "/") xsXml += xsChr;
xbInNode = false;
}
}
}
} while (xsChr != ">" && xsChr != "/" && xiPos <= xiLastPos);
}
else
{
xsXml += xsCurrChr;
}
}
}
}
else
{
//
// if not currently inside a node, then we are in a value (or about to enter a new node)
//
xsXml += xsCurrChr;
if (xsCurrQtChr.Length == 0)
{
if (xsCurrChr == "<")
{
xbInNode = true;
}
}
else
{
//
// currently inside a quoted string
//
if (xsCurrQtChr == xsCurrChr)
{
// finishing quoted string
xsCurrQtChr = "";
}
}
}
}
return (xsXml);
}
I tried the first few solutions and didn't work for me. Mainly the problem with attributes being removed like the other have already mentioned. I would say my approach is very similar to Jimmy by using the XElement constructors that taking object as parameters.
public static XElement RemoveAllNamespaces(this XElement element)
{
return new XElement(element.Name.LocalName,
element.HasAttributes ? element.Attributes().Select(a => new XAttribute(a.Name.LocalName, a.Value)) : null,
element.HasElements ? element.Elements().Select(e => RemoveAllNamespaces(e)) : null,
element.Value);
}
Adding my that also cleans out the name of nodes that have namespace prefixes:
public static string RemoveAllNamespaces(XElement element)
{
string tex = element.ToString();
var nsitems = element.DescendantsAndSelf().Select(n => n.ToString().Split(' ', '>')[0].Split('<')[1]).Where(n => n.Contains(":")).DistinctBy(n => n).ToArray();
//Namespace prefix on nodes: <a:nodename/>
tex = nsitems.Aggregate(tex, (current, nsnode) => current.Replace("<"+nsnode + "", "<" + nsnode.Split(':')[1] + ""));
tex = nsitems.Aggregate(tex, (current, nsnode) => current.Replace("</" + nsnode + "", "</" + nsnode.Split(':')[1] + ""));
//Namespace attribs
var items = element.DescendantsAndSelf().SelectMany(d => d.Attributes().Where(a => a.IsNamespaceDeclaration || a.ToString().Contains(":"))).DistinctBy(o => o.Value);
tex = items.Aggregate(tex, (current, xAttribute) => current.Replace(xAttribute.ToString(), ""));
return tex;
}
You can do that using Linq:
public static string RemoveAllNamespaces(string xmlDocument)
{
var xml = XElement.Parse(xmlDocument);
xml.Descendants().Select(o => o.Name = o.Name.LocalName).ToArray();
return xml.ToString();
}
The reply's by Jimmy and Peter were a great help, but they actually removed all attributes, so I made a slight modification:
Imports System.Runtime.CompilerServices
Friend Module XElementExtensions
<Extension()> _
Public Function RemoveAllNamespaces(ByVal element As XElement) As XElement
If element.HasElements Then
Dim cleanElement = RemoveAllNamespaces(New XElement(element.Name.LocalName, element.Attributes))
cleanElement.Add(element.Elements.Select(Function(el) RemoveAllNamespaces(el)))
Return cleanElement
Else
Dim allAttributesExceptNamespaces = element.Attributes.Where(Function(attr) Not attr.IsNamespaceDeclaration)
element.ReplaceAttributes(allAttributesExceptNamespaces)
Return element
End If
End Function
End Module