I\'m trying to replace multiple tables from a large (~300 MB) XML file with external XML files.
There are roughly 30,000 tables, and there are 23,000 XML files because s
What you can do is to take the basic logic of streaming an XmlReader
to an XmlWriter
from Mark Fussell's article Combining the XmlReader and XmlWriter classes for simple streaming transformations to patch the contents of one XML file into another:
public abstract class XmlStreamingEditorBase
{
readonly XmlReader reader;
readonly XmlWriter writer;
readonly Predicate<XmlReader> shouldTransform;
public XmlStreamingEditorBase(XmlReader reader, XmlWriter writer, Predicate<XmlReader> shouldTransform)
{
this.reader = reader;
this.writer = writer;
this.shouldTransform = shouldTransform;
}
protected XmlReader Reader { get { return reader; } }
protected XmlWriter Writer { get { return writer; } }
public void Process()
{
while (Reader.Read())
{
if (Reader.NodeType == XmlNodeType.Element)
{
if (shouldTransform(Reader))
{
EditCurrentElement();
continue;
}
}
Writer.WriteShallowNode(Reader);
}
}
protected abstract void EditCurrentElement();
}
public class XmlStreamingEditor : XmlStreamingEditorBase
{
readonly Action<XmlReader, XmlWriter> transform;
public XmlStreamingEditor(XmlReader reader, XmlWriter writer, Predicate<XmlReader> shouldTransform, Action<XmlReader, XmlWriter> transform)
: base(reader, writer, shouldTransform)
{
this.transform = transform;
}
protected override void EditCurrentElement()
{
using (var subReader = Reader.ReadSubtree())
{
transform(subReader, Writer);
}
}
}
public class XmlStreamingPatcher
{
readonly XmlReader patchReader;
readonly XmlReader reader;
readonly XmlWriter writer;
readonly Predicate<XmlReader> shouldPatchFrom;
readonly Func<XmlReader, XmlReader, bool> shouldPatchFromTo;
bool patched = false;
public XmlStreamingPatcher(XmlReader reader, XmlWriter writer, XmlReader patchReader, Predicate<XmlReader> shouldPatchFrom, Func<XmlReader, XmlReader, bool> shouldPatchFromTo)
{
if (reader == null || writer == null || patchReader == null || shouldPatchFrom == null || shouldPatchFromTo == null)
throw new ArgumentNullException();
this.reader = reader;
this.writer = writer;
this.patchReader = patchReader;
this.shouldPatchFrom = shouldPatchFrom;
this.shouldPatchFromTo = shouldPatchFromTo;
}
public bool Process()
{
patched = false;
while (patchReader.Read())
{
if (patchReader.NodeType == XmlNodeType.Element)
{
if (shouldPatchFrom(patchReader))
{
var editor = new XmlStreamingEditor(reader, writer, ShouldPatchTo, PatchNode);
editor.Process();
return patched;
}
}
}
return false;
}
bool ShouldPatchTo(XmlReader reader)
{
return shouldPatchFromTo(patchReader, reader);
}
void PatchNode(XmlReader reader, XmlWriter writer)
{
using (var subReader = patchReader.ReadSubtree())
{
while (subReader.Read())
{
writer.WriteShallowNode(subReader);
patched = true;
}
}
}
}
public static class XmlReaderExtensions
{
public static XName GetElementName(this XmlReader reader)
{
if (reader == null)
return null;
if (reader.NodeType != XmlNodeType.Element)
return null;
string localName = reader.Name;
string uri = reader.NamespaceURI;
return XName.Get(localName, uri);
}
}
public static class XmlWriterExtensions
{
public static void WriteShallowNode(this XmlWriter writer, XmlReader reader)
{
// adapted from http://blogs.msdn.com/b/mfussell/archive/2005/02/12/371546.aspx
if (reader == null)
throw new ArgumentNullException("reader");
if (writer == null)
throw new ArgumentNullException("writer");
switch (reader.NodeType)
{
case XmlNodeType.Element:
writer.WriteStartElement(reader.Prefix, reader.LocalName, reader.NamespaceURI);
writer.WriteAttributes(reader, true);
if (reader.IsEmptyElement)
{
writer.WriteEndElement();
}
break;
case XmlNodeType.Text:
writer.WriteString(reader.Value);
break;
case XmlNodeType.Whitespace:
case XmlNodeType.SignificantWhitespace:
writer.WriteWhitespace(reader.Value);
break;
case XmlNodeType.CDATA:
writer.WriteCData(reader.Value);
break;
case XmlNodeType.EntityReference:
writer.WriteEntityRef(reader.Name);
break;
case XmlNodeType.XmlDeclaration:
case XmlNodeType.ProcessingInstruction:
writer.WriteProcessingInstruction(reader.Name, reader.Value);
break;
case XmlNodeType.DocumentType:
writer.WriteDocType(reader.Name, reader.GetAttribute("PUBLIC"), reader.GetAttribute("SYSTEM"), reader.Value);
break;
case XmlNodeType.Comment:
writer.WriteComment(reader.Value);
break;
case XmlNodeType.EndElement:
writer.WriteFullEndElement();
break;
default:
Debug.WriteLine("unknown NodeType " + reader.NodeType);
break;
}
}
}
To create instances XmlReader
and XmlWriter
to read and write XML from files, use XmlReader.Create(string) and XmlWriter.Create(string). Also, be sure to stream the large file into a temporary file and only replace the original after editing is finished.
And then, to test:
public static class TestXmlStreamingPatcher
{
public static void Test()
{
string mainXml = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<INI>
<TABLE name=""People"">
<ROW>
<ID>1</ID>
<Name><![CDATA[Bob]]></Name>
</ROW>
</TABLE>
<TABLE name=""Animals"">
<ROW>
<ID>1</ID>
<Name><![CDATA[Golden]]></Name>
</ROW>
</TABLE>
</INI>
";
string patchXml = @"<TABLE name=""People"">
<ROW>
<ID>1</ID>
<Name><![CDATA[Mary]]></Name>
</ROW>
<ROW>
<ID>2</ID>
<Name><![CDATA[Bob]]></Name>
</ROW>
<ROW>
<ID>3</ID>
<Name><![CDATA[Dan]]></Name>
</ROW>
</TABLE>
";
var patchedXml1 = TestPatch(mainXml, patchXml);
Debug.WriteLine(patchedXml1);
}
private static string TestPatch(string mainXml, string patchXml)
{
using (var mainReader = new StringReader(mainXml))
using (var mainXmlReader = XmlReader.Create(mainReader))
using (var patchReader = new StringReader(patchXml))
using (var patchXmlReader = XmlReader.Create(patchReader))
using (var mainWriter = new StringWriter())
{
using (var mainXmlWriter = XmlWriter.Create(mainWriter))
{
var patcher = new XmlStreamingPatcher(mainXmlReader, mainXmlWriter, patchXmlReader, ShouldPatchFrom, ShouldPatchFromTo);
patcher.Process();
}
return mainWriter.ToString();
}
}
static bool ShouldPatchFrom(XmlReader reader)
{
return reader.GetElementName() == "TABLE";
}
static bool ShouldPatchFromTo(XmlReader patchReader, XmlReader toReader)
{
if (patchReader.GetElementName() != toReader.GetElementName())
return false;
string name = patchReader.GetAttribute("name");
if (string.IsNullOrEmpty(name))
return false;
return name == toReader.GetAttribute("name");
}
}
The output of TestXmlStreamingPatcher.Test()
from this class is
<?xml version="1.0" encoding="UTF-8"?>
<INI>
<TABLE name="People">
<ROW>
<ID>1</ID>
<Name><![CDATA[Mary]]></Name>
</ROW>
<ROW>
<ID>2</ID>
<Name><![CDATA[Bob]]></Name>
</ROW>
<ROW>
<ID>3</ID>
<Name><![CDATA[Dan]]></Name>
</ROW>
</TABLE>
<TABLE name="Animals">
<ROW>
<ID>1</ID>
<Name><![CDATA[Golden]]></Name>
</ROW>
</TABLE>
</INI>
which is what you want.