I\'m trying to replace multiple tables from a large (~300 MB) XML file with external XML files.
There are roughly 30,000 tables, and there are 23,000 XML files because s
What you can do is to take the basic logic of streaming an XmlReader
to an XmlWriter
from Mark Fussell's article Combining the XmlReader and XmlWriter classes for simple streaming transformations to patch the contents of one XML file into another:
public abstract class XmlStreamingEditorBase
{
readonly XmlReader reader;
readonly XmlWriter writer;
readonly Predicate shouldTransform;
public XmlStreamingEditorBase(XmlReader reader, XmlWriter writer, Predicate shouldTransform)
{
this.reader = reader;
this.writer = writer;
this.shouldTransform = shouldTransform;
}
protected XmlReader Reader { get { return reader; } }
protected XmlWriter Writer { get { return writer; } }
public void Process()
{
while (Reader.Read())
{
if (Reader.NodeType == XmlNodeType.Element)
{
if (shouldTransform(Reader))
{
EditCurrentElement();
continue;
}
}
Writer.WriteShallowNode(Reader);
}
}
protected abstract void EditCurrentElement();
}
public class XmlStreamingEditor : XmlStreamingEditorBase
{
readonly Action transform;
public XmlStreamingEditor(XmlReader reader, XmlWriter writer, Predicate shouldTransform, Action transform)
: base(reader, writer, shouldTransform)
{
this.transform = transform;
}
protected override void EditCurrentElement()
{
using (var subReader = Reader.ReadSubtree())
{
transform(subReader, Writer);
}
}
}
public class XmlStreamingPatcher
{
readonly XmlReader patchReader;
readonly XmlReader reader;
readonly XmlWriter writer;
readonly Predicate shouldPatchFrom;
readonly Func shouldPatchFromTo;
bool patched = false;
public XmlStreamingPatcher(XmlReader reader, XmlWriter writer, XmlReader patchReader, Predicate shouldPatchFrom, Func shouldPatchFromTo)
{
if (reader == null || writer == null || patchReader == null || shouldPatchFrom == null || shouldPatchFromTo == null)
throw new ArgumentNullException();
this.reader = reader;
this.writer = writer;
this.patchReader = patchReader;
this.shouldPatchFrom = shouldPatchFrom;
this.shouldPatchFromTo = shouldPatchFromTo;
}
public bool Process()
{
patched = false;
while (patchReader.Read())
{
if (patchReader.NodeType == XmlNodeType.Element)
{
if (shouldPatchFrom(patchReader))
{
var editor = new XmlStreamingEditor(reader, writer, ShouldPatchTo, PatchNode);
editor.Process();
return patched;
}
}
}
return false;
}
bool ShouldPatchTo(XmlReader reader)
{
return shouldPatchFromTo(patchReader, reader);
}
void PatchNode(XmlReader reader, XmlWriter writer)
{
using (var subReader = patchReader.ReadSubtree())
{
while (subReader.Read())
{
writer.WriteShallowNode(subReader);
patched = true;
}
}
}
}
public static class XmlReaderExtensions
{
public static XName GetElementName(this XmlReader reader)
{
if (reader == null)
return null;
if (reader.NodeType != XmlNodeType.Element)
return null;
string localName = reader.Name;
string uri = reader.NamespaceURI;
return XName.Get(localName, uri);
}
}
public static class XmlWriterExtensions
{
public static void WriteShallowNode(this XmlWriter writer, XmlReader reader)
{
// adapted from http://blogs.msdn.com/b/mfussell/archive/2005/02/12/371546.aspx
if (reader == null)
throw new ArgumentNullException("reader");
if (writer == null)
throw new ArgumentNullException("writer");
switch (reader.NodeType)
{
case XmlNodeType.Element:
writer.WriteStartElement(reader.Prefix, reader.LocalName, reader.NamespaceURI);
writer.WriteAttributes(reader, true);
if (reader.IsEmptyElement)
{
writer.WriteEndElement();
}
break;
case XmlNodeType.Text:
writer.WriteString(reader.Value);
break;
case XmlNodeType.Whitespace:
case XmlNodeType.SignificantWhitespace:
writer.WriteWhitespace(reader.Value);
break;
case XmlNodeType.CDATA:
writer.WriteCData(reader.Value);
break;
case XmlNodeType.EntityReference:
writer.WriteEntityRef(reader.Name);
break;
case XmlNodeType.XmlDeclaration:
case XmlNodeType.ProcessingInstruction:
writer.WriteProcessingInstruction(reader.Name, reader.Value);
break;
case XmlNodeType.DocumentType:
writer.WriteDocType(reader.Name, reader.GetAttribute("PUBLIC"), reader.GetAttribute("SYSTEM"), reader.Value);
break;
case XmlNodeType.Comment:
writer.WriteComment(reader.Value);
break;
case XmlNodeType.EndElement:
writer.WriteFullEndElement();
break;
default:
Debug.WriteLine("unknown NodeType " + reader.NodeType);
break;
}
}
}
To create instances XmlReader
and XmlWriter
to read and write XML from files, use XmlReader.Create(string) and XmlWriter.Create(string). Also, be sure to stream the large file into a temporary file and only replace the original after editing is finished.
And then, to test:
public static class TestXmlStreamingPatcher
{
public static void Test()
{
string mainXml = @"
1
1
";
string patchXml = @"
1
2
3
";
var patchedXml1 = TestPatch(mainXml, patchXml);
Debug.WriteLine(patchedXml1);
}
private static string TestPatch(string mainXml, string patchXml)
{
using (var mainReader = new StringReader(mainXml))
using (var mainXmlReader = XmlReader.Create(mainReader))
using (var patchReader = new StringReader(patchXml))
using (var patchXmlReader = XmlReader.Create(patchReader))
using (var mainWriter = new StringWriter())
{
using (var mainXmlWriter = XmlWriter.Create(mainWriter))
{
var patcher = new XmlStreamingPatcher(mainXmlReader, mainXmlWriter, patchXmlReader, ShouldPatchFrom, ShouldPatchFromTo);
patcher.Process();
}
return mainWriter.ToString();
}
}
static bool ShouldPatchFrom(XmlReader reader)
{
return reader.GetElementName() == "TABLE";
}
static bool ShouldPatchFromTo(XmlReader patchReader, XmlReader toReader)
{
if (patchReader.GetElementName() != toReader.GetElementName())
return false;
string name = patchReader.GetAttribute("name");
if (string.IsNullOrEmpty(name))
return false;
return name == toReader.GetAttribute("name");
}
}
The output of TestXmlStreamingPatcher.Test()
from this class is
1
2
3
1
which is what you want.