In C#, what is the most efficient method to split a text file into multiple text files (the splitting delimiter being a blank line), while preserving the character encoding?
In the case anyone needs to split a text file into multiple files using a string:
public static void Main(string[] args)
{
void Split(string inputfile, string outputfilesformat)
{
int i = 0;
System.IO.StreamWriter outfile = null;
string line;
try
{
using (var infile = new System.IO.StreamReader(inputfile))
{
while (!infile.EndOfStream)
{
line = infile.ReadLine();
if (line.Trim().Contains("String You Want File To Split From"))
{
if (outfile != null)
{
outfile.Dispose();
outfile = null;
}
continue;
}
if (outfile == null)
{
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, i++),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
}
finally
{
if (outfile != null)
outfile.Dispose();
}
}
Split("C:test.txt", "C:\\output-files-{0}.txt");
}
Purely for those who want to avoid thinking:
If you have a CSV (comma separated values) file and want to split the file when a field changes, identify/name the file by the change (without unnecessary quote marks), and strip out comments/certain lines (here identified by starting with "#)
Modified method:
public void Split(string inputfile, string outputfilesformat)
{
System.IO.StreamWriter outfile = null;
string line;
string[] splitArray;
string nameFromFile = "";
try
{
using (var infile = new System.IO.StreamReader(inputfile))
{
while (!infile.EndOfStream)
{
line = infile.ReadLine();
splitArray = line.Split(new char[] { ',' });
if (!splitArray[0].StartsWith("\"#"))
{
if (splitArray[4].Replace("\"", "") != nameFromFile.Replace("\"", ""))
{
if (outfile != null)
{
outfile.Dispose();
outfile = null;
}
nameFromFile = splitArray[4].Replace("\"", "");
continue;
}
if (outfile == null)
{
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, nameFromFile),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
}
}
finally
{
if (outfile != null)
outfile.Dispose();
}
}
Local path call:
string strpath = Server.MapPath("~/Data/SPLIT/DATA.TXT");
string newFile = Server.MapPath("~/Data/SPLIT");
if (System.IO.File.Exists(@strpath))
{
Split(strpath, newFile+"\\{0}.CSV");
}
I would use the StreamReader and StreamWriter classes:
public void Split(string inputfile, string outputfilesformat) {
int i = 0;
System.IO.StreamWriter outfile = null;
string line;
try {
using(var infile = new System.IO.StreamReader(inputfile)) {
while(!infile.EndOfStream){
line = infile.ReadLine();
if(string.IsNullOrEmpty(line)) {
if(outfile != null) {
outfile.Dispose();
outfile = null;
}
continue;
}
if(outfile == null) {
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, i++),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
} finally {
if(outfile != null)
outfile.Dispose();
}
}
You would then call this method like this:
Split("C:\\somefile.txt", "C:\\output-files-{0}.txt");