I am using the .Split(\',\')
method on a string that I know has values delimited by commas and I want those values to be separated and put into a string[]
Since you're reading a CSV file, the best course of action would be to use an existing CSV reader. There's more to CSV than just commas between quotes. Finding all of the cases you need to handle would be more work than it's worth.
Here's a CSV reader question on SO.
This is a fairly straight forward CSV Reader implementation we use in a few projects here. Easy to use and handles those cases you are talking about.
First the CSV Class
public static class Csv
{
public static string Escape(string s)
{
if (s.Contains(QUOTE))
s = s.Replace(QUOTE, ESCAPED_QUOTE);
if (s.IndexOfAny(CHARACTERS_THAT_MUST_BE_QUOTED) > -1)
s = QUOTE + s + QUOTE;
return s;
}
public static string Unescape(string s)
{
if (s.StartsWith(QUOTE) && s.EndsWith(QUOTE))
{
s = s.Substring(1, s.Length - 2);
if (s.Contains(ESCAPED_QUOTE))
s = s.Replace(ESCAPED_QUOTE, QUOTE);
}
return s;
}
private const string QUOTE = "\"";
private const string ESCAPED_QUOTE = "\"\"";
private static char[] CHARACTERS_THAT_MUST_BE_QUOTED = { ',', '"', '\n' };
}
Then a pretty nice Reader implementation - If you need it. You should be able to do what you need with just the CSV class above.
public sealed class CsvReader : System.IDisposable
{
public CsvReader(string fileName)
: this(new FileStream(fileName, FileMode.Open, FileAccess.Read))
{
}
public CsvReader(Stream stream)
{
__reader = new StreamReader(stream);
}
public System.Collections.IEnumerable RowEnumerator
{
get
{
if (null == __reader)
throw new System.ApplicationException("I can't start reading without CSV input.");
__rowno = 0;
string sLine;
string sNextLine;
while (null != (sLine = __reader.ReadLine()))
{
while (rexRunOnLine.IsMatch(sLine) && null != (sNextLine = __reader.ReadLine()))
sLine += "\n" + sNextLine;
__rowno++;
string[] values = rexCsvSplitter.Split(sLine);
for (int i = 0; i < values.Length; i++)
values[i] = Csv.Unescape(values[i]);
yield return values;
}
__reader.Close();
}
}
public long RowIndex { get { return __rowno; } }
public void Dispose()
{
if (null != __reader) __reader.Dispose();
}
//============================================
private long __rowno = 0;
private TextReader __reader;
private static Regex rexCsvSplitter = new Regex(@",(?=(?:[^""]*""[^""]*"")*(?![^""]*""))");
private static Regex rexRunOnLine = new Regex(@"^[^""]*(?:""[^""]*""[^""]*)*""[^""]*$");
}
Then you can use it like this.
var reader = new CsvReader(new FileStream(file, FileMode.Open));
Note: This would open an existing CSV file, but can be modified fairly easily to take a string[]
like you need.
You should probably read this article: Regular Expression for Comma Based Splitting Ignoring Commas inside Quotes Although it is for Java, but the regular expression is the same.