In most cases, CSV files are text files with records delimited by commas. However, sometimes these files will come semicolon delimited. (Excel will use semicolon delimiter
This is my code (no validation on text)... perhaps it could help or make a base :-) !
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using MoreLinq; // http://stackoverflow.com/questions/15265588/how-to-find-item-with-max-value-using-linq
namespace HQ.Util.General.CSV
{
public class CsvHelper
{
public static Dictionary> DictionaryOfLineSeparatorAndItsFunc = new Dictionary>();
static CsvHelper()
{
DictionaryOfLineSeparatorAndItsFunc[LineSeparator.Unknown] = ParseLineNotSeparated;
DictionaryOfLineSeparatorAndItsFunc[LineSeparator.Tab] = ParseLineTabSeparated;
DictionaryOfLineSeparatorAndItsFunc[LineSeparator.Semicolon] = ParseLineSemicolonSeparated;
DictionaryOfLineSeparatorAndItsFunc[LineSeparator.Comma] = ParseLineCommaSeparated;
}
// ******************************************************************
public enum LineSeparator
{
Unknown = 0,
Tab,
Semicolon,
Comma
}
// ******************************************************************
public static LineSeparator GuessCsvSeparator(string oneLine)
{
List> listOfLineSeparatorAndThereFirstLineSeparatedValueCount = new List>();
listOfLineSeparatorAndThereFirstLineSeparatedValueCount.Add(new Tuple(LineSeparator.Tab, CsvHelper.ParseLineTabSeparated(oneLine).Count()));
listOfLineSeparatorAndThereFirstLineSeparatedValueCount.Add(new Tuple(LineSeparator.Semicolon, CsvHelper.ParseLineSemicolonSeparated(oneLine).Count()));
listOfLineSeparatorAndThereFirstLineSeparatedValueCount.Add(new Tuple(LineSeparator.Comma, CsvHelper.ParseLineCommaSeparated(oneLine).Count()));
Tuple bestBet = listOfLineSeparatorAndThereFirstLineSeparatedValueCount.MaxBy((n)=>n.Item2);
if (bestBet != null && bestBet.Item2 > 1)
{
return bestBet.Item1;
}
return LineSeparator.Unknown;
}
// ******************************************************************
public static string[] ParseLineCommaSeparated(string line)
{
// CSV line parsing : From "jgr4" in http://www.kimgentes.com/worshiptech-web-tools-page/2008/10/14/regex-pattern-for-parsing-csv-files-with-embedded-commas-dou.html
var matches = Regex.Matches(line, @"\s?((?(?=[,]+))|""(?([^""]|"""")+)""|""(?)""|(?[^,]+)),?",
RegexOptions.ExplicitCapture);
string[] values = (from Match m in matches
select m.Groups["x"].Value.Trim().Replace("\"\"", "\"")).ToArray();
return values;
}
// ******************************************************************
public static string[] ParseLineTabSeparated(string line)
{
var matchesTab = Regex.Matches(line, @"\s?((?(?=[\t]+))|""(?([^""]|"""")+)""|""(?)""|(?[^\t]+))\t?",
RegexOptions.ExplicitCapture);
string[] values = (from Match m in matchesTab
select m.Groups["x"].Value.Trim().Replace("\"\"", "\"")).ToArray();
return values;
}
// ******************************************************************
public static string[] ParseLineSemicolonSeparated(string line)
{
// CSV line parsing : From "jgr4" in http://www.kimgentes.com/worshiptech-web-tools-page/2008/10/14/regex-pattern-for-parsing-csv-files-with-embedded-commas-dou.html
var matches = Regex.Matches(line, @"\s?((?(?=[;]+))|""(?([^""]|"""")+)""|""(?)""|(?[^;]+));?",
RegexOptions.ExplicitCapture);
string[] values = (from Match m in matches
select m.Groups["x"].Value.Trim().Replace("\"\"", "\"")).ToArray();
return values;
}
// ******************************************************************
public static string[] ParseLineNotSeparated(string line)
{
string [] lineValues = new string[1];
lineValues[0] = line;
return lineValues;
}
// ******************************************************************
public static List ParseText(string text)
{
string[] lines = text.Split(new string[] { "\r\n" }, StringSplitOptions.None);
return ParseString(lines);
}
// ******************************************************************
public static List ParseString(string[] lines)
{
List result = new List();
LineSeparator lineSeparator = LineSeparator.Unknown;
if (lines.Any())
{
lineSeparator = GuessCsvSeparator(lines[0]);
}
Func funcParse = DictionaryOfLineSeparatorAndItsFunc[lineSeparator];
foreach (string line in lines)
{
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
result.Add(funcParse(line));
}
return result;
}
// ******************************************************************
}
}