Anyone know how I can import/export csv, txt files in a way similar to NET FileHelpers, but using Delphi, taking spaces and quotes into account and handling traditional CSV esca
Here some code I wrote that reads CSV files, it handles carriage returns inside quotes as well.
unit CSV;
interface
uses
SysUtils, Generics.Collections, IOUtils;
type
TParseState = (psRowStart, psFieldStart, psUnquotedFieldData,
psQuotedFieldData, psQFBranch, psEndOfQuotedField, psQFEndSearch,
psEndOfLine, psEndOfFile);
TCSVField = class
strict private
FText: String;
public
constructor Create;
destructor Destroy; override;
property Text: string read FText write FText;
procedure Clear;
end;
TCSVFieldList = class(TObjectList<TCSVField>)
public
function AddField(const AText: string): TCSVField;
procedure ClearFields;
end;
TCSVRow = class
strict private
FFields: TCSVFieldList;
public
constructor Create;
destructor Destroy; override;
property Fields: TCSVFieldList read FFields;
end;
TCSVParser = class
strict private
FRow: TCSVRow;
FContent: String;
FCIdx: Integer;
FParseState: TParseState;
FEOF: Boolean;
procedure ParseRow;
public
function First: Boolean;
function EOF: Boolean;
function Next: Boolean;
procedure OpenFile(AFileName: String);
procedure OpenText(const AText: string);
property Row: TCSVRow read FRow;
constructor Create;
destructor Destroy; override;
end;
implementation
{implementation of TCSVField}
procedure TCSVField.Clear;
begin
FText:= '';
end;
constructor TCSVField.Create;
begin
inherited Create;
end;
destructor TCSVField.Destroy;
begin
inherited Destroy;
end;
{implementation of TCSVRow}
constructor TCSVRow.Create;
begin
inherited Create;
FFields:= TCSVFieldList.Create;
end;
destructor TCSVRow.Destroy;
begin
FreeAndNil(FFields);
inherited Destroy;
end;
{implementation of TCSVParser}
constructor TCSVParser.Create;
begin
inherited Create;
FRow:= TCSVRow.Create;
FCIdx:= 1;
FParseState:= psEndOfFile;
end;
destructor TCSVParser.Destroy;
begin
FreeAndNil(FRow);
inherited Destroy;
end;
function TCSVParser.EOF: Boolean;
begin
Result:= FEOF;
end;
function TCSVParser.First: Boolean;
begin
FEOF:= False;
FCIdx:= 1;
FParseState:= psRowStart;
Result:= Next;
end;
function TCSVParser.Next: Boolean;
begin
if not EOF then
ParseRow;
Result:= not EOF;
end;
procedure TCSVParser.OpenFile(AFileName: String);
begin
OpenText(TFile.ReadAllText(AFileName));
end;
procedure TCSVParser.OpenText(const AText: string);
begin
FContent:= AText;
FRow.Fields.Clear;
First;
end;
procedure TCSVParser.ParseRow;
var
FieldIdx: Integer;
procedure AddField(const AText: string);
begin
if FieldIdx > FRow.Fields.Count-1 then
FRow.Fields.AddField(AText)
else
FRow.Fields[FieldIdx].Text:= AText;
Inc(FieldIdx);
end;
var
FieldText: string;
Curr: Char;
LastIdx: Integer;
begin
if FParseState = psEndOfFile then
begin
FEOF:= True;
FRow.Fields.ClearFields;
Exit;
end;
if not (FParseState in [psRowStart]) then
raise Exception.Create('ParseRow requires ParseState = psRowState');
FieldIdx:= 0;
FRow.Fields.ClearFields;
LastIdx:= Length(FContent);
while True do
begin
case FParseState of
psRowStart:
begin
if FCIdx > LastIdx then
begin
FEOF:= True;
FParseState:= psEndOfFile;
end
else
begin
FParseState:= psFieldStart;
end;
Dec(FCIdx); // do not consume
end;
psFieldStart:
begin
FieldText:= '';
if FContent[FCIdx] = '"' then
FParseState:= psQuotedFieldData
else
begin
FParseState:= psUnquotedFieldData;
Dec(FCIdx); // do not consume
end;
end;
psUnquotedFieldData:
begin
if FCIdx > LastIdx then
begin
AddField(FieldText);
FParseState:= psEndOfFile;
end
else
begin
Curr:= FContent[FCIdx];
case Curr of
#13, #10:
begin
AddField(FieldText);
FParseState:= psEndOfLine;
end;
',':
begin
AddField(FieldText);
FParseState:= psFieldStart;
end;
else
FieldText:= FieldText + Curr;
end;
end;
end;
psQuotedFieldData:
begin
if FCIdx > LastIdx then
raise Exception.Create('EOF in quoted Field.');
Curr:= FContent[FCIdx];
if Curr = '"' then
FParseState:= psQFBranch
else
FieldText:= FieldText + Curr;
end;
psQFBranch:
begin
Curr:= FContent[FCIdx];
if Curr = '"' then
begin
FieldText:= FieldText + Curr;
FParseState:= psQuotedFieldData;
end
else
begin
AddField(FieldText);
FParseState:= psEndOfQuotedField;
Dec(FCIdx); // do not consume
end;
end;
psEndOfQuotedField:
begin
if FCIdx > LastIdx then
FParseState:= psEndOfFile
else
begin
Curr:= FContent[FCIdx];
if CharInSet(Curr, [#13, #10]) then
FParseState:= psEndOfLine
else
begin
FParseState:= psQFEndSearch;
Dec(FCIdx); // do not consume
end;
end;
end;
psQFEndSearch:
begin
if FCIdx > LastIdx then
FParseState:= psEndOfFile
else
begin
Curr:= FContent[FCIdx];
if CharInSet(Curr, [#13, #10]) then
FParseState:= psEndOfLine
else if Curr = ',' then
FParseState:= psFieldStart;
// skips white space or other until end
end;
end;
psEndOfLine:
begin
if FCIdx > LastIdx then
FParseState:= psEndOfFile
else
begin
Curr:= FContent[FCIdx];
if not CharInSet(Curr, [#13, #10]) then
begin
FParseState:= psRowStart;
Break; // exit loop, we are done with this row
end;
end;
end;
psEndOfFile:
begin
Break;
end;
end;
Inc(FCIdx);
end;
end;
{ TCSVFieldList }
function TCSVFieldList.AddField(const AText: string): TCSVField;
begin
Result:= TCSVField.Create;
Add(Result);
Result.Text:= AText;
end;
procedure TCSVFieldList.ClearFields;
var
F: TCSVField;
begin
for F in Self do
F.Clear;
end;
end.
It's pretty basic, but TStringList
has Delimiter
, DelimitedText
, and QuoteChar
properties, which address some of these issues.
Updated to add, per comments: Don't be tempted by the CommaText
property, which has some surprising limitations for backwards compatibility with archaic versions of Delphi.
Came across this Delphi CSV File and String Reader Classes for Delphi 2009 and later today on CodeProject, I've not tried it, but from the example code it's kinda cool. Written by Vladimir Nikitenko, the main class is TnvvCSVReader.
I wrote a Dataset (TTable-like object) for Jedi project called TJvCsvDataSet that follows all CSV parsing rules in a way similar to the CSV parsing rules used by Excel and various database and report tools that import and export CSVs.
You can install JVCL, drop a TJvCsvDataSet on your form.
It also contains a stream class that will very quickly load a file on disk, and parse it line by line, using the correct escape rules required for CSV files, even files that include carriage-return/line-feed codes encoded within a field.
You just drop it on your form, and set the FieldDefs property like this:
CsvFieldDef=ABC:%,DEF:#,GHI:$,....
There are special codes for integer, floating point, iso date-time, and other fields. It even allows you to map a wide-string field to a utf8 field in a CSV file.
There is a designtime property editor to save you from having to declare the CSV Field Defs using the syntax above, instead you can just pick visually what the column types are.
If you don't set up a CSV Field Def, it merely maps whatever exists in the file to string-type fields.
Jedi JVCL: http://jvcl.delphi-jedi.org/
JvCsvDataSet Docs:
http://help.delphi-jedi.org/unit.php?Id=3107
http://help.delphi-jedi.org/item.php?Id=174896
Following the VCL TXMLTransform logic, I wrote a TCsvTransform class helper that translates a .csv format structure to /from a TClientDataSet.
For more details about TCsvTransform, cf http://didier.cabale.free.fr/delphi.htm#uCsvTransform.
NB: I set the same field type symbols as Warren's TJvCsvDataSet
My framework has code for this in the CsiTextStreamsUnt.pas file (see http://www.csinnovations.com/framework_delphi.htm)