I am trying to extract all text data from an Excel document in C# and am having performance issues. In the following code I open the Workbook, loop over all worksheets, and loop
One thing which will speed it up is to use a StringBuilder instead of +=
on the previous string. Strings are immutable in C# and therefore you are creating a ton of extra strings during your process of creating the final string.
Additionally you may improve performance looping over the row, column positions instead of looping over the index.
Here is the code changed with a StringBuilder and row, column positional looping:
public class ExcelFile
{
public string Path = @"C:\test.xlsx";
private Excel.Application xl = new Excel.Application();
private Excel.Workbook WB;
public string FullText;
private Excel.Range rng;
private Dictionary Variables;
public ExcelFile()
{
StringBuilder sb = new StringBuilder();
WB = xl.Workbooks.Open(Path);
xl.Visible = true;
foreach (Excel.Worksheet CurrentWS in WB.Worksheets)
{
rng = CurrentWS.UsedRange;
for (int i = 1; i <= rng.Rows.Count; i++)
{
for (int j = 1; j <= rng.Columns.Count; j++)
{
sb.append(rng.Cells[i, j].Value);
}
}
}
FullText = sb.ToString();
WB.Close(false);
xl.Quit();
}
}