问题
The following code allows you to convert a file from an XLS
to a CSV
:
static void ConvertExcelToCsv(string excelFile, string csvOutputFile, int worksheetNumber = 1)
{
//Checks if the two files required exist or not and then throws an exception.
if (!File.Exists(excelFile)) throw new FileNotFoundException(excelFile);
if (File.Exists(csvOutputFile))
{
File.Delete(csvOutputFile);
}
// connection string
var cnnStr = String.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data Source={0};Extended Properties=\"Excel 8.0;IMEX=1;HDR=NO\"", excelFile);
//Creates a new OleDbConnection with an argument of cnn
var cnn = new OleDbConnection(cnnStr);
//creates new datatable in memory to store the read excel spreadsheet
var dt = new DataTable();
try
{
//Opens the new connection called "cnn".
cnn.Open();
var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if (schemaTable.Rows.Count < worksheetNumber) throw new ArgumentException("The worksheet number provided cannot be found in the spreadsheet");
string worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"].ToString().Replace("'", "");
Console.WriteLine("worksheet:" + worksheet);
string sql = String.Format("select * from [{0}]", worksheet).ToString();
//string sql = worksheet.ToString();
Console.WriteLine("sql:" + sql);
var da = new OleDbDataAdapter(sql, cnn);
da.Fill(dt);
}
catch (OleDbException e)
{
throw new ArgumentException(e.Message, "Error during the conversion");
}
finally
{
// free resources
cnn.Close();
}
// write out CSV data
using (var wtr = new StreamWriter(csvOutputFile))
{
foreach (DataRow row in dt.Rows)
{
bool firstLine = true;
foreach (DataColumn col in dt.Columns)
{
if (!firstLine)
{
wtr.Write("~");
}
else
{
firstLine = false;
}
var data = row[col.ColumnName].ToString().Replace("\"", "\"\"");
// wtr.Write(String.Format("{0}", data));
wtr.Write(data.ToString());
}
wtr.WriteLine();
}
}
}
Which converts the XLS
to CSV
file but when I have a cell of this type:
Converts it this way:
How do I change the code to have the same value as XLS
?
回答1:
The suggested solution in my comment above will export an Excel Sheet as a CSV
file using Excel Interop Objects.
Applying that on your case:
using System;
using System.IO;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...
public static void ConvertExcelToCsv(
string excelFile,
string csvOutputFile,
int worksheetNumber = 1)
{
Excel.Application xlApp = null;
Excel.Workbook xlWorkBook = null;
try
{
if (!File.Exists(excelFile))
throw new FileNotFoundException(excelFile);
if (File.Exists(csvOutputFile))
File.Delete(csvOutputFile);
xlApp = new Excel.Application();
xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;
if (xlSheet is null)
throw new ArgumentException();
xlSheet.SaveAs(csvOutputFile, Excel.XlFileFormat.xlCSV);
}
catch (FileNotFoundException)
{
Console.WriteLine($"'{excelFile}' does not exist!");
}
catch (ArgumentException)
{
Console.WriteLine("The worksheet number provided does not exist.");
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
finally
{
xlWorkBook?.Close(false);
xlApp?.Quit();
if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);
xlWorkBook = null;
xlApp = null;
GC.Collect();
GC.WaitForPendingFinalizers();
}
}
However the TextInfo.ListSeparator will be used as the delimiter. If you need to use a specific character like ~ as separator, using the same approach:
using System;
using System.IO;
using System.Linq;
using System.Globalization;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...
public static void ConvertExcelToCsv(
string excelFile,
string csvOutputFile,
int worksheetNumber = 1,
string delimiter = null)
{
Excel.Application xlApp = null;
Excel.Workbook xlWorkBook = null;
try
{
if (!File.Exists(excelFile))
throw new FileNotFoundException(excelFile);
if (File.Exists(csvOutputFile))
File.Delete(csvOutputFile);
xlApp = new Excel.Application();
xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;
if (xlSheet is null)
throw new ArgumentException();
if (delimiter is null)
delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;
var xlRange = xlSheet.UsedRange;
using (var sw = new StreamWriter(csvOutputFile))
//Use:
//foreach (var r in xlRange.Rows.Cast<Excel.Range>().Skip(1))
//If the first row is a header row and you want to skip it...
foreach (Excel.Range row in xlRange.Rows)
sw.WriteLine(string.Join(delimiter, row.Cells.Cast<Excel.Range>()
.Select(x => x.Value2)));
}
catch (FileNotFoundException)
{
Console.WriteLine($"'{excelFile}' does not exist!");
}
catch (ArgumentException)
{
Console.WriteLine("The worksheet number provided does not exist.");
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
finally
{
xlWorkBook?.Close(false);
xlApp?.Quit();
if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);
xlWorkBook = null;
xlApp = null;
GC.Collect();
GC.WaitForPendingFinalizers();
}
}
Note that, the more cells the more processing time. However, you will get the values as they are and regardless whether the Sheet has mixed-type columns or not.
Also, you might want to try the OleDb
way for faster processing. Instead of filling a DataTable and looping over the columns and rows again to write the output lines, use the OleDbDataReader to get the values of each row from the Sheet, concatenate and separate them by the delimiter, and pass the string to the SteamWriter.WriteLine method:
using System;
using System.IO;
using System.Linq;
using System.Data.OleDb;
using System.Globalization;
//...
public static void ConvertExcelToCsv(
string excelFile,
string csvOutputFile,
int worksheetNumber = 1,
string delimiter = null)
{
try
{
if (!File.Exists(excelFile))
throw new FileNotFoundException(excelFile);
if (File.Exists(csvOutputFile))
File.Delete(csvOutputFile);
if (delimiter is null)
delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;
var cnnStr = $"Provider=Microsoft.Jet.OLEDB.4.0;Data Source={excelFile};" +
$"Extended Properties='Excel 8.0;HDR=Mo;IMEX=1;'";
using (var cnn = new OleDbConnection(cnnStr))
{
cnn.Open();
var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if (schemaTable.Rows.Count < worksheetNumber)
throw new ArgumentException();
var worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"]
.ToString().Replace("'", "");
using (var cmd = new OleDbCommand($"SELECT * FROM [{worksheet}]", cnn))
using (var r = cmd.ExecuteReader())
using (var sw = new StreamWriter(csvOutputFile))
while (r.Read())
{
var values = new object[r.FieldCount];
r.GetValues(values);
sw.WriteLine(string.Join(delimiter, values));
}
}
}
catch (FileNotFoundException)
{
Console.WriteLine($"'{excelFile}' does not exist!");
}
catch (ArgumentException)
{
Console.WriteLine("The worksheet number provided does not exist.");
}
catch (OleDbException ex)
{
Console.WriteLine(ex.Message);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
The caller:
void TheCaller()
{
var xlFile = "XlsFile.xls";
var csvFile = Path.ChangeExtension(xlFile, "csv");
var delimiter = "~";
var sheetNumber = 1;
Console.WriteLine("Exporting...");
//ConvertExcelToCsv(xlFile, csvFile, sheetNumber); //To call the first code snippet.
ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter);
Console.WriteLine("Done...!");
}
Or you might need an asynchronous call especially for the first two options:
async void TheCaller()
{
var xlFile = "XlsFile.xls";
var csvFile = Path.ChangeExtension(xlFile, "csv");
var delimiter = "~";
var sheetNumber = 1;
Console.WriteLine("Exporting...");
await Task.Run(() => ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter));
Console.WriteLine("Done...!");
}
Notes
1. To use Excel Interop Objets, add reference to the Microsoft.Office.Interop.Excel
2. You might want to check this out.
来源:https://stackoverflow.com/questions/62819631/convert-xls-file-to-csv-but-error-number-cell