Convert XLS file to CSV but error number cell

痞子三分冷 提交于 2020-08-08 07:04:42

问题


The following code allows you to convert a file from an XLS to a CSV:

static void ConvertExcelToCsv(string excelFile, string csvOutputFile, int worksheetNumber = 1)
{
    //Checks if the two files required exist or not and then throws an exception.
    if (!File.Exists(excelFile)) throw new FileNotFoundException(excelFile);
    if (File.Exists(csvOutputFile))
    {
        File.Delete(csvOutputFile);
    }

    // connection string
    var cnnStr = String.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data Source={0};Extended Properties=\"Excel 8.0;IMEX=1;HDR=NO\"", excelFile);
    //Creates a new OleDbConnection with an argument of cnn
    var cnn = new OleDbConnection(cnnStr);

    //creates new datatable in memory to store the read excel spreadsheet
    var dt = new DataTable();
    try
    {
        //Opens the new connection called "cnn".
        cnn.Open();
        var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
        if (schemaTable.Rows.Count < worksheetNumber) throw new ArgumentException("The worksheet number provided cannot be found in the spreadsheet");
        string worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"].ToString().Replace("'", "");
        Console.WriteLine("worksheet:" + worksheet);
        string sql = String.Format("select * from [{0}]", worksheet).ToString();
        //string sql = worksheet.ToString();
        Console.WriteLine("sql:" + sql);
        var da = new OleDbDataAdapter(sql, cnn);
        da.Fill(dt);
    }
    catch (OleDbException e)
    {
        throw new ArgumentException(e.Message, "Error during the conversion");

    }
    finally
    {
        // free resources
        cnn.Close();
    }

    // write out CSV data
    using (var wtr = new StreamWriter(csvOutputFile))
    {
        foreach (DataRow row in dt.Rows)
        {
            bool firstLine = true;
            foreach (DataColumn col in dt.Columns)
            {
                if (!firstLine)
                {
                    wtr.Write("~");
                }
                else
                {
                    firstLine = false;
                }
                var data = row[col.ColumnName].ToString().Replace("\"", "\"\"");
                // wtr.Write(String.Format("{0}", data));
                wtr.Write(data.ToString());
            }
            wtr.WriteLine();
        }
    }
}

Which converts the XLS to CSV file but when I have a cell of this type:

Cell of this type

Converts it this way:

Converts it his way

How do I change the code to have the same value as XLS?


回答1:


The suggested solution in my comment above will export an Excel Sheet as a CSV file using Excel Interop Objects.

Applying that on your case:

using System;
using System.IO;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...

public static void ConvertExcelToCsv(
    string excelFile,
    string csvOutputFile,
    int worksheetNumber = 1)
{
    Excel.Application xlApp = null;
    Excel.Workbook xlWorkBook = null;

    try
    {
        if (!File.Exists(excelFile))
            throw new FileNotFoundException(excelFile);

        if (File.Exists(csvOutputFile))
            File.Delete(csvOutputFile);

        xlApp = new Excel.Application();
        xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
        var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;

        if (xlSheet is null)
            throw new ArgumentException();

        xlSheet.SaveAs(csvOutputFile, Excel.XlFileFormat.xlCSV);
    }
    catch (FileNotFoundException)
    {
        Console.WriteLine($"'{excelFile}' does not exist!");
    }
    catch (ArgumentException)
    {
        Console.WriteLine("The worksheet number provided does not exist.");
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
    finally
    {
        xlWorkBook?.Close(false);
        xlApp?.Quit();

        if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
        if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);

        xlWorkBook = null;
        xlApp = null;

        GC.Collect();
        GC.WaitForPendingFinalizers();
    }
}

However the TextInfo.ListSeparator will be used as the delimiter. If you need to use a specific character like ~ as separator, using the same approach:

using System;
using System.IO;
using System.Linq;
using System.Globalization;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...

public static void ConvertExcelToCsv(
    string excelFile,
    string csvOutputFile,
    int worksheetNumber = 1,
    string delimiter = null)
{
    Excel.Application xlApp = null;
    Excel.Workbook xlWorkBook = null;

    try
    {
        if (!File.Exists(excelFile))
            throw new FileNotFoundException(excelFile);

        if (File.Exists(csvOutputFile))
            File.Delete(csvOutputFile);

        xlApp = new Excel.Application();
        xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
        var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;                

        if (xlSheet is null)
            throw new ArgumentException();

        if (delimiter is null)
            delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;

        var xlRange = xlSheet.UsedRange;

        using (var sw = new StreamWriter(csvOutputFile))
            //Use:
            //foreach (var r in xlRange.Rows.Cast<Excel.Range>().Skip(1))
            //If the first row is a header row and you want to skip it...
            foreach (Excel.Range row in xlRange.Rows)
                sw.WriteLine(string.Join(delimiter, row.Cells.Cast<Excel.Range>()
                    .Select(x => x.Value2)));

    }
    catch (FileNotFoundException)
    {
        Console.WriteLine($"'{excelFile}' does not exist!");
    }
    catch (ArgumentException)
    {
        Console.WriteLine("The worksheet number provided does not exist.");
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
    finally
    {
        xlWorkBook?.Close(false);
        xlApp?.Quit();

        if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
        if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);

        xlWorkBook = null;
        xlApp = null;

        GC.Collect();
        GC.WaitForPendingFinalizers();
    }
}

Note that, the more cells the more processing time. However, you will get the values as they are and regardless whether the Sheet has mixed-type columns or not.

Also, you might want to try the OleDb way for faster processing. Instead of filling a DataTable and looping over the columns and rows again to write the output lines, use the OleDbDataReader to get the values of each row from the Sheet, concatenate and separate them by the delimiter, and pass the string to the SteamWriter.WriteLine method:

using System;
using System.IO;
using System.Linq;
using System.Data.OleDb;
using System.Globalization;
//...

public static void ConvertExcelToCsv(
    string excelFile,
    string csvOutputFile,
    int worksheetNumber = 1,
    string delimiter = null)
{
    try
    {
        if (!File.Exists(excelFile))
            throw new FileNotFoundException(excelFile);

        if (File.Exists(csvOutputFile))
            File.Delete(csvOutputFile);

        if (delimiter is null)
            delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;

        var cnnStr = $"Provider=Microsoft.Jet.OLEDB.4.0;Data Source={excelFile};" +
            $"Extended Properties='Excel 8.0;HDR=Mo;IMEX=1;'";

        using (var cnn = new OleDbConnection(cnnStr))
        {
            cnn.Open();

            var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);

            if (schemaTable.Rows.Count < worksheetNumber)
                throw new ArgumentException();

            var worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"]
                .ToString().Replace("'", "");

            using (var cmd = new OleDbCommand($"SELECT * FROM [{worksheet}]", cnn))
            using (var r = cmd.ExecuteReader())
            using (var sw = new StreamWriter(csvOutputFile))
                while (r.Read())
                {
                    var values = new object[r.FieldCount];
                    r.GetValues(values);
                    sw.WriteLine(string.Join(delimiter, values));
                }
        }
    }
    catch (FileNotFoundException)
    {
        Console.WriteLine($"'{excelFile}' does not exist!");
    }
    catch (ArgumentException)
    {
        Console.WriteLine("The worksheet number provided does not exist.");
    }
    catch (OleDbException ex)
    {
        Console.WriteLine(ex.Message);
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
}

The caller:

void TheCaller()
{
    var xlFile = "XlsFile.xls";
    var csvFile = Path.ChangeExtension(xlFile, "csv");
    var delimiter = "~";
    var sheetNumber = 1;

    Console.WriteLine("Exporting...");
    //ConvertExcelToCsv(xlFile, csvFile, sheetNumber); //To call the first code snippet.
    ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter);
    Console.WriteLine("Done...!");
}

Or you might need an asynchronous call especially for the first two options:

async void TheCaller()
{
    var xlFile = "XlsFile.xls";
    var csvFile = Path.ChangeExtension(xlFile, "csv");
    var delimiter = "~";
    var sheetNumber = 1;

    Console.WriteLine("Exporting...");
    await Task.Run(() => ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter));
    Console.WriteLine("Done...!");
}

Notes

1. To use Excel Interop Objets, add reference to the Microsoft.Office.Interop.Excel
2. You might want to check this out.



来源:https://stackoverflow.com/questions/62819631/convert-xls-file-to-csv-but-error-number-cell

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!