From Excel to DataTable in C# with Open XML

前端 未结 7 2045
暖寄归人
暖寄归人 2020-11-29 00:29

I\'m using Visual Studio 2008 and I need create a DataTable from a Excel Sheet using the Open XML SDK 2.0. I need to create it with the DataTable columns with t

相关标签:
7条回答
  • 2020-11-29 00:46

    I think this should do what you're asking. The other function is there just to deal with if you have shared strings, which I assume you do in your column headers. Not sure this is perfect, but I hope it helps.

    static void Main(string[] args)
    {
        DataTable dt = new DataTable();
    
        using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(@"..\..\example.xlsx", false))
        {
    
            WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
            IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
            string relationshipId = sheets.First().Id.Value;
            WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
            Worksheet workSheet = worksheetPart.Worksheet;
            SheetData sheetData = workSheet.GetFirstChild<SheetData>();
            IEnumerable<Row> rows = sheetData.Descendants<Row>();
    
            foreach (Cell cell in rows.ElementAt(0))
            {
                dt.Columns.Add(GetCellValue(spreadSheetDocument, cell));
            }
    
            foreach (Row row in rows) //this will also include your header row...
            {
                DataRow tempRow = dt.NewRow();
    
                for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
                {
                    tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
                }
    
                dt.Rows.Add(tempRow);
            }
    
        }
        dt.Rows.RemoveAt(0); //...so i'm taking it out here.
    
    }
    
    
    public static string GetCellValue(SpreadsheetDocument document, Cell cell)
    {
        SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
        string value = cell.CellValue.InnerXml;
    
        if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
        {
            return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
        }
        else
        {
            return value;
        }
    }
    
    0 讨论(0)
  • 2020-11-29 00:48

    if rows value is null or empty get values wrong work.

    all columns filled with data if it is working true. but maybe all rows not

    0 讨论(0)
  • 2020-11-29 00:53

    First Add ExcelUtility.cs to your project :

    ExcelUtility.cs

    using System.Data;
    using System.Linq;
    using DocumentFormat.OpenXml.Packaging;
    using DocumentFormat.OpenXml.Spreadsheet;
    
    namespace Core_Excel.Utilities
    {
        static class ExcelUtility
        {
            public static DataTable Read(string path)
            {
                var dt = new DataTable();
    
                using (var ssDoc = SpreadsheetDocument.Open(path, false))
                {
                    var sheets = ssDoc.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
                    var relationshipId = sheets.First().Id.Value;
                    var worksheetPart = (WorksheetPart) ssDoc.WorkbookPart.GetPartById(relationshipId);
                    var workSheet = worksheetPart.Worksheet;
                    var sheetData = workSheet.GetFirstChild<SheetData>();
                    var rows = sheetData.Descendants<Row>().ToList();
    
                    foreach (var row in rows) //this will also include your header row...
                    {
                        var tempRow = dt.NewRow();
    
                        var colCount = row.Descendants<Cell>().Count();
                        foreach (var cell in row.Descendants<Cell>())
                        {
                            var index = GetIndex(cell.CellReference);
    
                            // Add Columns
                            for (var i = dt.Columns.Count; i <= index; i++)
                                dt.Columns.Add();
    
                            tempRow[index] = GetCellValue(ssDoc, cell);
                        }
    
                        dt.Rows.Add(tempRow);
                    }
                }
    
                var m = dt.Rows[0][9];
    
                return dt;
            }
    
            private static string GetCellValue(SpreadsheetDocument document, Cell cell)
            {
                var stringTablePart = document.WorkbookPart.SharedStringTablePart;
                var value = cell.CellValue.InnerXml;
    
                if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
                    return stringTablePart.SharedStringTable.ChildElements[int.Parse(value)].InnerText;
    
                return value;
            }
    
            public static int GetIndex(string name)
            {
                if (string.IsNullOrWhiteSpace(name))
                    return -1;
    
                int index = 0;
                foreach (var ch in name)
                {
                    if (char.IsLetter(ch))
                    {
                        int value = ch - 'A' + 1;
                        index = value + index * 26;
                    }
                    else
                        break;
                }
    
                return index - 1;
            }
        }
    }
    

    Usage :

    var path = "D:\\Documents\\test.xlsx";
    var dt = ExcelUtility.Read(path);
    

    then enjoy it!

    0 讨论(0)
  • 2020-11-29 00:56

    Hi The above code is working fine except one change

    replace the below line of code

    tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
    

    with

    tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));
    

    If you use (i-1) it will throw an exception:

    specified argument was out of the range of valid values. parameter name index.
    
    0 讨论(0)
  • 2020-11-29 01:00

    This is my complete solution where empty cell is also taken into consideration.

    public static class ExcelHelper
            {
                //To get the value of the cell, even it's empty. Unable to use loop by index
                private static string GetCellValue(WorkbookPart wbPart, List<Cell> theCells, string cellColumnReference)
                {
                    Cell theCell = null;
                    string value = "";
                    foreach (Cell cell in theCells)
                    {
                        if (cell.CellReference.Value.StartsWith(cellColumnReference))
                        {
                            theCell = cell;
                            break;
                        }
                    }
                    if (theCell != null)
                    {
                        value = theCell.InnerText;
                        // If the cell represents an integer number, you are done. 
                        // For dates, this code returns the serialized value that represents the date. The code handles strings and 
                        // Booleans individually. For shared strings, the code looks up the corresponding value in the shared string table. For Booleans, the code converts the value into the words TRUE or FALSE.
                        if (theCell.DataType != null)
                        {
                            switch (theCell.DataType.Value)
                            {
                                case CellValues.SharedString:
                                    // For shared strings, look up the value in the shared strings table.
                                    var stringTable = wbPart.GetPartsOfType<SharedStringTablePart>().FirstOrDefault();
                                    // If the shared string table is missing, something is wrong. Return the index that is in the cell. Otherwise, look up the correct text in the table.
                                    if (stringTable != null)
                                    {
                                        value = stringTable.SharedStringTable.ElementAt(int.Parse(value)).InnerText;
                                    }
                                    break;
                                case CellValues.Boolean:
                                    switch (value)
                                    {
                                        case "0":
                                            value = "FALSE";
                                            break;
                                        default:
                                            value = "TRUE";
                                            break;
                                    }
                                    break;
                            }
                        }
                    }
                    return value;
                }
    
                private static string GetCellValue(WorkbookPart wbPart, List<Cell> theCells, int index)
                {
                    return GetCellValue(wbPart, theCells, GetExcelColumnName(index));
                }
    
                private static string GetExcelColumnName(int columnNumber)
                {
                    int dividend = columnNumber;
                    string columnName = String.Empty;
                    int modulo;
                    while (dividend > 0)
                    {
                        modulo = (dividend - 1) % 26;
                        columnName = Convert.ToChar(65 + modulo).ToString() + columnName;
                        dividend = (int)((dividend - modulo) / 26);
                    }
                    return columnName;
                }
    
                //Only xlsx files
                public static DataTable GetDataTableFromExcelFile(string filePath, string sheetName = "")
                {
                    DataTable dt = new DataTable();
                    try
                    {
                        using (SpreadsheetDocument document = SpreadsheetDocument.Open(filePath, false))
                        {
                            WorkbookPart wbPart = document.WorkbookPart;
                            IEnumerable<Sheet> sheets = document.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
                            string sheetId = sheetName != "" ? sheets.Where(q => q.Name == sheetName).First().Id.Value : sheets.First().Id.Value;
                            WorksheetPart wsPart = (WorksheetPart)wbPart.GetPartById(sheetId);
                            SheetData sheetdata = wsPart.Worksheet.Elements<SheetData>().FirstOrDefault();
                            int totalHeaderCount = sheetdata.Descendants<Row>().ElementAt(0).Descendants<Cell>().Count();
                            //Get the header                    
                            for (int i = 1; i <= totalHeaderCount; i++)
                            {
                                dt.Columns.Add(GetCellValue(wbPart, sheetdata.Descendants<Row>().ElementAt(0).Elements<Cell>().ToList(), i));
                            }
                            foreach (Row r in sheetdata.Descendants<Row>())
                            {
                                if (r.RowIndex > 1)
                                {
                                    DataRow tempRow = dt.NewRow();
    
                                    //Always get from the header count, because the index of the row changes where empty cell is not counted
                                    for (int i = 1; i <= totalHeaderCount; i++)
                                    {
                                        tempRow[i - 1] = GetCellValue(wbPart, r.Elements<Cell>().ToList(), i);
                                    }
                                    dt.Rows.Add(tempRow);
                                }
                            }                    
                        }
                    }
                    catch (Exception ex)
                    {
    
                    }
                    return dt;
                }
            }
    
    0 讨论(0)
  • 2020-11-29 01:07

    This solution works for spreadsheets without empty cells.

    To handle empty cells, you will need to replace this line:

    tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
    

    with something like this:

    Cell cell = row.Descendants<Cell>().ElementAt(i);
    int index = CellReferenceToIndex(cell);
    tempRow[index] = GetCellValue(spreadSheetDocument, cell);
    

    And add this method:

    private static int CellReferenceToIndex(Cell cell)
    {
        int index = -1;
        string reference = cell.CellReference.ToString().ToUpper();
        foreach (char ch in reference)
        {
            if (Char.IsLetter(ch))
            {
                int value = (int)ch - (int)'A';
                index = (index + 1) * 26 + value;
            }
            else
                return index;
        }
        return index;
    }
    
    0 讨论(0)
提交回复
热议问题