How to parse excel rows back to types using EPPlus

后端 未结 2 1965
南旧
南旧 2020-12-01 07:54

EPPlus has a convenient LoadFromCollection method to get data of my own type into a worksheet.

For example if I have a class:

p         


        
相关标签:
2条回答
  • 2020-12-01 08:34

    There is no such method native to EPPlus unfortunately. Its a tough nut to crack since you would have to use reflections if you truly want it to be generic. And because of Excel storing all numbers and dates as double you have to deal with alot of unboxing and type checks.

    This is something I have been working on. Its an extension method that will do it via Generics. It works but only under limited testing so make sure you check it yourself. I cant guarantee it is the most optimized (yet) but it is pretty decent at his point. You would use it like this:

    IEnumerable<TestObject> newcollection = worksheet.ConvertSheetToObjects<TestObject>();
    

    The extension:

    public static IEnumerable<T> ConvertSheetToObjects<T>(this ExcelWorksheet worksheet) where T:new()
    {
        //DateTime Conversion
        var convertDateTime = new Func<double, DateTime>(excelDate =>
        {
            if (excelDate < 1)
                throw new ArgumentException("Excel dates cannot be smaller than 0.");
    
            var dateOfReference = new DateTime(1900, 1, 1);
    
            if (excelDate > 60d)
                excelDate = excelDate - 2;
            else
                excelDate = excelDate - 1;
            return dateOfReference.AddDays(excelDate);
        });
    
        //Get the properties of T
        var tprops = (new T())
            .GetType()
            .GetProperties()
            .ToList();
    
        //Cells only contains references to cells with actual data
        var groups = worksheet.Cells
            .GroupBy(cell => cell.Start.Row)
            .ToList();
    
        //Assume the second row represents column data types (big assumption!)
        var types = groups
            .Skip(1)
            .First()
            .Select(rcell => rcell.Value.GetType())
            .ToList();
    
        //Assume first row has the column names
        var colnames = groups
            .First()
            .Select((hcell, idx) => new { Name = hcell.Value.ToString(), index = idx })
            .Where(o => tprops.Select(p => p.Name).Contains(o.Name))
            .ToList();
    
        //Everything after the header is data
        var rowvalues = groups
            .Skip(1) //Exclude header
            .Select(cg => cg.Select(c => c.Value).ToList());
    
    
        //Create the collection container
        var collection = rowvalues
            .Select(row =>
            {
                var tnew = new T();
                colnames.ForEach(colname =>
                {
                    //This is the real wrinkle to using reflection - Excel stores all numbers as double including int
                    var val = row[colname.index];
                    var type = types[colname.index];
                    var prop = tprops.First(p => p.Name == colname.Name);
    
                    //If it is numeric it is a double since that is how excel stores all numbers
                    if (type == typeof (double))
                    {
                        //Unbox it
                        var unboxedVal = (double) val;
    
                        //FAR FROM A COMPLETE LIST!!!
                        if (prop.PropertyType == typeof (Int32))
                            prop.SetValue(tnew, (int) unboxedVal);
                        else if (prop.PropertyType == typeof (double))
                            prop.SetValue(tnew, unboxedVal);
                        else if (prop.PropertyType == typeof (DateTime))
                            prop.SetValue(tnew, convertDateTime(unboxedVal));
                        else
                            throw new NotImplementedException(String.Format("Type '{0}' not implemented yet!", prop.PropertyType.Name));
                    }
                    else
                    {
                        //Its a string
                        prop.SetValue(tnew, val);
                    }
                });
    
                return tnew;
            });
    
    
        //Send it back
        return collection;
    }
    

    A FULL example:

    [TestMethod]
    public void Read_To_Collection_Test()
    {   
        //A collection to Test
        var objectcollection = new List<TestObject>();
    
        for (var i = 0; i < 10; i++)
            objectcollection.Add(new TestObject {Col1 = i, Col2 = i*10, Col3 = Path.GetRandomFileName(), Col4 = DateTime.Now.AddDays(i)});
    
        //Create a test file to convert back
        byte[] bytes;
        using (var pck = new ExcelPackage())
        {
            //Load the random data
            var workbook = pck.Workbook;
            var worksheet = workbook.Worksheets.Add("data");
            worksheet.Cells.LoadFromCollection(objectcollection, true);
            bytes = pck.GetAsByteArray();
        }
    
    
        //*********************************
        //Convert from excel to a collection
        using (var pck = new ExcelPackage(new MemoryStream(bytes)))
        {
            var workbook = pck.Workbook;
            var worksheet = workbook.Worksheets["data"];
    
            var newcollection = worksheet.ConvertSheetToObjects<TestObject>();
            newcollection.ToList().ForEach(to => Console.WriteLine("{{ Col1:{0}, Col2: {1}, Col3: \"{2}\", Col4: {3} }}", to.Col1, to.Col2, to.Col3, to.Col4.ToShortDateString()));
        }
    }
    
    //test object class
    public class TestObject
    {
        public int Col1 { get; set; }
        public int Col2 { get; set; }
        public string Col3 { get; set; }
        public DateTime Col4 { get; set; }
    }
    

    Console Output:

    { Col1:0, Col2: 0, Col3: "wrulvxbx.wdv", Col4: 10/30/2015 }
    { Col1:1, Col2: 10, Col3: "wflh34yu.0pu", Col4: 10/31/2015 }
    { Col1:2, Col2: 20, Col3: "ps0f1jg0.121", Col4: 11/1/2015 }
    { Col1:3, Col2: 30, Col3: "skoc2gx1.2xs", Col4: 11/2/2015 }
    { Col1:4, Col2: 40, Col3: "urs3jnbb.ob1", Col4: 11/3/2015 }
    { Col1:5, Col2: 50, Col3: "m4l2fese.4yz", Col4: 11/4/2015 }
    { Col1:6, Col2: 60, Col3: "v3dselpn.rqq", Col4: 11/5/2015 }
    { Col1:7, Col2: 70, Col3: "v2ggbaar.r31", Col4: 11/6/2015 }
    { Col1:8, Col2: 80, Col3: "da4vd35p.msl", Col4: 11/7/2015 }
    { Col1:9, Col2: 90, Col3: "v5dtpuad.2ao", Col4: 11/8/2015 }
    
    0 讨论(0)
  • 2020-12-01 08:45

    Inspired by the above I took it a slightly different route.

    1. I created an attribute and mapped each property to a column.
    2. I use the DTO type to define what I expect each column to be
    3. Allow columns to not be requried
    4. Use EPPlus to convert the types

    By doing so it allows me to use traditional model validation, and embrace changes to column headers

    -- Usage:

    using(FileStream fileStream = new FileStream(_fileName, FileMode.Open)){
          ExcelPackage excel = new ExcelPackage(fileStream);
          var workSheet = excel.Workbook.Worksheets[RESOURCES_WORKSHEET];
    
          IEnumerable<ExcelResourceDto> newcollection = workSheet.ConvertSheetToObjects<ExcelResourceDto>();
          newcollection.ToList().ForEach(x => Console.WriteLine(x.Title));
     }
    

    Dto that maps to excel

    public class ExcelResourceDto
    {
        [Column(1)]
        [Required]
        public string Title { get; set; }
    
        [Column(2)]
        [Required]
        public string SearchTags { get; set; }
    }
    

    This is the attribute definition

    [AttributeUsage(AttributeTargets.All)]
    public class Column : System.Attribute
    {
        public int ColumnIndex { get; set; }
    
    
        public Column(int column) 
        {
            ColumnIndex = column;
        }
    } 
    

    Extension class to handle mapping rows to DTO

    public static class EPPLusExtensions
    {
       public static IEnumerable<T> ConvertSheetToObjects<T>(this ExcelWorksheet worksheet) where T : new()
        {
    
            Func<CustomAttributeData, bool> columnOnly = y => y.AttributeType == typeof(Column);
    
            var columns = typeof(T)
                    .GetProperties()
                    .Where(x => x.CustomAttributes.Any(columnOnly))
            .Select(p => new
            {
                Property = p,
                Column = p.GetCustomAttributes<Column>().First().ColumnIndex //safe because if where above
            }).ToList();
    
    
            var rows= worksheet.Cells
                .Select(cell => cell.Start.Row)
                .Distinct()
                .OrderBy(x=>x);
    
    
            //Create the collection container
            var collection = rows.Skip(1)
                .Select(row =>
                {
                    var tnew = new T();
                    columns.ForEach(col =>
                    {
                        //This is the real wrinkle to using reflection - Excel stores all numbers as double including int
                        var val = worksheet.Cells[row, col.Column];
                        //If it is numeric it is a double since that is how excel stores all numbers
                        if (val.Value == null)
                        {
                            col.Property.SetValue(tnew, null);
                            return;
                        }
                        if (col.Property.PropertyType == typeof(Int32))
                        {
                            col.Property.SetValue(tnew, val.GetValue<int>());
                            return;
                        }
                        if (col.Property.PropertyType == typeof(double))
                        {
                            col.Property.SetValue(tnew, val.GetValue<double>());
                            return;
                        }
                        if (col.Property.PropertyType == typeof(DateTime))
                        {
                            col.Property.SetValue(tnew, val.GetValue<DateTime>());
                            return;
                        }
                        //Its a string
                        col.Property.SetValue(tnew, val.GetValue<string>());
                    });
    
                    return tnew;
                });
    
    
            //Send it back
            return collection;
        }
    }
    
    0 讨论(0)
提交回复
热议问题