EPPlus save two million rows with 200+ columns datatable to multiple excel files

隐身守侯 提交于 2019-12-19 04:09:29

问题


I have function that saves all records from SQL table to excel worksheet using EPPlus. If I export small amount of data everything works fine, but with 200+ columns and 500 000+ rows I get OutOfMemory exception.

I'd like to modify my code in a way to be able to save 50 000 records per file.

Here is my code that works for small data:

private Task SaveAsync(string tableName)
{

    return Task.Run(() =>
    {
        try
        {
            using (var conn = new SqlConnection(_connectionString))
            {
                using (var cmd = new SqlCommand(string.Format(DataQuery, tableName), conn))
                {
                    cmd.CommandType = CommandType.Text;
                    cmd.CommandTimeout = 360;
                    conn.Open();
                    using (SqlDataReader sdr = cmd.ExecuteReader())
                    {
                        var fileName = string.Format(TargetFile, tableName);
                        if (File.Exists(fileName))
                        {
                            File.Delete(fileName);
                        }

                        sdr.Read();
                        var numberOfRecordsInTable = sdr.GetInt32(0);

                        sdr.NextResult();

                        using (ExcelPackage pck = new ExcelPackage(new FileInfo(fileName)))
                        {
                            ExcelWorksheet ws = pck.Workbook.Worksheets.Add("Results");

                            int count = sdr.FieldCount;
                            int col = 1, row = 1;

                            for (int i = 0; i < count; i++)
                            {
                                ws.SetValue(row, col++, sdr.GetName(i));
                            }
                            row++;
                            col = 1;
                            while (sdr.Read())
                            {
                                for (int i = 0; i < count; i++)
                                {
                                    var val = sdr.GetValue(i);
                                    ws.SetValue(row, col++, val);
                                }
                                row++;
                                col = 1;
                            }
                            //autosize
                            ws.Cells[ws.Dimension.Address].AutoFitColumns();
                            //autofiltr
                            ws.Cells[1, 1, 1, count].AutoFilter = true;
                        }
                    }
                    conn.Close();
                }
            }
        }
        catch (Exception e)
        {
            Debug.WriteLine("Error at: " + Thread.CurrentThread.ManagedThreadId);
            Debug.WriteLine(e);
        }
    });
}

and my modified code that splits records 50 000 per file:

private Task SaveAsync2(string tableName)
{
    return Task.Run(() =>
    {
        try
        {
            using (var conn = new SqlConnection(_connectionString))
            {
                using (var cmd = new SqlCommand(string.Format(DataQuery, tableName), conn))
                {
                    cmd.CommandType = CommandType.Text;
                    cmd.CommandTimeout = 360;
                    conn.Open();
                    using (SqlDataReader sdr = cmd.ExecuteReader())
                    {

                        var fileName = string.Format(TargetFile, tableName,"");
                        if (File.Exists(fileName))
                        {
                            File.Delete(fileName);
                        }

                        sdr.Read();
                        var max = sdr.GetInt32(0);
                        int filesCount = 1;
                        if (max > 50000)
                        {
                            fileName = string.Format(TargetFile, tableName, filesCount);
                        }

                        sdr.NextResult();

                        ExcelPackage pck = new ExcelPackage(new FileInfo(fileName));
                        ExcelWorksheet ws = pck.Workbook.Worksheets.Add("RESULTS");

                        int count = sdr.FieldCount;

                        int col = 1, row = 1;

                        for (int i = 0; i < count; i++)
                        {
                            ws.SetValue(row, col++, sdr.GetName(i));
                        }
                        row++;
                        col = 1;
                        while (sdr.Read())
                        {
                            for (int i = 0; i < count; i++)
                            {
                                var val = sdr.GetValue(i);
                                ws.SetValue(row, col++, val);
                            }
                            row++;
                            col = 1;

                            if (row > 50000)
                            {
                                pck.Save();
                                filesCount++;
                                fileName = string.Format(TargetFile, tableName, filesCount);

                                pck = new ExcelPackage(new FileInfo(fileName));
                                ws = pck.Workbook.Worksheets.Add("RESULTS");

                                count = sdr.FieldCount;

                                col = 1;
                                row = 1;

                                for (int i = 0; i < count; i++)
                                {
                                    ws.SetValue(row, col++, sdr.GetName(i));
                                }
                                row++;
                                col = 1;
                            }
                        }

                        //autosize
                        ws.Cells[ws.Dimension.Address].AutoFitColumns();
                        //autofiltr
                        ws.Cells[1, 1, 1, count].AutoFilter = true;

                        pck.Save();
                    }
                }
                conn.Close();

            }
        }
        catch (Exception e)
        {
            Debug.WriteLine("Error at: " + Thread.CurrentThread.ManagedThreadId);
            Debug.WriteLine(e);
        }
    });
}

basically this works fine, but in first version of my code I was using everything inside using statement, when in second version I'm calling same code twice.

  1. How can I fix my code to remove duplicate code and put everything inside using.
  2. Can I add next set (50 000 records) as new worksheet instead of creating new file?
  3. What would be EPPlus limit when saving data to file? rows x columns? I found information that EPPlus should handle more than million rows, but not so much columns as I have. I thinks that I can export million rows with single column, but for 200+ columns for me 50 000 rows is limit. I'm wondering if there is number (rows x columns) that will be limit to which my export will work fine. I want that export function to be universal, so when I pass datatable with 50 columns it will export for example 100 000 rows per file and for 2 columns it will export half million per file.

回答1:


Unfortunately, there is no easy way to merge that much data with Epplus in a single file. Basically, the entire file is loaded into memory when open - its either all or nothing. In theory, you could generate the XML files that XLSX contains (they are zip files renamed) and manually insert it since it would have a smaller memory footprint but that is no small feat.

For you current code, you could always just call .dispose() manually if you want to avoid the using statement. But I understand you wanting to avoid duplicate code. What about something like this (but watch for memory usage when copying all the object data):

const int max = 10;
var loop = 0;

using (var sdr = cmd.ExecuteReader())
{
    var fieldcount = sdr.FieldCount;

    var getfi = new Func<int, FileInfo>(i =>
    {
        var fi = new FileInfo(String.Format(@"c:\temp\Multi_Files{0}.xlsx", i));
        if (fi.Exists) fi.Delete();
        return fi;
    });

    var savefile = new Action<FileInfo, List<Object[]>>((info, rows) =>
    {
        using (var pck = new ExcelPackage(info))
        {
            var wb = pck.Workbook;
            var ws = wb.Worksheets.Add("RESULTS");
            for (var row = 0; row < rows.Count; row++)
                for (var col = 0; col < fieldcount; col++)
                    ws.SetValue(row + 1, col + 1, rows[row][col]);
            pck.Save();
        }
    });

    var rowlist = new List<Object[]>();

    while (sdr.Read())
    {
        var rowdata = new Object[sdr.FieldCount];
        sdr.GetValues(rowdata);
        rowlist.Add(rowdata);

        if (rowlist.Count == max)
        {
            savefile(getfi(++loop), rowlist);
            rowlist.Clear();
        }
    }
    if (rowlist.Count > 0)
        savefile(getfi(++loop), rowlist);
}



回答2:


I've run up against memory limits with EPPlus in the past, and ended up generating multiple .xlsx files as a workaround (similar to your approach). Another alternative would be to change your compiler settings to target 64-bit only (if you can get by without supporting 32-bit platforms). As I recall, EPPlus is compiled for "Any CPU", so if you can change your code to target "x64" that would likely relax the memory limitations and allow you to generate a single .xlsx file. Targeting x64 likely would have worked in my case, but I didn't think of it until after the fact, so I never had an opportunity to test.

UPDATE: I just ran a quick test using EPPlus 3.1.3 creating 500,000 rows, 70 columns each. My 32-bit application was able to generate about 119,000 rows before generating an out-of-memory exception. After switching the target to x64, it successfully generated all 500,000 rows, although it took forever. Creating the actual worksheet took just a few minutes, but ExcelPackage.SaveAs() took close to 20 minutes. RAM consumption was quite high as well (roughly 11GB of RAM). The resulting .xlsx is 220MB, which 32-bit Excel is not able to open (out of memory). The bottom line: Targeting x64 is probably not a viable solution; you'd be better off splitting the output into multiple .xlsx files.

I was tempted to delete this answer since it has turned out to be a dead end, but decided to leave it in case it helps someone else avoid this path in the future.




回答3:


Since you are creating a new excel file (correct me if I'm wrong), you can simply write an XML file with some specific contents. Excel supports .xml files if they contain the correct stuff.

You can simply create the contents of the XML file in-memory, and afterwards write this contents to an .XML file. You do not need the EPPlus package, therefore you are bypassing the limitations of the EPPlus package.

Of course, you have to figure out manually what you need to write in the .XML file. Of you are going to use formatting and formulas, it might be a complex.

See here:

  • https://www.google.nl/search?q=excel%2520xml%2520format&oq=excel%2520xml%2520format
  • https://technet.microsoft.com/en-gb/magazine/2006.01.blogtales.aspx
  • http://blogs.msdn.com/b/brian_jones/archive/2005/06/27/433152.aspx



回答4:


simple solution without any tricks (not tested, but the intentions should be clear)

using (var conn = new SqlConnection(_connectionString))
{
    int filesCount = 1;
    int col = 1, row = 1;
    string fileName = String.Empty;
    int count;
    ExcelPackage pck;
    ExcelWorksheet ws;

    using (var cmd = new SqlCommand(string.Format(DataQuery, tableName), conn))
    {
         cmd.CommandType = CommandType.Text;
         cmd.CommandTimeout = 360;
         conn.Open();
         using (SqlDataReader sdr = cmd.ExecuteReader())
         {
              while (sdr.Read())
              {
                   if (row == 1)
                   {
                       fileName = string.Format(TargetFile, tableName, filesCount);
                       if (File.Exists(fileName))
                       {
                            File.Delete(fileName);
                       }
                       pck = new ExcelPackage(new FileInfo(fileName));
                       ws = pck.Workbook.Worksheets.Add("RESULTS");
                   }

                   count = sdr.FieldCount; 
                   for (int i = 0; i < count; i++)
                   {
                       var val = sdr.GetValue(i);
                       ws.SetValue(row, col++, val);
                   }
                   row++;
                   col = 1;

                   if (row >= 50000)
                   {
                        ws.Cells[ws.Dimension.Address].AutoFitColumns();
                        ws.Cells[1, 1, 1, count].AutoFilter = true;
                        pck.Save();
                        row = 1;
                        filesCount+
                   }
               }
          }
          if (row > 1)
          {
               ws.Cells[ws.Dimension.Address].AutoFitColumns();
               ws.Cells[1, 1, 1, count].AutoFilter = true;
               pck.Save();
          }
     }
}
conn.Close();


来源:https://stackoverflow.com/questions/33264040/epplus-save-two-million-rows-with-200-columns-datatable-to-multiple-excel-files

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!