Can someone suggest a way to create batches of a certain size in linq?
Ideally I want to be able to perform operations in chunks of some configurable amount.
I wrote a custom IEnumerable implementation that works without linq and guarantees a single enumeration over the data. It also accomplishes all this without requiring backing lists or arrays that cause memory explosions over large data sets.
Here are some basic tests:
[Fact]
public void ShouldPartition()
{
var ints = new List {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
var data = ints.PartitionByMaxGroupSize(3);
data.Count().Should().Be(4);
data.Skip(0).First().Count().Should().Be(3);
data.Skip(0).First().ToList()[0].Should().Be(0);
data.Skip(0).First().ToList()[1].Should().Be(1);
data.Skip(0).First().ToList()[2].Should().Be(2);
data.Skip(1).First().Count().Should().Be(3);
data.Skip(1).First().ToList()[0].Should().Be(3);
data.Skip(1).First().ToList()[1].Should().Be(4);
data.Skip(1).First().ToList()[2].Should().Be(5);
data.Skip(2).First().Count().Should().Be(3);
data.Skip(2).First().ToList()[0].Should().Be(6);
data.Skip(2).First().ToList()[1].Should().Be(7);
data.Skip(2).First().ToList()[2].Should().Be(8);
data.Skip(3).First().Count().Should().Be(1);
data.Skip(3).First().ToList()[0].Should().Be(9);
}
The Extension Method to partition the data.
///
/// A set of extension methods for .
///
public static class EnumerableExtender
{
///
/// Splits an enumerable into chucks, by a maximum group size.
///
/// The source to split
/// The maximum number of items per group.
/// The type of item to split
/// A list of lists of the original items.
public static IEnumerable> PartitionByMaxGroupSize(this IEnumerable source, int maxSize)
{
return new SplittingEnumerable(source, maxSize);
}
}
This is the implementing class
using System.Collections;
using System.Collections.Generic;
internal class SplittingEnumerable : IEnumerable>
{
private readonly IEnumerable backing;
private readonly int maxSize;
private bool hasCurrent;
private T lastItem;
public SplittingEnumerable(IEnumerable backing, int maxSize)
{
this.backing = backing;
this.maxSize = maxSize;
}
public IEnumerator> GetEnumerator()
{
return new Enumerator(this, this.backing.GetEnumerator());
}
IEnumerator IEnumerable.GetEnumerator()
{
return this.GetEnumerator();
}
private class Enumerator : IEnumerator>
{
private readonly SplittingEnumerable parent;
private readonly IEnumerator backingEnumerator;
private NextEnumerable current;
public Enumerator(SplittingEnumerable parent, IEnumerator backingEnumerator)
{
this.parent = parent;
this.backingEnumerator = backingEnumerator;
this.parent.hasCurrent = this.backingEnumerator.MoveNext();
if (this.parent.hasCurrent)
{
this.parent.lastItem = this.backingEnumerator.Current;
}
}
public bool MoveNext()
{
if (this.current == null)
{
this.current = new NextEnumerable(this.parent, this.backingEnumerator);
return true;
}
else
{
if (!this.current.IsComplete)
{
using (var enumerator = this.current.GetEnumerator())
{
while (enumerator.MoveNext())
{
}
}
}
}
if (!this.parent.hasCurrent)
{
return false;
}
this.current = new NextEnumerable(this.parent, this.backingEnumerator);
return true;
}
public void Reset()
{
throw new System.NotImplementedException();
}
public IEnumerable Current
{
get { return this.current; }
}
object IEnumerator.Current
{
get { return this.Current; }
}
public void Dispose()
{
}
}
private class NextEnumerable : IEnumerable
{
private readonly SplittingEnumerable splitter;
private readonly IEnumerator backingEnumerator;
private int currentSize;
public NextEnumerable(SplittingEnumerable splitter, IEnumerator backingEnumerator)
{
this.splitter = splitter;
this.backingEnumerator = backingEnumerator;
}
public bool IsComplete { get; private set; }
public IEnumerator GetEnumerator()
{
return new NextEnumerator(this.splitter, this, this.backingEnumerator);
}
IEnumerator IEnumerable.GetEnumerator()
{
return this.GetEnumerator();
}
private class NextEnumerator : IEnumerator
{
private readonly SplittingEnumerable splitter;
private readonly NextEnumerable parent;
private readonly IEnumerator enumerator;
private T currentItem;
public NextEnumerator(SplittingEnumerable splitter, NextEnumerable parent, IEnumerator enumerator)
{
this.splitter = splitter;
this.parent = parent;
this.enumerator = enumerator;
}
public bool MoveNext()
{
this.parent.currentSize += 1;
this.currentItem = this.splitter.lastItem;
var hasCcurent = this.splitter.hasCurrent;
this.parent.IsComplete = this.parent.currentSize > this.splitter.maxSize;
if (this.parent.IsComplete)
{
return false;
}
if (hasCcurent)
{
var result = this.enumerator.MoveNext();
this.splitter.lastItem = this.enumerator.Current;
this.splitter.hasCurrent = result;
}
return hasCcurent;
}
public void Reset()
{
throw new System.NotImplementedException();
}
public T Current
{
get { return this.currentItem; }
}
object IEnumerator.Current
{
get { return this.Current; }
}
public void Dispose()
{
}
}
}
}