Is there any way I can separate a List
into several separate lists of SomeObject
, using the item index as the delimiter of each s
Try the following code.
public static IList<IList<T>> Split<T>(IList<T> source)
{
return source
.Select((x, i) => new { Index = i, Value = x })
.GroupBy(x => x.Index / 3)
.Select(x => x.Select(v => v.Value).ToList())
.ToList();
}
The idea is to first group the elements by indexes. Dividing by three has the effect of grouping them into groups of 3. Then convert each group to a list and the IEnumerable
of List
to a List
of List
s
If the list is of type system.collections.generic you can use the "CopyTo" method available to copy elements of your array to other sub arrays. You specify the start element and number of elements to copy.
You could also make 3 clones of your original list and use the "RemoveRange" on each list to shrink the list to the size you want.
Or just create a helper method to do it for you.
Using modular partitioning:
public IEnumerable<IEnumerable<string>> Split(IEnumerable<string> input, int chunkSize)
{
var chunks = (int)Math.Ceiling((double)input.Count() / (double)chunkSize);
return Enumerable.Range(0, chunks).Select(id => input.Where(s => s.GetHashCode() % chunks == id));
}
In general the approach suggested by CaseyB works fine, in fact if you are passing in a List<T>
it is hard to fault it, perhaps I would change it to:
public static IEnumerable<IEnumerable<T>> ChunkTrivialBetter<T>(this IEnumerable<T> source, int chunksize)
{
var pos = 0;
while (source.Skip(pos).Any())
{
yield return source.Skip(pos).Take(chunksize);
pos += chunksize;
}
}
Which will avoid massive call chains. Nonetheless, this approach has a general flaw. It materializes two enumerations per chunk, to highlight the issue try running:
foreach (var item in Enumerable.Range(1, int.MaxValue).Chunk(8).Skip(100000).First())
{
Console.WriteLine(item);
}
// wait forever
To overcome this we can try Cameron's approach, which passes the above test in flying colors as it only walks the enumeration once.
Trouble is that it has a different flaw, it materializes every item in each chunk, the trouble with that approach is that you run high on memory.
To illustrate that try running:
foreach (var item in Enumerable.Range(1, int.MaxValue)
.Select(x => x + new string('x', 100000))
.Clump(10000).Skip(100).First())
{
Console.Write('.');
}
// OutOfMemoryException
Finally, any implementation should be able to handle out of order iteration of chunks, for example:
Enumerable.Range(1,3).Chunk(2).Reverse().ToArray()
// should return [3],[1,2]
Many highly optimal solutions like my first revision of this answer failed there. The same issue can be seen in casperOne's optimized answer.
To address all these issues you can use the following:
namespace ChunkedEnumerator
{
public static class Extensions
{
class ChunkedEnumerable<T> : IEnumerable<T>
{
class ChildEnumerator : IEnumerator<T>
{
ChunkedEnumerable<T> parent;
int position;
bool done = false;
T current;
public ChildEnumerator(ChunkedEnumerable<T> parent)
{
this.parent = parent;
position = -1;
parent.wrapper.AddRef();
}
public T Current
{
get
{
if (position == -1 || done)
{
throw new InvalidOperationException();
}
return current;
}
}
public void Dispose()
{
if (!done)
{
done = true;
parent.wrapper.RemoveRef();
}
}
object System.Collections.IEnumerator.Current
{
get { return Current; }
}
public bool MoveNext()
{
position++;
if (position + 1 > parent.chunkSize)
{
done = true;
}
if (!done)
{
done = !parent.wrapper.Get(position + parent.start, out current);
}
return !done;
}
public void Reset()
{
// per http://msdn.microsoft.com/en-us/library/system.collections.ienumerator.reset.aspx
throw new NotSupportedException();
}
}
EnumeratorWrapper<T> wrapper;
int chunkSize;
int start;
public ChunkedEnumerable(EnumeratorWrapper<T> wrapper, int chunkSize, int start)
{
this.wrapper = wrapper;
this.chunkSize = chunkSize;
this.start = start;
}
public IEnumerator<T> GetEnumerator()
{
return new ChildEnumerator(this);
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
class EnumeratorWrapper<T>
{
public EnumeratorWrapper (IEnumerable<T> source)
{
SourceEumerable = source;
}
IEnumerable<T> SourceEumerable {get; set;}
Enumeration currentEnumeration;
class Enumeration
{
public IEnumerator<T> Source { get; set; }
public int Position { get; set; }
public bool AtEnd { get; set; }
}
public bool Get(int pos, out T item)
{
if (currentEnumeration != null && currentEnumeration.Position > pos)
{
currentEnumeration.Source.Dispose();
currentEnumeration = null;
}
if (currentEnumeration == null)
{
currentEnumeration = new Enumeration { Position = -1, Source = SourceEumerable.GetEnumerator(), AtEnd = false };
}
item = default(T);
if (currentEnumeration.AtEnd)
{
return false;
}
while(currentEnumeration.Position < pos)
{
currentEnumeration.AtEnd = !currentEnumeration.Source.MoveNext();
currentEnumeration.Position++;
if (currentEnumeration.AtEnd)
{
return false;
}
}
item = currentEnumeration.Source.Current;
return true;
}
int refs = 0;
// needed for dispose semantics
public void AddRef()
{
refs++;
}
public void RemoveRef()
{
refs--;
if (refs == 0 && currentEnumeration != null)
{
var copy = currentEnumeration;
currentEnumeration = null;
copy.Source.Dispose();
}
}
}
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source, int chunksize)
{
if (chunksize < 1) throw new InvalidOperationException();
var wrapper = new EnumeratorWrapper<T>(source);
int currentPos = 0;
T ignore;
try
{
wrapper.AddRef();
while (wrapper.Get(currentPos, out ignore))
{
yield return new ChunkedEnumerable<T>(wrapper, chunksize, currentPos);
currentPos += chunksize;
}
}
finally
{
wrapper.RemoveRef();
}
}
}
class Program
{
static void Main(string[] args)
{
int i = 10;
foreach (var group in Enumerable.Range(1, int.MaxValue).Skip(10000000).Chunk(3))
{
foreach (var n in group)
{
Console.Write(n);
Console.Write(" ");
}
Console.WriteLine();
if (i-- == 0) break;
}
var stuffs = Enumerable.Range(1, 10).Chunk(2).ToArray();
foreach (var idx in new [] {3,2,1})
{
Console.Write("idx " + idx + " ");
foreach (var n in stuffs[idx])
{
Console.Write(n);
Console.Write(" ");
}
Console.WriteLine();
}
/*
10000001 10000002 10000003
10000004 10000005 10000006
10000007 10000008 10000009
10000010 10000011 10000012
10000013 10000014 10000015
10000016 10000017 10000018
10000019 10000020 10000021
10000022 10000023 10000024
10000025 10000026 10000027
10000028 10000029 10000030
10000031 10000032 10000033
idx 3 7 8
idx 2 5 6
idx 1 3 4
*/
Console.ReadKey();
}
}
}
There is also a round of optimisations you could introduce for out-of-order iteration of chunks, which is out of scope here.
As to which method you should choose? It totally depends on the problem you are trying to solve. If you are not concerned with the first flaw the simple answer is incredibly appealing.
Note as with most methods, this is not safe for multi threading, stuff can get weird if you wish to make it thread safe you would need to amend EnumeratorWrapper
.
This following solution is the most compact I could come up with that is O(n).
public static IEnumerable<T[]> Chunk<T>(IEnumerable<T> source, int chunksize)
{
var list = source as IList<T> ?? source.ToList();
for (int start = 0; start < list.Count; start += chunksize)
{
T[] chunk = new T[Math.Min(chunksize, list.Count - start)];
for (int i = 0; i < chunk.Length; i++)
chunk[i] = list[start + i];
yield return chunk;
}
}
I wrote a Clump extension method several years ago. Works great, and is the fastest implementation here. :P
/// <summary>
/// Clumps items into same size lots.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="source">The source list of items.</param>
/// <param name="size">The maximum size of the clumps to make.</param>
/// <returns>A list of list of items, where each list of items is no bigger than the size given.</returns>
public static IEnumerable<IEnumerable<T>> Clump<T>(this IEnumerable<T> source, int size)
{
if (source == null)
throw new ArgumentNullException("source");
if (size < 1)
throw new ArgumentOutOfRangeException("size", "size must be greater than 0");
return ClumpIterator<T>(source, size);
}
private static IEnumerable<IEnumerable<T>> ClumpIterator<T>(IEnumerable<T> source, int size)
{
Debug.Assert(source != null, "source is null.");
T[] items = new T[size];
int count = 0;
foreach (var item in source)
{
items[count] = item;
count++;
if (count == size)
{
yield return items;
items = new T[size];
count = 0;
}
}
if (count > 0)
{
if (count == size)
yield return items;
else
{
T[] tempItems = new T[count];
Array.Copy(items, tempItems, count);
yield return tempItems;
}
}
}