Which .NET collection is faster: enumerating foreach Dictionary<>.Values or List<>?

时光总嘲笑我的痴心妄想 提交于 2020-01-01 10:16:46

问题


Are one of these enumerations faster than the other or about the same? (example in C#)

Case 1:

Dictionary<string, object> valuesDict;

// valuesDict loaded with thousands of objects

foreach (object value in valuesDict.Values) { /* process */ }

Case 2:

List<object> valuesList;

// valuesList loaded with thousands of objects

foreach (object value in valuesList) { /* process */ }

UPDATE:

Background:

The dictionary would be beneficial for keyed search elsewhere (as opposed to iterating through a list), but the benefit would be diminished if iterating through the dictionary is much slower than going through the list.

UPDATE: Taking the advice of many, I've done my own testing.

First, these are the results. Following is the program.

Iterate whole collection Dict: 78 Keyd: 131 List: 76

Keyed search collection Dict: 178 Keyd: 194 List: 142800

using System;
using System.Linq;

namespace IterateCollections
{
    public class Data
    {
        public string Id;
        public string Text;
    }

    public class KeyedData : System.Collections.ObjectModel.KeyedCollection<string, Data>
    {
        protected override string GetKeyForItem(Data item)
        {
            return item.Id;
        }
    }

    class Program
    {
        static void Main(string[] args)
        {
            var dict = new System.Collections.Generic.Dictionary<string, Data>();
            var list = new System.Collections.Generic.List<Data>();
            var keyd = new KeyedData();

            for (int i = 0; i < 10000; i++)
            {
                string s = i.ToString();
                var d = new Data { Id = s, Text = s };
                dict.Add(d.Id, d);
                list.Add(d);
                keyd.Add(d);
            }

            var sw = new System.Diagnostics.Stopwatch();
            sw.Start();
            for (int r = 0; r < 1000; r++)
            {
                foreach (Data d in dict.Values)
                {
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            var dictTime = sw.ElapsedMilliseconds;

            sw.Reset();
            sw.Start();
            for (int r = 0; r < 1000; r++)
            {
                foreach (Data d in keyd)
                {
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            var keydTime = sw.ElapsedMilliseconds;

            sw.Reset();
            sw.Start();
            for (int r = 0; r < 1000; r++)
            {
                foreach (Data d in list)
                {
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            var listTime = sw.ElapsedMilliseconds;

            Console.WriteLine("Iterate whole collection");
            Console.WriteLine("Dict: " + dictTime);
            Console.WriteLine("Keyd: " + keydTime);
            Console.WriteLine("List: " + listTime);

            sw.Reset();
            sw.Start();
            for (int r = 0; r < 1000; r++)
            {
                for (int i = 0; i < 10000; i += 10)
                {
                    string s = i.ToString();
                    Data d = dict[s];
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            dictTime = sw.ElapsedMilliseconds;

            sw.Reset();
            sw.Start();
            for (int r = 0; r < 1000; r++)
            {
                for (int i = 0; i < 10000; i += 10)
                {
                    string s = i.ToString();
                    Data d = keyd[s];
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            keydTime = sw.ElapsedMilliseconds;

            sw.Reset();
            sw.Start();
            for (int r = 0; r < 10; r++)
            {
                for (int i = 0; i < 10000; i += 10)
                {
                    string s = i.ToString();
                    Data d = list.FirstOrDefault(item => item.Id == s);
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            listTime = sw.ElapsedMilliseconds * 100;

            Console.WriteLine("Keyed search collection");
            Console.WriteLine("Dict: " + dictTime);
            Console.WriteLine("Keyd: " + keydTime);
            Console.WriteLine("List: " + listTime);

        }
    }

}

UPDATE:

Comparison of Dictionary with KeyedCollection as suggested by @Blam.

The fastest method is iterating over an Array of KeyedCollection Items.

Note, however, that iterating over the dictionary values is faster than over the KeyedCollection without converting to an array.

Note that iterating over the dictionary values is much, much faster than over the dictionary collection.

 Iterate 1,000 times over collection of 10,000 items
   Dictionary Pair:   519 ms
 Dictionary Values:    95 ms
  Dict Val ToArray:    92 ms
   KeyedCollection:   141 ms
   KeyedC. ToArray:    17 ms

Timings are from a Windows console application (Release build). Here is the source code:

using System;
using System.Collections.Generic;
using System.Linq;

namespace IterateCollections
{
    public class GUIDkeyCollection : System.Collections.ObjectModel.KeyedCollection<Guid, GUIDkey>
    {
        // This parameterless constructor calls the base class constructor 
        // that specifies a dictionary threshold of 0, so that the internal 
        // dictionary is created as soon as an item is added to the  
        // collection. 
        // 
        public GUIDkeyCollection() : base() { }

        // This is the only method that absolutely must be overridden, 
        // because without it the KeyedCollection cannot extract the 
        // keys from the items.  
        // 
        protected override Guid GetKeyForItem(GUIDkey item)
        {
            // In this example, the key is the part number. 
            return item.Key;
        }

        public GUIDkey[] ToArray()
        {
            return Items.ToArray();
        }

        //[Obsolete("Iterate using .ToArray()", true)]
        //public new IEnumerator GetEnumerator()
        //{
        //    throw new NotImplementedException("Iterate using .ToArray()");
        //}
    }
    public class GUIDkey : Object
    {
        private Guid key;
        public Guid Key
        {
            get
            {
                return key;
            }
        }
        public override bool Equals(Object obj)
        {
            //Check for null and compare run-time types.
            if (obj == null || !(obj is GUIDkey)) return false;
            GUIDkey item = (GUIDkey)obj;
            return (Key == item.Key);
        }
        public override int GetHashCode() { return Key.GetHashCode(); }
        public GUIDkey(Guid guid)
        {
            key = guid;
        }
    }

    class Program
    {
        static void Main(string[] args)
        {
            const int itemCount = 10000;
            const int repetitions = 1000;
            const string resultFormat = "{0,18}: {1,5:D} ms";

            Console.WriteLine("Iterate {0:N0} times over collection of {1:N0} items", repetitions, itemCount);

            var dict = new Dictionary<Guid, GUIDkey>();
            var keyd = new GUIDkeyCollection();

            for (int i = 0; i < itemCount; i++)
            {
                var d = new GUIDkey(Guid.NewGuid());
                dict.Add(d.Key, d);
                keyd.Add(d);
            }

            var sw = new System.Diagnostics.Stopwatch();
            long time;

            sw.Reset();
            sw.Start();
            for (int r = 0; r < repetitions; r++)
            {
                foreach (KeyValuePair<Guid, GUIDkey> w in dict)
                {
                    if (null == w.Value) throw new ApplicationException();
                }
            }
            sw.Stop();
            time = sw.ElapsedMilliseconds;
            Console.WriteLine(resultFormat, "Dictionary Pair", time);

            sw.Reset();
            sw.Start();
            for (int r = 0; r < repetitions; r++)
            {
                foreach (GUIDkey d in dict.Values)
                {
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            time = sw.ElapsedMilliseconds;
            Console.WriteLine(resultFormat, "Dictionary Values", time);

            sw.Reset();
            sw.Start();
            for (int r = 0; r < repetitions; r++)
            {
                foreach (GUIDkey d in dict.Values.ToArray())
                {
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            time = sw.ElapsedMilliseconds;
            Console.WriteLine(resultFormat, "Dict Val ToArray", time);

            sw.Reset();
            sw.Start();
            for (int r = 0; r < repetitions; r++)
            {
                foreach (GUIDkey d in keyd)
                {
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            time = sw.ElapsedMilliseconds;
            Console.WriteLine(resultFormat, "KeyedCollection", time);

            sw.Reset();
            sw.Start();
            for (int r = 0; r < repetitions; r++)
            {
                foreach (GUIDkey d in keyd.ToArray())
                {
                    if (null == d) throw new ApplicationException();
                }
            }
            sw.Stop();
            time = sw.ElapsedMilliseconds;
            Console.WriteLine(resultFormat, "KeyedC. ToArray", time);
        }
    }

}

回答1:


This is relatively easy to check with a stopwatch:

var d = new Dictionary<string, object>();
var s = new List<object>();
for (int i =0 ; i != 10000000 ; i++) {
    d.Add(""+i, i);
    s.Add(i);
}
var sw = new Stopwatch();
sw.Start();
foreach(object o in d.Values) {
    if (o == null) throw new ApplicationException();
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
sw.Reset();
sw.Start();
foreach (object o in s) {
    if (o == null) throw new ApplicationException();
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);

This prints numbers that are reasonably close to each other:

Dict List
---- ----
 136  107
 139  108
 136  108

The List always wins, but the margins are not as large as one would expect, given the relative complexity of the two data structures.




回答2:


It's about the same time. Surely it won't be noticable once your process includes any code.

But why do you listen to random people from the internet? Test it!

The Stopwatch class might be useful.




回答3:


If you want lookup by key then Dictionary.
Dictionary lookup by key is very fast as that is what it is designed to do.

The difference in foreach will be minor.

If the key is also a property then consider KeyedCollection
KeyedCollection Class

Provides the abstract base class for a collection whose keys are embedded in the values.

As Servy commented pick the collection with the features you need
.NET has many collections.
System.Collections Namespaces

And if your objects have a natural key that can be represented as an Int32 then consider OverRide GetHashCode().

If your objects have a natural key of GUID then consider KeyedCollection and OverRide GetHashCode and Equals

And for seach on nonKey properties consider LINQ rather than ForEach with a break;

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections.ObjectModel;
using System.Diagnostics;

namespace IntIntKeyedCollection
{
    class Program
    {
        static void Main(string[] args)
        {

            Guid findGUID = Guid.NewGuid();
            GUIDkeyCollection gUIDkeyCollection = new GUIDkeyCollection();
            gUIDkeyCollection.Add(new GUIDkey(findGUID));
            gUIDkeyCollection.Add(new GUIDkey(Guid.NewGuid()));
            gUIDkeyCollection.Add(new GUIDkey(Guid.NewGuid()));
            GUIDkey findGUIDkey = gUIDkeyCollection[findGUID];  // lookup by key (behaves like a dict)
            Console.WriteLine(findGUIDkey.Key);
            Console.WriteLine(findGUIDkey.GetHashCode());
            Console.WriteLine(findGUID);
            Console.WriteLine(findGUID.GetHashCode());
            Console.ReadLine();
        }
        public class GUIDkeyCollection : KeyedCollection<Guid, GUIDkey>
        {
            // This parameterless constructor calls the base class constructor 
            // that specifies a dictionary threshold of 0, so that the internal 
            // dictionary is created as soon as an item is added to the  
            // collection. 
            // 
            public GUIDkeyCollection() : base() { }

            // This is the only method that absolutely must be overridden, 
            // because without it the KeyedCollection cannot extract the 
            // keys from the items.  
            // 
            protected override Guid GetKeyForItem(GUIDkey item)
            {
                // In this example, the key is the part number. 
                return item.Key;
            }
        }
        public class GUIDkey : Object
        {
            private Guid key;
            public Guid Key
            {
                get
                {
                    return key;
                }
            }
            public override bool Equals(Object obj)
            {
                //Check for null and compare run-time types.
                if (obj == null || !(obj is GUIDkey)) return false;
                GUIDkey item = (GUIDkey)obj;
                return (Key == item.Key);
            }
            public override int GetHashCode() { return Key.GetHashCode(); }
            public GUIDkey(Guid guid)
            {
                key = guid;
            }
        }
    }
}



回答4:


As others have said, premature optimization yadda yadda. Use the right collection for the right scenario and only worry about speed if it becomes a problem.

Anyway, the only way to know is to measure. I made a quick and dirty test which populates a dictionary and a list with 30,000 objects and then iterates them 10,000 times. Results are:

Dictionary: 2683ms
List: 1955ms

So, it would seem that Dictionary.Values is slightly slower to enumerate for whatever reason.

Here is the code:

void Main()
{
    int i = 0;

    var dict = new Dictionary<string, object>();
    var list = new List<object>();

    for (i = 0; i < 30000; i++)
    {
        var foo = new Foo();
        dict.Add(i.ToString(), foo);
        list.Add(foo);
    }

    var dictWatch = new Stopwatch();

    dictWatch.Start();

    for (i = 0; i < 10000; i++)
    {
        foreach (var foo in dict.Values) {}
    }

    dictWatch.Stop();
    Console.WriteLine("Dictionary: " + dictWatch.ElapsedMilliseconds);

    var listWatch = new Stopwatch();
    listWatch.Start();

    for (i = 0; i < 10000; i++)
    {
        foreach (var foo in list) {}
    }

    listWatch.Stop();

    Console.WriteLine("List: " + listWatch.ElapsedMilliseconds);
}

class Foo {}



回答5:


Here is your test:

class speedtest
{
    static void Main(string[] args)
    {
        int size = 10000000;
        Dictionary<string, object> valuesDict = new Dictionary<string, object>();
        List<object> valuesList = new List<object>();

        for (int i = 0; i < size; i++)
        {
            valuesDict.Add(i.ToString(), i);
            valuesList.Add(i);
        }
        // valuesDict loaded with thousands of objects

        Stopwatch s = new Stopwatch();
        s.Start();
        foreach (object value in valuesDict.Values) { /* process */ }
        s.Stop();

        Stopwatch s2 = new Stopwatch();
        s2.Start();
        foreach (object value in valuesList) { /* process */ }
        s.Stop();
        Console.WriteLine("Size {0}, Dictonary {1}ms, List {2}ms",size,s.ElapsedMilliseconds,s2.ElapsedMilliseconds);

        Console.ReadLine();
    }
}

Outputs:
Size 10000000, Dictonary 73ms, List 63ms

However you should also test if you have hashing collisions in your dictionary. They can give you a different outcome.

In cases of a real application, you have to consider the time spend creating, accessing and memory foot print of your structure.



来源:https://stackoverflow.com/questions/15904625/which-net-collection-is-faster-enumerating-foreach-dictionary-values-or-list

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!