Best way to find out if IEnumerable<> has unique values

前端未结

关注

 7  2186

轻奢々 2021-02-19 20:00

I have a lot of code in which I do something like this

bool GetIsUnique(IEnumerable values)
{
    return values.Count() == values.Distinct().Count;
}


      
      
        
          7条回答        

        
                    
            
            
                         
                
              
              
                
                   Happy的楠姐
                                             
                
                
                (楼主)
            
              
              
                2021-02-19 20:50
              

            
            
                        
I'm suprised no one has tested this just yet:

Comparing Gluip's version in the question with JamieC, LukeK, and MrKWatkins, All three answers are better than the asker's version.

Of the three answers, they are all pretty comparable but in most case JamieC's is slightly faster. 

When the data has no duplicates or a duplicate is at the end of the IEnumerable, the size or algorithm had no major differences.

When the data has a duplicate near the middle, or at the beginning, Gluip's version in the original question shows its slowness compared to the other three. 

The time to check a set appears to scale linearly with the size of set, meaning no algorithm is preferable for large or small sets.

Here's a test program that spits out a CSV that you can load into a spreadsheet program and sort and graph if you like:

Test Program:

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace AreUniqueTest
{
class Program
{
    const int Iterations = 1000;

    enum DupeLocation
    {
        None,
        Early,
        Center,
        Late,
    }

    enum SetSize
    {
        Tiny= 10,
        Small = 100,
        Medium = 1000,
        Large = 10000,
        Huge = 100000,
    }

    static void Main()
    {
        Dictionary, bool>> functions = new Dictionary, bool>>
        {
            {"Gluip", GetIsUniqueGluip},
            {"LukeH", GetIsUniqueLukeH },
            {"Jamiec", GetIsUniqueJamiec },
            {"MrKWatkins", GetIsUniqueMrKWatkins }
        };

        var output = new StringBuilder();

        Console.WriteLine("Function,SetSize,DupeLocation,TotalTicks,AverageTicks");
        output.AppendLine("Function,SetSize,DupeLocation,TotalTicks,AverageTicks");

        foreach (SetSize size in Enum.GetValues(typeof(SetSize)))
        {
            var sizevalue = (int) size;
            foreach (DupeLocation location in Enum.GetValues(typeof(DupeLocation)))
            {
                var data = CreateTestData((int)size, location);
                foreach (string functionKey in functions.Keys)
                {
                    var ticks = RunSet(functions[functionKey], data, Iterations);
                    var avg = ticks / Iterations;
                    Console.WriteLine($"{functionKey},{sizevalue},{location},{ticks},{avg}");
                    output.AppendLine($"{functionKey},{sizevalue},{location},{ticks},{avg}");
                }
            }
        }

        File.WriteAllText("output.csv", output.ToString());
        Process.Start("output.csv");
    }

    static long RunSet(Func, bool> getIsUnique, IEnumerable values, int iterations)
    {
        var stopwatch = new Stopwatch();
        stopwatch.Start();
        for (var i = 0; i < iterations; i++)
        {
            getIsUnique.Invoke(values);
        }
        stopwatch.Stop();
        return stopwatch.ElapsedTicks;
    }

    static bool GetIsUniqueLukeH(IEnumerable values)
    {
        var set = new HashSet();

        foreach (T item in values)
        {
            if (!set.Add(item))
                return false;
        }
        return true;
    }

    static bool GetIsUniqueGluip(IEnumerable values)
    {
        return values.Count() == values.Distinct().Count();
    }

    static bool GetIsUniqueJamiec(IEnumerable list)
    {
        var hs = new HashSet();
        return list.All(hs.Add);
    }

    static bool GetIsUniqueMrKWatkins(IEnumerable values)
    {
        HashSet hashSet = new HashSet();
        foreach (var value in values)
        {
            if (hashSet.Contains(value))
            {
                return false;
            }
            hashSet.Add(value);
        }
        return true;
    }

    static int[] CreateTestData(int size, DupeLocation location)
    {
        var result = new int[size];
        Parallel.For(0, size, i => { result[i] = i; });
        return SetDupe(result, location);
    }

    static int[] SetDupe(int[] values, DupeLocation location)
    {
        switch (location)
        {
            case DupeLocation.Early:
                values[1] = values[0];
                break;
            case DupeLocation.Center:
                var midpoint = values.Length / 2;
                values[midpoint] = values[midpoint + 1];
                break;
            case DupeLocation.Late:
                values[values.Length - 1] = values[values.Length - 2];
                break;
            // case DupeLocation.None: // do nothing.
        }
        return values;
    }
}
}

    
             
                                                        
            
            
              
                
                0
              
                   
                
               讨论(0)
              
                                                  
              
              
                          
             
       
          
              
                                       
     查看其它7个回答


            
                         
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
                              			
        
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复