LINQ - Full Outer Join

后端 未结 16 1547
既然无缘
既然无缘 2020-11-21 22:45

I have a list of people\'s ID and their first name, and a list of people\'s ID and their surname. Some people don\'t have a first name and some don\'t have a surname; I\'d l

相关标签:
16条回答
  • 2020-11-21 23:07

    I decided to add this as a separate answer as I am not positive it is tested enough. This is a re-implementation of the FullOuterJoin method using essentially a simplified, customized version of LINQKit Invoke/Expand for Expression so that it should work the Entity Framework. There's not much explanation as it is pretty much the same as my previous answer.

    public static class Ext {
        private static Expression<Func<TP, TC, TResult>> CastSMBody<TP, TC, TResult>(LambdaExpression ex, TP unusedP, TC unusedC, TResult unusedRes) => (Expression<Func<TP, TC, TResult>>)ex;
    
        public static IQueryable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
            this IQueryable<TLeft> leftItems,
            IQueryable<TRight> rightItems,
            Expression<Func<TLeft, TKey>> leftKeySelector,
            Expression<Func<TRight, TKey>> rightKeySelector,
            Expression<Func<TLeft, TRight, TResult>> resultSelector) {
    
            // (lrg,r) => resultSelector(lrg.left, r)
            var sampleAnonLR = new { left = default(TLeft), rightg = default(IEnumerable<TRight>) };
            var parmP = Expression.Parameter(sampleAnonLR.GetType(), "lrg");
            var parmC = Expression.Parameter(typeof(TRight), "r");
            var argLeft = Expression.PropertyOrField(parmP, "left");
            var newleftrs = CastSMBody(Expression.Lambda(resultSelector.Apply(argLeft, parmC), parmP, parmC), sampleAnonLR, default(TRight), default(TResult));
    
            return leftItems.GroupJoin(rightItems, leftKeySelector, rightKeySelector, (left, rightg) => new { left, rightg }).SelectMany(r => r.rightg.DefaultIfEmpty(), newleftrs);
        }
    
        public static IQueryable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
            this IQueryable<TLeft> leftItems,
            IQueryable<TRight> rightItems,
            Expression<Func<TLeft, TKey>> leftKeySelector,
            Expression<Func<TRight, TKey>> rightKeySelector,
            Expression<Func<TLeft, TRight, TResult>> resultSelector) {
    
            // (lgr,l) => resultSelector(l, lgr.right)
            var sampleAnonLR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
            var parmP = Expression.Parameter(sampleAnonLR.GetType(), "lgr");
            var parmC = Expression.Parameter(typeof(TLeft), "l");
            var argRight = Expression.PropertyOrField(parmP, "right");
            var newrightrs = CastSMBody(Expression.Lambda(resultSelector.Apply(parmC, argRight), parmP, parmC), sampleAnonLR, default(TLeft), default(TResult));
    
            return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right })
                             .SelectMany(l => l.leftg.DefaultIfEmpty(), newrightrs);
        }
    
        private static Expression<Func<TParm, TResult>> CastSBody<TParm, TResult>(LambdaExpression ex, TParm unusedP, TResult unusedRes) => (Expression<Func<TParm, TResult>>)ex;
    
        public static IQueryable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
            this IQueryable<TLeft> leftItems,
            IQueryable<TRight> rightItems,
            Expression<Func<TLeft, TKey>> leftKeySelector,
            Expression<Func<TRight, TKey>> rightKeySelector,
            Expression<Func<TLeft, TRight, TResult>> resultSelector) where TLeft : class where TRight : class where TResult : class {
    
            // newrightrs = lgr => resultSelector(default(TLeft), lgr.right)
            var sampleAnonLgR = new { leftg = (IEnumerable<TLeft>)null, right = default(TRight) };
            var parmLgR = Expression.Parameter(sampleAnonLgR.GetType(), "lgr");
            var argLeft = Expression.Constant(default(TLeft), typeof(TLeft));
            var argRight = Expression.PropertyOrField(parmLgR, "right");
            var newrightrs = CastSBody(Expression.Lambda(resultSelector.Apply(argLeft, argRight), parmLgR), sampleAnonLgR, default(TResult));
    
            return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).Where(lgr => !lgr.leftg.Any()).Select(newrightrs);
        }
    
        public static IQueryable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
            this IQueryable<TLeft> leftItems,
            IQueryable<TRight> rightItems,
            Expression<Func<TLeft, TKey>> leftKeySelector,
            Expression<Func<TRight, TKey>> rightKeySelector,
            Expression<Func<TLeft, TRight, TResult>> resultSelector)  where TLeft : class where TRight : class where TResult : class {
    
            return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
        }
    
        public static Expression Apply(this LambdaExpression e, params Expression[] args) {
            var b = e.Body;
    
            foreach (var pa in e.Parameters.Cast<ParameterExpression>().Zip(args, (p, a) => (p, a))) {
                b = b.Replace(pa.p, pa.a);
            }
    
            return b.PropagateNull();
        }
    
        public static Expression Replace(this Expression orig, Expression from, Expression to) => new ReplaceVisitor(from, to).Visit(orig);
        public class ReplaceVisitor : System.Linq.Expressions.ExpressionVisitor {
            public readonly Expression from;
            public readonly Expression to;
    
            public ReplaceVisitor(Expression _from, Expression _to) {
                from = _from;
                to = _to;
            }
    
            public override Expression Visit(Expression node) => node == from ? to : base.Visit(node);
        }
    
        public static Expression PropagateNull(this Expression orig) => new NullVisitor().Visit(orig);
        public class NullVisitor : System.Linq.Expressions.ExpressionVisitor {
            public override Expression Visit(Expression node) {
                if (node is MemberExpression nme && nme.Expression is ConstantExpression nce && nce.Value == null)
                    return Expression.Constant(null, nce.Type.GetMember(nme.Member.Name).Single().GetMemberType());
                else
                    return base.Visit(node);
            }
        }
    
        public static Type GetMemberType(this MemberInfo member) {
            switch (member) {
                case FieldInfo mfi:
                    return mfi.FieldType;
                case PropertyInfo mpi:
                    return mpi.PropertyType;
                case EventInfo mei:
                    return mei.EventHandlerType;
                default:
                    throw new ArgumentException("MemberInfo must be if type FieldInfo, PropertyInfo or EventInfo", nameof(member));
            }
        }
    }
    
    0 讨论(0)
  • 2020-11-21 23:07

    Yet another full outer join

    As was not that happy with the simplicity and the readability of the other propositions, I ended up with this :

    It does not have the pretension to be fast ( about 800 ms to join 1000 * 1000 on a 2020m CPU : 2.4ghz / 2cores). To me, it is just a compact and casual full outer join.

    It works the same as a SQL FULL OUTER JOIN (duplicates conservation)

    Cheers ;-)

    using System;
    using System.Collections.Generic;
    using System.Linq;
    namespace NS
    {
    public static class DataReunion
    {
        public static List<Tuple<T1, T2>> FullJoin<T1, T2, TKey>(List<T1> List1, Func<T1, TKey> KeyFunc1, List<T2> List2, Func<T2, TKey> KeyFunc2)
        {
            List<Tuple<T1, T2>> result = new List<Tuple<T1, T2>>();
    
            Tuple<TKey, T1>[] identifiedList1 = List1.Select(_ => Tuple.Create(KeyFunc1(_), _)).OrderBy(_ => _.Item1).ToArray();
            Tuple<TKey, T2>[] identifiedList2 = List2.Select(_ => Tuple.Create(KeyFunc2(_), _)).OrderBy(_ => _.Item1).ToArray();
    
            identifiedList1.Where(_ => !identifiedList2.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
                result.Add(Tuple.Create<T1, T2>(_.Item2, default(T2)));
            });
    
            result.AddRange(
                identifiedList1.Join(identifiedList2, left => left.Item1, right => right.Item1, (left, right) => Tuple.Create<T1, T2>(left.Item2, right.Item2)).ToList()
            );
    
            identifiedList2.Where(_ => !identifiedList1.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
                result.Add(Tuple.Create<T1, T2>(default(T1), _.Item2));
            });
    
            return result;
        }
    }
    }
    

    The idea is to

    1. Build Ids based on provided key function builders
    2. Process left only items
    3. Process inner join
    4. Process right only items

    Here is a succinct test that goes with it :

    Place a break point at the end to manually verify that it behaves as expected

    using System;
    using System.Collections.Generic;
    using Microsoft.VisualStudio.TestTools.UnitTesting;
    using Newtonsoft.Json;
    using Newtonsoft.Json.Linq;
    using NS;
    
    namespace Tests
    {
    [TestClass]
    public class DataReunionTest
    {
        [TestMethod]
        public void Test()
        {
            List<Tuple<Int32, Int32, String>> A = new List<Tuple<Int32, Int32, String>>();
            List<Tuple<Int32, Int32, String>> B = new List<Tuple<Int32, Int32, String>>();
    
            Random rnd = new Random();
    
            /* Comment the testing block you do not want to run
            /* Solution to test a wide range of keys*/
    
            for (int i = 0; i < 500; i += 1)
            {
                A.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "A"));
                B.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "B"));
            }
    
            /* Solution for essential testing*/
    
            A.Add(Tuple.Create(1, 2, "B11"));
            A.Add(Tuple.Create(1, 2, "B12"));
            A.Add(Tuple.Create(1, 3, "C11"));
            A.Add(Tuple.Create(1, 3, "C12"));
            A.Add(Tuple.Create(1, 3, "C13"));
            A.Add(Tuple.Create(1, 4, "D1"));
    
            B.Add(Tuple.Create(1, 1, "A21"));
            B.Add(Tuple.Create(1, 1, "A22"));
            B.Add(Tuple.Create(1, 1, "A23"));
            B.Add(Tuple.Create(1, 2, "B21"));
            B.Add(Tuple.Create(1, 2, "B22"));
            B.Add(Tuple.Create(1, 2, "B23"));
            B.Add(Tuple.Create(1, 3, "C2"));
            B.Add(Tuple.Create(1, 5, "E2"));
    
            Func<Tuple<Int32, Int32, String>, Tuple<Int32, Int32>> key = (_) => Tuple.Create(_.Item1, _.Item2);
    
            var watch = System.Diagnostics.Stopwatch.StartNew();
            var res = DataReunion.FullJoin(A, key, B, key);
            watch.Stop();
            var elapsedMs = watch.ElapsedMilliseconds;
            String aser = JToken.FromObject(res).ToString(Formatting.Indented);
            Console.Write(elapsedMs);
        }
    }
    

    }

    0 讨论(0)
  • 2020-11-21 23:11

    I've written this extensions class for an app perhaps 6 years ago, and have been using it ever since in many solutions without issues. Hope it helps.

    edit: I noticed some might not know how to use an extension class.

    To use this extension class, just reference its namespace in your class by adding the following line using joinext;

    ^ this should allow you to to see the intellisense of extension functions on any IEnumerable object collection you happen to use.

    Hope this helps. Let me know if it's still not clear, and I'll hopefully write a sample example on how to use it.

    Now here is the class:

    namespace joinext
    {    
    public static class JoinExtensions
        {
            public static IEnumerable<TResult> FullOuterJoin<TOuter, TInner, TKey, TResult>(
                this IEnumerable<TOuter> outer,
                IEnumerable<TInner> inner,
                Func<TOuter, TKey> outerKeySelector,
                Func<TInner, TKey> innerKeySelector,
                Func<TOuter, TInner, TResult> resultSelector)
                where TInner : class
                where TOuter : class
            {
                var innerLookup = inner.ToLookup(innerKeySelector);
                var outerLookup = outer.ToLookup(outerKeySelector);
    
                var innerJoinItems = inner
                    .Where(innerItem => !outerLookup.Contains(innerKeySelector(innerItem)))
                    .Select(innerItem => resultSelector(null, innerItem));
    
                return outer
                    .SelectMany(outerItem =>
                    {
                        var innerItems = innerLookup[outerKeySelector(outerItem)];
    
                        return innerItems.Any() ? innerItems : new TInner[] { null };
                    }, resultSelector)
                    .Concat(innerJoinItems);
            }
    
    
            public static IEnumerable<TResult> LeftJoin<TOuter, TInner, TKey, TResult>(
                this IEnumerable<TOuter> outer,
                IEnumerable<TInner> inner,
                Func<TOuter, TKey> outerKeySelector,
                Func<TInner, TKey> innerKeySelector,
                Func<TOuter, TInner, TResult> resultSelector)
            {
                return outer.GroupJoin(
                    inner,
                    outerKeySelector,
                    innerKeySelector,
                    (o, i) =>
                        new { o = o, i = i.DefaultIfEmpty() })
                        .SelectMany(m => m.i.Select(inn =>
                            resultSelector(m.o, inn)
                            ));
    
            }
    
    
    
            public static IEnumerable<TResult> RightJoin<TOuter, TInner, TKey, TResult>(
                this IEnumerable<TOuter> outer,
                IEnumerable<TInner> inner,
                Func<TOuter, TKey> outerKeySelector,
                Func<TInner, TKey> innerKeySelector,
                Func<TOuter, TInner, TResult> resultSelector)
            {
                return inner.GroupJoin(
                    outer,
                    innerKeySelector,
                    outerKeySelector,
                    (i, o) =>
                        new { i = i, o = o.DefaultIfEmpty() })
                        .SelectMany(m => m.o.Select(outt =>
                            resultSelector(outt, m.i)
                            ));
    
            }
    
        }
    }
    
    0 讨论(0)
  • 2020-11-21 23:15

    As you've found, Linq doesn't have an "outer join" construct. The closest you can get is a left outer join using the query you stated. To this, you can add any elements of the lastname list that aren't represented in the join:

    outerJoin = outerJoin.Concat(lastNames.Select(l=>new
                                {
                                    id = l.ID,
                                    firstname = String.Empty,
                                    surname = l.Name
                                }).Where(l=>!outerJoin.Any(o=>o.id == l.id)));
    
    0 讨论(0)
  • 2020-11-21 23:15

    Performs a in-memory streaming enumeration over both inputs and invokes the selector for each row. If there is no correlation at the current iteration, one of the selector arguments will be null.

    Example:

       var result = left.FullOuterJoin(
             right, 
             x=>left.Key, 
             x=>right.Key, 
             (l,r) => new { LeftKey = l?.Key, RightKey=r?.Key });
    
    • Requires an IComparer for the correlation type, uses the Comparer.Default if not provided.

    • Requires that 'OrderBy' is applied to the input enumerables

      /// <summary>
      /// Performs a full outer join on two <see cref="IEnumerable{T}" />.
      /// </summary>
      /// <typeparam name="TLeft"></typeparam>
      /// <typeparam name="TValue"></typeparam>
      /// <typeparam name="TRight"></typeparam>
      /// <typeparam name="TResult"></typeparam>
      /// <param name="left"></param>
      /// <param name="right"></param>
      /// <param name="leftKeySelector"></param>
      /// <param name="rightKeySelector"></param>
      /// <param name="selector">Expression defining result type</param>
      /// <param name="keyComparer">A comparer if there is no default for the type</param>
      /// <returns></returns>
      [System.Diagnostics.DebuggerStepThrough]
      public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TValue, TResult>(
          this IEnumerable<TLeft> left,
          IEnumerable<TRight> right,
          Func<TLeft, TValue> leftKeySelector,
          Func<TRight, TValue> rightKeySelector,
          Func<TLeft, TRight, TResult> selector,
          IComparer<TValue> keyComparer = null)
          where TLeft: class
          where TRight: class
          where TValue : IComparable
      {
      
          keyComparer = keyComparer ?? Comparer<TValue>.Default;
      
          using (var enumLeft = left.OrderBy(leftKeySelector).GetEnumerator())
          using (var enumRight = right.OrderBy(rightKeySelector).GetEnumerator())
          {
      
              var hasLeft = enumLeft.MoveNext();
              var hasRight = enumRight.MoveNext();
              while (hasLeft || hasRight)
              {
      
                  var currentLeft = enumLeft.Current;
                  var valueLeft = hasLeft ? leftKeySelector(currentLeft) : default(TValue);
      
                  var currentRight = enumRight.Current;
                  var valueRight = hasRight ? rightKeySelector(currentRight) : default(TValue);
      
                  int compare =
                      !hasLeft ? 1
                      : !hasRight ? -1
                      : keyComparer.Compare(valueLeft, valueRight);
      
                  switch (compare)
                  {
                      case 0:
                          // The selector matches. An inner join is achieved
                          yield return selector(currentLeft, currentRight);
                          hasLeft = enumLeft.MoveNext();
                          hasRight = enumRight.MoveNext();
                          break;
                      case -1:
                          yield return selector(currentLeft, default(TRight));
                          hasLeft = enumLeft.MoveNext();
                          break;
                      case 1:
                          yield return selector(default(TLeft), currentRight);
                          hasRight = enumRight.MoveNext();
                          break;
                  }
              }
      
          }
      
      }
      
    0 讨论(0)
  • 2020-11-21 23:17

    I like sehe's answer, but it does not use deferred execution (the input sequences are eagerly enumerated by the calls to ToLookup). So after looking at the .NET sources for LINQ-to-objects, I came up with this:

    public static class LinqExtensions
    {
        public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
            this IEnumerable<TLeft> left,
            IEnumerable<TRight> right,
            Func<TLeft, TKey> leftKeySelector,
            Func<TRight, TKey> rightKeySelector,
            Func<TLeft, TRight, TKey, TResult> resultSelector,
            IEqualityComparer<TKey> comparator = null,
            TLeft defaultLeft = default(TLeft),
            TRight defaultRight = default(TRight))
        {
            if (left == null) throw new ArgumentNullException("left");
            if (right == null) throw new ArgumentNullException("right");
            if (leftKeySelector == null) throw new ArgumentNullException("leftKeySelector");
            if (rightKeySelector == null) throw new ArgumentNullException("rightKeySelector");
            if (resultSelector == null) throw new ArgumentNullException("resultSelector");
    
            comparator = comparator ?? EqualityComparer<TKey>.Default;
            return FullOuterJoinIterator(left, right, leftKeySelector, rightKeySelector, resultSelector, comparator, defaultLeft, defaultRight);
        }
    
        internal static IEnumerable<TResult> FullOuterJoinIterator<TLeft, TRight, TKey, TResult>(
            this IEnumerable<TLeft> left,
            IEnumerable<TRight> right,
            Func<TLeft, TKey> leftKeySelector,
            Func<TRight, TKey> rightKeySelector,
            Func<TLeft, TRight, TKey, TResult> resultSelector,
            IEqualityComparer<TKey> comparator,
            TLeft defaultLeft,
            TRight defaultRight)
        {
            var leftLookup = left.ToLookup(leftKeySelector, comparator);
            var rightLookup = right.ToLookup(rightKeySelector, comparator);
            var keys = leftLookup.Select(g => g.Key).Union(rightLookup.Select(g => g.Key), comparator);
    
            foreach (var key in keys)
                foreach (var leftValue in leftLookup[key].DefaultIfEmpty(defaultLeft))
                    foreach (var rightValue in rightLookup[key].DefaultIfEmpty(defaultRight))
                        yield return resultSelector(leftValue, rightValue, key);
        }
    }
    

    This implementation has the following important properties:

    • Deferred execution, input sequences will not be enumerated before the output sequence is enumerated.
    • Only enumerates the input sequences once each.
    • Preserves order of input sequences, in the sense that it will yield tuples in the order of the left sequence and then the right (for the keys not present in left sequence).

    These properties are important, because they are what someone new to FullOuterJoin but experienced with LINQ will expect.

    0 讨论(0)
提交回复
热议问题