I have a list of people\'s ID and their first name, and a list of people\'s ID and their surname. Some people don\'t have a first name and some don\'t have a surname; I\'d l
I decided to add this as a separate answer as I am not positive it is tested enough. This is a re-implementation of the FullOuterJoin
method using essentially a simplified, customized version of LINQKit
Invoke
/Expand
for Expression
so that it should work the Entity Framework. There's not much explanation as it is pretty much the same as my previous answer.
public static class Ext {
private static Expression<Func<TP, TC, TResult>> CastSMBody<TP, TC, TResult>(LambdaExpression ex, TP unusedP, TC unusedC, TResult unusedRes) => (Expression<Func<TP, TC, TResult>>)ex;
public static IQueryable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
// (lrg,r) => resultSelector(lrg.left, r)
var sampleAnonLR = new { left = default(TLeft), rightg = default(IEnumerable<TRight>) };
var parmP = Expression.Parameter(sampleAnonLR.GetType(), "lrg");
var parmC = Expression.Parameter(typeof(TRight), "r");
var argLeft = Expression.PropertyOrField(parmP, "left");
var newleftrs = CastSMBody(Expression.Lambda(resultSelector.Apply(argLeft, parmC), parmP, parmC), sampleAnonLR, default(TRight), default(TResult));
return leftItems.GroupJoin(rightItems, leftKeySelector, rightKeySelector, (left, rightg) => new { left, rightg }).SelectMany(r => r.rightg.DefaultIfEmpty(), newleftrs);
}
public static IQueryable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
// (lgr,l) => resultSelector(l, lgr.right)
var sampleAnonLR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
var parmP = Expression.Parameter(sampleAnonLR.GetType(), "lgr");
var parmC = Expression.Parameter(typeof(TLeft), "l");
var argRight = Expression.PropertyOrField(parmP, "right");
var newrightrs = CastSMBody(Expression.Lambda(resultSelector.Apply(parmC, argRight), parmP, parmC), sampleAnonLR, default(TLeft), default(TResult));
return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right })
.SelectMany(l => l.leftg.DefaultIfEmpty(), newrightrs);
}
private static Expression<Func<TParm, TResult>> CastSBody<TParm, TResult>(LambdaExpression ex, TParm unusedP, TResult unusedRes) => (Expression<Func<TParm, TResult>>)ex;
public static IQueryable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) where TLeft : class where TRight : class where TResult : class {
// newrightrs = lgr => resultSelector(default(TLeft), lgr.right)
var sampleAnonLgR = new { leftg = (IEnumerable<TLeft>)null, right = default(TRight) };
var parmLgR = Expression.Parameter(sampleAnonLgR.GetType(), "lgr");
var argLeft = Expression.Constant(default(TLeft), typeof(TLeft));
var argRight = Expression.PropertyOrField(parmLgR, "right");
var newrightrs = CastSBody(Expression.Lambda(resultSelector.Apply(argLeft, argRight), parmLgR), sampleAnonLgR, default(TResult));
return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).Where(lgr => !lgr.leftg.Any()).Select(newrightrs);
}
public static IQueryable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) where TLeft : class where TRight : class where TResult : class {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
public static Expression Apply(this LambdaExpression e, params Expression[] args) {
var b = e.Body;
foreach (var pa in e.Parameters.Cast<ParameterExpression>().Zip(args, (p, a) => (p, a))) {
b = b.Replace(pa.p, pa.a);
}
return b.PropagateNull();
}
public static Expression Replace(this Expression orig, Expression from, Expression to) => new ReplaceVisitor(from, to).Visit(orig);
public class ReplaceVisitor : System.Linq.Expressions.ExpressionVisitor {
public readonly Expression from;
public readonly Expression to;
public ReplaceVisitor(Expression _from, Expression _to) {
from = _from;
to = _to;
}
public override Expression Visit(Expression node) => node == from ? to : base.Visit(node);
}
public static Expression PropagateNull(this Expression orig) => new NullVisitor().Visit(orig);
public class NullVisitor : System.Linq.Expressions.ExpressionVisitor {
public override Expression Visit(Expression node) {
if (node is MemberExpression nme && nme.Expression is ConstantExpression nce && nce.Value == null)
return Expression.Constant(null, nce.Type.GetMember(nme.Member.Name).Single().GetMemberType());
else
return base.Visit(node);
}
}
public static Type GetMemberType(this MemberInfo member) {
switch (member) {
case FieldInfo mfi:
return mfi.FieldType;
case PropertyInfo mpi:
return mpi.PropertyType;
case EventInfo mei:
return mei.EventHandlerType;
default:
throw new ArgumentException("MemberInfo must be if type FieldInfo, PropertyInfo or EventInfo", nameof(member));
}
}
}
Yet another full outer join
As was not that happy with the simplicity and the readability of the other propositions, I ended up with this :
It does not have the pretension to be fast ( about 800 ms to join 1000 * 1000 on a 2020m CPU : 2.4ghz / 2cores). To me, it is just a compact and casual full outer join.
It works the same as a SQL FULL OUTER JOIN (duplicates conservation)
Cheers ;-)
using System;
using System.Collections.Generic;
using System.Linq;
namespace NS
{
public static class DataReunion
{
public static List<Tuple<T1, T2>> FullJoin<T1, T2, TKey>(List<T1> List1, Func<T1, TKey> KeyFunc1, List<T2> List2, Func<T2, TKey> KeyFunc2)
{
List<Tuple<T1, T2>> result = new List<Tuple<T1, T2>>();
Tuple<TKey, T1>[] identifiedList1 = List1.Select(_ => Tuple.Create(KeyFunc1(_), _)).OrderBy(_ => _.Item1).ToArray();
Tuple<TKey, T2>[] identifiedList2 = List2.Select(_ => Tuple.Create(KeyFunc2(_), _)).OrderBy(_ => _.Item1).ToArray();
identifiedList1.Where(_ => !identifiedList2.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
result.Add(Tuple.Create<T1, T2>(_.Item2, default(T2)));
});
result.AddRange(
identifiedList1.Join(identifiedList2, left => left.Item1, right => right.Item1, (left, right) => Tuple.Create<T1, T2>(left.Item2, right.Item2)).ToList()
);
identifiedList2.Where(_ => !identifiedList1.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
result.Add(Tuple.Create<T1, T2>(default(T1), _.Item2));
});
return result;
}
}
}
The idea is to
Here is a succinct test that goes with it :
Place a break point at the end to manually verify that it behaves as expected
using System;
using System.Collections.Generic;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using NS;
namespace Tests
{
[TestClass]
public class DataReunionTest
{
[TestMethod]
public void Test()
{
List<Tuple<Int32, Int32, String>> A = new List<Tuple<Int32, Int32, String>>();
List<Tuple<Int32, Int32, String>> B = new List<Tuple<Int32, Int32, String>>();
Random rnd = new Random();
/* Comment the testing block you do not want to run
/* Solution to test a wide range of keys*/
for (int i = 0; i < 500; i += 1)
{
A.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "A"));
B.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "B"));
}
/* Solution for essential testing*/
A.Add(Tuple.Create(1, 2, "B11"));
A.Add(Tuple.Create(1, 2, "B12"));
A.Add(Tuple.Create(1, 3, "C11"));
A.Add(Tuple.Create(1, 3, "C12"));
A.Add(Tuple.Create(1, 3, "C13"));
A.Add(Tuple.Create(1, 4, "D1"));
B.Add(Tuple.Create(1, 1, "A21"));
B.Add(Tuple.Create(1, 1, "A22"));
B.Add(Tuple.Create(1, 1, "A23"));
B.Add(Tuple.Create(1, 2, "B21"));
B.Add(Tuple.Create(1, 2, "B22"));
B.Add(Tuple.Create(1, 2, "B23"));
B.Add(Tuple.Create(1, 3, "C2"));
B.Add(Tuple.Create(1, 5, "E2"));
Func<Tuple<Int32, Int32, String>, Tuple<Int32, Int32>> key = (_) => Tuple.Create(_.Item1, _.Item2);
var watch = System.Diagnostics.Stopwatch.StartNew();
var res = DataReunion.FullJoin(A, key, B, key);
watch.Stop();
var elapsedMs = watch.ElapsedMilliseconds;
String aser = JToken.FromObject(res).ToString(Formatting.Indented);
Console.Write(elapsedMs);
}
}
}
I've written this extensions class for an app perhaps 6 years ago, and have been using it ever since in many solutions without issues. Hope it helps.
edit: I noticed some might not know how to use an extension class.
To use this extension class, just reference its namespace in your class by adding the following line using joinext;
^ this should allow you to to see the intellisense of extension functions on any IEnumerable object collection you happen to use.
Hope this helps. Let me know if it's still not clear, and I'll hopefully write a sample example on how to use it.
Now here is the class:
namespace joinext
{
public static class JoinExtensions
{
public static IEnumerable<TResult> FullOuterJoin<TOuter, TInner, TKey, TResult>(
this IEnumerable<TOuter> outer,
IEnumerable<TInner> inner,
Func<TOuter, TKey> outerKeySelector,
Func<TInner, TKey> innerKeySelector,
Func<TOuter, TInner, TResult> resultSelector)
where TInner : class
where TOuter : class
{
var innerLookup = inner.ToLookup(innerKeySelector);
var outerLookup = outer.ToLookup(outerKeySelector);
var innerJoinItems = inner
.Where(innerItem => !outerLookup.Contains(innerKeySelector(innerItem)))
.Select(innerItem => resultSelector(null, innerItem));
return outer
.SelectMany(outerItem =>
{
var innerItems = innerLookup[outerKeySelector(outerItem)];
return innerItems.Any() ? innerItems : new TInner[] { null };
}, resultSelector)
.Concat(innerJoinItems);
}
public static IEnumerable<TResult> LeftJoin<TOuter, TInner, TKey, TResult>(
this IEnumerable<TOuter> outer,
IEnumerable<TInner> inner,
Func<TOuter, TKey> outerKeySelector,
Func<TInner, TKey> innerKeySelector,
Func<TOuter, TInner, TResult> resultSelector)
{
return outer.GroupJoin(
inner,
outerKeySelector,
innerKeySelector,
(o, i) =>
new { o = o, i = i.DefaultIfEmpty() })
.SelectMany(m => m.i.Select(inn =>
resultSelector(m.o, inn)
));
}
public static IEnumerable<TResult> RightJoin<TOuter, TInner, TKey, TResult>(
this IEnumerable<TOuter> outer,
IEnumerable<TInner> inner,
Func<TOuter, TKey> outerKeySelector,
Func<TInner, TKey> innerKeySelector,
Func<TOuter, TInner, TResult> resultSelector)
{
return inner.GroupJoin(
outer,
innerKeySelector,
outerKeySelector,
(i, o) =>
new { i = i, o = o.DefaultIfEmpty() })
.SelectMany(m => m.o.Select(outt =>
resultSelector(outt, m.i)
));
}
}
}
As you've found, Linq doesn't have an "outer join" construct. The closest you can get is a left outer join using the query you stated. To this, you can add any elements of the lastname list that aren't represented in the join:
outerJoin = outerJoin.Concat(lastNames.Select(l=>new
{
id = l.ID,
firstname = String.Empty,
surname = l.Name
}).Where(l=>!outerJoin.Any(o=>o.id == l.id)));
Performs a in-memory streaming enumeration over both inputs and invokes the selector for each row. If there is no correlation at the current iteration, one of the selector arguments will be null.
Example:
var result = left.FullOuterJoin(
right,
x=>left.Key,
x=>right.Key,
(l,r) => new { LeftKey = l?.Key, RightKey=r?.Key });
Requires an IComparer for the correlation type, uses the Comparer.Default if not provided.
Requires that 'OrderBy' is applied to the input enumerables
/// <summary>
/// Performs a full outer join on two <see cref="IEnumerable{T}" />.
/// </summary>
/// <typeparam name="TLeft"></typeparam>
/// <typeparam name="TValue"></typeparam>
/// <typeparam name="TRight"></typeparam>
/// <typeparam name="TResult"></typeparam>
/// <param name="left"></param>
/// <param name="right"></param>
/// <param name="leftKeySelector"></param>
/// <param name="rightKeySelector"></param>
/// <param name="selector">Expression defining result type</param>
/// <param name="keyComparer">A comparer if there is no default for the type</param>
/// <returns></returns>
[System.Diagnostics.DebuggerStepThrough]
public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TValue, TResult>(
this IEnumerable<TLeft> left,
IEnumerable<TRight> right,
Func<TLeft, TValue> leftKeySelector,
Func<TRight, TValue> rightKeySelector,
Func<TLeft, TRight, TResult> selector,
IComparer<TValue> keyComparer = null)
where TLeft: class
where TRight: class
where TValue : IComparable
{
keyComparer = keyComparer ?? Comparer<TValue>.Default;
using (var enumLeft = left.OrderBy(leftKeySelector).GetEnumerator())
using (var enumRight = right.OrderBy(rightKeySelector).GetEnumerator())
{
var hasLeft = enumLeft.MoveNext();
var hasRight = enumRight.MoveNext();
while (hasLeft || hasRight)
{
var currentLeft = enumLeft.Current;
var valueLeft = hasLeft ? leftKeySelector(currentLeft) : default(TValue);
var currentRight = enumRight.Current;
var valueRight = hasRight ? rightKeySelector(currentRight) : default(TValue);
int compare =
!hasLeft ? 1
: !hasRight ? -1
: keyComparer.Compare(valueLeft, valueRight);
switch (compare)
{
case 0:
// The selector matches. An inner join is achieved
yield return selector(currentLeft, currentRight);
hasLeft = enumLeft.MoveNext();
hasRight = enumRight.MoveNext();
break;
case -1:
yield return selector(currentLeft, default(TRight));
hasLeft = enumLeft.MoveNext();
break;
case 1:
yield return selector(default(TLeft), currentRight);
hasRight = enumRight.MoveNext();
break;
}
}
}
}
I like sehe's answer, but it does not use deferred execution (the input sequences are eagerly enumerated by the calls to ToLookup). So after looking at the .NET sources for LINQ-to-objects, I came up with this:
public static class LinqExtensions
{
public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> left,
IEnumerable<TRight> right,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TKey, TResult> resultSelector,
IEqualityComparer<TKey> comparator = null,
TLeft defaultLeft = default(TLeft),
TRight defaultRight = default(TRight))
{
if (left == null) throw new ArgumentNullException("left");
if (right == null) throw new ArgumentNullException("right");
if (leftKeySelector == null) throw new ArgumentNullException("leftKeySelector");
if (rightKeySelector == null) throw new ArgumentNullException("rightKeySelector");
if (resultSelector == null) throw new ArgumentNullException("resultSelector");
comparator = comparator ?? EqualityComparer<TKey>.Default;
return FullOuterJoinIterator(left, right, leftKeySelector, rightKeySelector, resultSelector, comparator, defaultLeft, defaultRight);
}
internal static IEnumerable<TResult> FullOuterJoinIterator<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> left,
IEnumerable<TRight> right,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TKey, TResult> resultSelector,
IEqualityComparer<TKey> comparator,
TLeft defaultLeft,
TRight defaultRight)
{
var leftLookup = left.ToLookup(leftKeySelector, comparator);
var rightLookup = right.ToLookup(rightKeySelector, comparator);
var keys = leftLookup.Select(g => g.Key).Union(rightLookup.Select(g => g.Key), comparator);
foreach (var key in keys)
foreach (var leftValue in leftLookup[key].DefaultIfEmpty(defaultLeft))
foreach (var rightValue in rightLookup[key].DefaultIfEmpty(defaultRight))
yield return resultSelector(leftValue, rightValue, key);
}
}
This implementation has the following important properties:
These properties are important, because they are what someone new to FullOuterJoin but experienced with LINQ will expect.