让Linq的Dictinct更给力
Linq的Distinct含义就是去除重复项,那么如何定义重复项呢?微软给出了默认的方案:给出一个IEqualityComparer<T>的实例。
如果你不传IEqualityComparer<T>的实例,那么默认是比较引用。
这个方案很不方便,详见:Linq的Distinct太不给力了
鹤冲天 给出了他的方案,c# 扩展方法奇思妙用基础篇八:Distinct 扩展
受到启发,我们可以从0开始构造自己的扩展。其实说白了就是传一个委托,委托可以获取属性,根据那个属性(比如ID)来进行比较。
第一版:
public static class DistinctExtensions
{
/// <summary>
/// source.Distinct(o=>o.ID)
/// </summary>
public static IEnumerable<T> Distinct<T, TProperty>(this IEnumerable<T> source, Func<T, TProperty> keySelector)
{
return source.Distinct(new DynamicComparer<T, TProperty>(keySelector));
}
}
public class DynamicComparer<T, TResult> : IEqualityComparer<T>
{
private readonly Func<T, TResult> _selector;
public DynamicComparer(Func<T, TResult> selector)
{
this._selector = selector;
}
public bool Equals(T x, T y)
{
return EqualityComparer<TResult>.Default.Equals(_selector(x), _selector(y));
}
public int GetHashCode(T obj)
{
return EqualityComparer<T>.Default.GetHashCode(obj);
}
}
第二版:
public static class DistinctExtensions
{
public static IEnumerable<T> Distinct<T, TProperty>(this IEnumerable<T> source, Func<T, TProperty> keySelector, IEqualityComparer<TProperty> comparer = null)
{
return source.Distinct(new CommonEqualityComparer<T, TProperty>(keySelector, comparer));
}
}
public class CommonEqualityComparer<T, TProperty> : IEqualityComparer<T>
{
private Func<T, TProperty> _keySelector;
private IEqualityComparer<TProperty> _comparer;
public CommonEqualityComparer(Func<T, TProperty> keySelector, IEqualityComparer<TProperty> comparer)
{
this._keySelector = keySelector;
this._comparer = comparer ?? EqualityComparer<TProperty>.Default;
}
public CommonEqualityComparer(Func<T, TProperty> keySelector)
: this(keySelector, EqualityComparer<TProperty>.Default)
{ }
public bool Equals(T x, T y)
{
return _comparer.Equals(_keySelector(x), _keySelector(y));
}
public int GetHashCode(T obj)
{
return _comparer.GetHashCode(_keySelector(obj));
}
}
这个是最简单的版本。根据快速创建 IEqualityComparer<T> 和 IComparer<T> 的实例一文,可以封装一下new的逻辑。
第三版:
public static class Equality<T>
{
/// <summary>
/// var equalityComparer1 = Equality<Person>.CreateComparer(p => p.ID);
///var equalityComparer2 = Equality<Person>.CreateComparer(p => p.Name);
///var equalityComparer3 = Equality<Person>.CreateComparer(p => p.Birthday.Year);
///var equalityComparer4 = Equality<Person>.CreateComparer(p => p.Name, StringComparer.CurrentCultureIgnoreCase);
/// </summary>
public static IEqualityComparer<T> CreateComparer<V>(Func<T, V> keySelector)
{
return new CommonEqualityComparer<V>(keySelector);
}
public static IEqualityComparer<T> CreateComparer<V>(Func<T, V> keySelector, IEqualityComparer<V> comparer)
{
return new CommonEqualityComparer<V>(keySelector, comparer);
}
class CommonEqualityComparer<V> : IEqualityComparer<T>
{
private Func<T, V> keySelector;
private IEqualityComparer<V> comparer;
public CommonEqualityComparer(Func<T, V> keySelector, IEqualityComparer<V> comparer)
{
this.keySelector = keySelector;
this.comparer = comparer ?? EqualityComparer<V>.Default;
}
public CommonEqualityComparer(Func<T, V> keySelector)
: this(keySelector, EqualityComparer<V>.Default)
{ }
public bool Equals(T x, T y)
{
return comparer.Equals(keySelector(x), keySelector(y));
}
public int GetHashCode(T obj)
{
return comparer.GetHashCode(keySelector(obj));
}
}
}
但是前面这些都是取某个属性为确定唯一值的方法,但是如果我们是联合主键,Code + Name 一起确定唯一性,那么这些都不适用了。
但是我们通过上面的原理:传递委托,可以用以下方式实现。(注意,下面这个方法是错的!!)
public static class DistinctExtensions
{
/// <summary>
/// 以联合主键(多个属性)来确定唯一性,而不是某个属性
/// source.Distinct((x,y)=>(x.Code + x.Name).Equals((y.Code + y.Name)));
/// </summary>
public static IEnumerable<T> Distinct<T, TProperty>(this IEnumerable<T> source, Func<T, T, bool> compareCallback, Func<T, int> getHashCodeCallback = null)
{
return source.Distinct(new DelegatingEqualityComparer<T>(compareCallback, getHashCodeCallback));
}
}
public sealed class DelegatingEqualityComparer<T> : IEqualityComparer<T>, IEqualityComparer
{
private readonly Func<T, T, bool> _compareCallback;
private readonly Func<T, int> _getHashCodeCallback;
public DelegatingEqualityComparer(Func<T, T, bool> compareCallback)
: this(compareCallback, null)
{
}
public DelegatingEqualityComparer(Func<T, T, bool> compareCallback, Func<T, int> getHashCodeCallback)
{
if (compareCallback == null) throw new ArgumentNullException("compareCallback");
this._compareCallback = compareCallback;
this._getHashCodeCallback = getHashCodeCallback;
}
public static DelegatingEqualityComparer<T> Default<TMember>(Func<T, TMember> memberSelector)
{
return new DelegatingEqualityComparer<T>((x, y) =>
EqualityComparer<TMember>.Default.Equals(memberSelector(x), memberSelector(y)));
}
public bool Equals(T x, T y)
{
return this._compareCallback(x, y);
}
public int GetHashCode(T obj)
{
// 这里有错误,你能看出来么?
if (this._getHashCodeCallback == null) return EqualityComparer<T>.Default.GetHashCode(obj);
return this._getHashCodeCallback(obj);
}
bool IEqualityComparer.Equals(object x, object y)
{
return (x is T) && (y is T) && this.Equals((T)x, (T)y);
}
int IEqualityComparer.GetHashCode(object obj)
{
if (obj is T) return this.GetHashCode((T)obj);
if (obj == null) return 0;
return obj.GetHashCode();
}
}
上面这个方法看起来很美,其实有个bug呢!
你知道是什么bug么?,关键点是在GetHashCode(T obj)的实现上。
呵呵,一开始我也没看出来。其实是泛型不对,T表示是原来的obj,而我们要取的应该是TProperty的obj的hashcode。
所以应该是EqualityComparer<TProperty>.Default。但是这里没有TProperty,于是我们只能在Default<TMember>里面,创建TProperty hashcode的方法。
完整代码如下:
public sealed class DelegatingEqualityComparer<T> : IEqualityComparer<T>, IEqualityComparer
{
private readonly Func<T, T, bool> _compareCallback;
private readonly Func<T, int> _getHashCodeCallback;
public DelegatingEqualityComparer(Func<T, T, bool> compareCallback, Func<T, int> getHashCodeCallback)
{
if (compareCallback == null) throw new ArgumentNullException("compareCallback");
if (getHashCodeCallback == null) throw new ArgumentNullException("getHashCodeCallback");
this._compareCallback = compareCallback;
this._getHashCodeCallback = getHashCodeCallback;
}
public static DelegatingEqualityComparer<T> Default<TMember>(Func<T, TMember> memberSelector)
{
return new DelegatingEqualityComparer<T>(
(x, y) => EqualityComparer<TMember>.Default.Equals(memberSelector(x), memberSelector(y)),
z => EqualityComparer<TMember>.Default.GetHashCode(memberSelector(z)));
}
public bool Equals(T x, T y)
{
return this._compareCallback(x, y);
}
public int GetHashCode(T obj)
{
return this._getHashCodeCallback(obj);
}
bool IEqualityComparer.Equals(object x, object y)
{
return (x is T) && (y is T) && this.Equals((T)x, (T)y);
}
int IEqualityComparer.GetHashCode(object obj)
{
if (obj is T) return this.GetHashCode((T)obj);
if (obj == null) return 0;
return obj.GetHashCode();
}
}
关于EqualityComparer<T>.Default,可以参考.Net 相等性:集合类 Contains 方法 深入详解
此文研究的比较深入。
扩展方法给我们带来了乐趣,让我们代码越写越少,越写约爽。C#还是很强悍的,同情可怜的Java。