关于怎么在10万个手机号码中选择重复号码的问题(目前最高效的算法)

晚上看到有算法分享关于怎么在10万个手机号码中选择重复号码的问题。

刚好晚上有空,也写了一个算法。

 

View Code
Dictionary<intint> dic = new Dictionary<intint>();
            
int count3 = 0;
            
            
foreach (var item in mobileArray)
            {
                var hashCode 
= item.GetHashCode();
                
int outInt = 0;

                
if (dic.TryGetValue(hashCode, out outInt))
                {
                    
if (outInt == 1)
                    {
                        count3
++;
                        dic[hashCode] 
= 2;
                    }
                }
                
else
                    dic[hashCode] 
= 1;

            }

 

有下面几点需要注意:

  1. Dictionary的Key本身是hash,效率很高
  2. 相同的字符串在.net实际上是同一个地址,所以GetHashCode是一样的。

效果:

欢迎各位高手弄出个更快的算法

 

所有代码

View Code
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace 手机号码重复算法
{
    
unsafe class Program
    {
        
static void Main(string[] args)
        {
            
//示例数组,存放手机号
            string[] mobileArray = new string[100000];// { "13900001234", "13900001235", "13900001236", "13900001237", "13900001234" };

            
for (int i = 0; i < 100000; i++)
            {
                mobileArray[i] 
= "1390000"
                    
+ (i.ToString().Length > 4 ? i.ToString().Substring(04) : (i.ToString() + "0000").Substring(04));
            }

            
////linq语句来实现【select mobile from tmpTable group by mobile having count(*)>1】的效果
            var selMobile = from n in mobileArray group n by n into g where g.Count() > 1 select g.Distinct();// select g;



            System.Diagnostics.Stopwatch sw 
= new System.Diagnostics.Stopwatch();
            sw.Reset();
            sw.Start();
            
int count1 = 0;
            
//通过两层循环输出重复的手机号
            foreach (var mobile in selMobile)
            {
                
foreach (string multiMobile in mobile)
                {
                    count1
++;
                    
//Console.WriteLine(multiMobile);
                }
            }

            sw.Stop();

            Console.WriteLine(
"Linq共有重复号" + count1 + "耗时" + sw.ElapsedMilliseconds);

            TenNodeTree tree 
= new TenNodeTree();
            TenNodeTree tree2 
= new TenNodeTree();

            sw.Reset();
            sw.Start();
            
int count2 = 0;
            
//mobileArray = new string[] { "13900001234", "13900001235", "13900001236", "13900001237", "13900001234", "13900001236" };

            
foreach (var item in mobileArray)
            {
                
fixed (char* no = item)
                {
                    
if (!tree.Add(no, 11))
                    {
                        
if (tree2.Add(no, 11))
                        {
                            count2
++;
                        }
                    }
                }

            }

            sw.Stop();

            Console.WriteLine(
"十叉树共有重复号" + count1 + "耗时" + sw.ElapsedMilliseconds);



            sw.Restart();
            Dictionary
<intint> dic = new Dictionary<intint>();
            
int count3 = 0;
            
            
foreach (var item in mobileArray)
            {
                var hashCode 
= item.GetHashCode();
                
int outInt = 0;

                
if (dic.TryGetValue(hashCode, out outInt))
                {
                    
if (outInt == 1)
                    {
                        count3
++;
                        dic[hashCode] 
= 2;
                    }
                }
                
else
                    dic[hashCode] 
= 1;

            }

           

            sw.Stop();
            Console.WriteLine(
"hash计算共有重复号" + count3 + "耗时" + sw.ElapsedMilliseconds);

            Console.ReadLine();

        }

        
class TenNodeTree
        {
            
public TenNode Root = new TenNode();

            
public bool Add(char* no, int len)
            {
                TenNode cnode 
= Root;
                
bool isadd = false;

                
for (int i = 0; i < len; i++)
                {
                    
char k = *no;

                    
if (cnode.Child[k - 48== null)
                    {
                        isadd 
= true;
                        cnode.Child[k 
- 48= new TenNode();
                    }
                    cnode 
= cnode.Child[k - 48];

                    no
++;

                }

                
return isadd;

            }

        }

        
class TenNode
        {
            
public TenNode[] Child = new TenNode[10];
        }


    }
}

 

 

posted @ 2011-07-21 22:32  星际迷茫  阅读(890)  评论(4)    收藏  举报