Fork me on GitHub

并行中的分区Partitioner

  本篇介绍在C#中,把一个大集合,或大数组分成若干个区来执行。Demo中是把一组字符串放在list中,然后并行生成MD5串,返回回来。

using System;
using System.Collections.Generic;
using System.Reflection;
using System.Threading.Tasks;
using System.Linq;
using System.Collections.Concurrent;

public class Program
{
    public static async Task Main(string[] args)
    {
        await PartitionerDemoAsync();
    }
    public static string ToMD5Hash(string str)
    {
        if (string.IsNullOrEmpty(str))
        {
            return null;
        }
        var bytes = Encoding.ASCII.GetBytes(str);
        if (bytes == null || bytes.Length == 0)
        {
            return null;
        }
        using (var md5 = MD5.Create())
        {
            return string.Join("", md5.ComputeHash(bytes).Select(x => x.ToString("X2")));
        }
    }
    static async Task<List<string>> PartitionA(IEnumerator<string> partition)
    {
        using (partition)
        {
            var list = new List<string>();
            while (partition.MoveNext())
            {
                list.Add(ToMD5Hash(partition.Current));
            }
            Console.WriteLine($"======={list.Count}========");
            return await Task.FromResult(list);
        }
    }
    static async Task PartitionerDemoAsync()
    {
        while (true)
        {
            Console.ReadLine();
            var source = new List<string>();
            for (var i = 0; i < 80000; i++)
            {
                source.Add($"{i}{DateTime.Now.ToString("yyyyMMddHHmmssfffffff")}");
            }

            var list = Partitioner
                      .Create(source)
                      .GetPartitions(12)
                      .AsParallel()
                      .Select(PartitionA);

            var count = 0;
            foreach (var item in list)
            {
                count++;
                foreach (var t in await item)
                {
                    Console.WriteLine($"---{count}---{t}-----");
                }
            }
        }
    }
}

我电脑是配置是8核。

 

 

 

  这是把区分成8个后,80000个元素,三次分配的结果。元素越多,相对分配置比较均匀。

 

 

   当把分区设置成12时,会发现三次分区中,总是有四个分不到的,说明.GetPartitions(12)会查看本机的cpu核心数,把分区的上限限制在核心数上,可以少于等于这个值,可以理解,多了没有cpu去运算,也没有意义了。

 

 

 最后,用Benchmark跑一下用分区的和不分区的状态下生成80000个md5结果:

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;

namespace Demo01
{
    public class ParallelDemo2 : IDemo
    {
        public void Run()
        {
            BenchmarkRunner.Run<TestParallelDemo2>();
        }
    }

    public class TestParallelDemo2
    {

        [Benchmark]
        public void DemoAsync()
        {
            var list = new List<string>();
            for (var i = 0; i < 80000; i++)
            {
                list.Add(ToMD5Hash($"{i}{DateTime.Now.ToString("yyyyMMddHHmmssfffffff")}"));
            }
            foreach (var item in list)
            {
               // Console.WriteLine($"-----{item}-----");
            }
        }

        [Benchmark]
        public async Task PartitionerDemoAsync()
        {
            var source = new List<string>();
            for (var i = 0; i < 80000; i++)
            {
                source.Add($"{i}{DateTime.Now.ToString("yyyyMMddHHmmssfffffff")}");
            }
            var list = Partitioner
                      .Create(source)
                      .GetPartitions(12)
                      .AsParallel()
                      .Select(PartitionA);

            foreach (var item in list)
            {
                foreach (var t in await item)
                {
                   // Console.WriteLine($"-----{t}-----");
                }
            }

        }
        string ToMD5Hash(string str)
        {
            if (string.IsNullOrEmpty(str))
            {
                return null;
            }
            var bytes = Encoding.ASCII.GetBytes(str);
            if (bytes == null || bytes.Length == 0)
            {
                return null;
            }
            using (var md5 = MD5.Create())
            {
                return string.Join("", md5.ComputeHash(bytes).Select(x => x.ToString("X2")));
            }
        }
        async Task<List<string>> PartitionA(IEnumerator<string> partition)
        {
            using (partition)
            {
                var list = new List<string>();
                while (partition.MoveNext())
                {
                    list.Add(ToMD5Hash(partition.Current));
                }
                //Console.WriteLine($"======={list.Count}========");
                return await Task.FromResult(list);
            }
        }
    }
}

分区明显要优于普通方式,数据越多,优势越明显。

 

 

 

  想要更快更方便的了解相关知识,可以关注微信公众号 
 

 

 

posted @ 2022-02-11 15:55  桂素伟  阅读(370)  评论(0编辑  收藏  举报