并行中的分区Partitioner
本篇介绍在C#中,把一个大集合,或大数组分成若干个区来执行。Demo中是把一组字符串放在list中,然后并行生成MD5串,返回回来。
using System;
using System.Collections.Generic;
using System.Reflection;
using System.Threading.Tasks;
using System.Linq;
using System.Collections.Concurrent;
public class Program
{
public static async Task Main(string[] args)
{
await PartitionerDemoAsync();
}
public static string ToMD5Hash(string str)
{
if (string.IsNullOrEmpty(str))
{
return null;
}
var bytes = Encoding.ASCII.GetBytes(str);
if (bytes == null || bytes.Length == 0)
{
return null;
}
using (var md5 = MD5.Create())
{
return string.Join("", md5.ComputeHash(bytes).Select(x => x.ToString("X2")));
}
}
static async Task<List<string>> PartitionA(IEnumerator<string> partition)
{
using (partition)
{
var list = new List<string>();
while (partition.MoveNext())
{
list.Add(ToMD5Hash(partition.Current));
}
Console.WriteLine($"======={list.Count}========");
return await Task.FromResult(list);
}
}
static async Task PartitionerDemoAsync()
{
while (true)
{
Console.ReadLine();
var source = new List<string>();
for (var i = 0; i < 80000; i++)
{
source.Add($"{i}{DateTime.Now.ToString("yyyyMMddHHmmssfffffff")}");
}
var list = Partitioner
.Create(source)
.GetPartitions(12)
.AsParallel()
.Select(PartitionA);
var count = 0;
foreach (var item in list)
{
count++;
foreach (var t in await item)
{
Console.WriteLine($"---{count}---{t}-----");
}
}
}
}
}
我电脑是配置是8核。
这是把区分成8个后,80000个元素,三次分配的结果。元素越多,相对分配置比较均匀。
当把分区设置成12时,会发现三次分区中,总是有四个分不到的,说明.GetPartitions(12)会查看本机的cpu核心数,把分区的上限限制在核心数上,可以少于等于这个值,可以理解,多了没有cpu去运算,也没有意义了。
最后,用Benchmark跑一下用分区的和不分区的状态下生成80000个md5结果:
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;
namespace Demo01
{
public class ParallelDemo2 : IDemo
{
public void Run()
{
BenchmarkRunner.Run<TestParallelDemo2>();
}
}
public class TestParallelDemo2
{
[Benchmark]
public void DemoAsync()
{
var list = new List<string>();
for (var i = 0; i < 80000; i++)
{
list.Add(ToMD5Hash($"{i}{DateTime.Now.ToString("yyyyMMddHHmmssfffffff")}"));
}
foreach (var item in list)
{
// Console.WriteLine($"-----{item}-----");
}
}
[Benchmark]
public async Task PartitionerDemoAsync()
{
var source = new List<string>();
for (var i = 0; i < 80000; i++)
{
source.Add($"{i}{DateTime.Now.ToString("yyyyMMddHHmmssfffffff")}");
}
var list = Partitioner
.Create(source)
.GetPartitions(12)
.AsParallel()
.Select(PartitionA);
foreach (var item in list)
{
foreach (var t in await item)
{
// Console.WriteLine($"-----{t}-----");
}
}
}
string ToMD5Hash(string str)
{
if (string.IsNullOrEmpty(str))
{
return null;
}
var bytes = Encoding.ASCII.GetBytes(str);
if (bytes == null || bytes.Length == 0)
{
return null;
}
using (var md5 = MD5.Create())
{
return string.Join("", md5.ComputeHash(bytes).Select(x => x.ToString("X2")));
}
}
async Task<List<string>> PartitionA(IEnumerator<string> partition)
{
using (partition)
{
var list = new List<string>();
while (partition.MoveNext())
{
list.Add(ToMD5Hash(partition.Current));
}
//Console.WriteLine($"======={list.Count}========");
return await Task.FromResult(list);
}
}
}
}
分区明显要优于普通方式,数据越多,优势越明显。
想要更快更方便的了解相关知识,可以关注微信公众号