C++amp简要范例

参考自:https://msdn.microsoft.com/en-us/library/hh265136.aspx

#include <amp.h>
#include <amp_math.h>
#include <iostream>

using namespace concurrency;
const int size = 5;

// C++AMP样例
void CppAmpMethod()
{
	int aCPP[] = { 1, 2, 3, 4, 5 };
	int bCPP[] = { 6, 7, 8, 9, 10 };

	int sumCPP[size];

	// Create C++ AMP objects.
	array_view<const int, 1> a(size, aCPP);
	array_view<const int, 1> b(size, bCPP);
	array_view<int, 1> sum(size, sumCPP);
	sum.discard_data();

	parallel_for_each(
		// Define the compute domain, which is the set of threads that are created
		sum.extent,
		// Define the code to run on each thread on the accelerator
		[=](index<1> idx) restrict(amp)
	{
		sum[idx] = a[idx] + b[idx];
	}
	);

	// print the results. The expected output is "7, 9, 11, 13, 15"
	for (int i = 0; i < size; i++)
	{
		std::cout << sum[i] << "\n";
	}
}

// array_view用法范例1
void index1()
{
	int aCPP[] = { 1, 2, 3, 4, 5 };
	array_view<int, 1> a(5, aCPP);
	index<1> idx(2);
	std::cout << a[idx] << "\n";
	// Output: 3
}

// array_view用法范例2
void index2()
{
	int aCPP[] = { 1, 2, 3,
				   4, 5, 6 };
	array_view<int, 2> a(2, 3, aCPP);
	index<2> idx(1, 2);
	std::cout << a[idx] << "\n";
	// Output: 6
}

// array_view用法范例3
void index3()
{
	int aCPP[] = {
		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };

	array_view<int, 3> a(2, 3, 4, aCPP);

	// Specifies the element at 3, 1, 0
	index<3> idx(0, 1, 3);
	std::cout << a[idx] << "\n";
	// Output: 8
}

// extent用法范例1
void extent1()
{
	int aCPP[] = {
		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
	// There are 3 rows and 4 columns, and the depth is two.
	array_view<int, 3> a(2, 3, 4, aCPP);
	std::cout << "The number of colmns is " << a.extent[2] << "\n";
	std::cout << "The number of rows is " << a.extent[1] << "\n";
	std::cout << "The depth is " << a.extent[0] << "\n";

	std::cout << "Length in most significant dimension is " << a.extent[0] << "\n";
}

// extent用法范例2
void extent2()
{
	int aCPP[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\
		17, 18, 19, 20, 21, 22, 23, 24 };
	
	extent<3> e(2, 3, 4);
	array_view<int, 3> a(e, aCPP);
	std::cout << "The num of columns is " << a.extent[2] << "\n";
	std::cout << "The num of rows is " << a.extent[1] << "\n";
	std::cout << "The depth is " << a.extent[0] << "\n";
}

// araay范例
void array1()
{
	std::vector<int> data(5);
	for (int count = 0; count < 5; count++)
	{
		data[count] = count;
	}

	array<int, 1> a(5, data.begin(), data.end());

	parallel_for_each(
		a.extent,
		[=, &a](index<1> idx) restrict(amp)
		{
			a[idx] = a[idx] * 10;
		}
	);

	data = a;
	for (int i = 0; i < 5; i++)
	{
		std::cout << data[i] << "\n";
	}
}

// 和cpu共享内存
void shareMemory1()
{
	accelerator acc = accelerator(accelerator::default_accelerator);

	// Early out if the defult accelerator doesn't support shared memory.
	if (!acc.supports_cpu_shared_memory)
	{
		std::cout << "The defult acclerator does not support shared memory " << std::endl;
		return;
	}

	// Override the default CPU access type.
	//acc.default_cpu_access_type = access_type_read_write;

	// Create an accelerator_view from the default accelerator.
	// The accelerator_view inherits its default_cpu_access_type from acc.
	accelerator_view acc_v = acc.default_view;

	// Create an extent object to size the arrays.
	extent<1> ex(10);

	// Input array that can be written on the CPU.
	array<int, 1> arr_w(ex, acc_v, access_type_write);
	
	// Output array that can be read on the CPU
	array<int, 1> arr_r(ex, acc_v, access_type_read);

	// Read-write array that can be both written to and read from on the CPU.
	array<int, 1> arr_rm(ex, acc_v, access_type_read_write);
}

// parallel_for_each用法范例1
void AddArrays()
{
	int aCPP[] = { 1, 2, 3, 4, 5 };
	int bCPP[] = { 6, 7, 8, 9, 10 };
	int sumCPP[] = { 0, 0, 0, 0, 0 };

	array_view<int, 1> a(5, aCPP);
	array_view<int, 1> b(5, bCPP);
	array_view<int, 1> sum(5, sumCPP);

	parallel_for_each(
		sum.extent,
		[=](index<1> idx) restrict(amp)
		{
			sum[idx] = a[idx] + b[idx];
		}
	);

	for (int i = 0; i < 5; i++)
	{
		std::cout << sum[i] << "\n";
	}
}

void AddElements(index<1> idx, array_view<int, 1> sum, array_view<int, 1> a, array_view<int, 1> b) restrict(amp)
{
	sum[idx] = a[idx] + b[idx];
}

// parallel_for_each用法范例2
void AddArraysWitchFunction()
{
	int aCPP[] = { 1, 2, 3, 4, 5 };
	int bCPP[] = { 6, 7, 8, 9, 10 };
	int sumCPP[] = { 0, 0, 0, 0, 0 };

	array_view<int, 1> a(5, aCPP);
	array_view<int, 1> b(5, bCPP);
	array_view<int, 1> sum(5, sumCPP);

	parallel_for_each(
		sum.extent,
		[=](index<1> idx) restrict(amp)
		{
		AddElements(idx, sum, a, b);
		}
	);

	for (int i = 0; i < 5; i++)
	{
		std::cout << sum[i] << "\n";
	}
}

// 二维分割切块加速
void acceleratingCode()
{
	// Sample data:
	int sampledata[] = {
		2, 2, 9, 7, 1, 4,
		4, 4, 8, 8, 3, 4,
		1, 5, 1, 2, 5, 2,
		6, 8, 3, 2, 7, 2
	};

	// The tiles:
	// 2 2      9 7      1 4
	// 4 4      8 8      3 4
	//
	// 1 5      1 2      5 2
	// 6 8      3 2      7 2

	// Averages:
	int averagedata[] = {
		0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0,
	};

	array_view<int, 2> sample(4, 6, sampledata);
	array_view<int, 2> average(4, 6, averagedata);

	parallel_for_each(
		// Create threads for sample.extent and divide the extent into 2 x 2 tiles
		sample.extent.tile<2, 2>(),
		[=](tiled_index<2, 2> idx) restrict(amp)
		{
			// Create a 2 x 2 array to hold the values in this tile.
			tile_static int nums[2][2];
			// Copy the values for the tile into the 2 x 2 array.
			nums[idx.local[1]][idx.local[0]] = sample[idx.global];
			// When all the threads have executed and the 2 x 2 array is complete, find the average.
			idx.barrier.wait();
			int sum = nums[0][0] + nums[0][1] + nums[1][0] + nums[1][1];
			// Copy the average into the array_view.
			average[idx.global] = sum / 4;
		}
	);

	for (int i = 0; i < 4; i++)
	{
		for (int j = 0; j < 6; j++)
		{
			std::cout << average(i, j) << " ";
		}
		std::cout << "\n";
	}

	// Output
	// 3 3 8 8 3 3
	// 3 3 8 8 3 3
	// 5 5 2 2 4 4
	// 5 5 2 2 4 4
}

// parallel_for_each用法范例3:使用并且的数学库
void MathExample()
{
	double numbers[] = { 1.0, 10.0, 60.0, 100.0, 600.0, 1000.0 };
	array_view<double, 1> logs(6, numbers);
	parallel_for_each(
		logs.extent,
		[=](index<1> idx) restrict(amp)
		{
			logs[idx] = concurrency::fast_math::log10(logs[idx]);
		}
	);

	for (int i = 0; i < 6; i++)
	{
		std::cout << logs[i] << "\n";
	}
} 

int main()
{
	CppAmpMethod();
	//index1();
	//index2();
	//index3();
	
	//extent1();
	//extent2();
	//array1();

	//shareMemory1();
	//AddArrays();
	//AddArraysWitchFunction();
	//acceleratingCode();
	//MathExample();

	return 1;
}

  

posted @ 2015-06-02 08:58  -学以致用-  阅读(746)  评论(0编辑  收藏  举报