欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

C++amp矩阵分块

程序员文章站 2022-07-12 14:14:32
...

参考自:https://msdn.microsoft.com/en-us/library/hh873135.aspx

#include <iostream>
#include <iomanip>
#include <amp.h>
using namespace concurrency;

const int ROWS = 8;
const int COLS = 9;

// tileRow and tileColumn specify the tile that each thread is in.
// globalRow and globalColum specify the location of the thread in the array_view.
// localRow and localColumn specify the location of the thread relativie to  the tile.
struct Description
{
	int value;
	int tileRow;
	int tileColumn;
	int globalRow;
	int globalColumn;
	int localRow;
	int localColumn;
};

// A helper function for formatting the output.
void SetConsoleColor(int color)
{
	int colorValue = (color == 4) ? 4 : 2;
	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colorValue);
}

// A helper function for farmatting the output.
void SetConsoleSize(int height, int width)
{
	COORD coord; coord.X = width; coord.Y = height;
	SetConsoleScreenBufferSize(GetStdHandle(STD_OUTPUT_HANDLE), coord);
	SMALL_RECT *rect = new SMALL_RECT();
	rect->Left = 0;
	rect->Top = 0;
	rect->Right = width;
	rect->Bottom = height;
	SetConsoleWindowInfo(GetStdHandle(STD_OUTPUT_HANDLE), true, rect);
}

// This method creates an 8 x 9 matrix of Description structures. In the
// call to parallel_for_each, the structure is updated with tile,global, and local indices.
void TilingDescription()
{
	// Create 72(8x9) Description structures.
	std::vector<Description> descs;
	for (int i = 0; i < ROWS * COLS; i++)
	{
		Description d = { i, 0, 0, 0, 0, 0, 0 };
		descs.push_back(d);
	}

	// Create an array_view from the Description structures.
	extent<2> matrix(ROWS, COLS);
	array_view<Description, 2> descriptions(matrix, descs);

	// Update each Description with the tile, global. and local indices.
	parallel_for_each(descriptions.extent.tile<2, 3>(),
		[=](tiled_index<2, 3> t_idx) restrict(amp)
	{	
		descriptions[t_idx].globalRow = t_idx.global[0];
		descriptions[t_idx].globalColumn = t_idx.global[1];
		descriptions[t_idx].tileRow = t_idx.tile[0];
		descriptions[t_idx].tileColumn = t_idx.tile[1];
		descriptions[t_idx].localRow = t_idx.local[0];
		descriptions[t_idx].localColumn = t_idx.local[1];
	});

	// Print out the Description structure for each element in the matrix.
	// Tiles are displayed in red and green to distinguish them from each other.
	SetConsoleSize(100, 150);
	for (int row = 0; row < ROWS; row++)
	{
		for (int column = 0; column < COLS; column++)
		{
			SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
			std::cout << "Value: " << std::setw(2) << descriptions(row, column).value << "    ";
		}
		std::cout << "\n";

		for (int column = 0; column < COLS; column++)
		{
			SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
			std::cout << "Tile: " << "(" << descriptions(row, column).tileRow << "," << descriptions(row, column).tileColumn << ") ";
		}
		std::cout << "\n";

		for (int column = 0; column < COLS; column++)
		{
			SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
			std::cout << "Global: " << "(" << descriptions(row, column).globalRow << "," << descriptions(row, column).globalColumn << ")";
		}
		std::cout << "\n";

		for (int column = 0; column < COLS; column++)
		{
			SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
			std::cout << "Local: " << "(" << descriptions(row, column).localRow << "," << descriptions(row, column).localColumn << ")";
		}
		std::cout << "\n";
		std::cout << "\n";
	}
}

#define SAMPLESIZE 2
#define MATRIXSIZE 8
void SamplingExample()
{
	// Create data and array_view for the matrix.
	std::vector<float> rawData;
	for (int i = 0; i < MATRIXSIZE * MATRIXSIZE; i++)
	{
		rawData.push_back((float)i);
	}
	extent<2> dataExtent(MATRIXSIZE, MATRIXSIZE);
	array_view<float, 2> matrix(dataExtent, rawData);

	// Create the array for  the averages.
	// There is one element in the output for each tile in the data.
	std::vector<float> outputData;
	int outputSize = MATRIXSIZE / SAMPLESIZE;
	for (int j = 0; j < outputSize * outputSize; j++)
	{
		outputData.push_back((float)0);
	}

	extent<2> outputExtent(MATRIXSIZE / SAMPLESIZE, MATRIXSIZE / SAMPLESIZE);
	array<float, 2> averages(outputExtent, outputData.begin(), outputData.end());

	// Use tiles that are SAMPLESIZE x SAMPLESIZE
	// Find the average of the values in each tile.
	// The only reference-type variable you can pass into the parallel_fo_each_call
	// is a concurrency::array.
	parallel_for_each(matrix.extent.tile<SAMPLESIZE, SAMPLESIZE>(),
		[=, &averages](tiled_index<SAMPLESIZE, SAMPLESIZE> t_idx) restrict(amp)
		{
			// Copy the values of the tile into a tile-sized array.
			tile_static float tileValues[SAMPLESIZE][SAMPLESIZE];
			tileValues[t_idx.local[0]][t_idx.local[1]] = matrix[t_idx];

			// Wait for the tile-sized array to load before you calculate the average.
			t_idx.barrier.wait();

			// IF you remove the if statement, then the  calculation executes for every
			// thread in the tile, and makes the same assignemnt to averages each time.
			if (t_idx.local[0] == 0 && t_idx.local[1] == 0)
			{
				for (int trow = 0; trow < SAMPLESIZE; trow++)
				{
					for (int tcol = 0; tcol < SAMPLESIZE; tcol++)
					{
						averages(t_idx.tile[0], t_idx.tile[1]) += tileValues[trow][tcol];
					}
				}
				averages(t_idx.tile[0], t_idx.tile[1]) /= (float)(SAMPLESIZE * SAMPLESIZE);
			}
		}
	);

	// Print out the results.
	// You cannot access the values in aveages directly. You must copy them
	// vack to a CPU variable.
	outputData = averages;
	for (int row = 0; row < outputSize; row++)
	{
		for (int col = 0; col < outputSize; col++)
		{
			std::cout << outputData[row * outputSize + col] << " ";
		}
		std::cout << "\n";
	}

	// Output for SAMPLESSIZE = 2 is:
	// 4.5  6.5  8.5  10.5
	// 20.5 22.5 24.5 26.5
	// 36.5 38.5 40.5 42.5
	// 52.5 54.5 56.5 58.5

	// Output for SAMPLESIZE = 4 is:
	// 13.5  17.5
	// 45.5  49.5
}

void main()
{
	//TilingDescription();
	SamplingExample();
	
	char wait;
	std::cin >> wait;
}