参考自:https://msdn.microsoft.com/en-us/library/hh873135.aspx
#include <iostream>
#include <iomanip>
#include <amp.h>
using namespace concurrency;
const int ROWS = 8;
const int COLS = 9;
// tileRow and tileColumn specify the tile that each thread is in.
// globalRow and globalColum specify the location of the thread in the array_view.
// localRow and localColumn specify the location of the thread relativie to the tile.
struct Description
{
int value;
int tileRow;
int tileColumn;
int globalRow;
int globalColumn;
int localRow;
int localColumn;
};
// A helper function for formatting the output.
void SetConsoleColor(int color)
{
int colorValue = (color == 4) ? 4 : 2;
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colorValue);
}
// A helper function for farmatting the output.
void SetConsoleSize(int height, int width)
{
COORD coord; coord.X = width; coord.Y = height;
SetConsoleScreenBufferSize(GetStdHandle(STD_OUTPUT_HANDLE), coord);
SMALL_RECT *rect = new SMALL_RECT();
rect->Left = 0;
rect->Top = 0;
rect->Right = width;
rect->Bottom = height;
SetConsoleWindowInfo(GetStdHandle(STD_OUTPUT_HANDLE), true, rect);
}
// This method creates an 8 x 9 matrix of Description structures. In the
// call to parallel_for_each, the structure is updated with tile,global, and local indices.
void TilingDescription()
{
// Create 72(8x9) Description structures.
std::vector<Description> descs;
for (int i = 0; i < ROWS * COLS; i++)
{
Description d = { i, 0, 0, 0, 0, 0, 0 };
descs.push_back(d);
}
// Create an array_view from the Description structures.
extent<2> matrix(ROWS, COLS);
array_view<Description, 2> descriptions(matrix, descs);
// Update each Description with the tile, global. and local indices.
parallel_for_each(descriptions.extent.tile<2, 3>(),
[=](tiled_index<2, 3> t_idx) restrict(amp)
{
descriptions[t_idx].globalRow = t_idx.global[0];
descriptions[t_idx].globalColumn = t_idx.global[1];
descriptions[t_idx].tileRow = t_idx.tile[0];
descriptions[t_idx].tileColumn = t_idx.tile[1];
descriptions[t_idx].localRow = t_idx.local[0];
descriptions[t_idx].localColumn = t_idx.local[1];
});
// Print out the Description structure for each element in the matrix.
// Tiles are displayed in red and green to distinguish them from each other.
SetConsoleSize(100, 150);
for (int row = 0; row < ROWS; row++)
{
for (int column = 0; column < COLS; column++)
{
SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
std::cout << "Value: " << std::setw(2) << descriptions(row, column).value << " ";
}
std::cout << "\n";
for (int column = 0; column < COLS; column++)
{
SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
std::cout << "Tile: " << "(" << descriptions(row, column).tileRow << "," << descriptions(row, column).tileColumn << ") ";
}
std::cout << "\n";
for (int column = 0; column < COLS; column++)
{
SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
std::cout << "Global: " << "(" << descriptions(row, column).globalRow << "," << descriptions(row, column).globalColumn << ")";
}
std::cout << "\n";
for (int column = 0; column < COLS; column++)
{
SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2);
std::cout << "Local: " << "(" << descriptions(row, column).localRow << "," << descriptions(row, column).localColumn << ")";
}
std::cout << "\n";
std::cout << "\n";
}
}
#define SAMPLESIZE 2
#define MATRIXSIZE 8
void SamplingExample()
{
// Create data and array_view for the matrix.
std::vector<float> rawData;
for (int i = 0; i < MATRIXSIZE * MATRIXSIZE; i++)
{
rawData.push_back((float)i);
}
extent<2> dataExtent(MATRIXSIZE, MATRIXSIZE);
array_view<float, 2> matrix(dataExtent, rawData);
// Create the array for the averages.
// There is one element in the output for each tile in the data.
std::vector<float> outputData;
int outputSize = MATRIXSIZE / SAMPLESIZE;
for (int j = 0; j < outputSize * outputSize; j++)
{
outputData.push_back((float)0);
}
extent<2> outputExtent(MATRIXSIZE / SAMPLESIZE, MATRIXSIZE / SAMPLESIZE);
array<float, 2> averages(outputExtent, outputData.begin(), outputData.end());
// Use tiles that are SAMPLESIZE x SAMPLESIZE
// Find the average of the values in each tile.
// The only reference-type variable you can pass into the parallel_fo_each_call
// is a concurrency::array.
parallel_for_each(matrix.extent.tile<SAMPLESIZE, SAMPLESIZE>(),
[=, &averages](tiled_index<SAMPLESIZE, SAMPLESIZE> t_idx) restrict(amp)
{
// Copy the values of the tile into a tile-sized array.
tile_static float tileValues[SAMPLESIZE][SAMPLESIZE];
tileValues[t_idx.local[0]][t_idx.local[1]] = matrix[t_idx];
// Wait for the tile-sized array to load before you calculate the average.
t_idx.barrier.wait();
// IF you remove the if statement, then the calculation executes for every
// thread in the tile, and makes the same assignemnt to averages each time.
if (t_idx.local[0] == 0 && t_idx.local[1] == 0)
{
for (int trow = 0; trow < SAMPLESIZE; trow++)
{
for (int tcol = 0; tcol < SAMPLESIZE; tcol++)
{
averages(t_idx.tile[0], t_idx.tile[1]) += tileValues[trow][tcol];
}
}
averages(t_idx.tile[0], t_idx.tile[1]) /= (float)(SAMPLESIZE * SAMPLESIZE);
}
}
);
// Print out the results.
// You cannot access the values in aveages directly. You must copy them
// vack to a CPU variable.
outputData = averages;
for (int row = 0; row < outputSize; row++)
{
for (int col = 0; col < outputSize; col++)
{
std::cout << outputData[row * outputSize + col] << " ";
}
std::cout << "\n";
}
// Output for SAMPLESSIZE = 2 is:
// 4.5 6.5 8.5 10.5
// 20.5 22.5 24.5 26.5
// 36.5 38.5 40.5 42.5
// 52.5 54.5 56.5 58.5
// Output for SAMPLESIZE = 4 is:
// 13.5 17.5
// 45.5 49.5
}
void main()
{
//TilingDescription();
SamplingExample();
char wait;
std::cin >> wait;
}