螺旋矩阵 之三
问题:如何高效的构建一个螺旋矩阵?
前面的文章讨论了两种螺旋矩阵。当N比较小时,可以用模拟法(测试代码中的build_1a和build_1b函数),另外可以将4个for循环体合并到一个(build_2a,build_2b和build_2c函数)。但N比较大时,由于不断的对内存跳跃式访问,CPU cache line命中率很低,定位和载入内存的开销相当大。一种解决方法是,直接计算每个位置对应的值(build_3a和build_3b函数);另一种解决方法则是:将每行拆分成三部份,一部分等于上一行同一列数值减1,中间部分是一断连续的递增或递减的数列(其起始和结束值可由公式算得),最后一部分的数等于上一行同一列数值加1(build_4)。为了测试方便,加了一个build_basic函数,先行后列填充1到N2的等差数列。
测试结果有点出乎意外,效率最高的build_basic、build_3a、build_3b和build_4这几个函数所用时间相当接近,其它几个函数的效率彼此间也相差不大。由于程序的性能瓶颈在于对内存访问的效率,二维数组的部局,CPU的缓存大小,内存页的大小等都对测试结果有很大影响,使得测试结果不精确。下面仅列出一个极端情况下的结果:
值得注意的是,用模拟法构建 5120 * 5120 时,所用时间是 构建 5121 * 5121 的3倍多。
各种方法构建N*N矩阵所有时间(ms)
| 5119 | 5120 | 5121 |
build_1a | 387 | 1259 | 331 |
build_1b | 390 | 1259 | 331 |
build_2a | 418 | 1259 | 325 |
build_2b | 375 | 1256 | 312 |
build_2c | 371 | 1187 | 312 |
build_3a | 140 | 137 | 137 |
build_3b | 134 | 134 | 134 |
build_4 | 134 | 134 | 162 |
basic | 134 | 131 | 134 |
测试代码:
代码
//www.cnblogs.com/flyinghearts
#include<iostream>
#include<algorithm>
#include<vector>
#include<ctime>
#include<windows.h>
using std::min;
using std::vector;
using std::cout;
const int N = 5120;
int arr[N][N];
void basic_build(int n)
{
for (int i = 0, s = 1; i < n; ++i)
for (int j = 0; j < n; ++j)
arr[i][j] = s++;
}
void build_1a(int n)
{
const int count = n / 2u;
int s = 0;
for (int i = 0; i < count; ++i) {
const int C = n - 1 - i;
for (int j = i; j < C; ++j) arr[i][j] = ++s;
for (int j = i; j < C; ++j) arr[j][C] = ++s;
for (int j = C; j > i; --j) arr[C][j] = ++s;
for (int j = C; j > i; --j) arr[j][i] = ++s;
}
if (n & 1) arr[count][count] = ++s;
}
void build_1b(int n)
{
const int count = n / 2u;
for (int i = 0; i < count; ++i) {
const int C = n - 1 - i;
const int rr = C - i;
const int s = 4 * i * (n - i) + 1;
for (int j = i, k = s; j < C; ++j) arr[i][j] = k++;
for (int j = i, k = s + rr; j < C; ++j) arr[j][C] = k++;
for (int j = C, k = s + 2 * rr; j > i; --j) arr[C][j] = k++;
for (int j = C, k = s + rr * 3; j > i; --j) arr[j][i] = k++;
}
if (n & 1) arr[count][count] = n * n;
}
void build_2a(int n)
{
const int count = n / 2u;
for (int i = 0, s = 1; i < count; ++i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
for (int j = i, k = C; j < C; ++j, --k) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[C][k] = s + 2 * len;
arr[k][i] = s + 3 * len;
++s;
}
s += 3 * len;
}
if (n & 1) arr[count][count] = n * n;
}
void build_2b(int n)
{
const int count = n / 2u;
for (int i = 0, s = 1; i < count; ++i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
for (int j = i, ss = s + 4 * len - 1; j < C; ++j) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[j + 1][i] = ss;
arr[C][j + 1] = ss - len;
++s;
--ss;
}
s += 3 * len;
}
if (n & 1) arr[count][count] = n * n;
}
void build_2c(int n)
{
const int count = n / 2u;
for (int i = 0, s = 1; i < count; ++i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
arr[i][i] = s;
arr[i][C] = s + len;
arr[C][C] = s + 2 * len;
arr[C][i] = s + 3 * len;
++s;
for (int j = i + 1, ss = s + 4 * len - 2; j < C; ++j) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[j][i] = ss;
arr[C][j] = ss - len;
++s;
--ss;
}
s += 3 * len;
}
if (n & 1) arr[count][count] = n * n;
}
void build_3a(int n)
{
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
if (i <= j) {
int k = min(i, n - 1 - j);
arr[i][j] = 4 * k * (n - k) + 1 + (i + j - k * 2);
} else {
int k = min(j, n - 1 - i) + 1;
arr[i][j] = 4 * k * (n - k) + 1 - (i + j - (k - 1) * 2);
}
}
}
}
void build_3b(int n)
{
for (int i = 0; i < n; ++i) {
for (int j = 0; j < i; ++j) {
int k = min(j, n - 1 - i) + 1;
arr[i][j] = 4 * k * (n - k) + 1 - (i + j - (k - 1) * 2);
}
for (int j = i; j < n; ++j) {
int k = min(i, n - 1 - j);
arr[i][j] = 4 * k * (n - k) + 1 + (i + j - k * 2);
}
}
}
void build_4(int n)
{
for (int j = 0; j < n; ++j) arr[0][j] = j + 1;
const int mid = (n + 1) / 2u;
for (int i = 1; i < mid; ++i) {
int j = 0;
for (; j + 1 < i; ++j) arr[i][j] = arr[i-1][j] - 1;
int s = 4 * i * (n - i);
for (int C = n - i; j < C; ++j) arr[i][j] = s++;
for (; j < n; ++j) arr[i][j] = arr[i-1][j] + 1;
}
for (int i = mid; i < n; ++i) {
int j = 0;
int C = n - 1 - i;
for (; j < C; ++j) arr[i][j] = arr[i-1][j] - 1;
int s = 4 * C * (n - C) + 1 + 3 * (n - 1 - 2 * C);
for (; j <= i; ++j) arr[i][j] = s--;
for (; j < n; ++j) arr[i][j] = arr[i-1][j] + 1;
}
}
void print(int n)
{
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j)
cout.width(3),cout << arr[i][j] << " ";
cout << "\n";
}
cout << "\n";
}
struct Func {
const char *name;
void (*func)(int n);
};
void test(Func pf[], size_t len, int n, int count = 1, int M = 1)
{
if (count < 0) {
for (size_t k = 0; k < len; ++k) {
cout << pf[k].name << " :\n";
pf[k].func(n);
print(n);
}
return;
}
static vector<size_t> a;
a.assign(len, 0);
basic_build(n);
for (int k = 0; k < count; ++k)
for (size_t i = 0; i < len; ++i) {
clock_t ta = clock();
for (int j = 0; j < M; ++j) pf[i].func(n);
ta = clock() - ta;
printf("%d %s %ld\n",n, pf[i].name, ta);
a[i] += ta;
}
int total = M * count;
if (total <= 0) return;
cout << "\nResult: " << n << "\n";
for (size_t k = 0; k < len; ++k)
cout << pf[k].name << " " << a[k] / total << "\n";
cout << "\n";
}
int main()
{
SYSTEM_INFO info;
GetSystemInfo(&info);
if (info.dwNumberOfProcessors >= 2)
SetProcessAffinityMask( GetCurrentProcess(),2);
Func pf[]={
{"build_1a", build_1a},
{"build_1b", build_1b},
{"build_2a", build_2a},
{"build_2b", build_2b},
{"build_2c", build_2c},
{"build_3a", build_3a},
{"build_3b", build_3b},
{"build_4 ", build_4},
{"basic ", basic_build},
};
const size_t sz = sizeof(pf)/sizeof(pf[0]);
//test(pf, sz, 5, -1);
//test(pf, sz, N, 5);
test(pf, sz, N, 1, 5);
}
#include<iostream>
#include<algorithm>
#include<vector>
#include<ctime>
#include<windows.h>
using std::min;
using std::vector;
using std::cout;
const int N = 5120;
int arr[N][N];
void basic_build(int n)
{
for (int i = 0, s = 1; i < n; ++i)
for (int j = 0; j < n; ++j)
arr[i][j] = s++;
}
void build_1a(int n)
{
const int count = n / 2u;
int s = 0;
for (int i = 0; i < count; ++i) {
const int C = n - 1 - i;
for (int j = i; j < C; ++j) arr[i][j] = ++s;
for (int j = i; j < C; ++j) arr[j][C] = ++s;
for (int j = C; j > i; --j) arr[C][j] = ++s;
for (int j = C; j > i; --j) arr[j][i] = ++s;
}
if (n & 1) arr[count][count] = ++s;
}
void build_1b(int n)
{
const int count = n / 2u;
for (int i = 0; i < count; ++i) {
const int C = n - 1 - i;
const int rr = C - i;
const int s = 4 * i * (n - i) + 1;
for (int j = i, k = s; j < C; ++j) arr[i][j] = k++;
for (int j = i, k = s + rr; j < C; ++j) arr[j][C] = k++;
for (int j = C, k = s + 2 * rr; j > i; --j) arr[C][j] = k++;
for (int j = C, k = s + rr * 3; j > i; --j) arr[j][i] = k++;
}
if (n & 1) arr[count][count] = n * n;
}
void build_2a(int n)
{
const int count = n / 2u;
for (int i = 0, s = 1; i < count; ++i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
for (int j = i, k = C; j < C; ++j, --k) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[C][k] = s + 2 * len;
arr[k][i] = s + 3 * len;
++s;
}
s += 3 * len;
}
if (n & 1) arr[count][count] = n * n;
}
void build_2b(int n)
{
const int count = n / 2u;
for (int i = 0, s = 1; i < count; ++i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
for (int j = i, ss = s + 4 * len - 1; j < C; ++j) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[j + 1][i] = ss;
arr[C][j + 1] = ss - len;
++s;
--ss;
}
s += 3 * len;
}
if (n & 1) arr[count][count] = n * n;
}
void build_2c(int n)
{
const int count = n / 2u;
for (int i = 0, s = 1; i < count; ++i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
arr[i][i] = s;
arr[i][C] = s + len;
arr[C][C] = s + 2 * len;
arr[C][i] = s + 3 * len;
++s;
for (int j = i + 1, ss = s + 4 * len - 2; j < C; ++j) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[j][i] = ss;
arr[C][j] = ss - len;
++s;
--ss;
}
s += 3 * len;
}
if (n & 1) arr[count][count] = n * n;
}
void build_3a(int n)
{
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
if (i <= j) {
int k = min(i, n - 1 - j);
arr[i][j] = 4 * k * (n - k) + 1 + (i + j - k * 2);
} else {
int k = min(j, n - 1 - i) + 1;
arr[i][j] = 4 * k * (n - k) + 1 - (i + j - (k - 1) * 2);
}
}
}
}
void build_3b(int n)
{
for (int i = 0; i < n; ++i) {
for (int j = 0; j < i; ++j) {
int k = min(j, n - 1 - i) + 1;
arr[i][j] = 4 * k * (n - k) + 1 - (i + j - (k - 1) * 2);
}
for (int j = i; j < n; ++j) {
int k = min(i, n - 1 - j);
arr[i][j] = 4 * k * (n - k) + 1 + (i + j - k * 2);
}
}
}
void build_4(int n)
{
for (int j = 0; j < n; ++j) arr[0][j] = j + 1;
const int mid = (n + 1) / 2u;
for (int i = 1; i < mid; ++i) {
int j = 0;
for (; j + 1 < i; ++j) arr[i][j] = arr[i-1][j] - 1;
int s = 4 * i * (n - i);
for (int C = n - i; j < C; ++j) arr[i][j] = s++;
for (; j < n; ++j) arr[i][j] = arr[i-1][j] + 1;
}
for (int i = mid; i < n; ++i) {
int j = 0;
int C = n - 1 - i;
for (; j < C; ++j) arr[i][j] = arr[i-1][j] - 1;
int s = 4 * C * (n - C) + 1 + 3 * (n - 1 - 2 * C);
for (; j <= i; ++j) arr[i][j] = s--;
for (; j < n; ++j) arr[i][j] = arr[i-1][j] + 1;
}
}
void print(int n)
{
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j)
cout.width(3),cout << arr[i][j] << " ";
cout << "\n";
}
cout << "\n";
}
struct Func {
const char *name;
void (*func)(int n);
};
void test(Func pf[], size_t len, int n, int count = 1, int M = 1)
{
if (count < 0) {
for (size_t k = 0; k < len; ++k) {
cout << pf[k].name << " :\n";
pf[k].func(n);
print(n);
}
return;
}
static vector<size_t> a;
a.assign(len, 0);
basic_build(n);
for (int k = 0; k < count; ++k)
for (size_t i = 0; i < len; ++i) {
clock_t ta = clock();
for (int j = 0; j < M; ++j) pf[i].func(n);
ta = clock() - ta;
printf("%d %s %ld\n",n, pf[i].name, ta);
a[i] += ta;
}
int total = M * count;
if (total <= 0) return;
cout << "\nResult: " << n << "\n";
for (size_t k = 0; k < len; ++k)
cout << pf[k].name << " " << a[k] / total << "\n";
cout << "\n";
}
int main()
{
SYSTEM_INFO info;
GetSystemInfo(&info);
if (info.dwNumberOfProcessors >= 2)
SetProcessAffinityMask( GetCurrentProcess(),2);
Func pf[]={
{"build_1a", build_1a},
{"build_1b", build_1b},
{"build_2a", build_2a},
{"build_2b", build_2b},
{"build_2c", build_2c},
{"build_3a", build_3a},
{"build_3b", build_3b},
{"build_4 ", build_4},
{"basic ", basic_build},
};
const size_t sz = sizeof(pf)/sizeof(pf[0]);
//test(pf, sz, 5, -1);
//test(pf, sz, N, 5);
test(pf, sz, N, 1, 5);
}
作者: flyinghearts
出处: http://www.cnblogs.com/flyinghearts/
本文采用知识共享署名-非商业性使用-相同方式共享 2.5 中国大陆许可协议进行许可,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。