高Cache命中率的矩阵乘法
#include <ctime> #include <iostream> using namespace std; int main(int argc, char** argv) { int N = 500; int A[N][N]; int B[N][N]; double C1[N][N]; double C2[N][N]; for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { A[i][j] = rand(); B[i][j] = rand(); } } clock_t t1 = clock(); for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { for (int k = 0; k < N; k++) { C1[i][j] += A[i][k] * B[k][j]; } } } clock_t t2 = clock(); cout << "Conventional method takes " << t2 - t1 << " milliseconds." << endl; t1 = clock(); for (int i = 0; i < N; i++) { for (int k = 0; k < N; k++) { for (int j = 0; j < N; j++) { C2[i][j] += A[i][k] * B[k][j]; } } } t2 = clock(); cout << "New method takes " << t2 - t1 << " milliseconds." << endl; }