以下代码在win7 home basic , ati hd 5450平台测试通过,处理速度为每秒100万次。
程序很简单,只有一个main.cpp程序。Device端只有一个md5.cl文件。
下面我把代码贴出来,因为不能上传附件,我把完整工程包放到了242337476的群共享里面。。。。
main.cpp
#include "CL\cl.h" #include <stdio.h> #include <iostream> #include <Windows.h> void main() { cl_int err_code; printf("\nLet's rock!!!!!\n"); //setup platform cl_platform_id platIDs[128]; cl_uint numPlatform; if( clGetPlatformIDs(128, platIDs, &numPlatform) != CL_SUCCESS ) { printf("error: clGetPlatformIDs\n"); return; } printf("number platforms we found :%d\n", numPlatform); for(int i=0; i<numPlatform; i++) { char buf[500]; size_t s; if(clGetPlatformInfo(platIDs[i],CL_PLATFORM_NAME, 500, buf, &s) != CL_SUCCESS) { printf("error: clGetPlatformInfo\n"); return; } printf("%d:%s\n", i, buf); } //get device info cl_device_id deviceIDs[128]; cl_uint numDevice; if(clGetDeviceIDs(platIDs[0], CL_DEVICE_TYPE_GPU, 128, deviceIDs, &numDevice) != CL_SUCCESS) { printf("error: clGetDeviceIDs\n"); return; } printf("number device we found :%d\n", numDevice); for(int i=0; i<numDevice; i++) { char buf[500]; size_t s; if(clGetDeviceInfo(deviceIDs[i], CL_DEVICE_NAME, 500, buf, &s) != CL_SUCCESS) { printf("error: clGetDeviceInfo\n"); return; } printf("Device %d, %s\n", deviceIDs[i], buf); } //user platform 0////////////////////// //create context cl_context theContext; cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platIDs[0], 0 }; theContext = clCreateContext(cps, 1, deviceIDs, NULL, NULL, &err_code); if(err_code != CL_SUCCESS) { printf("error: clCreateContext\n"); return; } //command queue cl_command_queue commandQ; commandQ = clCreateCommandQueue(theContext, deviceIDs[0], 0, &err_code); if(err_code != CL_SUCCESS) { printf("error: clCreateCommandQueue\n"); return; } //program //read from file to buf FILE *fp; fp = fopen("md5.cl", "rb"); static char source[1024*1024];//1MByte to big to the default stack size_t numRead = fread(source, 1, 1024*1024, fp); printf("read %d bytes from md5.cl\n", numRead); fclose(fp); //create program cl_program Prog; char* srcarr[1]; srcarr[0] = source; size_t srclen[1]; srclen[0] = numRead; Prog = clCreateProgramWithSource(theContext, 1, (const char **)srcarr, srclen, &err_code); if(err_code != CL_SUCCESS) { printf("error: clCreateProgramWithSource\n"); return; } //build program if(clBuildProgram(Prog, 1, deviceIDs, "", NULL, NULL) != CL_SUCCESS) { // Shows the log char* build_log; size_t log_size; // First call to know the proper size clGetProgramBuildInfo(Prog, deviceIDs[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); build_log = new char[log_size+1]; // Second call to get the log clGetProgramBuildInfo(Prog, deviceIDs[0], CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL); build_log[log_size] = '\0'; printf("%s\n", build_log); delete[] build_log; printf("error: clBuildProgram\n"); return; } //Create kernel cl_kernel K; K = clCreateKernel(Prog, "md5_kernel", &err_code); if(err_code != CL_SUCCESS) { printf("error: clCreateKernel\n"); return; } //alloc memory and set kernel param cl_mem res_d = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, 1000*16, NULL, &err_code); if(err_code != CL_SUCCESS) { printf("error: clCreateBuffer\n"); return; } err_code = clSetKernelArg(K, 0, sizeof(cl_mem), &res_d); if(err_code != CL_SUCCESS) { printf("error: clSetKernelArg\n"); return; } const size_t global_ws = 1000; printf("Start\n"); DWORD sTime = GetTickCount(); for(int i=0; i<10000; i++) { if(clEnqueueNDRangeKernel(commandQ, K, 1, NULL, &global_ws, 0, 0, NULL, NULL) != CL_SUCCESS) { printf("error: clEnqueueNDRangeKernel\n"); } clFinish(commandQ); printf("%d\r", i); } DWORD dTime = GetTickCount() - sTime; printf("\nFinished in %f second\n", dTime/1000.0f); //display result char* check = new char[1000*16]; clEnqueueReadBuffer(commandQ, res_d, CL_TRUE, 0, 1000*16, check, 0, NULL, NULL); //*(check + 32) = '\0'; //printf("Result is : %s\n", check); for(int j=0; j<10; j++) { for (int i = 0; i < 16; i++) { printf("%02x",(unsigned char)check[j*16 + i]); } printf("\n"); } printf("\n"); delete[] check; //release clReleaseKernel(K); clReleaseMemObject(res_d); clReleaseCommandQueue(commandQ); clReleaseProgram(Prog); clReleaseContext(theContext); }
md5.cl
// Enter your kernel in this window /* MD5lib.h - md5 library */ /* Copyright (C) 1990-2, RSA Data Security, Inc. Created 1990. All rights reserved. RSA Data Security, Inc. makes no representations concerning either the merchantability of this software or the suitability of this software for any particular purpose. It is provided "as is" without express or implied warranty of any kind. These notices must be retained in any copies of any part of this documentation and/or software. */ /* The following makes MD default to MD5 if it has not already been defined with C compiler flags. */ #define MD 5 /* GLOBAL.H - RSAREF types and constants */ /* PROTOTYPES should be set to one if and only if the compiler supports function argument prototyping. The following makes PROTOTYPES default to 0 if it has not already been defined with C compiler flags. */ #ifndef PROTOTYPES #define PROTOTYPES 0 #endif /* POINTER defines a generic pointer type */ typedef unsigned char *POINTER; /* UINT2 defines a two byte word */ typedef unsigned short UINT2; /* UINT4 defines a four byte word */ typedef unsigned int UINT4; /* PROTO_LIST is defined depending on how PROTOTYPES is defined above. If using PROTOTYPES, then PROTO_LIST returns the list, otherwise it returns an empty list. */ #if PROTOTYPES #define PROTO_LIST(list) list #else #define PROTO_LIST(list) () #endif /* Length of test block, number of test blocks. */ #define TEST_BLOCK_LEN 1000 #define TEST_BLOCK_COUNT 1000 /* Constants for MD5Transform routine. */ #define S11 7 #define S12 12 #define S13 17 #define S14 22 #define S21 5 #define S22 9 #define S23 14 #define S24 20 #define S31 4 #define S32 11 #define S33 16 #define S34 23 #define S41 6 #define S42 10 #define S43 15 #define S44 21 char* MDString PROTO_LIST ((char *)); char* MDFile PROTO_LIST ((char *)); char* hmac_md5(char* text, char* key); typedef struct { UINT4 state[4]; /* state (ABCD) */ UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */ unsigned char buffer[64]; /* input buffer */ } MD5_CTX; /*void MD5Init PROTO_LIST ((MD5_CTX *)); void MD5Update PROTO_LIST ((MD5_CTX *, unsigned char *, unsigned int)); void MD5Final PROTO_LIST ((unsigned char [16], MD5_CT X *)); void MD5Transform PROTO_LIST ((UINT4 [4], unsigned char [64])); void Encode PROTO_LIST ((unsigned char *, UINT4 *, unsigned int)); void Decode PROTO_LIST ((UINT4 *, unsigned char *, unsigned int)); void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned int)); void MD5_memset PROTO_LIST ((POINTER, int, unsigned int)); */ /*__global unsigned char PADDING[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };*/ /* F, G, H and I are basic MD5 functions. */ #define F(x, y, z) (((x) & (y)) | ((~x) & (z))) #define G(x, y, z) (((x) & (z)) | ((y) & (~z))) #define H(x, y, z) ((x) ^ (y) ^ (z)) #define I(x, y, z) ((y) ^ ((x) | (~z))) /* ROTATE_LEFT rotates x left n bits. */ #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) /* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. Rotation is separate from addition to prevent recomputation. */ #define FF(a, b, c, d, x, s, ac) {(a)+=F((b), (c), (d)) + (x) + (UINT4)(ac);(a)= ROTATE_LEFT((a),(s)); (a)+=(b);} #define GG(a, b, c, d, x, s, ac) {(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac);(a) = ROTATE_LEFT ((a), (s)); (a) += (b);} #define HH(a, b, c, d, x, s, ac) {(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac);(a) = ROTATE_LEFT ((a), (s)); (a) += (b);} #define II(a, b, c, d, x, s, ac) {(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac);(a) = ROTATE_LEFT ((a), (s)); (a) += (b);} void MD5Init (MD5_CTX *context); void MD5Update(MD5_CTX *context, unsigned char *input,unsigned int inputLen); void MD5Final (unsigned char digest[16], MD5_CTX *context); void MD5Transform (UINT4 [4], unsigned char [64]) ; void Encode(unsigned char *, UINT4 *, unsigned int); void Decode (UINT4 *, unsigned char *, unsigned int); void MD5_memcpy(POINTER, POINTER, unsigned int); void MD5_memset(POINTER, int, unsigned int); /* MD5 initialization. Begins an MD5 operation, writing a new context. */ void MD5Init (MD5_CTX *context) /* context */ { context->count[0] = context->count[1] = 0; /* Load magic initialization constants. */ context->state[0] = 0x67452301; context->state[1] = 0xefcdab89; context->state[2] = 0x98badcfe; context->state[3] = 0x10325476; } /* MD5 block update operation. Continues an MD5 message-digest operation, processing another message block, and updating the context. */ void MD5Update (MD5_CTX *context, unsigned char *input,unsigned int inputLen ) /* context */ /* input block */ /* length of input block */ { unsigned int i, index, partLen; /* Compute number of bytes mod 64 */ index = (unsigned int)((context->count[0] >> 3) & 0x3F); /* Update number of bits */ if ((context->count[0] += ((UINT4)inputLen << 3)) < ((UINT4)inputLen << 3)) context->count[1]++; context->count[1] += ((UINT4)inputLen >> 29); partLen = 64 - index; /* Transform as many times as possible. */ if (inputLen >= partLen) { MD5_memcpy ((POINTER)&context->buffer[index], (POINTER)input, partLen); MD5Transform (context->state, context->buffer); for (i = partLen; i + 63 < inputLen; i += 64) MD5Transform (context->state, &input[i]); index = 0; } else i = 0; /* Buffer remaining input */ MD5_memcpy ((POINTER)&context->buffer[index], (POINTER)&input[i], inputLen-i); } /* MD5 finalization. Ends an MD5 message-digest operation, writing the the message digest and zeroizing the context. */ void MD5Final (unsigned char digest[16], MD5_CTX *context) /* message digest */ /* context */ { unsigned char bits[8]; unsigned int index, padLen; /* Save number of bits */ Encode (bits, context->count, 8); /* Pad out to 56 mod 64. */ index = (unsigned int)((context->count[0] >> 3) & 0x3f); padLen = (index < 56) ? (56 - index) : (120 - index); unsigned char PADDING[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; MD5Update (context,(unsigned char*) PADDING, padLen); /* Append length (before padding) */ MD5Update (context, bits, 8); /* Store state in digest */ Encode (digest, context->state, 16); /* Zeroize sensitive information. */ MD5_memset ((POINTER)context, 0, sizeof (*context)); } /* MD5 basic transformation. Transforms state based on block. */ static void MD5Transform (UINT4 state[4], unsigned char block[64]) { UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16]; Decode (x, block, 64); /* Round 1 */ FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ /* Round 2 */ GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ /* Round 3 */ HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ /* Round 4 */ II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; /* Zeroize sensitive information. */ MD5_memset ((POINTER)x, 0, sizeof (x)); } /* Encodes input (UINT4) into output (unsigned char). Assumes len is a multiple of 4. */ void Encode (unsigned char *output, UINT4 *input, unsigned int len) { unsigned int i, j; for (i = 0, j = 0; j < len; i++, j += 4) { output[j] = (unsigned char)(input[i] & 0xff); output[j+1] = (unsigned char)((input[i] >> 8) & 0xff); output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); } } /* Decodes input (unsigned char) into output (UINT4). Assumes len is a multiple of 4. */ void Decode (UINT4 *output, unsigned char *input, unsigned int len) { unsigned int i, j; for (i = 0, j = 0; j < len; i++, j += 4) output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) | (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24); } /* Note: Replace "for loop" with standard memcpy if possible. */ void MD5_memcpy (POINTER output, POINTER input, unsigned int len) { unsigned int i; for (i = 0; i < len; i++) output[i] = input[i]; } /* Note: Replace "for loop" with standard memset if possible. */ void MD5_memset (POINTER output, int value, unsigned int len) { unsigned int i; for (i = 0; i < len; i++) ((char *)output)[i] = (char)value; } void myitoa(int i, char* string) { int power, j; j=i; for (power=1;j>=10;j/=10) power*=10; for (;power>0;power/=10) { *string++='0'+i/power; i%=power; } *string='\0'; } __kernel void md5_kernel (__global char* res) { int idx = get_global_id(0); idx = idx%1000; char src[32]; for(int i=0; i<32; i++) { src[i] = 0; } myitoa(idx, src); int len; for(int i=0; i<32; i++) { if(src[i] == 0) { len = i; break; } } unsigned char digest[16]; MD5_CTX context; MD5Init (&context); MD5Update (&context, (unsigned char*)src, len); MD5Final (digest, &context); for(int i=0; i<16; i++) { *(res + idx*16 + i) = digest[i]; } }