不做死就不会死

    尽管花儿很漂亮,但只不过是朵漂亮的花——仅此而已。并无任何非凡之处。而你只是个会编程的人。或许你很优秀,但没有什么与众不同。你与我、或者这个星球上其他所有人都是同类。
    你不仅要吃喝拉撒。当然,还要睡。(最好)在很久以后,你会驾鹤西游,你所创造的一切也将随之消失。甚至是象征不朽的金字塔很久以后也会消失。你知道那些建造金字塔的人的名字么?即便你真的知道,难道你知道了就那么重要么?并非如此。金字塔依然耸立在那里,或是随风而逝。平淡无奇。

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

以下代码在win7 home basic , ati hd 5450平台测试通过,处理速度为每秒100万次。

 

程序很简单,只有一个main.cpp程序。Device端只有一个md5.cl文件。

下面我把代码贴出来,因为不能上传附件,我把完整工程包放到了242337476的群共享里面。。。。

 

main.cpp

#include "CL\cl.h"

#include <stdio.h>
#include <iostream>

#include <Windows.h>

void main()
{
    cl_int err_code;

    printf("\nLet's rock!!!!!\n");

    //setup platform
    cl_platform_id platIDs[128];
    cl_uint numPlatform;

    if( clGetPlatformIDs(128, platIDs, &numPlatform) != CL_SUCCESS )
    {
        printf("error: clGetPlatformIDs\n");
        return;
    }

    printf("number platforms we found :%d\n", numPlatform);

    for(int i=0; i<numPlatform; i++)
    {
        char buf[500];
        size_t s;
        if(clGetPlatformInfo(platIDs[i],CL_PLATFORM_NAME, 500, buf, &s) != CL_SUCCESS)
        {
            printf("error: clGetPlatformInfo\n");
            return;
        }

        printf("%d:%s\n", i, buf);
    }

    //get device info
    cl_device_id deviceIDs[128];
    cl_uint numDevice;
    if(clGetDeviceIDs(platIDs[0], CL_DEVICE_TYPE_GPU, 128, deviceIDs, &numDevice) != CL_SUCCESS)
    {
        printf("error: clGetDeviceIDs\n");
        return;
    }

    printf("number device we found :%d\n", numDevice);

    for(int i=0; i<numDevice; i++)
    {
        char buf[500]; 
        size_t s;
        if(clGetDeviceInfo(deviceIDs[i], CL_DEVICE_NAME, 500, buf, &s) != CL_SUCCESS)
        {
            printf("error: clGetDeviceInfo\n");
            return;
        }

        printf("Device %d, %s\n", deviceIDs[i], buf);
    }

    //user platform 0//////////////////////
    //create context

    cl_context theContext;

    cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platIDs[0], 0 };

    theContext = clCreateContext(cps, 1, deviceIDs, NULL, NULL, &err_code);
    if(err_code != CL_SUCCESS)
    {
        printf("error: clCreateContext\n");
        return;
    }


    //command queue
    cl_command_queue commandQ;

    commandQ = clCreateCommandQueue(theContext, deviceIDs[0], 0, &err_code);
    if(err_code != CL_SUCCESS)
    {
        printf("error: clCreateCommandQueue\n");
        return;
    }


    //program
    //read from file to buf
    FILE *fp;
    fp = fopen("md5.cl", "rb");

    static char source[1024*1024];//1MByte to big to the default stack
    size_t numRead = fread(source, 1, 1024*1024, fp);
    printf("read %d bytes from md5.cl\n", numRead);

    fclose(fp);

    //create program 
    cl_program Prog;

    char* srcarr[1];
    srcarr[0] = source;
    size_t srclen[1];
    srclen[0] = numRead;

    Prog = clCreateProgramWithSource(theContext, 1, (const char **)srcarr, srclen, &err_code);
    if(err_code != CL_SUCCESS)
    {
        printf("error: clCreateProgramWithSource\n");
        return;
    }

    //build program
    if(clBuildProgram(Prog, 1, deviceIDs, "", NULL, NULL) != CL_SUCCESS)
    {

        // Shows the log
        char* build_log;
        size_t log_size;
        // First call to know the proper size
        clGetProgramBuildInfo(Prog, deviceIDs[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
        build_log = new char[log_size+1];
        // Second call to get the log
        clGetProgramBuildInfo(Prog, deviceIDs[0], CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
        build_log[log_size] = '\0';
        printf("%s\n", build_log);
        delete[] build_log;


        printf("error: clBuildProgram\n");
        return;
    }

    //Create kernel
    cl_kernel K;
    K = clCreateKernel(Prog, "md5_kernel", &err_code);
    if(err_code != CL_SUCCESS)
    {
        printf("error: clCreateKernel\n");
        return;
    }

    //alloc memory and set kernel param
    cl_mem res_d = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, 1000*16, NULL, &err_code);
    if(err_code != CL_SUCCESS)
    {
        printf("error: clCreateBuffer\n");
        return;
    }

    err_code = clSetKernelArg(K, 0, sizeof(cl_mem), &res_d);
    if(err_code != CL_SUCCESS)
    {
        printf("error: clSetKernelArg\n");
        return;
    }

    const size_t global_ws = 1000;

    printf("Start\n");
    DWORD sTime = GetTickCount();
    for(int i=0; i<10000; i++)
    {

        if(clEnqueueNDRangeKernel(commandQ, K, 1, NULL, &global_ws, 0, 0, NULL, NULL) != CL_SUCCESS)
        {
            printf("error: clEnqueueNDRangeKernel\n");
        }

        clFinish(commandQ);
        printf("%d\r", i);
    }

    DWORD dTime = GetTickCount() - sTime;

    printf("\nFinished in %f second\n", dTime/1000.0f);


    //display result
    char* check = new char[1000*16];
    clEnqueueReadBuffer(commandQ, res_d, CL_TRUE, 0, 1000*16, check, 0, NULL, NULL);

    //*(check + 32) = '\0';
    //printf("Result is : %s\n", check);

    for(int j=0; j<10; j++)
    {
        for (int i = 0; i < 16; i++)
        {
            printf("%02x",(unsigned char)check[j*16 + i]);
        }
        printf("\n");
    }

    printf("\n");


    delete[] check;

    //release
    clReleaseKernel(K);
    clReleaseMemObject(res_d);
    clReleaseCommandQueue(commandQ);
    clReleaseProgram(Prog);
    clReleaseContext(theContext);
    

}

 

 

md5.cl

// Enter your kernel in this window
/* MD5lib.h - md5 library
 */

/* Copyright (C) 1990-2, RSA Data Security, Inc. Created 1990. All
rights reserved.

RSA Data Security, Inc. makes no representations concerning either
the merchantability of this software or the suitability of this
software for any particular purpose. It is provided "as is"
without express or implied warranty of any kind.

These notices must be retained in any copies of any part of this
documentation and/or software.
 */

/* The following makes MD default to MD5 if it has not already been
  defined with C compiler flags.
 */

#define MD 5

/* GLOBAL.H - RSAREF types and constants
 */

/* PROTOTYPES should be set to one if and only if the compiler supports
  function argument prototyping.
  The following makes PROTOTYPES default to 0 if it has not already
  been defined with C compiler flags.
 */
#ifndef PROTOTYPES
#define PROTOTYPES 0
#endif

/* POINTER defines a generic pointer type */
typedef unsigned char *POINTER;

/* UINT2 defines a two byte word */
typedef unsigned short UINT2;

/* UINT4 defines a four byte word */
typedef unsigned int UINT4;

/* PROTO_LIST is defined depending on how PROTOTYPES is defined above.
If using PROTOTYPES, then PROTO_LIST returns the list, otherwise it
  returns an empty list.
 */
#if PROTOTYPES
#define PROTO_LIST(list) list
#else
#define PROTO_LIST(list) ()
#endif


 /* Length of test block, number of test blocks.
 */
#define TEST_BLOCK_LEN 1000
#define TEST_BLOCK_COUNT 1000

  

/* Constants for MD5Transform routine.
 */
#define S11 7
#define S12 12
#define S13 17
#define S14 22
#define S21 5
#define S22 9
#define S23 14
#define S24 20
#define S31 4
#define S32 11
#define S33 16
#define S34 23
#define S41 6
#define S42 10
#define S43 15
#define S44 21

char* MDString PROTO_LIST ((char *));
char* MDFile PROTO_LIST ((char *));
char* hmac_md5(char* text, char* key);

typedef struct {
  UINT4 state[4];                                   /* state (ABCD) */
  UINT4 count[2];        /* number of bits, modulo 2^64 (lsb first) */
  unsigned char buffer[64];                         /* input buffer */
} MD5_CTX;

/*void MD5Init PROTO_LIST ((MD5_CTX *));
void MD5Update PROTO_LIST
  ((MD5_CTX *, unsigned char *, unsigned int));
void MD5Final PROTO_LIST ((unsigned char [16], MD5_CT X *));

void MD5Transform PROTO_LIST ((UINT4 [4], unsigned char [64]));
void Encode PROTO_LIST
  ((unsigned char *, UINT4 *, unsigned int));
void Decode PROTO_LIST
  ((UINT4 *, unsigned char *, unsigned int));
void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned int));
void MD5_memset PROTO_LIST ((POINTER, int, unsigned int));
*/
/*__global unsigned char PADDING[64] = {
  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};*/

/* F, G, H and I are basic MD5 functions.
 */
#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | (~z)))

/* ROTATE_LEFT rotates x left n bits.
 */
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))

/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
Rotation is separate from addition to prevent recomputation.
 */
#define FF(a, b, c, d, x, s, ac) {(a)+=F((b), (c), (d)) + (x) + (UINT4)(ac);(a)= ROTATE_LEFT((a),(s)); (a)+=(b);}
#define GG(a, b, c, d, x, s, ac) {(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac);(a) = ROTATE_LEFT ((a), (s)); (a) += (b);}
#define HH(a, b, c, d, x, s, ac) {(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac);(a) = ROTATE_LEFT ((a), (s)); (a) += (b);}
#define II(a, b, c, d, x, s, ac) {(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac);(a) = ROTATE_LEFT ((a), (s)); (a) += (b);}
void MD5Init (MD5_CTX *context);
void MD5Update(MD5_CTX *context, unsigned char *input,unsigned int inputLen);
  
void MD5Final (unsigned char digest[16], MD5_CTX *context);
void MD5Transform  (UINT4 [4], unsigned char [64]) ;
void Encode(unsigned char *, UINT4 *, unsigned int);
void Decode (UINT4 *, unsigned char *, unsigned int);
void MD5_memcpy(POINTER, POINTER, unsigned int);
void MD5_memset(POINTER, int, unsigned int);

/* MD5 initialization. Begins an MD5 operation, writing a new context.
 */
void MD5Init (MD5_CTX *context)
                                        /* context */
{
  context->count[0] = context->count[1] = 0;
  /* Load magic initialization constants.
*/
  context->state[0] = 0x67452301;
  context->state[1] = 0xefcdab89;
  context->state[2] = 0x98badcfe;
  context->state[3] = 0x10325476;
}

/* MD5 block update operation. Continues an MD5 message-digest
  operation, processing another message block, and updating the
  context.
 */
void MD5Update (MD5_CTX *context, unsigned char *input,unsigned int inputLen  )
                                         /* context */
                              /* input block */
                     /* length of input block */
{
  unsigned int i, index, partLen;

  /* Compute number of bytes mod 64 */
  index = (unsigned int)((context->count[0] >> 3) & 0x3F);

  /* Update number of bits */
  if ((context->count[0] += ((UINT4)inputLen << 3))
  < ((UINT4)inputLen << 3))
 context->count[1]++;
  context->count[1] += ((UINT4)inputLen >> 29);

  partLen = 64 - index;

  /* Transform as many times as possible.
*/
  if (inputLen >= partLen) {
 MD5_memcpy
   ((POINTER)&context->buffer[index], (POINTER)input, partLen);
 MD5Transform (context->state, context->buffer);

 for (i = partLen; i + 63 < inputLen; i += 64)
   MD5Transform (context->state, &input[i]);

 index = 0;
  }
  else
 i = 0;

  /* Buffer remaining input */
  MD5_memcpy
 ((POINTER)&context->buffer[index], (POINTER)&input[i],
  inputLen-i);
}

/* MD5 finalization. Ends an MD5 message-digest operation, writing the
  the message digest and zeroizing the context.
 */
void MD5Final (unsigned char digest[16], MD5_CTX *context)
                       /* message digest */
                                        /* context */
{
  unsigned char bits[8];
  unsigned int index, padLen;

  /* Save number of bits */
  Encode (bits, context->count, 8);

  /* Pad out to 56 mod 64.
*/
  index = (unsigned int)((context->count[0] >> 3) & 0x3f);
  padLen = (index < 56) ? (56 - index) : (120 - index);




  unsigned char PADDING[64] = {
  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  };

  MD5Update (context,(unsigned char*) PADDING, padLen);

  /* Append length (before padding) */
  MD5Update (context, bits, 8);
  /* Store state in digest */
  Encode (digest, context->state, 16);

  /* Zeroize sensitive information.
*/
  MD5_memset ((POINTER)context, 0, sizeof (*context));
}

/* MD5 basic transformation. Transforms state based on block.
 */
static void MD5Transform (UINT4 state[4],
unsigned char block[64])
 
{
 
 UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];

  Decode (x, block, 64);

  /* Round 1 */
  FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
  FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
  FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
  FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
  FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
  FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
  FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
  FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
  FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
  FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
  FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
  FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
  FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
  FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
  FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
  FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */

 /* Round 2 */
  GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
  GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
  GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
  GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
  GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
  GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
  GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
  GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
  GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
  GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
  GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
  GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
  GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
  GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
  GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
  GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */

  /* Round 3 */
  HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
  HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
  HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
  HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
  HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
  HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
  HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
  HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
  HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
  HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
  HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
  HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
  HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
  HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
  HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
  HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */

  /* Round 4 */
  II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
  II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
  II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
  II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
  II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
  II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
  II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
  II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
  II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
  II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
  II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
  II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
  II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
  II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
  II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
  II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */

  state[0] += a;
  state[1] += b;
  state[2] += c;
  state[3] += d;

  /* Zeroize sensitive information.
  */
  MD5_memset ((POINTER)x, 0, sizeof (x));
}

/* Encodes input (UINT4) into output (unsigned char). Assumes len is
  a multiple of 4.
 */
void Encode (unsigned char *output,
UINT4 *input,
unsigned int len)
 
{
  unsigned int i, j;

  for (i = 0, j = 0; j < len; i++, j += 4) {
 output[j] = (unsigned char)(input[i] & 0xff);
 output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
 output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
 output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
  }
}

/* Decodes input (unsigned char) into output (UINT4). Assumes len is
  a multiple of 4.
 */
void Decode (UINT4 *output,
unsigned char *input,
unsigned int len)
 
{
  unsigned int i, j;

  for (i = 0, j = 0; j < len; i++, j += 4)
 output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
   (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
}

/* Note: Replace "for loop" with standard memcpy if possible.
 */

void MD5_memcpy (POINTER output,
POINTER input,
unsigned int len)
 
{
  unsigned int i;

  for (i = 0; i < len; i++)
  output[i] = input[i];
}

/* Note: Replace "for loop" with standard memset if possible.
 */
void MD5_memset (POINTER output,
int value,
unsigned int len)
 
{
  unsigned int i;

  for (i = 0; i < len; i++)
 ((char *)output)[i] = (char)value;
}


void myitoa(int i, char* string)
{
        int power, j;
        j=i; 
        for (power=1;j>=10;j/=10) 
            power*=10; 
        for (;power>0;power/=10)
        {
            *string++='0'+i/power; i%=power; 
        }
        *string='\0';
}



__kernel void md5_kernel (__global char* res)
{
    int idx = get_global_id(0);
    idx = idx%1000;
    char src[32];

    for(int i=0; i<32; i++)
    {
        src[i] = 0;
    }

    myitoa(idx, src);

    int len;
    for(int i=0; i<32; i++)
    {
        if(src[i] == 0)
        {
            len = i;
            break;
        }
    }

    unsigned char digest[16];

    MD5_CTX context;
    MD5Init (&context);
    MD5Update (&context, (unsigned char*)src, len);
    MD5Final (digest, &context);

    for(int i=0; i<16; i++)
   {
        *(res + idx*16 + i) = digest[i];
   }
}
posted on 2012-06-07 14:53  张家瑞  阅读(1319)  评论(0编辑  收藏  举报