md5算法实现原理深剖

一、基本介绍

MD系列算法是信息摘要三大算法中的一种,全称:Message Digest算法,按照规范版本分为MD2、MD4、MD5三种算法,目前最常用的是MD5版本算法。本文介绍MD5算法的实现原理。

1991年,继 MD4 算法后,罗纳德·李维斯特教授开发了 MD5 算法,将 MD 算法推向成熟。MD5 算法经 MD2、MD3 和 MD4 算法发展而来,算法复杂程度和安全强度大大提高。但不管是 MD2、MD4 还是 MD5 算法,其算法的最终结果均是产生一个 128 位的消息摘要,这也是 MD 系列算法的特点。MD5 算法执行效率略次于 MD4 算法,但在安全性方面,MD5 算法更胜一筹。随着计算机技术的发展和计算水平的不断提高,MD5 算法暴露出来的漏洞也越来越多。1996 年后被证实存在弱点,可以被加以破解,对于需要高度安全性的数据,专家一般建议改用其他算法,如 SHA-2。2004 年,证实 MD5 算法无法防止碰撞(collision),因此不适用于安全性认证,如 SSL 公开密钥认证或是数字签名等用途。

二、实现原理

有关 MD5 算法详情请参见 RFC 1321 http://www.ietf.org/rfc/rfc1321.txt,RFC 1321 是MD5算法的官方文档,其实现原理共分为5步:

第1步:字节填充(Append Padding Bytes)

数据先补上1个1比特,再补上k个0比特,使得补位后的数据比特数(n+1+k)满足(n+1+k) mod 512 = 448,k取最小正整数。

第2步:追加长度信息(Append Length)

数据比特位的数据长度追加到最后8字节中。

第3步:初始化MD Buffer(Initialize MD Buffer)

这一步最简单了,定义ABCD四个4字节数组,分别赋初值即可。

uint32_t A = 0x67452301;	// [ 0x01, 0x23, 0x45, 0x67 ]
uint32_t B = 0xEFCDAB89;	// [ 0x89, 0xAB, 0xCD, 0xEF ]
uint32_t C = 0x98BADCFE;	// [ 0xFE, 0xDC, 0xBA, 0x98 ]
uint32_t D = 0x10325476;	// [ 0x76, 0x54, 0x32, 0x10 ]    

以上操作与md4完全一致。

第4步:处理消息块(Process Message in 16-Byte Blocks)

这个是MD5算法最核心的部分了,对第2步组装数据进行分块依次处理。

   /* Process each 16-word block. */
   For i = 0 to N/16-1 do

     /* Copy block i into X. */
     For j = 0 to 15 do
       Set X[j] to M[i*16+j].
     end /* of loop on j */

     /* Save A as AA, B as BB, C as CC, and D as DD. */
     AA = A
     BB = B
     CC = C
     DD = D

     /* Round 1. */
     /* Let [abcd k s i] denote the operation
          a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
     /* Do the following 16 operations. */
     [ABCD  0  7  1]  [DABC  1 12  2]  [CDAB  2 17  3]  [BCDA  3 22  4]
     [ABCD  4  7  5]  [DABC  5 12  6]  [CDAB  6 17  7]  [BCDA  7 22  8]
     [ABCD  8  7  9]  [DABC  9 12 10]  [CDAB 10 17 11]  [BCDA 11 22 12]
     [ABCD 12  7 13]  [DABC 13 12 14]  [CDAB 14 17 15]  [BCDA 15 22 16]

     /* Round 2. */
     /* Let [abcd k s i] denote the operation
          a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
     /* Do the following 16 operations. */
     [ABCD  1  5 17]  [DABC  6  9 18]  [CDAB 11 14 19]  [BCDA  0 20 20]
     [ABCD  5  5 21]  [DABC 10  9 22]  [CDAB 15 14 23]  [BCDA  4 20 24]
     [ABCD  9  5 25]  [DABC 14  9 26]  [CDAB  3 14 27]  [BCDA  8 20 28]
     [ABCD 13  5 29]  [DABC  2  9 30]  [CDAB  7 14 31]  [BCDA 12 20 32]

     /* Round 3. */
     /* Let [abcd k s t] denote the operation
          a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
     /* Do the following 16 operations. */
     [ABCD  5  4 33]  [DABC  8 11 34]  [CDAB 11 16 35]  [BCDA 14 23 36]
     [ABCD  1  4 37]  [DABC  4 11 38]  [CDAB  7 16 39]  [BCDA 10 23 40]
     [ABCD 13  4 41]  [DABC  0 11 42]  [CDAB  3 16 43]  [BCDA  6 23 44]
     [ABCD  9  4 45]  [DABC 12 11 46]  [CDAB 15 16 47]  [BCDA  2 23 48]

     /* Round 4. */
     /* Let [abcd k s t] denote the operation
          a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
     /* Do the following 16 operations. */
     [ABCD  0  6 49]  [DABC  7 10 50]  [CDAB 14 15 51]  [BCDA  5 21 52]
     [ABCD 12  6 53]  [DABC  3 10 54]  [CDAB 10 15 55]  [BCDA  1 21 56]
     [ABCD  8  6 57]  [DABC 15 10 58]  [CDAB  6 15 59]  [BCDA 13 21 60]
     [ABCD  4  6 61]  [DABC 11 10 62]  [CDAB  2 15 63]  [BCDA  9 21 64]

     /* Then perform the following additions. (That is increment each
        of the four registers by the value it had before this block
        was started.) */
     A = A + AA
     B = B + BB
     C = C + CC
     D = D + DD

   end /* of loop on i */

第5步:输出(Output)

这一步也非常简单,只需要将计算后的A、B、C、D进行拼接输出即可。

三、示例讲解

 四、代码实现

以下为C/C++代码实现:

#include <string.h>
#include <stdio.h>

#define HASH_BLOCK_SIZE         64  /* 512 bits = 64 bytes */
#define HASH_LEN_SIZE           8   /* 64 bits =  8 bytes */
#define HASH_LEN_OFFSET         56  /* 64 bytes - 8 bytes */
#define HASH_DIGEST_SIZE        16  /* 128 bits = 16 bytes */

typedef unsigned char		uint8_t;
typedef unsigned short int	uint16_t;
typedef unsigned int		uint32_t;
typedef unsigned long long	uint64_t;

/* T table */
static uint32_t T[64] =
{
	/* Round 1 */
	0xD76AA478, 0xE8C7B756, 0x242070DB, 0xC1BDCEEE, 0xF57C0FAF, 0x4787C62A, 0xA8304613, 0xFD469501,
	0x698098D8, 0x8B44F7AF, 0xFFFF5BB1, 0x895CD7BE, 0x6B901122, 0xFD987193, 0xA679438E, 0x49B40821,
	/* ROUND 2 */
	0xF61E2562, 0xC040B340, 0x265E5A51, 0xE9B6C7AA, 0xD62F105D, 0x02441453, 0xD8A1E681, 0xE7D3FBC8,
	0x21E1CDE6, 0xC33707D6, 0xF4D50D87, 0x455A14ED, 0xA9E3E905, 0xFCEFA3F8, 0x676F02D9, 0x8D2A4C8A,
	/* ROUND 3 */
	0xFFFA3942, 0x8771F681, 0x6D9D6122, 0xFDE5380C, 0xA4BEEA44, 0x4BDECFA9, 0xF6BB4B60, 0xBEBFBC70,
	0x289B7EC6, 0xEAA127FA, 0xD4EF3085, 0x04881D05, 0xD9D4D039, 0xE6DB99E5, 0x1FA27CF8, 0xC4AC5665,
	/* ROUND 4 */
	0xF4292244, 0x432AFF97, 0xAB9423A7, 0xFC93A039, 0x655B59C3, 0x8F0CCC92, 0xFFEFF47D, 0x85845DD1,
	0x6FA87E4F, 0xFE2CE6E0, 0xA3014314, 0x4E0811A1, 0xF7537E82, 0xBD3AF235, 0x2AD7D2BB, 0xEB86D391
};

static uint32_t F(uint32_t X, uint32_t Y, uint32_t Z)
{
	return (X & Y) | ((~X) & Z);
}
static uint32_t G(uint32_t X, uint32_t Y, uint32_t Z)
{
	return (X & Z) | (Y & (~Z));
}
static uint32_t H(uint32_t X, uint32_t Y, uint32_t Z)
{
	return X ^ Y ^ Z;
}
static uint32_t I(uint32_t X, uint32_t Y, uint32_t Z)
{
	return Y ^ ( X | (~Z));
}

/* 循环向左移动offset个比特位 */
static uint32_t MoveLeft(uint32_t X, uint8_t offset)
{
	uint32_t res = (X << offset) | (X >> (32 - offset));
	return res;
}

static uint32_t Round1(uint32_t A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint32_t N, uint32_t T)
{
	return B + MoveLeft(A + F(B, C, D) + M + T, N);
}
static uint32_t Round2(uint32_t A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint32_t N, uint32_t T)
{
	return B + MoveLeft(A + G(B, C, D) + M + T, N);
}
static uint32_t Round3(uint32_t A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint32_t N, uint32_t T)
{
	return B + MoveLeft(A + H(B, C, D) + M + T, N);
}
static uint32_t Round4(uint32_t A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint32_t N, uint32_t T)
{
	return B + MoveLeft(A + I(B, C, D) + M + T, N);
}

#define ASSERT_RETURN_INT(x, d) if(!(x)) { return d; }

int md5(unsigned char *out, const unsigned char* in, const int inlen)
{
	ASSERT_RETURN_INT(out && in && (inlen >= 0), 1);
	int i = 0, j = 0;

	// step 1: 字节填充(Append Padding Bytes)
	// 数据先补上1个1比特,再补上k个0比特,使得补位后的数据比特数(n+1+k)满足(n+1+k) mod 512 = 448,k取最小正整数
	int iX = inlen / HASH_BLOCK_SIZE;
	int iY = inlen % HASH_BLOCK_SIZE;
	iX = (iY < HASH_LEN_OFFSET) ? iX : (iX + 1);

	int iLen = (iX + 1) * HASH_BLOCK_SIZE;
	unsigned char* M = malloc(iLen);
	memcpy(M, in, inlen);
	// 先补上1个1比特+7个0比特
	M[inlen] = 0x80;
	// 再补上(k-7)个0比特
	for (i = inlen + 1; i < (iX * HASH_BLOCK_SIZE + HASH_LEN_OFFSET); i++)
	{
		M[i] = 0;
	}

	// step 2: 追加长度信息(Append Length)
	uint64_t *pLen = (uint64_t*)(M + (iX * HASH_BLOCK_SIZE + HASH_LEN_OFFSET));
	*pLen = inlen << 3;

	// Step 3. 初始化MD Buffer(Initialize MD Buffer)
	uint32_t A = 0x67452301;	// 0x01, 0x23, 0x45, 0x67
	uint32_t B = 0xEFCDAB89;	// 0x89, 0xAB, 0xCD, 0xEF
	uint32_t C = 0x98BADCFE;	// 0xFE, 0xDC, 0xBA, 0x98
	uint32_t D = 0x10325476;	// 0x76, 0x54, 0x32, 0x10

	uint32_t X[HASH_BLOCK_SIZE / 4] = { 0 };

	// step 4: 处理消息块(Process Message in 64-Byte Blocks)
	for (i = 0; i < iLen / HASH_BLOCK_SIZE; i++)
	{
		/* Copy block i into X. */
		for (j = 0; j < HASH_BLOCK_SIZE; j = j + 4)
		{
			uint32_t* temp = &M[i * HASH_BLOCK_SIZE + j];
			X[j / 4] = *temp;
		}

		/* Save A as AA, B as BB, C as CC, and D as DD. */
		uint32_t AA = A;
		uint32_t BB = B;
		uint32_t CC = C;
		uint32_t DD = D;

		/* Round 1. */
		/* Let [abcd k s i] denote the operation
			a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
		/* Do the following 16 operations.
		[ABCD  0  7  1][DABC  1 12  2][CDAB  2 17  3][BCDA  3 22  4]
		[ABCD  4  7  5][DABC  5 12  6][CDAB  6 17  7][BCDA  7 22  8]
		[ABCD  8  7  9][DABC  9 12 10][CDAB 10 17 11][BCDA 11 22 12]
		[ABCD 12  7 13][DABC 13 12 14][CDAB 14 17 15][BCDA 15 22 16]
		
		此处T[i]有问题 应该是i-1 索引下标从0开始
		*/
		A = Round1(A, B, C, D, X[0],  7,  T[0]);  D = Round1(D, A, B, C, X[1],  12, T[1]);  C = Round1(C, D, A, B, X[2],  17, T[2]);  B = Round1(B, C, D, A, X[3],  22, T[3]);
		A = Round1(A, B, C, D, X[4],  7,  T[4]);  D = Round1(D, A, B, C, X[5],  12, T[5]);  C = Round1(C, D, A, B, X[6],  17, T[6]);  B = Round1(B, C, D, A, X[7],  22, T[7]);
		A = Round1(A, B, C, D, X[8],  7,  T[8]);  D = Round1(D, A, B, C, X[9],  12, T[9]);  C = Round1(C, D, A, B, X[10], 17, T[10]); B = Round1(B, C, D, A, X[11], 22, T[11]);
		A = Round1(A, B, C, D, X[12], 7,  T[12]); D = Round1(D, A, B, C, X[13], 12, T[13]); C = Round1(C, D, A, B, X[14], 17, T[14]); B = Round1(B, C, D, A, X[15], 22, T[15]);

		/* Round 2. */
		/* Let [abcd k s i] denote the operation
			a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
		/* Do the following 16 operations.
		[ABCD  1  5 17][DABC  6  9 18][CDAB 11 14 19][BCDA  0 20 20]
		[ABCD  5  5 21][DABC 10  9 22][CDAB 15 14 23][BCDA  4 20 24]
		[ABCD  9  5 25][DABC 14  9 26][CDAB  3 14 27][BCDA  8 20 28]
		[ABCD 13  5 29][DABC  2  9 30][CDAB  7 14 31][BCDA 12 20 32]
		*/
		A = Round2(A, B, C, D, X[1],  5,  T[16]); D = Round2(D, A, B, C, X[6],  9,  T[17]); C = Round2(C, D, A, B, X[11], 14, T[18]); B = Round2(B, C, D, A, X[0],  20, T[19]);
		A = Round2(A, B, C, D, X[5],  5,  T[20]); D = Round2(D, A, B, C, X[10], 9,  T[21]); C = Round2(C, D, A, B, X[15], 14, T[22]); B = Round2(B, C, D, A, X[4],  20, T[23]);
		A = Round2(A, B, C, D, X[9],  5,  T[24]); D = Round2(D, A, B, C, X[14], 9,  T[25]); C = Round2(C, D, A, B, X[3],  14, T[26]); B = Round2(B, C, D, A, X[8],  20, T[27]);
		A = Round2(A, B, C, D, X[13], 5,  T[28]); D = Round2(D, A, B, C, X[2],  9,  T[29]); C = Round2(C, D, A, B, X[7],  14, T[30]); B = Round2(B, C, D, A, X[12], 20, T[31]);

		/* Round 3. */
		/* Let [abcd k s t] denote the operation
			a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
		/* Do the following 16 operations. 
		[ABCD  5  4 33][DABC  8 11 34][CDAB 11 16 35][BCDA 14 23 36]
		[ABCD  1  4 37][DABC  4 11 38][CDAB  7 16 39][BCDA 10 23 40]
		[ABCD 13  4 41][DABC  0 11 42][CDAB  3 16 43][BCDA  6 23 44]
		[ABCD  9  4 45][DABC 12 11 46][CDAB 15 16 47][BCDA  2 23 48]
		*/
		A = Round3(A, B, C, D, X[5],  4,  T[32]); D = Round3(D, A, B, C, X[8],  11, T[33]); C = Round3(C, D, A, B, X[11], 16, T[34]); B = Round3(B, C, D, A, X[14], 23, T[35]);
		A = Round3(A, B, C, D, X[1],  4,  T[36]); D = Round3(D, A, B, C, X[4],  11, T[37]); C = Round3(C, D, A, B, X[7],  16, T[38]); B = Round3(B, C, D, A, X[10], 23, T[39]);
		A = Round3(A, B, C, D, X[13], 4,  T[40]); D = Round3(D, A, B, C, X[0],  11, T[41]); C = Round3(C, D, A, B, X[3],  16, T[42]); B = Round3(B, C, D, A, X[6],  23, T[43]);
		A = Round3(A, B, C, D, X[9],  4,  T[44]); D = Round3(D, A, B, C, X[12], 11, T[45]); C = Round3(C, D, A, B, X[15], 16, T[46]); B = Round3(B, C, D, A, X[2],  23, T[47]);

		/* Round 4. */
		/* Let [abcd k s t] denote the operation
			a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
		/* Do the following 16 operations.
		[ABCD  0  6 49][DABC  7 10 50][CDAB 14 15 51][BCDA  5 21 52]
		[ABCD 12  6 53][DABC  3 10 54][CDAB 10 15 55][BCDA  1 21 56]
		[ABCD  8  6 57][DABC 15 10 58][CDAB  6 15 59][BCDA 13 21 60]
		[ABCD  4  6 61][DABC 11 10 62][CDAB  2 15 63][BCDA  9 21 64]
		*/
		A = Round4(A, B, C, D, X[0],  6,  T[48]);  D = Round4(D, A, B, C, X[7],  10, T[49]); C = Round4(C, D, A, B, X[14], 15, T[50]); B = Round4(B, C, D, A, X[5],  21, T[51]);
		A = Round4(A, B, C, D, X[12], 6,  T[52]);  D = Round4(D, A, B, C, X[3],  10, T[53]); C = Round4(C, D, A, B, X[10], 15, T[54]); B = Round4(B, C, D, A, X[1],  21, T[55]);
		A = Round4(A, B, C, D, X[8],  6,  T[56]);  D = Round4(D, A, B, C, X[15], 10, T[57]); C = Round4(C, D, A, B, X[6],  15, T[58]); B = Round4(B, C, D, A, X[13], 21, T[59]);
		A = Round4(A, B, C, D, X[4],  6,  T[60]);  D = Round4(D, A, B, C, X[11], 10, T[61]); C = Round4(C, D, A, B, X[2],  15, T[62]); B = Round4(B, C, D, A, X[9],  21, T[63]);

		/* Then perform the following additions. (That is, increment each
		of the four registers by the value it had before this block
		was started.) */
		A = A + AA;
		B = B + BB;
		C = C + CC;
		D = D + DD;
	}

	// step 5: 输出ABCD
	memcpy(out + 0, &A, 4);
	memcpy(out + 4, &B, 4);
	memcpy(out + 8, &C, 4);
	memcpy(out + 12, &D, 4);
	free(M);

	return 0;
}

int main()
{
	unsigned char digest[16] = { 0 };

	md5(digest, "Hello World!", strlen("Hello World!"));

	return 0;
}
 

posted on 2022-08-05 13:48  仲达超  阅读(434)  评论(0编辑  收藏  举报

导航