ChaCha20加密 与 Salsa20加密

一、简介

Chacha20流密码经常和Poly1305消息认证码结合使用,被称为ChaCha20-Poly1305,由Google公司率先在Andriod移动平台中的Chrome中代替RC4使用,由于其算法精简、安全性强、兼容性强等特点,目前Google致力于全面将其在移动端推广

二、初始化矩阵

ChaCha20加密的初始状态包括了包括了
1、一个128位常量(Constant)
常量的内容为0x61707865,0x3320646e,0x79622d32,0x6b206574.)
2、一个256位密钥(Key)
3、一个64位计数(Counter)
4、一个64位随机数(Nonce)
一共64字节其排列成4 * 4的32位字矩阵如下所示:(实际运算为小端)
 
0
 

三、1/4 轮操作

在ChaCha20算法当中, 一个基础的操作即为1/4轮运算, 它主要操作4个32位的无符号整数,具体操作如下:
QR(a,b,c,d)
 
0
 
代码如下:
static void chacha20_quarterround(uint32_t *x, int a, int b, int c, int d) { x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 16); x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 12); x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 8); x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 7); }

四、块函数

这个块函数输入是之前所生成的状态矩阵, 最终输出64bit的"随机化"的字节
块函数加密时分两种情况,一种是计数为奇数时,进行行变换,如下:
 
0
为偶数的时候进行列变换,如下:
 
0
代码实现如下:
static void chacha20_block(uint32_t in[16], uint8_t out[64], int num_rounds) { // num_rounds 一般为20 
    int i;
    uint32_t x[16];

    memcpy(x, in, sizeof(uint32_t) * 16);

    for (i = num_rounds; i > 0; i -= 2) {
        //odd round  // 奇数行变换
        chacha20_quarterround(x, 0, 4,  8, 12);
        chacha20_quarterround(x, 1, 5,  9, 13);
        chacha20_quarterround(x, 2, 6, 10, 14);
        chacha20_quarterround(x, 3, 7, 11, 15);
        //even round    // 偶数列变换
        chacha20_quarterround(x, 0, 5, 10, 15);
        chacha20_quarterround(x, 1, 6, 11, 12);
        chacha20_quarterround(x, 2, 7,  8, 13);
        chacha20_quarterround(x, 3, 4,  9, 14);
    }

    for (i = 0; i < 16; i++) {
        x[i] += in[i];
    }

    chacha20_serialize(x, out);
}

五、ChaCha20总代码如下

1、C语言实现

编译器:Dev-C++
chacha20.cpp
#include <stdint.h>
#include <string.h>
#include "chacha20.h"

static inline void u32t8le(uint32_t v, uint8_t p[4]) {
    p[0] = v & 0xff;
    p[1] = (v >> 8) & 0xff;
    p[2] = (v >> 16) & 0xff;
    p[3] = (v >> 24) & 0xff;
}

static inline uint32_t u8t32le(uint8_t p[4]) {
    uint32_t value = p[3];

    value = (value << 8) | p[2];
    value = (value << 8) | p[1];
    value = (value << 8) | p[0];

    return value;
}

static inline uint32_t rotl32(uint32_t x, int n) {
    // http://blog.regehr.org/archives/1063
    return x << n | (x >> (-n & 31));
}

// https://tools.ietf.org/html/rfc7539#section-2.1
static void chacha20_quarterround(uint32_t *x, int a, int b, int c, int d) {
    x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 16);
    x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 12);
    x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a],  8);
    x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c],  7);
}

static void chacha20_serialize(uint32_t in[16], uint8_t output[64]) {
    int i;
    for (i = 0; i < 16; i++) {
        u32t8le(in[i], output + (i << 2));
    }
}

static void chacha20_block(uint32_t in[16], uint8_t out[64], int num_rounds) { // num_rounds 一般为20 
    int i;
    uint32_t x[16];

    memcpy(x, in, sizeof(uint32_t) * 16);

    for (i = num_rounds; i > 0; i -= 2) {    
        //odd round
        chacha20_quarterround(x, 0, 4,  8, 12);
        chacha20_quarterround(x, 1, 5,  9, 13);
        chacha20_quarterround(x, 2, 6, 10, 14);
        chacha20_quarterround(x, 3, 7, 11, 15);
        //even round 
        chacha20_quarterround(x, 0, 5, 10, 15);
        chacha20_quarterround(x, 1, 6, 11, 12);
        chacha20_quarterround(x, 2, 7,  8, 13);
        chacha20_quarterround(x, 3, 4,  9, 14);
    }

    for (i = 0; i < 16; i++) {
        x[i] += in[i];
    }

    chacha20_serialize(x, out);
}

// https://tools.ietf.org/html/rfc7539#section-2.3
static void chacha20_init_state(uint32_t s[16], uint8_t key[32], uint32_t counter, uint8_t nonce[12]) {
    int i;

    // refer: https://dxr.mozilla.org/mozilla-beta/source/security/nss/lib/freebl/chacha20.c
    // convert magic number to string: "expand 32-byte k"
    s[0] = 0x61707865;
    s[1] = 0x3320646e;
    s[2] = 0x79622d32;
    s[3] = 0x6b206574;

    for (i = 0; i < 8; i++) {
        s[4 + i] = u8t32le(key + i * 4);
    }

    s[12] = counter;

    for (i = 0; i < 3; i++) {
        s[13 + i] = u8t32le(nonce + i * 4);
    }
}

void ChaCha20XOR(uint8_t key[32], uint32_t counter, uint8_t nonce[12], uint8_t *in, uint8_t *out, int inlen) {
    int i, j;

    uint32_t s[16];
    uint8_t block[64];

    chacha20_init_state(s, key, counter, nonce);

    for (i = 0; i < inlen; i += 64) {
        chacha20_block(s, block, 20);
        s[12]++;

        for (j = i; j < i + 64; j++) {
            if (j >= inlen) {
                break;
            }
            out[j] = in[j] ^ block[j - i];
        }
    }
}
chacha20.h
#ifndef __CHACHA20_H
#define __CHACHA20_H
#include <stdint.h>

void ChaCha20XOR(uint8_t key[32], uint32_t counter, uint8_t nonce[12], uint8_t *input, uint8_t *output, int inputlen);

#endif
main.cpp
#include <stdio.h>
#include "chacha20.h"

int main(int argc, char **argv) {
    int i;

    uint8_t key[] = {
        0x00, 0x01, 0x02, 0x03,
        0x04, 0x05, 0x06, 0x07,
        0x08, 0x09, 0x0a, 0x0b,
        0x0c, 0x0d, 0x0e, 0x0f,
        0x10, 0x11, 0x12, 0x13,
        0x14, 0x15, 0x16, 0x17,
        0x18, 0x19, 0x1a, 0x1b,
        0x1c, 0x1d, 0x1e, 0x1f
    };

    uint8_t nonce[] = {                // 随机数 
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x00
    };

    uint8_t input[114] = {
        0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c,
        0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73,
        0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39, 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63,
        0x6f, 0x75, 0x6c, 0x64, 0x20, 0x6f, 0x66, 0x66, 0x65, 0x72, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x6f,
        0x6e, 0x6c, 0x79, 0x20, 0x6f, 0x6e, 0x65, 0x20, 0x74, 0x69, 0x70, 0x20, 0x66, 0x6f, 0x72, 0x20,
        0x74, 0x68, 0x65, 0x20, 0x66, 0x75, 0x74, 0x75, 0x72, 0x65, 0x2c, 0x20, 0x73, 0x75, 0x6e, 0x73,
        0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69,
        0x74, 0x2e
    };

    uint8_t encrypt[114];
    uint8_t decrypt[114];

    ChaCha20XOR(key, 1, nonce, input, encrypt, 114);                //1 就是conter 
    ChaCha20XOR(key, 1, nonce, encrypt, decrypt, 114);

    printf("\nkey:");
    for (i = 0; i < 32; i++) {
        if (!(i % 16)) {
            printf("\n");
        }
        printf("%02x ", key[i]);
    }

    printf("\n\nnonce:\n");
    for (i = 0; i < 12; i++) {
        printf("%02x ", nonce[i]);
    }

    printf("\n\nplaintext:");
    for (i = 0; i < 114; i++) {
        if (!(i % 16)) {
            printf("\n");
        }
        printf("%02x ", input[i]);
    }

    printf("\n\nencrypted:");
    for (i = 0; i < 114; i++) {
        if (!(i % 16)) {
            printf("\n");
        }
        printf("%02x ", encrypt[i]);
    }

    printf("\n\ndecrypted:");
    for (i = 0; i < 114; i++) {
        if (!(i % 16)) {
            printf("\n");
        }
        printf("%02x ", decrypt[i]);
    }

    printf("\n");
    return 0;
}

2、Python实现

def main():
    runtests()

def chacha20_decrypt(key, counter, nonce, ciphertext):
    return chacha20_encrypt(key, counter, nonce, ciphertext)

def chacha20_encrypt(key, counter, nonce, plaintext):
    byte_length = len(plaintext)
    full_blocks = byte_length//64
    remainder_bytes = byte_length % 64
    encrypted_message = b''

    for i in range(full_blocks):
        key_stream = serialize(chacha20_block(key, counter + i, nonce))
        plaintext_block = plaintext[i*64:i*64+64]
        encrypted_block = [plaintext_block[j] ^ key_stream[j] for j in range(64)]
        encrypted_message += bytes(encrypted_block)
    if remainder_bytes != 0:
        key_stream = serialize(chacha20_block(key, counter + full_blocks, nonce))
        plaintext_block = plaintext[full_blocks*64:byte_length]
        encrypted_block = [plaintext_block[j] ^ key_stream[j] for j in range(remainder_bytes)]
        encrypted_message += bytes(encrypted_block)

    return encrypted_message

# returns a list of 16 32-bit unsigned integers
def chacha20_block(key, counter, nonce):
    BLOCK_CONSTANTS = [0x61707865, 0x3320646e, 0x79622d32, 0x6b206574]
    init_state = BLOCK_CONSTANTS + key + [counter] + nonce
    current_state = init_state[:]
    for i in range(10):
        inner_block(current_state)
    for i in range(16):
        current_state[i] = add_32(current_state[i], init_state[i])

    return current_state

def inner_block(state):
    # columns
    quarterround(state, 0, 4, 8, 12)
    quarterround(state, 1, 5, 9, 13)
    quarterround(state, 2, 6, 10, 14)
    quarterround(state, 3, 7, 11, 15)
    # diagonals
    quarterround(state, 0, 5, 10, 15)
    quarterround(state, 1, 6, 11, 12)
    quarterround(state, 2, 7, 8, 13)
    quarterround(state, 3, 4, 9, 14)

def xor_32(x, y):
    return (x ^ y) & 0xffffffff

def add_32(x, y):
    return (x + y) & 0xffffffff

def rot_l32(x, n):
    return ((x << n) | (x >> (32 - n))) & 0xffffffff

def quarterround(state, i1, i2, i3, i4):
    a = state[i1]
    b = state[i2]
    c = state[i3]
    d = state[i4]

    a = add_32(a, b); d = xor_32(d, a); d = rot_l32(d, 16)
    c = add_32(c, d); b = xor_32(b, c); b = rot_l32(b, 12)
    a = add_32(a, b); d = xor_32(d, a); d = rot_l32(d, 8)
    c = add_32(c, d); b = xor_32(b, c); b = rot_l32(b, 7)

    state[i1] = a
    state[i2] = b
    state[i3] = c
    state[i4] = d

def serialize(block):
    return b''.join([(word).to_bytes(4, 'little') for word in block])

# Test Vectors from RFC 8439
def runtests():

    key = [0x2519EB0A, 0x909CE82E, 0xD6C085EC, 0x545ACF07, 0x24124049, 0x1E1353E7, 0x14AD4F2F, 0xE98FF6DE] 
    plaintext = b"\x8e\x91\x9e\xbe\x6a\x6c\x64\xc1\x02\x02\xf8\xda\xc4\xc8\xd6\x14\xa0\xa3\x9c\x0e\x62\x64\x70\x6d\x02\x02\x0c\x9d\xd2\xd6\xc6\xa8"
    nonce = [0x7369C667, 0xEC4AFF51, 0xABBACD29]
    init_counter = 0x00000001
    ciphertext = chacha20_encrypt(key, init_counter, nonce, plaintext)
    for i in range(len(ciphertext)):
        print(hex(ciphertext[i])[2:],end = " ")
    assert(chacha20_decrypt(key, init_counter, nonce, ciphertext) == plaintext)

    print("All tests passed!")

main();

 

参考:

 

六、Salsa20加密

其实Salsa20加密和ChaCha20特别相似,ChaCha20是对Salsa20上稍微做了调整,数据bit扩散更快。每一个1/4 round会修改一个字两次,每一个输入字也会影响到输出字
两种加密算法只有四分之一论操作有一点点不同
ChaCha20是这样的:
static inline uint32_t rotl32(uint32_t x, int n) {
    // http://blog.regehr.org/archives/1063
    return x << n | (x >> (-n & 31));
}

// https://tools.ietf.org/html/rfc7539#section-2.1
static void chacha20_quarterround(uint32_t *x, int a, int b, int c, int d) {
    x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 16);
    x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 12);
    x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a],  8);
    x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c],  7);
}

Salsa20则是这样的:

#define quarter(a,b,c,d) do {\
    b ^= R(d+a, 7);\
    c ^= R(a+b, 9);\
    d ^= R(b+c, 13);\
    a ^= R(c+d, 18);\
} while (0)

 

七、Salsa20代码实现

1、C语言实现

#include <stdio.h>
#include <stdint.h> // we use 32-bit words

// rotate x to left by n bits, the bits that go over the left edge reappear on the right
#define R(x,n) (((x) << (n)) | ((x) >> (32-(n))))

// addition wraps modulo 2^32
// the choice of 7,9,13,18 "doesn't seem very important" (spec)
#define quarter(a,b,c,d) do {\
    b ^= R(d+a, 7);\
    c ^= R(a+b, 9);\
    d ^= R(b+c, 13);\
    a ^= R(c+d, 18);\
} while (0)

void salsa20_words(uint32_t *out, uint32_t in[16]) {            //        chacha20_quarterround(x, 0, 4,  8, 12);
                                                                //chacha20_quarterround(x, 1, 5,  9, 13);
                                                                //chacha20_quarterround(x, 2, 6, 10, 14);
                                                                //chacha20_quarterround(x, 3, 7, 11, 15);
                                                                ////even round 
                                                                //chacha20_quarterround(x, 0, 5, 10, 15);
                                                                //chacha20_quarterround(x, 1, 6, 11, 12);
                                                                //chacha20_quarterround(x, 2, 7,  8, 13);
                                                                //chacha20_quarterround(x, 3, 4,  9, 14);   其实这俩的置换是一模一样的 
    uint32_t x[4][4];
    int i;
    for (i=0; i<16; ++i) x[i/4][i%4] = in[i];
    for (i=0; i<10; ++i) { // 10 double rounds = 20 rounds
        // column round: quarter round on each column; start at ith element and wrap
        quarter(x[0][0], x[1][0], x[2][0], x[3][0]);
        quarter(x[1][1], x[2][1], x[3][1], x[0][1]);
        quarter(x[2][2], x[3][2], x[0][2], x[1][2]);
        quarter(x[3][3], x[0][3], x[1][3], x[2][3]);
        // row round: quarter round on each row; start at ith element and wrap around
        quarter(x[0][0], x[0][1], x[0][2], x[0][3]);
        quarter(x[1][1], x[1][2], x[1][3], x[1][0]);
        quarter(x[2][2], x[2][3], x[2][0], x[2][1]);
        quarter(x[3][3], x[3][0], x[3][1], x[3][2]);
    }                                                                        
    for (i=0; i<16; ++i) out[i] = x[i/4][i%4] + in[i];
}

// inputting a key, message nonce, keystream index and constants to that transormation
void salsa20_block(uint8_t *out, uint8_t key[32], uint64_t nonce, uint64_t index) {
    static const char c[16] = "expand 32-byte k"; // arbitrary constant
    #define LE(p) ( (p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24) )
    uint32_t in[16] = {LE(c),            LE(key),    LE(key+4),        LE(key+8),
                       LE(key+12),       LE(c+4),    nonce&0xffffffff, nonce>>32,
                       index&0xffffffff, index>>32,  LE(c+8),          LE(key+16),
                       LE(key+20),       LE(key+24), LE(key+28),       LE(c+12)};
    uint32_t wordout[16];
    salsa20_words(wordout, in);
    int i;
    for (i=0; i<64; ++i) out[i] = 0xff & (wordout[i/4] >> (8*(i%4)));
}

// enc/dec: xor a message with transformations of key, a per-message nonce and block index
void salsa20(uint8_t *message, uint64_t mlen, uint8_t key[32], uint64_t nonce) {
    int i;
    uint8_t block[64];
    for (i=0; i<mlen; i++) {
        if (i%64 == 0) salsa20_block(block, key, nonce, i/64);
        message[i] ^= block[i%64];
    }
}

//Set 2, vector# 0:
//                         key = 00000000000000000000000000000000
//                               00000000000000000000000000000000
//                          IV = 0000000000000000
//               stream[0..63] = 9A97F65B9B4C721B960A672145FCA8D4
//                               E32E67F9111EA979CE9C4826806AEEE6
//                               3DE9C0DA2BD7F91EBCB2639BF989C625
//                               1B29BF38D39A9BDCE7C55F4B2AC12A39

int  main () {
    uint8_t key[32] = {0};
    uint64_t nonce = 0;
    uint8_t msg[64] = {0};            // 密文        
    
    salsa20(msg, sizeof(msg), key, nonce);
    int i; for (i=0; i<sizeof(msg); ++i) printf("%02X ", msg[i]); printf("\n");
    
    
    printf("\n%d\n",i);
    return 0;
}

抄自github

2、Python实现

class Salsa:
  def __init__(self,r=20):
    assert r >= 0
    self._r = r # number of rounds
    self._mask = 0xffffffff # 32-bit mask
  
  def __call__(self,key=[0]*32,nonce=[0]*8,block_counter=[0]*8):
    assert len(key) == 32
    assert len(nonce) == 8
    assert len(block_counter) == 8
     
    # init state
    k = [self._littleendian(key[4*i:4*i+4]) for i in range(8)]
    n = [self._littleendian(nonce[4*i:4*i+4]) for i in range(2)]
    b = [self._littleendian(block_counter[4*i:4*i+4]) for i in range(2)]
    c = [0x61707865, 0x3320646e, 0x79622d32, 0x6b206574]

    s = [c[0], k[0], k[1], k[2], 
         k[3], c[1], n[0], n[1],
         b[0], b[1], c[2], k[4], 
         k[5], k[6], k[7], c[3]]

    # the state
    self._s = s[:]

    for i in range(self._r):
      self._round()

    # add initial state to the final one
    self._s = [(self._s[i] + s[i]) & self._mask for i in range(16)]

    return self._s

  def _littleendian(self,b):
    assert len(b) == 4
    return b[0] ^ (b[1] << 8) ^ (b[2] << 16) ^ (b[3] << 24)

  def _round(self):

    # quarterround 1
    self._s[ 4] ^= self._rotl32((self._s[ 0] + self._s[12]) & self._mask, 7)
    self._s[ 8] ^= self._rotl32((self._s[ 0] + self._s[ 4]) & self._mask, 9)
    self._s[12] ^= self._rotl32((self._s[ 4] + self._s[ 8]) & self._mask,13)
    self._s[ 0] ^= self._rotl32((self._s[ 8] + self._s[12]) & self._mask,18)

    # quarterround 2
    self._s[ 9] ^= self._rotl32((self._s[ 1] + self._s[ 5]) & self._mask, 7)
    self._s[13] ^= self._rotl32((self._s[ 5] + self._s[ 9]) & self._mask, 9)
    self._s[ 1] ^= self._rotl32((self._s[ 9] + self._s[13]) & self._mask,13)
    self._s[ 5] ^= self._rotl32((self._s[ 1] + self._s[13]) & self._mask,18)

    # quarterround 3
    self._s[14] ^= self._rotl32((self._s[ 6] + self._s[10]) & self._mask, 7)
    self._s[ 2] ^= self._rotl32((self._s[10] + self._s[14]) & self._mask, 9)
    self._s[ 6] ^= self._rotl32((self._s[ 2] + self._s[14]) & self._mask,13)
    self._s[10] ^= self._rotl32((self._s[ 2] + self._s[ 6]) & self._mask,18)

    # quarterround 4
    self._s[ 3] ^= self._rotl32((self._s[11] + self._s[15]) & self._mask, 7)
    self._s[ 7] ^= self._rotl32((self._s[ 3] + self._s[15]) & self._mask, 9)
    self._s[11] ^= self._rotl32((self._s[ 3] + self._s[ 7]) & self._mask,13)
    self._s[15] ^= self._rotl32((self._s[ 7] + self._s[11]) & self._mask,18)

    # transpose
    self._s = [self._s[ 0], self._s[ 4], self._s[ 8], self._s[12],
               self._s[ 1], self._s[ 5], self._s[ 9], self._s[13],
               self._s[ 2], self._s[ 6], self._s[10], self._s[14],
               self._s[ 3], self._s[ 7], self._s[11], self._s[15]]

  def _rotl32(self,w,r):
    # rotate left for 32-bits
    return ( ( ( w << r ) & self._mask) | ( w >> ( 32 - r ) ) ) 




if __name__ == '__main__':
  salsa20 = Salsa()

  # vectors = [ 
  #    [ [0]*32, [3,1,4,1,5,9,2,6], [7,0,0,0,0,0,0,0],     # 这里就是参数!!!
  #    [ 0xb9a205a3,0x0695e150,0xaa94881a,0xadb7b12c,
  #      0x798942d4,0x26107016,0x64edb1a4,0x2d27173f,
  #      0xb1c7f1fa,0x62066edc,0xe035fa23,0xc4496f04,
  #      0x2131e6b3,0x810bde28,0xf62cb407,0x6bdede3d ] ] ]
  vectors = [ 
     [ [0]*32, [0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0]]] # 这里就是参数!!!

  for i in range(len(vectors)):
    v = vectors[i]
    print(f"v[0] => {v[0]}")
    print(f"v[1] => {v[1]}")
    print(f"v[2] => {v[2]}")
    s =  salsa20(v[0],v[1],v[2]) 
    stream_key = []
    # for i in s:
    #   print(hex(i),end = " ")
    
    for i in range(len(s)):
      stream_key.append(s[i] & 0xff)
      stream_key.append((s[i] & 0xff00) >> 8)
      stream_key.append((s[i] & 0xff0000) >>16 )
      stream_key.append((s[i] & 0xff000000) >>24 )
    print()
    for i in stream_key:      
      print(hex(i),end = " ")      # 得到的是密钥流

 

 
posted @ 2022-10-24 20:08  TLSN  阅读(3958)  评论(0编辑  收藏  举报