[BJDCTF 2020]encode
[BJDCTF 2020]encode
分析程序
- 无壳
- 32位ELF程序
程序运行回显如下:
发现是一个输入flag,验证flag的程序。我们直接用IDA打开,发现程序去了符号表,shift+F12查看一下字符串列表,发现存在Please input your flag字符串.
交叉引用跟踪到对应函数。
sub_804887C函数伪代码如下,可以通过代码观察到,这个函数是程序的主逻辑,我们将其命名为main函数:
int sub_804887C()
{
int v0; // eax
int result; // eax
unsigned int i; // [esp+Ch] [ebp-FCh]
unsigned int v3; // [esp+10h] [ebp-F8h]
unsigned int v4; // [esp+14h] [ebp-F4h]
char v5[48]; // [esp+1Ah] [ebp-EEh] BYREF
char v6[178]; // [esp+4Ah] [ebp-BEh] BYREF
unsigned int v7; // [esp+FCh] [ebp-Ch]
v7 = __readgsdword(0x14u);
strcpy(v5, "Flag{This_a_Flag}");
v3 = sub_805BBD0(v5);
strcpy(v6, "E8D8BD91871A1E56F53F4889682F96142AF2AB8FED7ACFD5E");
sub_804F950("Please input your flag:");//printf函数
sub_806DA80(0, &v6[50], 256);//read函数
if ( sub_805BBD0(&v6[50]) != 21 )//确保flag长度必须为21
sub_804EAF0(0);//exit函数
v0 = sub_8048AC2(&v6[50]);
sub_80481D0(&v5[18], v0);
v4 = sub_805BBD0(&v5[18]);
for ( i = 0; i < v4; ++i )
v5[i + 18] ^= v5[i % v3]; //对v5字符串进行一个取余+异或的处理
sub_8048E24(&v5[18], v4, v5, v3);
if ( !sub_8048280(&v5[18], v6) )
sub_804EAF0(0);
sub_804F950("right!");
result = 0;
if ( __readgsdword(0x14u) != v7 )
sub_806FA00();
return result;
}
程序逻辑分析:
- sub_804F950是打印字符串Please input your flag,多半是printf或者Puts函数.
- sub_806DA80看起来像v6数组里面写入数据,根据后面的sub_805BBD0函数,可以确认sub_805BBD0函数应该是strlen函数,flag长度必须为21,所以sub_806DA80函数是一个read函数
- sub_8048AC2是对我们输入的v6数组进行处理
- 随后的for循环是对我们v5字符串数组的一个取余+异或的算法
- sub_8048E24也是对我们的输入的字符串经过了某种加密,并且传入了v5,v6等参数
算法分析:
sub_8048AC2(&V9[50]):
伪代码如下所示:
int __cdecl sub_8048AC2(int a1)
{
int v2; // [esp+8h] [ebp-20h]
int v3; // [esp+Ch] [ebp-1Ch]
int v4; // [esp+10h] [ebp-18h]
int v5; // [esp+18h] [ebp-10h]
int v6; // [esp+1Ch] [ebp-Ch]
v5 = strlen(a1);
if ( v5 % 3 )
v2 = 4 * (v5 / 3 + 1);
else
v2 = 4 * (v5 / 3);
v6 = sub_80597A0(v2 + 1);
*(_BYTE *)(v2 + v6) = 0;
v3 = 0;
v4 = 0;
while ( v2 - 2 > v3 )
{
*(_BYTE *)(v6 + v3) = a0123456789Abcd[*(_BYTE *)(v4 + a1) >> 2];
*(_BYTE *)(v6 + v3 + 1) = a0123456789Abcd[(16 * (*(_BYTE *)(v4 + a1) & 3)) | (*(_BYTE *)(v4 + 1 + a1) >> 4)];
*(_BYTE *)(v6 + v3 + 2) = a0123456789Abcd[(4 * (*(_BYTE *)(v4 + 1 + a1) & 0xF)) | (*(_BYTE *)(v4 + 2 + a1) >> 6)];
*(_BYTE *)(v6 + v3 + 3) = a0123456789Abcd[*(_BYTE *)(v4 + 2 + a1) & 0x3F];
v4 += 3;
v3 += 4;
}
if ( v5 % 3 == 1 )
{
*(_BYTE *)(v3 - 2 + v6) = 61;
*(_BYTE *)(v3 - 1 + v6) = 61;
}
else if ( v5 % 3 == 2 )
{
*(_BYTE *)(v3 - 1 + v6) = 61;
}
return v6;
}
#查看一下a0123456789Abcd,发现是一个非标准的Base64码表
#0123456789+/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
所以我们可以初步判断这个函数,是一个变表的Base64加密算法,码表为0123456789+/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.
我们在该函数下,下断点,输入21个a,发现返回的是一个v3(08CFF4E8),也就是一个数组,我们查看一下这个数组里面存储了什么东西.
也就是我们输入的21个a经过变表加密后,字符串变为了mk5vmk5vmk5vmk5vmk5vmk5vmk5v2DRZ36krZK==.我们还可以通过在线网站进行进一步的验证。但其实这里,题目出题人应该疏忽了一个地方,Base64算法的明文密文比为4:3,这里我们输入的明文长度为21,密文长度为40.如果要满足比例的话,密文长度必须为28.
Base64前置知识:
- 一个Base64字符相当于6位,不足也会补0
- 一个Base64字符可以近乎看成2个16进制字符
所以这里16进制字符应该为56个,但是这里的16进制字符只有58个,说明缺了很多字符
sub_80481D0(&v8[18], v3);
伪代码如下:
int __cdecl sub_80481D0(int a1, int a2)
{
return sub_805BAC0(a1, a2);
}
int (__cdecl *sub_805BAC0())(int, int)
{
int (__cdecl *result)(int, int); // eax
result = sub_805BB00;
if ( (dword_80EB574 & 0x4000000) != 0 )
{
result = sub_8067700;
if ( (dword_80EB5A4 & 0x10) == 0 && (dword_80EB570 & 0x200) != 0 )
return sub_8064640;
}
return result;
}
这个函数实际上对整个程序并没有很大的作用,我们继续往下分析。通过strlen函数对v8[18]也就是字符串Flag{This_a_Flag}计算了其长度。然后将其与我们输入的v6数组取余+异或运算.
for ( i = 0; i < v7; ++i )
v8[i + 18] ^= v8[i % v6];
sub_8048E24(&v8[18], v7, v8, v6);
伪代码如下:
unsigned int __cdecl sub_8048E24(int a1, unsigned int a2, int a3, int a4)
{
unsigned int result; // eax
char v5; // [esp+1Bh] [ebp-11Dh]
int v6; // [esp+1Ch] [ebp-11Ch]
int v7; // [esp+20h] [ebp-118h]
unsigned int i; // [esp+24h] [ebp-114h]
char v9[256]; // [esp+2Ch] [ebp-10Ch] BYREF
unsigned int v10; // [esp+12Ch] [ebp-Ch]
v10 = __readgsdword(0x14u);
sub_8048CC2(v9, a3, a4);
LOBYTE(v6) = 0;
LOBYTE(v7) = 0;
for ( i = 0; i < a2; ++i )
{
v6 = (unsigned __int8)(v6 + 1);
v7 = (unsigned __int8)(v9[v6] + v7);
v5 = v9[v6];
v9[v6] = v9[v7];
v9[v7] = v5;
*(_BYTE *)(a1 + i) ^= v9[(unsigned __int8)(v9[v6] + v9[v7])];
}
result = __readgsdword(0x14u) ^ v10;
if ( result )
sub_806FA00();
return result;
}
----------------------------------------------------------------------------------------
sub_8048CC2(v9, a3, a4);
unsigned int __cdecl sub_8048CC2(int a1, int a2, unsigned int a3)
{
unsigned int result; // eax
char v4; // [esp+13h] [ebp-115h]
int i; // [esp+14h] [ebp-114h]
int j; // [esp+14h] [ebp-114h]
int v7; // [esp+18h] [ebp-110h]
char v8[256]; // [esp+1Ch] [ebp-10Ch]
unsigned int v9; // [esp+11Ch] [ebp-Ch]
v9 = __readgsdword(0x14u);
for ( i = 0; i <= 255; ++i )
{
*(_BYTE *)(i + a1) = i;
v8[i] = *(_BYTE *)(i % a3 + a2);
}
v7 = 0;
for ( j = 0; j <= 255; ++j )
{
v7 = (*(unsigned __int8 *)(j + a1) + v7 + (unsigned __int8)v8[j]) % 256;
v4 = *(_BYTE *)(j + a1);
*(_BYTE *)(a1 + j) = *(_BYTE *)(v7 + a1);
*(_BYTE *)(a1 + v7) = v4;
}
result = __readgsdword(0x14u) ^ v9;
if ( result )
sub_806FA00();
return result;
}
可以看到在该函数传入了我们刚刚的&v8[18],v7,v8,v6;在这个函数里面还有一个sub_9048CC2函数.该函数显著的特征就是二次255循环,类似于我们的RC4算法。关于RC4算法我已经在另外一篇博客讲到过:https://www.cnblogs.com/qsons/p/17475767.html.
这里的函数很明显就是对我们经过了Base64变表后+取余异或运算后的字符串进行 RC4的加密。
for ( i = 0; i <= 255; ++i )
{
*(_BYTE *)(i + a1) = i;
v8[i] = *(_BYTE *)(i % a3 + a2);
}
v7 = 0;
for ( j = 0; j <= 255; ++j )
{
v7 = (*(unsigned __int8 *)(j + a1) + v7 + (unsigned __int8)v8[j]) % 256;
v4 = *(_BYTE *)(j + a1);
*(_BYTE *)(a1 + j) = *(_BYTE *)(v7 + a1);
*(_BYTE *)(a1 + v7) = v4;
}
/*
很明显上面的for循环就是RC4算法中的初始化即a1[i] = i;
S[i] = key[i%len]
下面的for循环为RC4算法中的置换。
j = (S[i] + j +k[i]) % 256;
tmp = a1[j];
a1[j] = a1[v7];
a1[v7] = tmp(v4);
*/
而在函数sub_8048E24中就是RC4算法中的最后一步即加密。而对于RC4而言需要4个参数,key,ken_len,data,data_len.而在函数中,也给出了相应的参数
v8 --- key
v7 --- key_len
v6 --- Data_len
v8[18] --- Data
所以整个程序加密逻辑如下所示:
- Base64变表加密运算
- 对我们的密钥进行取余+异或运算
- RC4加密
解密
题目中Hex字符缺了很多,看网上wp说是缺了很多010E这种,没看懂。多半是出题人的锅。RC4j脚本如下
#原有字符串E8D8BD91871A1E56F53F4889682F96142AF2AB8FED7ACFD5E
#实际解密字符串E8D8BD91871A010E560F53F4889682F961420AF2AB08FED7ACFD5E00
import base64
def rc4_main(key = "init_key", message = "init_message"):
print("RC4解密主函数调用成功")
print('\n')
s_box = rc4_init_sbox(key)
crypt = rc4_excrypt(message, s_box)
return crypt
def rc4_init_sbox(key):
s_box = list(range(256))
print("原来的 s 盒:%s" % s_box)
print('\n')
j = 0
for i in range(256):
j = (j + s_box[i] + ord(key[i % len(key)])) % 256
s_box[i], s_box[j] = s_box[j], s_box[i]
print("混乱后的 s 盒:%s"% s_box)
print('\n')
return s_box
def rc4_excrypt(plain, box):
print("调用解密程序成功。")
print('\n')
plain = base64.b64decode(plain.encode('utf-8'))
plain = bytes.decode(plain)
res = []
i = j = 0
for s in plain:
i = (i + 1) % 256
j = (j + box[i]) % 256
box[i], box[j] = box[j], box[i]
t = (box[i] + box[j]) % 256
k = box[t]
res.append(chr(ord(s) ^ k))
print("res用于解密字符串,解密后是:%res" %res)
print('\n')
cipher = "".join(res)
print("解密后的字符串是:%s" %cipher)
for i in range(len(cipher)):
print(ord(cipher[i]),end=",")
print('\n')
print("解密后的输出(没经过任何编码):")
print('\n')
# print(res)
return cipher
a = [0xE8,0xD8,0xBD,0x91,0x87,0x1A,0x01,0x0E,0x56,0x0F,0x53,0xF4,0x88,0x96,0x82,0xF9,0x61,0x42,0x0A,0xF2,0xAB,0x08,0xFE,0xD7,0xAC,0xFD,0x5E,0x00]
key="Flag{This_a_Flag}"
s=""
for i in a:
s += chr(i)
s=str(base64.b64encode(s.encode('utf-8')), 'utf-8')
rc4_main(key, s)
#得到的数据为
[35,21,37,83,8,26,89,56,18,106,57,49,39,91,11,19,19,8,92,51,11,53,97,1,81,31,16,92]
整个异或的逻辑如下:
for ( i = 0; i < v7; ++i )
v8[i + 18] ^= v8[i % v6];
/*
v7是经过运算后的RC4key的长度,被作为了循环次数。其实就跟原先的key一样.
v8是key
v6也是key长度
*/
#所以要想得到原来的明文的话直接与v8[i%v6]进行异或即可
异或脚本如下:
key = 'Flag{This_a_Flag}'
a = [35,21,37,83,8,26,89,56,18,106,57,49,39,91,11,19,19,8,92,51,11,53,97,1,81,31,16,92]
for i in range(len(a)):
a[i] ^= ord(key[i%17])
for i in range(len(a)):
print(chr(a[i]),end="")
#eyD4sN1Qa5Xna7jtnN0RlN5i8lO=
最后进行变表的Base64解密即可,也可以用网站解密(http://web.chacuo.net/netbasex):
import base64
import string
str1 = "eyD4sN1Qa5Xna7jtnN0RlN5i8lO=" # 加密字符
string1 = "0123456789+/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" #自定义base加密表
string2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" # 标准base表
print(base64.b64decode(str1.translate(str.maketrans(string1,string2))).decode())
#BJD{0v0_Y0u_g07_1T!}
Over!!!