【逆向】Base64编码解码及逆向识别

序言

 Base64编码解码原理相关内容,可以参考之前写的文章

示例代码

注意:该示例代码仅为Base64编码实现的其中一种,实际分析样本的Base64编码实现并不一定与此代码相同,读者应重点理解其原理部分,而忽略其实现形式的不同。

  1 // Base64.cpp : 定义控制台应用程序的入口点。
  2 //
  3 
  4 #include "stdafx.h"
  5 #include <stdio.h>
  6 #include "string.h"   
  7 #include "stdlib.h"
  8 
  9 static char find_pos(char ch);
 10 char *base64_encode(const char* data, int data_len, int *len);
 11 char *base64_decode(const char* data, int data_len, int *len);
 12 const char base[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
 13 
 14 //======获取BASE64编码索引
 15 static char find_pos(char ch)
 16 {
 17     //the last position (the only) in base[] 
 18     char *ptr = (char*)strrchr(base, ch);
 19     return (ptr - base);
 20 }
 21 
 22 //======BASE64编码
 23 char *base64_encode(const char* data, int data_len, int *len)
 24 {
 25     int prepare = 0;
 26     int ret_len;
 27     *len = 0;
 28     int temp = 0;
 29 
 30     int nCount = 0;
 31     char chArr[4];
 32 
 33     char *pRet = NULL;
 34     char *pTmp = NULL;
 35     
 36     //计算编码后字符长度
 37     ret_len = data_len / 3;
 38     temp = data_len % 3;
 39     if (temp > 0)
 40     {
 41         ret_len += 1;
 42     }
 43 
 44     //申请返回数据缓冲区
 45     ret_len = ret_len * 4 + 1;
 46     pRet = (char *)malloc(ret_len);
 47     if (pRet == NULL)
 48     {
 49         printf("No enough memory");
 50         exit(0);
 51     }
 52     memset(pRet, 0, ret_len);
 53     pTmp = pRet;
 54 
 55     //处理所有待编码数据
 56     while (nCount < data_len) 
 57     {
 58         temp = 0;
 59         prepare = 0;
 60         memset(chArr, 0, 4);
 61 
 62         //3个字节一组,保存每个字节二进制位
 63         while (temp < 3)    
 64         {
 65             if (nCount >= data_len)
 66             {
 67                 break;
 68             }
 69             prepare = ((prepare << 8) | (data[nCount] & 0xFF));
 70             nCount++;
 71             temp++;
 72         }
 73         //确保每个字节数据以24比特位排列
 74         prepare = (prepare << ((3 - temp) * 8));
 75 
 76         //获取base表索引及编码字符
 77         for (int i = 0; i < 4; i++)
 78         {
 79             //最后一位或两位用'='填充 
 80             if (temp < i)
 81             {
 82                 chArr[i] = 0x40;    
 83             }
 84             else
 85             {
 86                 //获取base表索引
 87                 chArr[i] = (prepare >> ((3 - i) * 6)) & 0x3F;
 88             }
 89             //通过索引获取对应编码字符
 90             *pTmp = base[chArr[i]];
 91             pTmp++;
 92             (*len)++;
 93         }
 94     }
 95 
 96     *pTmp = 0x00;
 97     return pRet;
 98 }
 99 
100 //======BASE64解码
101 char *base64_decode(const char *data, int data_len, int *len)
102 {
103     *len = 0;
104     char need[3];
105     int temp = 0;
106     int nCount = 0;
107     int prepare = 0;
108     int equal_count = 0;
109     int ret_len = (data_len / 4) * 3 + 1;
110 
111     char *pRet = NULL;
112     char *pTmp = NULL;
113 
114     //判断结尾有多少个通配符‘=’
115     if (*(data + data_len - 1) == '=')
116     {
117         equal_count += 1;
118     }
119     if (*(data + data_len - 2) == '=')
120     {
121         equal_count += 1;
122     }
123 
124     //申请缓冲区
125     pRet = (char *)malloc(ret_len);
126     if (pRet == NULL)
127     {
128         printf("No enough memory.n");
129         exit(0);
130     }
131     memset(pRet, 0, ret_len);
132     pTmp = pRet;
133 
134     //解码数据(‘=’除外)
135     while (nCount < (data_len - equal_count))
136     {
137         temp = 0;
138         prepare = 0;
139         memset(need, 0, 4);
140 
141         //4个字节一组,获取每个字符在base表中的下标索引(‘=’除外)
142         while (temp < 4)
143         {
144             if (nCount >= (data_len - equal_count))
145             {
146                 break;
147             }
148             prepare = (prepare << 6) | (find_pos(data[nCount]));
149             temp++;
150             nCount++;
151         }
152         //将每个索引按24比特位进行排列
153         prepare = prepare << ((4 - temp) * 6);
154 
155         //获取编码前字符
156         for (int i = 0; i<3; i++)
157         {
158             if (i == temp)
159             {
160                 break;
161             }
162             *pTmp = (char)((prepare >> ((2 - i) * 8)) & 0xFF);
163             pTmp++;
164             (*len)++;
165         }
166     }
167     *pTmp = 0x00;
168     if (data[data_len - 1] == '=')
169     {
170         (*len)--;
171     }
172 
173     return pRet;
174 }
175 
176 //======Main
177 int _tmain(int argc, _TCHAR* argv[])
178 {
179     int len1, len2, len3;
180     char *former = "A";
181     len3 = strlen(former);
182 
183     //编码
184     char *after = base64_encode(former, len3, &len1);
185     printf("enCode=%s    nLen=%d\n", after,len1);
186 
187     //解码
188     former = base64_decode(after, len1, &len2);
189     printf("enCode=%s    nLen=%d\n", former, len2);
190 
191     return 0;
192 }
View Code

逆向识别

1、编码识别

2、解码识别

总结

1、识别代码中的“=”通配符
2、识别Base编码表,静态编码表可以在IDA“string”窗口查看,有些动态拼接的则需要手动识别
3、识别代码中对数据的左移右移操作
4、最主要的是理解编码解码原理,比如编码时通常都会用3个字节一组来处理比特位数据,这些特征都可以用来分析识别

参考

https://www.52pojie.cn/thread-793420-1-1.html

posted @ 2020-02-19 23:36  SunsetR  阅读(2132)  评论(0编辑  收藏  举报