xss 多分类 优选 贝叶斯、逻辑回归、决策树
import re import numpy as np from sklearn import cross_validation from sklearn import datasets from sklearn import svm from sklearn.externals import joblib from sklearn.metrics import classification_report from sklearn import metrics from sklearn import linear_model from sklearn.naive_bayes import GaussianNB from sklearn import cross_validation from sklearn import tree x = [] y = [] def get_len(url): return len(url) def get_url_count(url): if re.search('(http://)|(https://)', url, re.IGNORECASE) : return 1 else: return 0 def get_evil_char(url): return len(re.findall("[<>,\'\"/]", url, re.IGNORECASE)) def get_evil_word(url): return len(re.findall("(alert)|(script=)(%3c)|(%3e)|(%20)|(onerror)|(onload)|(eval)|(src=)|(prompt)",url,re.IGNORECASE)) def get_last_char(url): if re.search('/$', url, re.IGNORECASE) : return 1 else: return 0 def get_feature(url): return [get_len(url),get_url_count(url),get_evil_char(url),get_evil_word(url),get_last_char(url)] def etl(filename,data,isxss): with open(filename) as f: for line in f: f1=get_len(line) f2=get_url_count(line) f3=get_evil_char(line) f4=get_evil_word(line) data.append([f1,f2,f3,f4]) if isxss: y.append(1) else: y.append(0) return data etl('../data/xss-200000.txt',x,1) etl('../data/good-xss-200000.txt',x,0) clf = tree.DecisionTreeClassifier() clf2 = svm.SVC(kernel='linear', C=1) clf3 = linear_model.LogisticRegression(C=1e5) clf4 = GaussianNB() for name,clf in {"decision tree":clf, "svm train long time":clf2, "LR":clf3, "bayes":clf4}.iteritems(): #for name,clf in {"decision tree":clf, "LR":clf3, "bayes":clf4}.iteritems(): print "model:", name score=cross_validation.cross_val_score(clf, x, y, n_jobs=-1, cv=5) print score print np.mean(score)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」
2016-11-11 elasticsearch 自定义_id
2016-11-11 自定义实现spark的分区函数
2016-11-11 elasticsearch 路由文档到分片
2016-11-11 spark streaming 入门例子
2016-11-11 ElasticSearch-hadoop saveToEs源码分析