39)django-XSS 过滤

  使用kingedit别人是可以输入script代码。这在后台是不允许script代码运行的。

  这里主要使用beatifulSoup过滤

示例1

beatufulsoup4
    
    from bs4 import Beatifulsoup
    soup=Beatifulsoup(content,"html.parse")#html.parse python内置解析器
    tag=soup.find("scrip")
    tag.hidden=True #把标签隐藏
    tag.clear #内容清空

    span=soup.find("span")
    del span.attr("style") #删除span的style属性

    content=soup.decode() #把解析的内容转字符串

    #只显示固定标签内容
    tags=["p","span"]

    for tag in soup.find_all():
        if tag.name in tags:
            pass
        else:
            tag.hidden=True
            tag.clear()

    #显示固定属性
    tags={
        "p":["class"],
        "span":["id"],
    }

    for tag in soup.find_all():
        if tag.name in tags:
            pass
        else:
            tag.hidden=True
            tag.clear()
            continue

        #用户提交标签的所有属性
        input_attrs=tag.attrs    #{"class":"c1","id":"i1"}
        valid_attrs=tags[tag.name] #

        for k in list(input_attrs.keys()):
            if k in valid_attrs:
                pass
            else:
                del input_attrs[k]
    

实例

#!/usr/bin/env python
# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup


class XSSFilter(object):
    __instance = None

    def __init__(self):
        # XSS白名单
        self.valid_tags = {
            "font": ['color', 'size', 'face', 'style'],
            'b': [],
            'div': [],
            "span": [],
            "table": [
                'border', 'cellspacing', 'cellpadding'
            ],
            'th': [
                'colspan', 'rowspan'
            ],
            'td': [
                'colspan', 'rowspan'
            ],
            "a": ['href', 'target', 'name'],
            "img": ['src', 'alt', 'title'],
            'p': [
                'align'
            ],
            "pre": ['class'],
            "hr": ['class'],
            'strong': []
        }

    def __new__(cls, *args, **kwargs):
        """
        单例模式
        :param cls:
        :param args:
        :param kwargs:
        :return:
        """
        if not cls.__instance:
            obj = object.__new__(cls, *args, **kwargs)
            cls.__instance = obj
        return cls.__instance

    def process(self, content):
        soup = BeautifulSoup(content, 'html.parser')
        # 遍历所有HTML标签
        for tag in soup.find_all(recursive=True):
            # 判断标签名是否在白名单中
            if tag.name not in self.valid_tags:
                tag.hidden = True
                if tag.name not in ['html', 'body']:
                    tag.hidden = True
                    tag.clear()
                continue
            # 当前标签的所有属性白名单
            attr_rules = self.valid_tags[tag.name]
            keys = list(tag.attrs.keys())
            for key in keys:
                if key not in attr_rules:
                    del tag[key]

        return soup.decode()


if __name__ == '__main__':
    html = """<p class="title">
                        <b>The Dormouse's story</b>
                    </p>
                    <p class="story">
                        <div name='root'>
                            Once upon a time there were three little sisters; and their names were
                            <a href="http://example.com/elsie" class="sister c1" style='color:red;background-color:green;' id="link1"><!-- Elsie --></a>
                            <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
                            <a href="http://example.com/tillie" class="sister" id="link3">Tilffffffffffffflie</a>;
                            and they lived at the bottom of a well.
                            <script>alert(123)</script>
                        </div>
                    </p>
                    <p class="story">...</p>"""

    obj = XSSFilter()
    v = obj.process(html)
    print(v)

 

posted on 2017-12-09 20:28  shisanjun  阅读(459)  评论(0编辑  收藏  举报

导航