保留内容中的特定标签
def strip_tags(string, allowed_tags=''):
if allowed_tags != '':
# Get a list of all allowed tag names.
allowed_tags = allowed_tags.split(',')
allowed_tags_pattern = ['</?' + allowed_tag + '[^>]*>' for allowed_tag in allowed_tags]
all_tags = re.findall(r'<[^>]+>', string, re.I)
not_allowed_tags = []
tmp = 0
for tag in all_tags:
for pattern in allowed_tags_pattern:
rs = re.match(pattern, tag)
if rs:
tmp += 1
else:
tmp += 0
if not tmp:
not_allowed_tags.append(tag)
tmp = 0
for not_allowed_tag in not_allowed_tags:
string = re.sub(re.escape(not_allowed_tag), '', string)
else:
# If no allowed tags, remove all.
string = re.sub(r'<[^>]*?>', '', string)
return string
content = strip_tags(content, allowed_tags='br,img,hr,a')
print(content)