Python清除常见的网页空格格式
def clean(string):
pattern = re.compile(r'<[^>]+>', re.S)
string = pattern.sub('', string)
string = string.replace('\n', ' ').replace('\r', ' ').replace(' ', ' ').replace('\t', ' ').replace(" ",'')
string = string.strip()
return string