Clean Text
2021-05-20 13:31 DataBases 阅读(25) 评论(0) 编辑 收藏 举报import re
import string
def clean_text(s):
"""
This function cleans the text a bit
:param s: string
:return: cleaned string
"""
# split by all whitespaces
s = s.split()
# join tokens by single space
s = " ".join(s)
# remove all punctuations using regex and string module
s = re.sub(f'[{re.escape(string.punctuation)}]', '', s)
return s