删除文章里的中文符号和空格,换成英文逗号,并获取最后两个标签

def update_biaoqian(tag_list, title):
   if tag_list==['']
print('没有标签,取标题作为标签')
        titless = re.sub('\s', ',', title)
        tag_list = title.replace('', ',').replace('', ',').replace('', ',').replace('', ',').replace(' ', ',').replace('', ',').replace('|','')
 
# 把相邻的逗号简化成一个
        tags = re.sub(r"(,)\1+", r"\1", tag_list)
        # 开头是逗号,从第二个开始取值
        if tags[0] == ',' and tags[-1] != ',':
            tags = tags[1:]
        # 结尾时句号,从倒数第二位取值
        elif tags[0] != ',' and tags[-1] == ',':
            tags = tags[:-1]

        elif tags[0] == ',' and tags[-1] == ',':
            tags = tags[1:-1]
        elif tags[0] != ',' and tags[-1] != ',':
            tags = tags
        # 以逗号作为分隔符,取二个
        tags_list = tags.split(',')[-2:]
        tag = ''
        for tags in tags_list:
            tag += tags + ','
        tag = tag[:-1]
    else:
        tag_list = ''.join(tag_list)
        # 把空格,顿号,逗号,句号,问号,分好转换成英文逗号
        tag_list = tag_list.replace('', ',').replace('', ',').replace('', ',').replace('', ',').replace(' ', ',').replace('', ',').replace('|','')
        # 把相邻的逗号简化成一个
        tags = re.sub(r"(,)\1+", r"\1", tag_list)
        # 开头是逗号,从第二个开始取值
        if tags[0] == ',' and tags[-1] != ',':
            tags = tags[1:]
        # 结尾时句号,从倒数第二位取值
        elif tags[0] != ',' and tags[-1] == ',':
            tags = tags[:-1]

        elif tags[0] == ',' and tags[-1] == ',':
            tags = tags[1:-1]
        elif tags[0] != ',' and tags[-1] != ',':
            tags = tags
        # 以逗号作为分隔符,取二个
        tags_list = tags.split(',')[-2:]
        tag = ''
        for tags in tags_list:
            tag += tags + ','
        tag = tag[:-1]
    return tag

 

posted @ 2023-08-24 14:16  布都御魂  阅读(12)  评论(0编辑  收藏  举报