multiple delimiters

re module version

import re


string_out = """ace, bda; des; edf;fsa; gas;   ace
b
e"""



def split_re(origin_string='', separators=','):

    origin_list = re.split(r'[%s]' % separators, origin_string)

    # if you also want the delimiters:
    # origin_list = re.split(r'([%s])' % separators, origin_string)

    total_list = []
    for data in origin_list:
        if data != '' and data not in total_list:
            total_list.append(data)

    return total_list


result = ','.join(split_re(string_out, '\n,+;;,、 '))

print(result)

my version

string_out = """ace, bda; des; edf;fsa; gas;   ace
b
e"""


def split_simple(origin_string='', separators=','):
    origin_list = [origin_string]

    # get different list from different separator
    for sep in separators:

        sep_list = []
        for r in origin_list:
            for i in r.split(sep):
                sep_list.append(i.strip())

        origin_list = sep_list

    # remove none and repeat value
    total_list = []
    for index, data in enumerate(origin_list):
        if index == len(origin_list):
            break
        else:
            if data != '' and data not in total_list:
                total_list.append(data)

    return total_list


result = ','.join(split_simple(string_out, '\n,+;;,、 '))

print(result)


Python 3 version

from functools import reduce


def split_by_separator(origin_string='', separators=','):
    origin_list = [origin_string]
    for sep in separators:
        tmp_each = []
        for r in origin_list:
            tmp_each.extend(map(lambda x: x.strip(), r.split(sep)))
            print('tmp_each: ', tmp_each)
        origin_list = tmp_each
    tmp_total = []
    [tmp_total.append(data) for data in origin_list if data != '']
    return reduce(lambda x, y: y in x and x or x + [y], [[], ] + tmp_total)


string_out = ' ;vickey; hello; world; hey;how; are; \na、b,cd'

result = ','.join(split_by_separator(string_out, '\n,+;;,、'))

print(result)

Python 2 version

#!/usr/bin/env python
# _*_ coding: utf-8 _*_
# @Time     : 2017/3/9 19:52
# @Author   : otfsenter
# @File     : a.py

#coding:utf-8

result = '''
sdf-asd
sdf-asd01
sdf-asd02
sdf-asd,sdf-asd01 ,sdf-asd02
aui+otfsenter+which
'''

# result = ''
# with open('tmp.txt', 'r') as f:
#     for i in f:
#         result += i
#
# print result

def split_by_separator(string='', separators=','):
    rst = [string]
    for sep in separators:
        tmp = []
        for r in rst:
            tmp.extend(map(lambda x: x.strip(), r.split(sep)))
        rst = tmp
    list_tmp = []
    [list_tmp.append(data) for data in rst if data != '']
    return reduce(lambda x, y: y in x and x or x + [y], [[], ] + list_tmp)

print split_by_separator(result, '\n,+')

posted @ 2017-03-13 18:42  idlewith  阅读(208)  评论(0编辑  收藏  举报