数据处理02
建立4个txt记录数据,james.txt/julie.txt/mikey.txt/sarah.txt
with open('james.txt') as jaf: #打开文件 data = jaf.readline() james = data.strip().split(',') #以逗号进行分隔 with open('julie.txt') as juf: data = juf.readline() julie = data.strip().split(',') with open('mikey.txt') as mif: data = mif.readline() mikey = data.strip().split(',') with open('sarah.txt') as saf: data = saf.readline() sarah = data.strip().split(',') def sanitize(time_string): #定义清洗数据函数 if '-' in time_string: #使用"in"操作符检查字符串是否包含一个短横线或冒号 splitter = '-' elif ':' in time_string: splitter = ':' else: return(time_string) #如果字符串不需要清理,就什么也不做 (mins, secs) = time_string.split(splitter) #分解字符串,抽出分钟和秒部分 return(mins + '.' + secs) james = sorted([sanitize(t) for t in james]) #清洗后的数据再赋给james julie = sorted([sanitize(t) for t in julie]) mikey = sorted([sanitize(t) for t in mikey]) sarah = sorted([sanitize(t) for t in sarah]) unique_james = [] #定义一个特殊的james空数组 for each_t in james: if each_t not in unique_james: unique_james.append(each_t) print(unique_james[0:3]) #打印第0到3项(不包括0项) unique_julie = [] for each_t in julie: if each_t not in unique_julie: unique_julie.append(each_t) print(unique_julie[0:3]) unique_mikey = [] for each_t in mikey: if each_t not in unique_mikey: unique_mikey.append(each_t) print(unique_mikey[0:3]) unique_sarah = [] for each_t in sarah: if each_t not in unique_sarah: unique_sarah.append(each_t) print(unique_sarah[0:3])
输出
['2.01', '2.22', '2.34']
['2.11', '2.23', '2.59']
['2.22', '2.38', '2.49']
['2.18', '2.25', '2.39']
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
数据理解
# Author kevin_hou def get_coach_data(filename): #定义一个通用打开文件的函数,替换with语句 try: with open(filename) as f: data = f.readline() return(data.strip().split(',')) except IOError as ioerr: print('File error:' + str(ioerr)) # sarah = get_coach_data('sarah.txt') def sanitize(time_string): if '-' in time_string: splitter = '-' elif ':' in time_string: splitter = ':' else: return(time_string) (mins, secs) = time_string.split(splitter) return(mins + '.' + secs) james = get_coach_data('james.txt') julie = get_coach_data('julie.txt') mikey = get_coach_data('mikey.txt') sarah = get_coach_data('sarah.txt') # with open('james.txt') as jaf: #等价于 james = get_coach_data('james.txt') # data = jaf.readline() # james = data.strip().split(',') # with open('julie.txt') as juf: # data = juf.readline() # julie = data.strip().split(',') # with open('mikey.txt') as mif: # data = mif.readline() # mikey = data.strip().split(',') # with open('sarah.txt') as saf: # data = saf.readline() # sarah = data.strip().split(',') print(sorted(set([sanitize(t) for t in james]))[0:3]) print(sorted(set([sanitize(t) for t in julie]))[0:3]) print(sorted(set([sanitize(t) for t in mikey]))[0:3]) print(sorted(set([sanitize(t) for t in sarah]))[0:3])
输出 ''' ['2.01', '2.22', '2.34'] ['2.11', '2.23', '2.59'] ['2.22', '2.38', '2.49'] ['2.18', '2.25', '2.39'] '''