数据处理02
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | 建立 4 个txt记录数据,james.txt / julie.txt / mikey.txt / sarah.txtwith open ( 'james.txt' ) as jaf: #打开文件 data = jaf.readline() james = data.strip().split( ',' ) #以逗号进行分隔 with open ( 'julie.txt' ) as juf: data = juf.readline() julie = data.strip().split( ',' ) with open ( 'mikey.txt' ) as mif: data = mif.readline() mikey = data.strip().split( ',' ) with open ( 'sarah.txt' ) as saf: data = saf.readline() sarah = data.strip().split( ',' ) def sanitize(time_string): #定义清洗数据函数 if '-' in time_string: #使用"in"操作符检查字符串是否包含一个短横线或冒号 splitter = '-' elif ':' in time_string: splitter = ':' else : return (time_string) #如果字符串不需要清理,就什么也不做 (mins, secs) = time_string.split(splitter) #分解字符串,抽出分钟和秒部分 return (mins + '.' + secs) james = sorted ([sanitize(t) for t in james]) #清洗后的数据再赋给james julie = sorted ([sanitize(t) for t in julie]) mikey = sorted ([sanitize(t) for t in mikey]) sarah = sorted ([sanitize(t) for t in sarah]) unique_james = [] #定义一个特殊的james空数组 for each_t in james: if each_t not in unique_james: unique_james.append(each_t) print (unique_james[ 0 : 3 ]) #打印第0到3项(不包括0项) unique_julie = [] for each_t in julie: if each_t not in unique_julie: unique_julie.append(each_t) print (unique_julie[ 0 : 3 ]) unique_mikey = [] for each_t in mikey: if each_t not in unique_mikey: unique_mikey.append(each_t) print (unique_mikey[ 0 : 3 ]) unique_sarah = [] for each_t in sarah: if each_t not in unique_sarah: unique_sarah.append(each_t) print (unique_sarah[ 0 : 3 ])<br><br>输出 # Author kevin_hou def get_coach_data(filename): #定义一个通用打开文件的函数,替换with语句 try : with open (filename) as f: data = f.readline() return (data.strip().split( ',' )) except IOError as ioerr: print ( 'File error:' + str (ioerr)) # sarah = get_coach_data('sarah.txt') def sanitize(time_string): if '-' in time_string: splitter = '-' elif ':' in time_string: splitter = ':' else : return (time_string) (mins, secs) = time_string.split(splitter) return (mins + '.' + secs) james = get_coach_data( 'james.txt' ) julie = get_coach_data( 'julie.txt' ) mikey = get_coach_data( 'mikey.txt' ) sarah = get_coach_data( 'sarah.txt' ) # with open('james.txt') as jaf: #等价于 james = get_coach_data('james.txt') # data = jaf.readline() # james = data.strip().split(',') # with open('julie.txt') as juf: # data = juf.readline() # julie = data.strip().split(',') # with open('mikey.txt') as mif: # data = mif.readline() # mikey = data.strip().split(',') # with open('sarah.txt') as saf: # data = saf.readline() # sarah = data.strip().split(',') print ( sorted ( set ([sanitize(t) for t in james]))[ 0 : 3 ]) print ( sorted ( set ([sanitize(t) for t in julie]))[ 0 : 3 ]) print ( sorted ( set ([sanitize(t) for t in mikey]))[ 0 : 3 ]) print ( sorted ( set ([sanitize(t) for t in sarah]))[ 0 : 3 ]) <br>输出 ''' ['2.01', '2.22', '2.34'] ['2.11', '2.23', '2.59'] ['2.22', '2.38', '2.49'] ['2.18', '2.25', '2.39'] ''' |
['2.01', '2.22', '2.34']
['2.11', '2.23', '2.59']
['2.22', '2.38', '2.49']
['2.18', '2.25', '2.39']
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
数据理解
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律