数据处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Author kevin_hou
 
with open('james.txt') as jaf:
    data = jaf.readline()
james = data.strip().split(',')
with open('julie.txt') as juf:
    data = juf.readline()
julie = data.strip().split(',')
with open('mikey.txt') as mif:
    data = mif.readline()
mikey = data.strip().split(',')
with open('sarah.txt') as saf:
    data = saf.readline()
sarah = data.strip().split(',')
 
# print(james)
# print(julie)
# print(mikey)
# print(sarah)
 
'''
['2:34', '3:21', '2:34', '2.45', '3.01', '2:01', '2:01', '3:10', '2:22']
['2.59', '2.11', '2:11', '2:23', '3:10', '2:23', '3:10', '3:21', '3-21']
['2:22', '3.01', '3:01', '3.02', '3:02', '3.02', '3:22', '2.49', '2:38']
['2:58', '2.58', '2:39', '2-25', '2-25', '2:54', '2.18', '2:55', '2:55']
'''
 
# data = [1,9,4,2,6,7,0]
# print(data) #[1, 9, 4, 2, 6, 7, 0]
 
# data.sort() #原地排序[0, 1, 2, 4, 6, 7, 9]
# print(data)
 
# data2 = sorted(data)
# print(data) #对数据完成复制排序[1, 9, 4, 2, 6, 7, 0]
# print(data2)    #复制排序[0, 1, 2, 4, 6, 7, 9]
 
 
def sanitize(time_string):
    if '-' in time_string:  #使用"in"操作符检查字符串是否包含一个短横线或冒号
        splitter = '-'
    elif ':' in time_string:
        splitter = ':'
    else:
        return(time_string) #如果字符串不需要清理,就什么也不做
    (mins, secs) = time_string.split(splitter)  #分解字符串,抽出分钟和秒部分
    return(mins + '.' + secs)
 
 
clean_james = []    #创建4个开始为空的新列表
clean_julie = []
clean_mikey = []
clean_sarah = []
for each_t in james:
    clean_james.append(sanitize(each_t))    #取原列表中的各个数据项,进行清理。
for each_t in julie:                        #然后将清理后的数据追加到适当的新列表
    clean_julie.append(sanitize(each_t))
for each_t in mikey:
    clean_mikey.append(sanitize(each_t))
for each_t in sarah:
    clean_sarah.append(sanitize(each_t))
 
 
print(sorted(clean_james))
print(sorted(clean_julie))
print(sorted(clean_mikey))
print(sorted(clean_sarah))
 
'''
['2.01', '2.01', '2.22', '2.34', '2.34', '2.45', '3.01', '3.10', '3.21']
['2.11', '2.11', '2.23', '2.23', '2.59', '3.10', '3.10', '3.21', '3.21']
['2.22', '2.38', '2.49', '3.01', '3.01', '3.02', '3.02', '3.02', '3.22']
['2.18', '2.25', '2.25', '2.39', '2.54', '2.55', '2.55', '2.58', '2.58']
'''
#默认的,sort()方法和sorted()  BIF都会按升序对数据排序。
# 要以降序对数据排序,需向sort()或sorted()传入参数reverse=True,python会负责具体处理<br><br>clean_mikey = [sanitize(each_t) for each_t in mikey]
 
mins = [1,2,3]
secs = [m * 60 for m in mins]
print(secs) #[60, 120, 180]
 
meters = [1, 10, 3]
feet = [m*3.281 for m in meters]
print(feet) #[3.281, 32.81, 9.843]
 
lower = ["I", "don't", "like", "span"]
upper = [s.upper() for s in lower]
print(upper)    #['I', "DON'T", 'LIKE', 'SPAN']
 
dirty = ['2-22', '2:22', '2.22']
clean = [sanitize(t) for t in dirty]
print(clean)    #['2.22', '2.22', '2.22']
 
clean = [float(s) for s in clean]
print(clean)    #[2.22, 2.22, 2.22]
 
clean = [float((sanitize(t)) for t in ['2-22', '3:33', '4.44'])]
print(clean)    #[2.22, 2.22, 2.22]
<br>

  

  

posted @   JRS077  阅读(82)  评论(0编辑  收藏  举报
编辑推荐:
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
点击右上角即可分享
微信分享提示