03.描述性统计代码
1 from collections import Counter 2 from math import sqrt 3 4 def frequency(data): 5 """频率""" 6 counter = Counter(data) 7 ret = [] 8 for point in counter.most_common(): 9 ret.append((point[0], point[1] / len(data))) 10 return ret 11 12 13 def mode(data): 14 """众数""" 15 counter = Counter(data) 16 if counter.most_common()[0][1] == 1: 17 return None, None 18 19 count = counter.most_common()[0][1] 20 ret = [] 21 for point in counter.most_common(): 22 if point[1] == count: 23 ret.append(point[0]) 24 else: 25 break 26 return ret, count 27 28 29 def median(data): 30 """中位数""" 31 sorted_data = sorted(data) 32 n = len(sorted_data) 33 34 if n % 2 == 1: 35 return sorted_data[n // 2] 36 37 return (sorted_data[n // 2 -1] + sorted_data[n // 2]) / 2 38 39 40 def mean(data): 41 """均值""" 42 return sum(data) / len(data) 43 44 45 def rng(data): 46 """极差""" 47 return max(data) - min(data) 48 49 50 def quartile(data): 51 """四分位数""" 52 n = len(data) 53 q1, q2, q3 = None, None, None 54 if n >= 4: 55 sorted_data = sorted(data) 56 q2 = median(sorted_data) 57 if n % 2 == 1: 58 q1 = median(sorted_data[:n // 2]) 59 q3 = median(sorted_data[n // 2 + 1:]) 60 else: 61 q1 = median(sorted_data[:n // 2]) 62 q3 = median(sorted_data[n // 2:]) 63 64 return q1, q2, q3 65 66 67 def variance(data): 68 """方差""" 69 n = len(data) 70 if n <= 1: 71 return None 72 73 mean_value = mean(data) 74 return sum((e - mean_value) ** 2 for e in data) / (n - 1) 75 76 77 def std(data): 78 """标准差""" 79 return sqrt(variance(data))
作图:
import matplotlib.pyplot as plt import random from collections import Counter if __name__ == "__main__": # scatter plot random.seed(666) x = [random.randint(0,100) for _ in range(100)] y = [random.randint(0,100) for _ in range(100)] plt.scatter(x, y) plt.show() # line plot x = [random.randint(0, 100) for _ in range(100)] plt.plot([i for i in range(100)], x) plt.show() # bar plot data = [3, 3, 4, 1, 5, 4, 2, 1, 5, 4, 4, 4, 5, 3, 2, 1, 4, 5, 5] counter = Counter(data) x = [point[0] for point in counter.most_common()] y = [point[1] for point in counter.most_common()] plt.bar(x, y) plt.show() # histogram data = [random.randint(1, 100) for _ in range(1000)] plt.hist(data, rwidth = 0.8, bins = 5, density = True) plt.show() # boxplot data = [random.randint(1, 100) for _ in range(1000)] data.append(200) data.append(-200) plt.boxplot(data) plt.show() data1 = [random.randint(66, 166) for _ in range(200)] data2 = [random.randint(60, 120) for _ in range(200)] plt.boxplot([data1, data2]) plt.show()