算数运算
| """ |
| 前面减后面 |
| add 加法 |
| sub 减法 |
| div 除法 |
| floordiv 整除 |
| mul 乘法 |
| pow 幂次方 |
| 后面减前面 |
| rsub 减法 |
| rdiv 除法 |
| rfloordiv 整除 |
| rpow 幂次方 |
| 替换无穷大 |
| replace(np.inf,0) |
| np.nan NaN |
| np.inf inf |
| -np.inf -inf |
| """ |
| |
| path = r"C:\work\python\pandas_files\计算.xlsx" |
| data = pd.read_excel(path) |
| print(data) |
output
| 1店 2店 |
| 0 5.0 3.0 |
| 1 NaN 1.0 |
| 2 1.0 NaN |
| # 1店的数都增加5 |
| data['1店'] = data['1店'] + 5 |
| print(data) |
output
| 1店 2店 |
| 0 10.0 3.0 |
| 1 NaN 1.0 |
| 2 6.0 NaN |
| # 汇总两个店的数据 |
| data['汇总'] = data["1店"] + data["2店"] |
| print(data) |
output
| 1店 2店 汇总 |
| 0 10.0 3.0 13.0 |
| 1 NaN 1.0 NaN |
| 2 6.0 NaN NaN |
| # 进行空值的处理 |
| data['汇总'] = data['1店'].fillna(0) + data['2店'].fillna(0) |
| print(data) |
output
| 1店 2店 汇总 |
| 0 10.0 3.0 13.0 |
| 1 NaN 1.0 1.0 |
| 2 6.0 NaN 6.0 |
| import numpy as np |
| # fill_value=0 在做运算前,将NaN值用0填充 |
| # 加法 |
| data['汇总'] = data['1店'].add(data['2店'], fill_value=0) |
| print(data) |
output
| 1店 2店 汇总 |
| 0 10.0 3.0 13.0 |
| 1 NaN 1.0 1.0 |
| 2 6.0 NaN 6.0 |
| |
| data['汇总'] = data['1店'].sub(data['2店'], fill_value=0) |
| print(data) |
output
| 1店 2店 汇总 |
| 0 10.0 3.0 7.0 |
| 1 NaN 1.0 -1.0 |
| 2 6.0 NaN 6.0 |
| |
| data['汇总'] = data['1店'].div(data['2店'], fill_value=0) |
| print(data) |
output
| 1店 2店 汇总 |
| 0 10.0 3.0 3.333333 |
| 1 NaN 1.0 0.000000 |
| 2 6.0 NaN inf |
| # 将无穷大替换为0 |
| data.replace(np.inf, 0, inplace=True) |
| print(data) |
output
| 1店 2店 汇总 |
| 0 10.0 3.0 3.333333 |
| 1 NaN 1.0 0.000000 |
| 2 6.0 NaN 0.000000 |
| # 相减 |
| data['汇总'] = data['1店'].sub(data['2店'], fill_value=0) |
| # 后面减前面 |
| data['汇总1'] = data['1店'].rdiv(data['汇总'], fill_value=0) |
| data.replace(-np.inf, 0, inplace=True) |
| print(data) |
output
| 1店 2店 汇总 汇总1 |
| 0 10.0 3.0 7.0 0.7 |
| 1 NaN 1.0 -1.0 0.0 |
| 2 6.0 NaN 6.0 1.0 |
| # 将NaN替换成0 |
| data.replace(np.nan, 0, inplace=True) |
| print(data) |
output
| 1店 2店 汇总 汇总1 |
| 0 10.0 3.0 7.0 0.7 |
| 1 0.0 1.0 -1.0 0.0 |
| 2 6.0 0.0 6.0 1.0 |
| |
分层索引
| path = r"C:\work\python\pandas_files\销售.xlsx" |
| data = pd.read_excel(path, header=[0, 1]) |
| print(data) |
output
| 土豆 倭瓜 |
| 销量 毛利 销量 毛利 |
| 0 10 5 20 6 |
| 1 11 4 30 5 |
| # 查看多层索引的列信息 |
| print(data.columns) |
output
| MultiIndex([('土豆', '销量'), |
| ('土豆', '毛利'), |
| ('倭瓜', '销量'), |
| ('倭瓜', '毛利')], |
| ) |
| |
| b = data[('土豆', '销量')] |
| print(b) |
output
| 0 10 |
| 1 11 |
| Name: (土豆, 销量), dtype: int64 |
| |
| b = data[[('土豆', '销量'), ('土豆', '毛利')]] |
| print(b) |
output
| |
| b = data[[('土豆', '销量'), ('倭瓜', '销量')]] |
| print(b) |
output
| 土豆 倭瓜 |
| 销量 销量 |
| 0 10 20 |
| 1 11 30 |
| |
| b = data[('土豆', '销量')] + data[('倭瓜', '销量')] |
| print(b) |
output
| |
| b = data['土豆'] + data['倭瓜'] |
| print(b) |
| print(b.columns) |
output
| 销量 毛利 |
| 0 30 11 |
| 1 41 9 |
| Index(['销量', '毛利'], dtype='object') |
| |
| b.columns = pd.MultiIndex.from_product([['总计'], b.columns]) |
| print(b) |
| |
output
| # 将汇总数据与原数据横向拼接 |
| data = pd.concat([data, b], axis=1) |
| print(data) |
output
| 土豆 倭瓜 总计 |
| 销量 毛利 销量 毛利 销量 毛利 |
| 0 10 5 20 6 30 11 |
| 1 11 4 30 5 41 9 |
分箱
| |
| score = np.random.randint(10, 100, 50) |
| print(score) |
| |
output
| [66 71 32 35 37 95 59 31 78 10 29 24 20 98 73 66 14 99 14 62 75 68 96 21 |
| 84 97 69 21 35 87 74 28 57 32 76 29 66 32 97 12 87 91 69 34 72 44 91 26 |
| 15 55] |
| |
| xz = [0, 60, 70, 80, 100] |
| xzname = ['差', '中', '良', '优'] |
| |
| b = pd.cut(score, xz) |
| print(b) |
output
| [(60, 70], (70, 80], (0, 60], (0, 60], (0, 60], ..., (0, 60], (80, 100], (0, 60], (0, 60], (0, 60]] |
| Length: 50 |
| Categories (4, interval[int64, right]): [(0, 60] < (60, 70] < (70, 80] < (80, 100]] |
| |
| c = b.value_counts() |
| print(c) |
output
| (0, 60] 25 |
| (60, 70] 7 |
| (70, 80] 7 |
| (80, 100] 11 |
| Name: count, dtype: int64 |
| |
| c = pd.Series(b).value_counts() |
| print(c) |
output
| (0, 60] 25 |
| (80, 100] 11 |
| (60, 70] 7 |
| (70, 80] 7 |
| Name: count, dtype: int64 |
| |
| b = pd.cut(score, xz, labels=xzname) |
| print(b.value_counts()) |
| |
output
| 差 25 |
| 中 7 |
| 良 7 |
| 优 11 |
| Name: count, dtype: int64 |
| |
| |
| score = np.random.randint(10, 100, 50) |
| print(score) |
| |
| print(score.max()) |
| print(score.min()) |
| |
| b = pd.qcut(score, q=4) |
| print(b.value_counts()) |
| |
output
| [38 61 75 18 60 92 13 55 79 15 91 28 86 85 18 85 32 29 30 78 73 43 94 49 |
| 27 36 74 90 69 98 17 56 26 86 48 21 56 77 41 42 22 50 46 66 39 69 53 64 |
| 70 64] |
| |
| 98 |
| |
| 13 |
| |
| (12.999, 33.0] 13 |
| (33.0, 55.5] 12 |
| (55.5, 74.75] 12 |
| (74.75, 98.0] 13 |
| Name: count, dtype: int64 |
合并
| import os |
| |
| path = r"C:\work\python\pandas_files\excel的合并与拆分01\\" |
| |
| data = pd.DataFrame() |
| |
| allfile = os.listdir(path) |
| |
| for fname in allfile: |
| data1 = pd.read_excel(path+fname) |
| data = pd.concat([data, data1], axis=0) |
| data.set_index("日期", inplace=True) |
| data.to_excel('合并.xlsx') |
| |
| |
| path = r"C:\work\python\pandas_files\excel的合并与拆分03\合并2.xlsx" |
| |
| |
| |
| data = pd.read_excel(path, sheet_name=None) |
| print(data) |
| newdata = pd.DataFrame() |
| for v in data.values(): |
| newdata = pd.concat([newdata, v]) |
| newdata.set_index("序号", inplace=True) |
| print(newdata) |
| newdata.to_excel("a合并.xlsx") |
拆分
| |
| path = r"C:\work\python\pandas_files\excel的合并与拆分03\拆分.xlsx" |
| data = pd.read_excel(path) |
| print(data) |
| |
| bmlst = data['部门'].drop_duplicates() |
| for bm in bmlst: |
| b = data[data.部门 == bm] |
| b.to_excel("a"+bm+'.xlsx') |
| |
| |
| path = r"C:\work\python\pandas_files\excel的合并与拆分03\拆分.xlsx" |
| data = pd.read_excel(path) |
| print(data) |
| |
| bmlst = data['部门'].drop_duplicates() |
| with pd.ExcelWriter('a拆分.xlsx') as ew: |
| for bm in bmlst: |
| b = data[data.部门 == bm] |
| b.to_excel(ew, sheet_name=bm, index=False) |
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构
· 字符编码:从基础到乱码解决
· 提示词工程——AI应用必不可少的技术