Python pandas模块

1、转换python字典类型为dataframe

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# __author__ =
# pandas >= 1.5.3

import pandas as pd

pydict = {'Dosage': '1.1.1.1,2.2.2.2,4.4.4.4,5.5.5.5', 'HalfLife': '6.6.6.6,7.7.7.7,8.8.8.8', 'Cmax': '20.20.20.20'}
df = pd.DataFrame(pd.Series(pydict), columns=['IP'])
df = df.reset_index().rename(columns={'index': '策略名'})
df.index = [i for i in range(1, len(df.index) + 1)]
print(df)

2、excel表设置样式

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#__author__ =
# pandas >= 1.5.3

import pandas as pd

data = [
    {'switchname': 'RuiJie#S6510-48VS8CQ-01', 'mac': 'ac:1f:6b:f8:70:f3', 'interface': 'TFGigabitEthernet 0/20'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-01', 'mac': 'ac:1f:6b:f8:ba:c3', 'interface': 'TFGigabitEthernet 0/21'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-01', 'mac': 'ac:1f:6b:f8:bd:65', 'interface': 'TFGigabitEthernet 0/15'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-01', 'mac': 'ac:1f:6b:f8:c2:a1', 'interface': 'TFGigabitEthernet 0/16'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-02', 'mac': 'ac:1f:6b:f8:c6:ef', 'interface': 'TFGigabitEthernet 0/19'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-02', 'mac': 'ac:1f:6b:f8:ce:5f', 'interface': 'TFGigabitEthernet 0/18'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-02', 'mac': 'ac:1f:6b:f8:ce:61', 'interface': 'TFGigabitEthernet 0/17'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-02', 'mac': 'ac:1f:6b:f8:cf:dd', 'interface': 'TFGigabitEthernet 0/14'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-03', 'mac': 'b0:26:28:7a:94:30', 'interface': 'TFGigabitEthernet 0/6'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-03', 'mac': 'b0:26:28:7a:94:e0', 'interface': 'TFGigabitEthernet 0/5'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-03', 'mac': 'ac:1f:6b:f8:70:f2', 'interface': 'TFGigabitEthernet 0/41'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-03', 'mac': 'ac:1f:6b:f8:ba:c2', 'interface': 'TFGigabitEthernet 0/42'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-04', 'mac': 'ac:1f:6b:f8:bd:64', 'interface': 'TFGigabitEthernet 0/36'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-04', 'mac': 'ac:1f:6b:f8:c2:a0', 'interface': 'TFGigabitEthernet 0/37'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-04', 'mac': 'ac:1f:6b:f8:c6:ee', 'interface': 'TFGigabitEthernet 0/40'},
    {'switchname': 'RuiJie#S6510-48VS8CQ-04', 'mac': '4c:00:0a:b6:22:0e', 'interface': 'TFGigabitEthernet 0/19'}]

df = pd.DataFrame(data, columns=['switchname', 'interface', 'mac'])
df = df.rename(columns={'switchname': '交换机名称', 'mac': 'MAC地址', 'interface': '接口名称'})
df.index = [i for i in range(1, len(df.index) + 1)]
df = df.style.set_properties(**{
    'border': '1px solid black',  # 所有边框
    # 'border-left': '1px solid black',  # 左边框
    # 'border-right': '1px solid black',  # 右边框
    # 'border-top': '1px solid black',  # 上边框
    # 'border-bottom': '1px solid black',  # 下边框

})
df.to_excel('abcd.xlsx')

3、读取excel并把数据转换为字典

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# __author__ =
# pandas >= 1.5.3


import pandas as pd

# 将excel中的某一列转换为字典,'records'参数，将每一行转化为字典
# 如果你希望使用不同的字典形式，可以查看pandas的to_dict方法的文档
df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
column1_dict = df[['column_name']].to_dict(orient='records')

# 将某列数据去重后转换为列表
column1_list = df['column_name'].drop_duplicates().tolist()

# 先使用set_index()方法将key_column列设置为DataFrame的索引，然后使用.T方法将DataFrame转置
# 最后使用to_dict()方法将DataFrame转换为字典，其中键是key_column列中的值，值是value_column列中的值
df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
dict_data = df[['key_column', 'value_column']].set_index('key_column').T.to_dict('records')[0]

4、pandas数据筛选

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# __author__ =
# pandas >= 1.5.3
# pandas条件筛选/过滤
# https://blog.csdn.net/qq_42375376/article/details/133362058

import pandas as pd

left = [
    {'name': '张三 ', 'sex': '男 ', 'age': 22, '专业': '计算机', '学制': 4},
    {'name': '李四 ', 'sex': '女', 'age': 22, '专业': '会计', '学制': 3},
    {'name': '王五 ', 'sex': '男', 'age': 23, '专业': '金融', '学制': 5},
    {'name': '赵六 ', 'sex': '女 ', 'age': 22, '专业': '计算机', '学制': 4}
]

right = [
    {'name': '孙七', 'sex': '男', '专业': '医学', '学制': 6},
    {'name': '吴八 ', 'sex': '女 ', 'age': 24}
]

"""过滤年龄为22，所学专业为计算机的人员姓名"""
AGE = 22
SPEC = '计算机'

df_result = pd.DataFrame(left, columns=['name', 'sex', 'age', '专业', '学制'])
df_res = df_result[(df_result['age'] == AGE) & (df_result['专业'] == SPEC)]
df_res.index = [i for i in range(1, len(df_res.index) + 1)]

"""过滤条件中引用外部变量"""
# 引用外部变量(@变量名)
expr = "age > @AGE and 学制 < 6 and 专业 == '金融'"
df_res1 = df_result.query(expr)
df_res1.index = [i for i in range(1, len(df_res1.index) + 1)]

"""DataFrame数据合并"""
# 清除某列数据中的空格
# df_res['name'] = df_res['name'].str.strip()
# 使用上面的方式清除某列数据中的空格会有告警提示，使用下面的方法无告警提示
df_temp = df_res.copy()
df_temp['name'] = df_temp['name'].str.strip()
df_res = df_temp.copy()
# 清除所有数据的空格
df_res = df_res.applymap(lambda x: x.strip() if isinstance(x, str) else x)
# 使用merge函数合并数据时注意on列数据中的空格
df_all = pd.merge(df_res, df_res1, on='sex')
# 使用concat合并两张表
df_all1 = pd.concat([df_res, df_res1], ignore_index=True)

"""使用fillna自定义填充数据"""
df_res2 = pd.DataFrame(right, columns=['name', 'sex', 'age', '专业', '学制']).fillna('空')
df_all2 = pd.merge(df_all1, df_res2, on='name', how='outer').fillna('空')
df_all2.index = [i for i in range(1, len(df_all2.index) + 1)]

"""过滤专业名称中以‘会’开头或者名字以‘融’结尾的人员"""
df_res3 = df_result[(df_result['专业'].str.startswith('会')) | (df_result['专业'].str.endswith('融'))]

"""过滤专业名称中包含‘算’的人员"""
df_res4 = df_result[df_result['专业'].str.contains('算')]

"""使用函数进行筛选"""
def starts_with_a(name):
    return name.endswith('融')
df_res5 = df_result[df_result['专业'].apply(starts_with_a)]

"""打印数据"""
# 打印所有列的数据类型
# print(df_res.dtypes)
# 只打印name列的数据类型
# print(df_res['name'].dtype)
print(df_res)
print(df_res1)
print(df_res2)
print(df_all)
print(df_all1)
print(df_all2)
# df_all2.to_excel('test.xlsx')
print(df_res3)
print(df_res4)
print(df_res5)

5、根据一列数据的情况生成一列新的数据

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# __author__ =
# pandas >= 1.5.3

import pandas as pd

reslist = [
    {'姓名': '张三', '性别': '男', '成绩': 100},
    {'姓名': '李四', '性别': '女', '成绩': 80},
    {'姓名': '王五', '性别': '男', '成绩': 60},
    {'姓名': '赵六', '性别': '女', '成绩': 40}
]

def getcolumn(status):
    if float(status) > 80:
        return "优秀"
    elif float(status) > 60 and float(status) <= 80:
        return "良好"
    else:
        return "一般"

df = pd.DataFrame(reslist, columns=['姓名', '性别', '成绩'])
# 对dataframe新增加一列名为'评价'，level由'成绩'一列而来，如果大于80为优秀，60和80之间为良好。其中axis = 1，表示原有dataframe的行不变，列的维数发生改变
df['评价'] = df.apply(lambda x: getcolumn(x['成绩']), axis=1)
df.index = [i for i in range(1, len(df.index) + 1)]
print(df)

6、并行计算polars库（pip install polars）

参考链接：
https://zhuanlan.zhihu.com/p/340119918 # Pandas简介
https://blog.csdn.net/weixin_47661174/article/details/124697842 # Pandas DataFrame的合并
https://blog.csdn.net/weixin_43237709/article/details/121622736 # pandas样式
https://pandas.pydata.org/pandas-docs/stable/user_guide/index.html # pandas使用指导
https://jupyter.org/install # jupyter安装
https://www.cnblogs.com/clschao/articles/10906415.html # Jupyter简单使用
https://www.jianshu.com/p/91365f343585 # Jupyter Notebook介绍、安装及使用教程
https://zhuanlan.zhihu.com/p/149091363 # 7种最流行的Python绘图库
https://opensource.com/article/20/4/plot-data-python # 7种最流行的Python绘图库
https://www.cnblogs.com/lcl-cn/p/17832543.html # pandas merge函数数据合并
https://blog.csdn.net/qq_38727995/article/details/124459704 # pandas dataframe的apply方法
https://blog.csdn.net/weixin_47661174/article/details/124697842 # Pandas DataFrame的合并

posted @ 2024-03-27 11:14 風￡飛阅读(57) 评论(0) 收藏举报

刷新页面返回顶部

風￡飛

Python pandas模块

公告