pandas操作Excel笔记

pip install -i https://pypi.tuna.tsinghua.edu.cn/simple python-office -U

元素

import pandas as pd

  1. 数据帧
    df = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Tim', 'Vinctor', 'Nick']})
  2. Index
    df.set_index('ID',inplace=True)
  3. Series

行列表示

import pandas as pd

s1 = pd.Series({'x': 100, 'y': 200, 'z': 300})
print(s1)

L1 = [100, 200, 300]
L2 = ['x', 'y', 'z']
s2 = pd.Series(L1, index=L2)
print(s2)
print("==")
# index不能对齐则取并集
s3 = pd.Series([1, 2, 3], index=[1, 2, 3], name='A')
s4 = pd.Series([100, 20, 30], index=[1, 2, 3], name='B')
s5 = pd.Series([100, 200, 300], index=[2, 3, 4], name='C')
# index竖向
df = pd.DataFrame({s3.name: s3, s4.name: s4, s5.name: s5})
# df.set_index('A', inplace=True)
print(df)
# index横向
df = pd.DataFrame([s3, s4, s5])
print("==")
print(df)

数据填充

import pandas as pd
from datetime import date, timedelta


# date month_delta 考虑尚不周全(2.31🤣)
def add_month(d: date, md: int):
	yd = md // 12
	m = d.month + md % 12
	if m != 12:  # 再进行一次操作即可全部完成
		yd += m // 12
		m %= 12
	return date(d.year + yd, m, d.day)


# 暂时先不设index_col为ID,否则不好操作
# 当pandas读取到NaN时会设为浮点类型,str填充方便
books = pd.read_excel('C:/Temp/books.xlsx', skiprows=5, usecols='B:E', index_col=None,
											dtype={'ID': str, 'InStore': str, 'Date': str})
start = date(2018, 1, 1)
print(type(books['ID']))

for i in books.index:
	books['ID'].at[i] = i + 1
	# books.at[i,'ID']=i+1 # 先横后竖
	books['InStore'].at[i] = 'Yes' if i % 2 == 0 else 'No'
	# books['Date'].at[i] = start + timedelta(days=i) # minutes hours miliseconds
	# books['Date'].at[i] = date(start.year + i, start.month, start.day)
	books['Date'].at[i] = add_month(start, i)

print(books)
books.set_index('ID', inplace=True)
books.to_excel('C:/Temp/books_output.xlsx')

排序

import pandas as pd

products = pd.read_excel('C:/Temp/list.xlsx', index_col='ID')
# default from low to high | ascend=climb
products.sort_values(by=['Worthy', 'Price'], inplace=True, ascending=[True, False])
print(products)

posted @ 2022-08-11 21:55  TomZz  阅读(23)  评论(0编辑  收藏  举报