# 1、pandas小技巧-去掉字段里内容
d = {"customer": ["A", "B", "C", "D"],
"sales": [1100, "950.5RMB", "$400", " $1250.75"]}
df = pd.DataFrame(d)
print(df, type(df))
df.loc[:, "sales"] = round(df.loc[:, "sales"].replace("[$, RMB]", "", regex=True).astype("float"), 1)
print(df)
# 2、读取部分数据 大数据文件若是一下读入内存会卡死,所以一般会读入部分数据看数据结构
df6 = pd.read_excel(r"C:\Users\Administrator\Desktop\4月份运营数据表\202003\20200317.xlsx", skiprows=lambda x: x>0 and np.random.rand() > 0.01)
print(df6)
df7 = pd.read_excel(r"C:\Users\Administrator\Desktop\4月份运营数据表\202003\20200318.xlsx", sheet_name="当日", skiprows=lambda x: x>0 and np.random.rand() > 0.01)
print(df7)