Python笔记 #15# Pandas: Missing Data

10 Minutes to pandas

 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

dates = pd.date_range('20180116', periods=3) # 创建 16 17 18 等六个日期

df = pd.DataFrame(np.random.randn(3,4), index=dates, columns=list('ABCD')) # 这是二维的,类似于一个

df1 = df.reindex(index=dates[0:3], columns=list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1],'E'] = 1
# print(df1)
#                    A         B         C         D    E
# 2018-01-16 -0.183828  1.393147  1.816151  0.595298  1.0
# 2018-01-17  1.118642 -0.106566 -0.213438  1.510072  1.0
# 2018-01-18  0.705483  1.629647 -1.657045  0.428885  NaN
# pandas 用 np.nan 来表示 missing data

# print(df1.dropna(how='any')) # 所以这个方法不改变内部哦 ~ 但是会返回一个删除所有含 NaN 的行的 dataframe
# print(df1)
#                    A         B         C         D    E
# 2018-01-16  0.866927  0.918359  0.908967 -0.888321  1.0
# 2018-01-17 -0.446272  0.534636 -0.160422 -0.157928  1.0
#                    A         B         C         D    E
# 2018-01-16  0.866927  0.918359  0.908967 -0.888321  1.0
# 2018-01-17 -0.446272  0.534636 -0.160422 -0.157928  1.0
# 2018-01-18  1.095823 -1.300827  0.746324 -0.277497  NaN

# 填充 NaN
# print(df1.fillna(value=5)) # 这个也是不改变“本尊”的!
# print(df1)
#                    A         B         C         D    E
# 2018-01-16  0.286535 -0.847836 -0.949535 -1.889351  1.0
# 2018-01-17 -0.530458 -0.871814  1.169275  0.337444  1.0
# 2018-01-18 -0.457999 -0.325463  0.439679 -0.104462  5.0
#                    A         B         C         D    E
# 2018-01-16  0.286535 -0.847836 -0.949535 -1.889351  1.0
# 2018-01-17 -0.530458 -0.871814  1.169275  0.337444  1.0
# 2018-01-18 -0.457999 -0.325463  0.439679 -0.104462  NaN

# To get the boolean mask where values are nan
# print(pd.isna(df1))
#                 A      B      C      D      E
# 2018-01-16  False  False  False  False  False
# 2018-01-17  False  False  False  False  False
# 2018-01-18  False  False  False  False   True

 

posted @ 2018-01-22 22:23  xkfx  阅读(366)  评论(0编辑  收藏  举报