pandas的合并数据、绘图

  • concat、append、merge
    • concat()  合并
    • append()  追加
    • merge()  连接
# concat、append、merge
def function1():
    # 生成四个二维表格,分别为全1,2,3,4
    # df1:            df2:             df3:             df4:
    #    a  b  c         a  b  c          a  b  c          b  c  d
    # 0  1  1  1      0  2  2  2       0  3  3  3       0  4  4  4
    # 1  1  1  1      1  2  2  2       1  3  3  3       1  4  4  4
    df1 = pd.DataFrame(np.ones(shape=(2, 3), dtype=np.int64) * 1, columns=['a', 'b', 'c'])
    df2 = pd.DataFrame(np.ones(shape=(2, 3), dtype=np.int64) * 2, columns=['a', 'b', 'c'])
    df3 = pd.DataFrame(np.ones(shape=(2, 3), dtype=np.int64) * 3, columns=['a', 'b', 'c'])
    df4 = pd.DataFrame(np.ones(shape=(2, 3), dtype=np.int64) * 4, columns=['b', 'c', 'd'])
 
    # 使用concat方法进行连接,这里选择行轴方向,ignore_index=True表示行坐标重置
    con1 = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
    #    a  b  c
    # 0  1  1  1
    # 1  1  1  1
    # ...  ...
    # 4  3  3  3
    # 5  3  3  3
    print(con1)
 
    # 列轴方向叠加,作用类似于outer外连接
    con2 = pd.concat([df1, df4], axis=0)
    #      a  b  c    d
    # 0  1.0  1  1  NaN
    # 1  1.0  1  1  NaN
    # 0  NaN  4  4  4.0
    # 1  NaN  4  4  4.0
    print(con2)
 
    # 内连接,只保留相同列
    con3 = pd.concat([df1, df4], join='inner', ignore_index=True)
    #    b  c
    # 0  1  1
    # 1  1  1
    # 2  4  4
    # 3  4  4
    print(con3)
 
    # 使用append方法,类似于axis=0的外连接
    con4 = df1.append(df4, ignore_index=True)
    #      a  b  c    d
    # 0  1.0  1  1  NaN
    # 1  1.0  1  1  NaN
    # 2  NaN  4  4  4.0
    # 3  NaN  4  4  4.0
    print(con4)
 
    # merge方法,how='' left, right, inner, outer 分别为 左连接、右连接、内连接、外连接
    # 左连接
    con5 = pd.merge(df1, df4, how='left')
    #    a  b  c   d
    # 0  1  1  1 NaN
    # 1  1  1  1 NaN
    print(con5)
 
    # 外连接
    con6 = pd.merge(df1, df4, how='outer')
    #      a  b  c    d
    # 0  1.0  1  1  NaN
    # 1  1.0  1  1  NaN
    # 2  NaN  4  4  4.0
    # 3  NaN  4  4  4.0
    print(con6)
 
  • plot绘图
    •  以Series为例
# 随机生成[-20, 20)的1000个一维数据
data1 = pd.Series(np.random.randint(-20, 20, 1000))
# 逐个累加
data2 = data1.cumsum()
# 绘图
plt.plot(data2)
# 显示
plt.show()
    •  以DataFrame为例
# randn生成正态分布
data3 = pd.DataFrame(np.random.randn(1000, 3), columns=list("ABC"))
# head(n) 显示前n条数据 # A B C # 0 -0.312282 1.160397 1.354245 # 1 0.688063 -0.997513 -0.125582 # 2 1.068869 1.652064 -0.499987 print(data3.head(3))
# 逐个累加 data4 = data3.cumsum() # 绘图 plt.plot(data4) # 显示 plt.show()
 
posted @ 2021-01-16 16:38  a最简单  阅读(266)  评论(0编辑  收藏  举报