pandas的选择、设置数据、导入导出

选择数据

iloc[] 根据位置（索引）选择数据

pandas.iloc[0, 2] 选择第0行第2列的数据
pandas.iloc[0:2, 1] 选择第 0-1 两行的第1列数据，[0,2)为半开半闭区间
pandas.iloc[ : , 1] 选择所有行的第1列数据

loc[] 根据标签选择数据

pandas.loc['A', 'age'] 选择A行的age列数据
pandas.loc['A':'C' , 'age'] 选择第A-C三行的，使用标签['A', 'C']为闭区间

# Pandas选择数据
def function1():
    # 自定义字典
    dict = {'height': pd.array([175, 182, 178], dtype=np.int32),
            'birthday': pd.date_range("2000-1-1", periods=3),
            'weight': np.array([65, 68, 70], dtype='int32'),
            'tool': pd.Categorical(["plane", "train", "bus"])}
    # index 设置行索引号，默认从0开始计数
    df = pd.DataFrame(dict, index=pd.Series(['A', 'B', 'C']))
    #    height   birthday  weight   tool
    # A     175 2000-01-01      65  plane
    # B     182 2000-01-02      68  train
    # C     178 2000-01-03      70    bus
    print(df)
 
    # df.tool同效果，打印指定列
    # A    plane
    # B    train
    # C      bus
    # Name: tool, dtype: category
    # Categories (3, object): ['bus', 'plane', 'train']
    print(df['tool'])
 
    # 选择某行的数据，[0,1)表示第0行，[0,3)表示第0到第2行
    #     height   birthday  weight   tool
    # A     175 2000-01-01      65  plane
    print(df[0:2])
 
    # select by label : loc
    # DataFrame.loc[行，列]  如loc[:, [height, weight]] 选择全部行的height、weight两列
    # 这里选择第A、第C行的height、weight两列内容
    #    height  weight
    # A     175      65
    # C     178      70
    print(df.loc[['A', 'C'], ['height', 'weight']])
 
    # 选择 第A到第C三行的 height 列
    #    height
    # A     175
    # B     182
    # C     178
    print(df.loc['A':'C', ['height']])
 
    # select by position : iloc
    # DataFrame.iloc[行，列]  如iloc[0:2, [0, 3]] 选择第0、第1行的第0、第3两列
    # 通过标签位置（数字下标）选择
    #    height   tool
    # A     175  plane
    # B     182  train
    print(df.iloc[0:2, [0, 3]])
 
    # Boolean index
    # 布尔索引，表达式为真时显示该行数据
    #    height   birthday  weight   tool
    # B     182 2000-01-02      68  train
    # C     178 2000-01-03      70    bus
    print(df[df['height'] > 175])

设置数据

使用iloc[]、loc[]设置，跟选择同理，选择并赋值就是设置

# 使用iloc[]、loc[]设置
def function2():
    # 自定义字典
    dict = {'height': pd.array([175, 182, 178], dtype=np.int32),
            'birthday': pd.date_range("2000-1-1", periods=3),
            'weight': np.array([65, 68, 70], dtype='int32'),
            'tool': pd.array(["plane", "train", "bus"])}
    # index 设置行索引号，默认从0开始计数
    df = pd.DataFrame(dict, index=pd.Series(['A', 'B', 'C']))
    #    height   birthday  weight   tool
    # A     175 2000-01-01      65  plane
    # B     182 2000-01-02      68  train
    # C     178 2000-01-03      70    bus
    print(df)
 
    # 根据位置设置
    # B     181 2000-01-02      68  train
    df.iloc[1, 0] = 181
    
    # 可以多个设置，这里是第1、第2行设置 df.iloc[1:3, 2] = 75 同效果
    #     # B     181 2000-01-02      75  train
    #     # C     178 2000-01-03      75    bus
    df.iloc[[1, 2], 2] = 75
 
    # 根据标签设置
    # A     175 2000-01-01      65  car
    df.loc['A', 'tool'] = "car"
 
    # : 表示所有的，这里时所有行的birthday列都设置
    #    height    birthday  weight   tool
    # A     175  2011-11-11      65    car
    # B     181  2011-11-11      75  train
    # C     178  2011-11-11      75    bus
    df.loc[:, 'birthday'] = "2011-11-11"

导入导出

文件格式	读取文件	保存文件
CSV	read_csv	to_csv
JSON	read_json	to_json
HTML	read_html	to_html
Excel	read_excel	to_excel
SQL	read_sql	to_sql
Local clipboard	read_clipboard	to_clipboard
HDF5 Format	read_hdf	to_hdf
Feather Format	read_feather	to_feather
Parquet Format	read_parquet	to_parquet
Msgpack	read_msgpack	to_msgpack
Stata	read_stata	to_stata
Python Pickle Format	read_pickle	to_pickle
Google Big Query	read_gbq	to_gbq

- Excel的 student.xlsx内容

- data = pandas.read_excel()

data类型为 DataFrame，修改data

# 通过位置修改第0行的名字
data.iloc[0, 1] = 'tomcat'
# 通过位置修改第1-4行的年龄为18
data.iloc[1:5, 2] = 25
# 通过标签将第0行的Id修改为11
data.loc[0, 'Id'] = 11

- data.to_csv()

# Pandas 导入导出
def function3():
    # 读取excel文件为data
    data = pd.read_excel('Excel/student.xlsx')
 
    # data类型为DataFrame二维表格
    # <class 'pandas.core.frame.DataFrame'>
    print(type(data))
 
    #    Id   Name  Age
    # 0   1    tom   18
    # 1   2  jerry   17
    # ...   ...    ...
    # 8   9  kelly   18
    # 9  10  catty   18
    print(data)
 
    # 通过位置修改第0行的名字
    # 0  1  tomcat   18
    data.iloc[0, 1] = 'tomcat'
 
    # 通过位置修改第1-4行的年龄为18
    # 1   2   jerry   18
    # 2   3    mary   18
    # 3   4    lili   18
    # 4   5   alicy   18
    data.iloc[1:5, 2] = 18
 
    # 通过标签将第0行的Id修改为11
    # 0  11  tomcat   18
    data.loc[0, 'Id'] = 11
 
    # 将data保存为csv格式
    data.to_csv('Excel/student.csv')

posted @ 2021-01-16 11:40 a最简单阅读(461) 评论(0) 编辑收藏举报

刷新页面返回顶部

@Asimple

( •̀ ω •́ )✧ 加油！

pandas的选择、设置数据、导入导出

公告