pandas读取操作

import numpy as np
import pandas as pd
import requests
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)
df = pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex1.csv')
print(df)
df=pd.read_table('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex1.csv', sep=',')#等价于上面的read_csv
print(df)

#若文件没有指定列名,可以让pandas为其分配默认的列名,也可以自己定义列名:
df1=pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex2.csv', header=None)
print(df1)
df1=pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])
print(df1)
#假设你希望将message列做成DataFrame的索引。你可以明确表示要将该列放到索引4的位置上,也可以通过index_col参数指定"message"
names = ['a', 'b', 'c', 'd', 'message']
df1=pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex2.csv', names=names, index_col='message')
print(df1)

#有些情况下,有些表格可能不是用固定的分隔符去分隔字段的(比如空白符或其它模式)
result = pd.read_table('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex3.txt', sep='\s+')
print(result)

#用skiprows跳过文件的第一行第三行第四行
print(pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex4.csv', skiprows=[0, 2, 3]))
#用isnull()判断是否为NA或NULL等
result = pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex5.csv')
print(result.isnull())
#na_values可以用一个列表或集合的字符串表示缺失值,这里如果csv里的message是foo、NA,something是two,就会被认为是NULL
sentinels = {'message': ['foo', 'NA'], 'something': ['two']}
print(pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex5.csv', na_values=sentinels))


#在看大文件之前,我们先设置pandas显示地更紧些
pd.options.display.max_rows = 10
result = pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex6.csv')
print(result)
#如果只想读取几行(避免读取整个文件),通过nrows进行指定即可
result = pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex6.csv',nrows=5)
print(result)
#要逐块读取文件,可以指定chunksize(行数)
chunker = pd.read_csv('F:\新建文件夹\Python数据分析\pydata-book-2nd-edition/examples/ex6.csv', chunksize=1000)
tot = pd.Series([])
for piece in chunker:
tot = tot.add(piece['key'].value_counts(), fill_value=0)
tot = tot.sort_values(ascending=False)
print(tot)

#to_csv写入文件
data = pd.read_csv('../examples/ex5.csv')
data.to_csv("../examples/out.csv", sep='|', na_rep='NULL', columns=['a', 'b', 'c'])#设置分隔符为|,空值写入null,只写入abc三列


#JSON
obj = """
{"name": "Wes",
"places_lived": ["United States", "Spain", "Germany"],
"pet": null,
"siblings": [{"name": "Scott", "age": 30, "pets": ["Zeus", "Zuko"]},
{"name": "Katie", "age": 38,
"pets": ["Sixes", "Stache", "Cisco"]}]
}
"""
#json.loads即可将JSON字符串转换成Python形式
import json
result = json.loads(obj)
print(result)
#json.dumps则将Python对象转换成JSON格式(字符串)
asjson = json.dumps(result)
print(asjson)
#将(一个或一组)JSON对象转换为DataFrame或其他便于分析的数据结构就由你决定了。最简单方便的方式是:向DataFrame构造器传入一个字典的列表(就是原先的JSON对象),并选取数据字段的子集:
siblings = pd.DataFrame(result['siblings'], columns=['name', 'age'])
print(siblings)
#pandas.read_json可以自动将特别格式的JSON数据集转换为Series或DataFrame。例如:
data = pd.read_json('../examples/example.json')
print(data)

#Excel读取
# xlsx = pd.ExcelFile('../examples/ex1.xlsx')
# print(pd.read_excel(xlsx, 'Sheet1'))



#WebAPi交互
url = 'https://api.github.com/repos/pandas-dev/pandas/issues'
resp = requests.get(url)
data=resp.json()
print(data)
posted @ 2022-04-13 20:10  {hunter}ZY  阅读(105)  评论(0编辑  收藏  举报