读取数据源
| pip install pandas |
| |
| |
| pip install xlrd |
| |
| |
| pip install openpyxl |
| import pandas as pd |
| path = r"C:\work\test.xlsx" |
| data = pd.read_excel(path) |
| print(data) |
| import pandas as pd |
| data = pd.read_csv(r"C:\\work\\test.csv") |
| print(data) |
| print(type(data)) |
- 注意excel文件另存为csv后缀的文件后,需要再用记事本打开,查看编码格式是否为utf8,否则会报错
解决方案
| Traceback (most recent call last): |
| File "C:\work\PythonProject\demo\day15\pandas学习01.py", line 27, in <module> |
| data = pd.read_csv(r"C:\\work\\test.csv") |
| File "C:\Users\ychen\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\parsers\readers.py", line 948, in read_csv |
| return _read(filepath_or_buffer, kwds) |
| File "C:\Users\ychen\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\parsers\readers.py", line 611, in _read |
| parser = TextFileReader(filepath_or_buffer, **kwds) |
| File "C:\Users\ychen\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\parsers\readers.py", line 1448, in __init__ |
| self._engine = self._make_engine(f, self.engine) |
| File "C:\Users\ychen\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\parsers\readers.py", line 1723, in _make_engine |
| return mapping[engine](f, **self.options) |
| File "C:\Users\ychen\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py", line 93, in __init__ |
| self._reader = parsers.TextReader(src, **kwds) |
| File "parsers.pyx", line 579, in pandas._libs.parsers.TextReader.__cinit__ |
| File "parsers.pyx", line 668, in pandas._libs.parsers.TextReader._get_header |
| File "parsers.pyx", line 879, in pandas._libs.parsers.TextReader._tokenize_rows |
| File "parsers.pyx", line 890, in pandas._libs.parsers.TextReader._check_tokenize_status |
| File "parsers.pyx", line 2050, in pandas._libs.parsers.raise_parser_error |
| UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb2 in position 0: invalid start byte |
| ''' |
| header=None 没有标题行 |
| names=['transaction_id','member_id'] 创建标题 |
| index_col 指定索引列 |
| skiprows |
| skiprows=[2,4] 跳过2行和4行,从0开始数 |
| skiprows=3 跳过前面3行 |
| encoding="utf-8" 指定编码格式 |
| nrows=3 只取前面3行 |
| ''' |
| |
| import pandas as pd |
| path = r"C:\\work\\sales.csv" |
| data = pd.read_csv(path, |
| header=None, |
| names=['transaction_id',"member_id",'product_id','city'], |
| index_col="transaction_id" |
| ,skiprows=[2,4] |
| ,encoding="utf-8" |
| ,nrows=3) |
| print(data) |
| |
| data.to_excel("a.xlsx") |
| ''' |
| # 安装库 |
| pip install pymysql |
| # host=主机名,user=用户名,password=密码,database=数据库名 |
| # id就是表的主键列 |
| # index_col="id" |
| ''' |
| |
| import pymysql as psql |
| conn = psql.connect(host="localhost",user="root",password="local123",database="mydb") |
| sql = "select * from shop" |
| res = pd.read_sql(sql,conn,index_col="id") |
| print(res) |
pandas数据结构
| |
| |
| |
| import pandas as pd |
| dic={ |
| "序号":[1,2,3,4], |
| "姓名":['张三','李四','王五',"赵六"] |
| } |
| |
| data = pd.DataFrame(dic) |
| print(data) |
| |
| data = data.set_index("序号") |
| print(data) |
| |
| data.to_excel("pa.xlsx") |
输出打印
| 序号 姓名 |
| 0 1 张三 |
| 1 2 李四 |
| 2 3 王五 |
| 3 4 赵六 |
| |
| 姓名 |
| 序号 |
| 1 张三 |
| 2 李四 |
| 3 王五 |
| 4 赵六 |
| |
| s1 = ["张三",30,'男'] |
| s2 = ['姓名','年龄','性别'] |
| se = pd.Series(s1,s2) |
| print(se) |
| |
| |
| dic = { |
| "姓名":"张三", |
| "性别":'男', |
| "年龄":20 |
| } |
| se = pd.Series(dic) |
| print(se) |
输出打印
| 姓名 张三 |
| 年龄 30 |
| 性别 男 |
| dtype: object |
| |
| 姓名 张三 |
| 性别 男 |
| 年龄 20 |
| dtype: object |
| d = [ |
| [1,11,12,13,14], |
| [2,16,17,18,19], |
| [3,21,22,23,24], |
| [4,21,22,23,24], |
| [5,21,22,23,24], |
| ] |
| data = pd.DataFrame(d,columns=['a','b','d','c','e']) |
| data = data.set_index('a') |
| print(data) |
| |
| """ |
| b d c e |
| a |
| 1 11 12 13 14 |
| 2 16 17 18 19 |
| 3 21 22 23 24 |
| 4 21 22 23 24 |
| 5 21 22 23 24 |
| """ |
| |
| |
| b = data.loc[1,'b'] |
| print(b) |
| |
| |
| b = data['b'] |
| print(b) |
| |
| """ |
| a |
| 1 11 |
| 2 16 |
| 3 21 |
| 4 21 |
| 5 21 |
| Name: b, dtype: int64 |
| """ |
| |
| |
| b = data[['b','c']] |
| print(b) |
| |
| """ |
| b c |
| a |
| 1 11 13 |
| 2 16 18 |
| 3 21 23 |
| 4 21 23 |
| 5 21 23 |
| """ |
| |
| |
| b = data.loc[[2,4],'b':'e'] |
| print(b) |
| |
| """ |
| b d c e |
| a |
| 2 16 17 18 19 |
| 4 21 22 23 24 |
| """ |
| dic ={ |
| "序号":[1,2,3,4], |
| "姓名":['张三','李四','王五','赵六'], |
| "性别":['男','男','女','男'], |
| "年龄":[28,30,18,29], |
| "特长":['吃饭','睡觉','打豆豆','跑步'] |
| } |
| data = pd.DataFrame(dic) |
| data = data.set_index("序号") |
| |
| print(data) |
| |
| """ |
| 姓名 性别 年龄 特长 |
| 序号 |
| 1 张三 男 28 吃饭 |
| 2 李四 男 30 睡觉 |
| 3 王五 女 18 打豆豆 |
| 4 赵六 男 29 跑步 |
| """ |
| |
| |
| print(data.columns) |
| |
| """ |
| Index(['姓名', '性别', '年龄', '特长'], dtype='object') |
| """ |
| |
| |
| print(data['姓名']) |
| |
| """ |
| 序号 |
| 1 张三 |
| 2 李四 |
| 3 王五 |
| 4 赵六 |
| Name: 姓名, dtype: object |
| """ |
| |
| |
| print(data[['姓名','性别']]) |
| |
| """ |
| 姓名 性别 |
| 序号 |
| 1 张三 男 |
| 2 李四 男 |
| 3 王五 女 |
| 4 赵六 男 |
| """ |
| |
| |
| print(data.loc[1:3]) |
| |
| """ |
| 姓名 性别 年龄 特长 |
| 序号 |
| 1 张三 男 28 吃饭 |
| 2 李四 男 30 睡觉 |
| 3 王五 女 18 打豆豆 |
| """ |
| ser1 = pd.Series(["张三",'李四','王五'],index=[1,2,5],name="姓名") |
| ser2 = pd.Series(["男",'女','男'],index=[1,5,3],name="性别") |
| ser3 = pd.Series([18,20,22],index=[1,2,3],name="年龄") |
| |
| data = pd.DataFrame([ser1,ser2,ser3]) |
| print(data) |
| |
| """ |
| 1 2 5 3 |
| 姓名 张三 李四 王五 NaN |
| 性别 男 NaN 女 男 |
| 年龄 18.0 20.0 NaN 22.0 |
| """ |
| |
| |
| dic={ |
| ser1.name:ser1, |
| ser2.name:ser2, |
| ser3.name:ser3 |
| } |
| data = pd.DataFrame(dic) |
| print(data) |
| |
| """ |
| 姓名 性别 年龄 |
| 1 张三 男 18.0 |
| 2 李四 NaN 20.0 |
| 3 NaN 男 22.0 |
| 5 王五 女 NaN |
| """ |
| pd.set_option('max_colwidth',200) |
| pd.set_option('display.max_columns', None) |
| pd.set_option('display.max_rows', None) |
| plt.rcParams['font.sans-serif'] = ['SimHei'] |
| plt.rcParams['axes.unicode_minus'] = False |
| |
| |
| home.head() |
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构
· 字符编码:从基础到乱码解决
· 提示词工程——AI应用必不可少的技术
2022-11-10 idea使用token