按照Key合并DateFrame
import pandas as pd left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']}) print('left\n', left) right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}) print('right\n', right) result = pd.merge(left, right, on='key') print('result\n', result)
输出
/Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/PD/merge_key.py left key A B 0 K0 A0 B0 1 K1 A1 B1 2 K2 A2 B2 3 K3 A3 B3 right key C D 0 K0 C0 D0 1 K1 C1 D1 2 K2 C2 D2 3 K3 C3 D3 result key A B C D 0 K0 A0 B0 C0 D0 1 K1 A1 B1 C1 D1 2 K2 A2 B2 C2 D2 3 K3 A3 B3 C3 D3 Process finished with exit code 0
import pandas as pd df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}, index=[0, 1, 2, 3]) print('df 1\n', df1) df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'], 'B': ['B4', 'B5', 'B6', 'B7'], 'C': ['C4', 'C5', 'C6', 'C7'], 'D': ['D4', 'D5', 'D6', 'D7']}, index=[4, 5, 6, 7]) print('df2\n', df2) df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'], 'B': ['B8', 'B9', 'B10', 'B11'], 'C': ['C8', 'C9', 'C10', 'C11'], 'D': ['D8', 'D9', 'D10', 'D11']}, index=[8, 9, 10, 11]) print('df3', df3) frames = [df1, df2, df3] print('frame 123\n', frames) result = pd.concat(frames, keys=['x', 'y', 'z']) print('xyz\n', result) print('loc y\n\n') print(result.loc['y']) df4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'], 'D': ['D2', 'D3', 'D6', 'D7'], 'F': ['F2', 'F3', 'F6', 'F7']}, index=[2, 3, 6, 7]) result_d1_d4_sort = pd.concat([df1, df4], axis=1, sort=False) print('result_d1_d4_sort\n\n', result_d1_d4_sort) result_d1_d4_join_inner = pd.concat([df1, df4], axis=1, join='inner') print('result_d1_d4_join\n\n', result_d1_d4_join_inner)
输出
/Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/PD/combine_index.py df 1 A B C D 0 A0 B0 C0 D0 1 A1 B1 C1 D1 2 A2 B2 C2 D2 3 A3 B3 C3 D3 df2 A B C D 4 A4 B4 C4 D4 5 A5 B5 C5 D5 6 A6 B6 C6 D6 7 A7 B7 C7 D7 df3 A B C D 8 A8 B8 C8 D8 9 A9 B9 C9 D9 10 A10 B10 C10 D10 11 A11 B11 C11 D11 frame 123 [ A B C D 0 A0 B0 C0 D0 1 A1 B1 C1 D1 2 A2 B2 C2 D2 3 A3 B3 C3 D3, A B C D 4 A4 B4 C4 D4 5 A5 B5 C5 D5 6 A6 B6 C6 D6 7 A7 B7 C7 D7, A B C D 8 A8 B8 C8 D8 9 A9 B9 C9 D9 10 A10 B10 C10 D10 11 A11 B11 C11 D11] xyz A B C D x 0 A0 B0 C0 D0 1 A1 B1 C1 D1 2 A2 B2 C2 D2 3 A3 B3 C3 D3 y 4 A4 B4 C4 D4 5 A5 B5 C5 D5 6 A6 B6 C6 D6 7 A7 B7 C7 D7 z 8 A8 B8 C8 D8 9 A9 B9 C9 D9 10 A10 B10 C10 D10 11 A11 B11 C11 D11 loc y A B C D 4 A4 B4 C4 D4 5 A5 B5 C5 D5 6 A6 B6 C6 D6 7 A7 B7 C7 D7 result_d1_d4_sort A B C D B D F 0 A0 B0 C0 D0 NaN NaN NaN 1 A1 B1 C1 D1 NaN NaN NaN 2 A2 B2 C2 D2 B2 D2 F2 3 A3 B3 C3 D3 B3 D3 F3 6 NaN NaN NaN NaN B6 D6 F6 7 NaN NaN NaN NaN B7 D7 F7 result_d1_d4_join A B C D B D F 2 A2 B2 C2 D2 B2 D2 F2 3 A3 B3 C3 D3 B3 D3 F3 Process finished with exit code 0
lambda 连接
import pandas as pd df = pd.DataFrame({'Year': ['2014', '2015'], 'Quarter': ['q1', 'q2']}) print('fist\n', df) df['YearQuarter'] = df[['Year', 'Quarter']].apply(lambda x: '{}--{}'.format(x[0], x[1]), axis=1) print('new df\n', df)
输出
/Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/PD/format.py fist Year Quarter 0 2014 q1 1 2015 q2 new df Year Quarter YearQuarter 0 2014 q1 2014--q1 1 2015 q2 2015--q2 Process finished with exit code 0
merge suffixes
import pandas as pd import numpy as np df1 = pd.DataFrame({'fruit': ['apple', 'banana', 'orange'] * 3, 'weight': ['high', 'medium', 'low'] * 3, 'price': np.random.randint(0, 15, 9)}) print('df1', df1) df2 = pd.DataFrame({'pazham': ['apple', 'orange', 'pine'] * 2, 'kilo': ['high', 'low'] * 3, 'price': np.random.randint(0, 15, 6)}) print('df2',df2) out = df1.merge(df2, left_on=('fruit', 'weight'), right_on=('pazham', 'kilo'), how='inner', suffixes=('_left', '_right')).head(10) print('out', out)
输出
/Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/PD/combine_data.py df1 fruit weight price 0 apple high 1 1 banana medium 12 2 orange low 11 3 apple high 13 4 banana medium 6 5 orange low 13 6 apple high 6 7 banana medium 13 8 orange low 6 df2 pazham kilo price 0 apple high 9 1 orange low 8 2 pine high 7 3 apple low 11 4 orange high 3 5 pine low 9 out fruit weight price_left pazham kilo price_right 0 apple high 1 apple high 9 1 apple high 13 apple high 9 2 apple high 6 apple high 9 3 orange low 11 orange low 8 4 orange low 13 orange low 8 5 orange low 6 orange low 8 Process finished with exit code 0
initialising _dictionary
# Python code to demonstrate # to split dictionary # into keys and values # initialising _dictionary ini_dict = {'a': 'akshat', 'b': 'bhuvan', 'c': 'chandan'} # printing iniial_dictionary print("intial_dictionary", str(ini_dict)) # split dictionary into keys and values keys = [] values = [] items = ini_dict.items() for item in items: keys.append(item[0]), values.append(item[1]) # printing keys and values separately print("keys : ", str(keys)) print("values : ", str(values))
输出
/Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/debug/split_items.py intial_dictionary {'a': 'akshat', 'b': 'bhuvan', 'c': 'chandan'} keys : ['a', 'b', 'c'] values : ['akshat', 'bhuvan', 'chandan'] Process finished with exit code 0
zip(*ini_dict.items())
# Python code to demonstrate # to split dictionary # into keys and values # initialising _dictionary ini_dict = {'a': 'akshat', 'b': 'bhuvan', 'c': 'chandan'} # printing iniial_dictionary print("intial_dictionary", str(ini_dict)) # split dictionary into keys and values keys, values = zip(*ini_dict.items()) # printing keys and values separately print("keys : ", str(keys)) print("values : ", str(values))
输出
/Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/debug/split_zip_dict.py intial_dictionary {'a': 'akshat', 'b': 'bhuvan', 'c': 'chandan'} keys : ('a', 'b', 'c') values : ('akshat', 'bhuvan', 'chandan') Process finished with exit code 0
拼接字典JSON合并LIST
test_list = [{'userId': '55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5'}, {'userId': 'ac05eb4d-1e2f-4065-9f45-33f6f4579448'}] combine_list = [] ids = ['55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5','ac05eb4d-1e2f-4065-9f45-33f6f4579448', 'xxxxx-1e2f-4065-9f45-33f6f4579448' ] x = {} for i in ids: # for x in range(len(ids)): x[f'userId'] = i combine_list.append(x.copy()) print(combine_list)
输出
/Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/debug/for_dict.py [{'userId': '55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5'}] [{'userId': '55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5'}, {'userId': 'ac05eb4d-1e2f-4065-9f45-33f6f4579448'}] [{'userId': '55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5'}, {'userId': 'ac05eb4d-1e2f-4065-9f45-33f6f4579448'}, {'userId': 'xxxxx-1e2f-4065-9f45-33f6f4579448'}] Process finished with exit code 0