简单的数据处理,涉及lambda函数
import pandas as pd import numpy as np data = pd.read_excel("2005-2023年国泰安数据库上市公司向银行借款表.xlsx") data = data.drop(index=[0,1]) data['rate_up'] = None data['rate_down'] = None pattern_up = r"(上浮)" pattern_down = r"(下浮)" data['rate_up'] = data['FloatingRate'].str.extract(pattern_up) data['rate_down'] = data['FloatingRate'].str.extract(pattern_down) data['rate_float_pc'] = None data['rate_float_pc'] = data['FloatingRate'].str.extract(r'(\d+(\.\d+)?%)') #rate_float_pc_list = data['rate_float_pc'].tolist() #rate_float_pc_list = [x for x in rate_float_pc_list if pd.notnull(x)] data['rate_float_pc'] = pd.to_numeric(data['rate_float_pc'].str.strip('%'), errors='coerce') / 100 data['interest_rates'] = data.apply(lambda row: row['BaseRate'] * (1 + row['rate_float_pc']) if row['rate_up'] == '上浮' else row['BaseRate'] * (1 - row['rate_float_pc']) if row['rate_down'] == '下浮' else float('nan'), axis=1) data.loc[(data['IntersetRate'].isna()) | (data['IntersetRate'] == 0), 'IntersetRate'] = data['interest_rates'] data.to_excel("贷款情况1.xlsx")
有需要这个数据的也可以找我要。因为国泰安更新过后最早只到2013年(说不定之前的数据有问题...)