简单的数据处理,涉及lambda函数

import pandas as pd
import numpy as np
data = pd.read_excel("2005-2023年国泰安数据库上市公司向银行借款表.xlsx")

data = data.drop(index=[0,1])

data['rate_up'] = None
data['rate_down'] = None
pattern_up = r"(上浮)"
pattern_down = r"(下浮)"
data['rate_up'] = data['FloatingRate'].str.extract(pattern_up)
data['rate_down'] = data['FloatingRate'].str.extract(pattern_down)

data['rate_float_pc'] = None
data['rate_float_pc'] = data['FloatingRate'].str.extract(r'(\d+(\.\d+)?%)')

#rate_float_pc_list = data['rate_float_pc'].tolist()
#rate_float_pc_list = [x for x in rate_float_pc_list if pd.notnull(x)]

data['rate_float_pc'] = pd.to_numeric(data['rate_float_pc'].str.strip('%'), errors='coerce') / 100

data['interest_rates'] = data.apply(lambda row: row['BaseRate'] * (1 + row['rate_float_pc']) if row['rate_up'] == '上浮'
                                    else row['BaseRate'] * (1 - row['rate_float_pc']) if row['rate_down'] == '下浮'
                                    else float('nan'), axis=1)

data.loc[(data['IntersetRate'].isna()) | (data['IntersetRate'] == 0), 'IntersetRate'] = data['interest_rates']

data.to_excel("贷款情况1.xlsx")

有需要这个数据的也可以找我要。因为国泰安更新过后最早只到2013年(说不定之前的数据有问题...)

posted @ 2023-09-13 19:25  热爱工作的宁致桑  阅读(12)  评论(0编辑  收藏  举报