针对单列条件:
#常规方式 import pandas as pd df = pd.DataFrame({'one':['a', 'a', 'b', 'c'], 'two':[3,1,2,3], 'three':['C','B','C','A']}) print(df) df.loc[df['two']==2, 'one']='x' #修改列"one"的值,推荐使用.loc print(df) df.one[df.two==2]='x' print(df)
#函数方式 def fun(x): if x >= 30: return 1 else: return 0 values= feature['values'].apply(lambda x: fun(x)) #若需要将改动赋值给原始的feature的列中的话,可以进行一次赋值 feature['values']=values #或者直接一次修改后赋值。 feature['values']= feature['values'].apply(lambda x: fun(x))
import numpy as np import pandas as pd data = {'city': ['Beijing', 'Shanghai', 'Guangzhou', 'Shenzhen', 'Hangzhou', 'Chongqing'],
'year': [2016,2016,2015,2017,2016, 2016], 'population': [2100, 2300, 1000, 700, 500, 500]} frame = pd.DataFrame(data, columns = ['year', 'city', 'population', 'debt']) # 使用apply函数, 如果city字段包含'ing'关键词,则'判断'这一列赋值为1,否则为0 frame['panduan'] = frame.city.apply(lambda x: 1 if 'ing' in x else 0) print(frame)
针对多列的条件:
#常规方式 import pandas as pd df = pd.DataFrame({'one':['a', 'a', 'b', 'c'], 'two':[3,1,2,3], 'three':['C','B','C','A']}) print(df) df.loc[(df['two']==2) | (df['three']=='A'), 'one']='x'#推荐使用.loc print(df) df.loc[(df['two']==2) & (df['three']=='C'), 'one']='x'#推荐使用.loc print(df)
import numpy as np import pandas as pd data = {'city': ['Beijing', 'Shanghai', 'Guangzhou', 'Shenzhen', 'Hangzhou', 'Chongqing'], 'year': [2016,2016,2015,2017,2016, 2016], 'population': [2100, 2300, 1000, 700, 500, 500]} frame = pd.DataFrame(data, columns = ['year', 'city', 'population', 'debt']) def function(a, b): if 'ing' in a and b == 2016: return 1 else: return 0 frame['test'] = frame.apply(lambda x: function(x.city, x.year), axis = 1) print(frame)
def win_or_loss(df): cond_loss_1 = (df['gli_h'] < -80) & (df['sc_h'] > df['sc_g']) cond_loss_2 = (df['gli_g'] < -80) & (df['sc_h'] < df['sc_g']) cond_loss_3 = (df['gli_drew'] < -80) & (df['eur_h'] < df['eur_g']) & (df['sc_h'] < df['sc_g']) cond_loss_4 = (df['gli_drew'] < -80) & (df['eur_h'] > df['eur_g']) & (df['sc_h'] > df['sc_g']) cond_loss = cond_loss_1 | cond_loss_2 | cond_loss_3 | cond_loss_4 # cond_win_1 = (df['gli_h'] < -80) & (df['sc_h'] < df['sc_g']) cond_win_2 = (df['gli_g'] < -80) & (df['sc_h'] > df['sc_g']) cond_win_3 = (df['gli_drew'] < -80) & (df['eur_h'] < df['eur_g']) & (df['sc_h'] > df['sc_g']) cond_win_4 = (df['gli_drew'] < -80) & (df['eur_h'] > df['eur_g']) & (df['sc_h'] < df['sc_g']) cond_win = cond_win_1 | cond_win_2 | cond_win_3 | cond_win_4 # if cond_win: return 'win' elif cond_loss: return 'loss' else: return 'd' def df_mark_win(df): cond_price = (df['price_h'] > 1.9) & (df['price_drew'] > 1.9) & (df['price_g'] > 1.9) cond_trd = (df['trade_h'] > 300000) | (df['trade_drew'] > 300000) | (df['trade_g'] > 300000) cond_bfidx = (df['index_h'] > 80) | (df['index_drew'] > 80) | (df['index_g'] > 80) cond_gli = (df['gli_h']<-80) | (df['gli_drew']<-80) | (df['gli_g']<-80) cond_hot = (df['hot_h'] > 80) | (df['hot_drew'] > 80) | (df['hot_g'] > 80) df_rst = df.loc[cond_price & cond_trd & cond_bfidx & cond_gli & cond_hot].copy() #用copy()避免在原df上操作避免报错 df_rst['result'] = df_rst.apply(lambda x: win_or_loss(x), axis=1) return df_rst
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· .NET Core 托管堆内存泄露/CPU异常的常见思路
· PostgreSQL 和 SQL Server 在统计信息维护中的关键差异
· C++代码改造为UTF-8编码问题的总结
· DeepSeek 解答了困扰我五年的技术问题
· 为什么说在企业级应用开发中,后端往往是效率杀手?
· 清华大学推出第四讲使用 DeepSeek + DeepResearch 让科研像聊天一样简单!
· 推荐几款开源且免费的 .NET MAUI 组件库
· 实操Deepseek接入个人知识库
· 易语言 —— 开山篇
· Trae初体验