日食三餐夜眠六尺

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

统计

2022年工匠杯赛题

1. 第一题

from random import choice
from numpy import *
import pandas as pd
import matplotlib.pyplot as plt

#1 
df = pd.read_csv('stock_prices.tsv', sep='\t')

n1 = choice(df[df['Industry']=='retail']['Symbol'].unique())
n2 = choice(df[df['Industry']=='health']['Symbol'].unique())
print('stock: {}, {}'.format(n1, n2))

s1 = df[(df['Industry']=='retail') & (df['Symbol']==n1)].sort_values('Date')['Adj Close']
s2 = df[(df['Industry']=='health') & (df['Symbol']==n2)].sort_values('Date')['Adj Close']

log1 = (log(s1)-log(s1.shift(1))).dropna()
log2 = (log(s2)-log(s2.shift(1))).dropna()

print('mean: {}, {}'.format(mean(log1), mean(log2)))
print('std: {}, {}'.format(std(log1), std(log2)))
print('var: {}, {}'.format(var(log1), var(log2)))
print('corrcofe: {}'.format(corrcoef(log1,log2)[0,1]))

#2
l,x,y=[],[],[]
for w in arange(0, 1.1, 0.1):
  ary = w*array(log1)+(1-w)*array(log2)
  la = '({:.0%},{:.0%})'.format(w,1-w)
  st = std(ary)
  me= mean(ary)
  print('{}, std:{}, mean:{}'.format(la,st,me))
  l.append(la)
  x.append(st)
  y.append(me)
  
# 3
plt.scatter(x,y)
idx = argmin(x)
plt.text(x[idx],y[idx],l[idx],c='r')
plt.xlabel('Risk')
plt.ylabel('Return')
plt.title('{} and {}'.format(n1,n2))
plt.show()

2. 第二题

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error as mse

#1 
df = pd.read_csv('loan_amount_by_risk_control_zscore.tsv', sep='\t')

x = df[['ApplicantIncome']]
y = df[['LoanAmount']]
x_train,x_test,y_train,y_test = train_test_split(x, y)

r = LinearRegression().fit(x_train,y_train)
y_p = r.predict(x_test)
print('Linear, coef:{}, inter:{}, mse:{}'.format(r.coef_, r.intercept_, mse(y_test,y_p)))


#2
r1 = Lasso(alpha=0.1).fit(x_train,y_train)
y_p1 = r1.predict(x_test)
print('L1, coef:{}, inter:{}, mse:{}'.format(r1.coef_, r1.intercept_, mse(y_test,y_p1)))

r2 = Ridge(alpha=0.1).fit(x_train,y_train)
y_p2 = r2.predict(x_test)
print('L2, coef:{}, inter:{}, mse:{}'.format(r2.coef_, r2.intercept_, mse(y_test,y_p2)))

plt.scatter(x_test,y_test)
plt.plot(x_test,y_p,label='Linear')
plt.plot(x_test,y_p1,label='L1')
plt.plot(x_test,y_p2,label='L2')
plt.legend()
plt.show()

#3
x = df[['Gender','Married','Dependents','Education','Self_Employed','ApplicantIncome','CoapplicantIncome']]
y = df[['LoanAmount']]
x_train,x_test,y_train,y_test = train_test_split(x, y)

r = LinearRegression().fit(x_train,y_train)
y_p = r.predict(x_test)
print('Linear, coef:{}, inter:{}, mse:{}'.format(r.coef_, r.intercept_, mse(y_test,y_p)))

r1 = Lasso(alpha=0.1).fit(x_train,y_train)
y_p1 = r1.predict(x_test)
print('L1, coef:{}, inter:{}, mse:{}'.format(r1.coef_, r1.intercept_, mse(y_test,y_p1)))

r2 = Ridge(alpha=0.1).fit(x_train,y_train)
y_p2 = r2.predict(x_test)
print('L2, coef:{}, inter:{}, mse:{}'.format(r2.coef_, r2.intercept_, mse(y_test,y_p2)))

posted on   chenxiaoyuan  阅读(56)  评论(1编辑  收藏  举报

相关博文:
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· AI技术革命,工作效率10个最佳AI工具
点击右上角即可分享
微信分享提示