多元线性回归(pandas/scikit-learn)

import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression

#数据1
tem16_1 = [3113,3122,3131,3137,3146,3149,3157,3166,3172,3178,3172,3151,3137,3131,3128,3107,3095,3081,3070,3049,3038,3023,3011,2997,2988,2985,2974,2957,2942,2928,2916,2901,2887,2884,2879,2870,2856,2848,2836,2828,2810,2802,2793,2785,2776,2768,2762,2756,2762,2765,2762,2771,2748,2731,2708,2697,2674,2697,2702,2725,2737,2759,2771,2787,2802,2819,2833,2845,2856,2859,2873,2879,2896,2898,2910,2922,2933,2945,2957,2968,2980,2985,3000,3011,3023,3038,3046,3058,3070,3072,3084,3090,3098,3107,3119,3122,3131,3134,3140,3143,3149,3154,3151,3166,3189,3178,3181,3169,3166,3169,3154,3137,3116,3107,3090,3093,3087,3075,3052,3038,3026,3017,3008,3002,2994,2991,2988,2977,2957,2951,2936,2931,2936,2957,2980,2962,2948,2928,2913,2898,2879,2867,2865,2856,2853,2848,2842,2833,2830,2819,2813,2807,2799,2796,2787,2785,2779,2773]
#数据2
tem16_2 = [4185,4209,4233,4256,4279,4303,4326,4349,4372,4398,4421,4414,4387,4383,4360,4349,4326,4303,4279,4264,4233,4209,4185,4161,4113,4088,4064,4043,4019,4023,4002,3978,3954,3933,3912,3891,3870,3845,3824,3802,3781,3759,3715,3693,3671,3648,3626,3603,3580,3534,3507,3496,3490,3467,3451,3429,3419,3432,3454,3464,3486,3496,3518,3542,3565,3588,3611,3633,3656,3678,3700,3722,3744,3766,3788,3809,3831,3873,3895,3919,3940,3964,3988,4010,4039,4060,4084,4108,4133,4157,4181,4205,4228,4252,4276,4299,4322,4345,4368,4391,4414,4436,4458,4455,4470,4485,4462,4458,4451,4447,4425,4402,4387,4364,4368,4349,4326,4307,4287,4264,4240,4221,4225,4197,4177,4169,4149,4133,4113,4088,4064,4043,4019,3995,3985,3968,3947,3926,3905,3884,3863,3842,3820,3799,3777,3773,3759,3737,3715,3693,3671,3648,3626,3603,3576,3553,3530,3507]
#回归数据
result =  [2364,2356,2356,2353,2353,2353,2353,2367,2367,2367,2383,2362,2435,2460,2427,2427,2451,2446,2424,2430,2407,2399,2386,2367,2372,2375,2364,2342,2326,2309,2285,2283,2288,2277,2285,2307,2329,2351,2372,2396,2421,2492,2312,2283,2280,2266,2253,2239,2234,2231,2247,2247,2242,2220,2198,2098,2034,2031,2053,2031,2093,2071,2093,2077,2142,2163,2185,2206,2215,2215,2215,2215,2217,2217,2217,2217,2217,2239,2239,2239,2256,2256,2256,2256,2274,2274,2274,2274,2293,2309,2293,2309,2331,2331,2331,2331,2331,2331,2331,2331,2331,2331,2356,2367,2380,2383,2386,2370,2378,2372,2351,2337,2320,2299,2293,2312,2334,2329,2323,2315,2304,2307,2301,2304,2290,2272,2256,2234,2237,2245,2266,2258,2261,2253,2266,2245,2223,2212,2215,2204,2198,2201,2223,2245,2261,2277,2269,2288,2290,2315,2261,2253,2266,2245,2223,2212,2215,2215]
#以DataFrame存储数据
data = pd.DataFrame([tem16_1,tem16_2,result],index=['tem16_1','tem16_2','result'])
data = data.T
X = data[['tem16_1','tem16_2']]

y = data['result']
#测试集和训练集
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
#训练
linreg = LinearRegression()
linreg.fit(X_train, y_train)
#结果
print linreg.intercept_
print linreg.coef_
print zip(['tem16_1','tem16_2'], linreg.coef_)
posted @ 2016-03-04 15:55  dayday+up  阅读(2395)  评论(0编辑  收藏  举报