LR模型

对系数做约束。

# encoding: utf-8
import numpy as np
from scipy import optimize


class nnegLR(object):
    def __init__(self, lambdo=1, fit_intercept=True):
        self.coef_, self.intercept_ = None, None
        self.lambdo = lambdo
        self.intercept = fit_intercept

    def fit(self, X, y, C=1):
        '''
        X 样本矩阵, 一行代表一个样本, y
        C is weight of each sample, 大小同y
        '''
        sety = set(y)
        if sety == set([0, 1]):
            y[y != 1] = -1

        N, D = X.shape
        if self.intercept is True:
            X = np.concatenate((X, np.ones((N, 1))), axis=1)

        def func_grad(w):
            # 计算函数值
            yz = y * (X @ w)
            f = np.sum(self.lambdo/2*(w*w))
            # 考虑精度
            f += np.sum(
                    C * np.where(yz >= 0, np.log(1+np.exp(-yz)), -yz + np.log(1+np.exp(yz)))
                    )
            # 计算梯度
            z = 1 / (1 + np.exp(-yz))
            g = self.lambdo * w + X.T @ (C*(z-1)*y)
            return f, g

        # 利用l_bfgs_b算法优化
        d = D+1 if self.intercept is True else D
        w0 = np.ones(d)
        bounds = [(0, np.inf)] * D
        if self.intercept is True:
            bounds.append([-np.inf, np.inf])
        w, f, d = optimize.fmin_l_bfgs_b(func_grad, w0, bounds=bounds)
        # 输出参数
        self.coef_ = w[:D]
        self.intercept_ = w[-1] if self.intercept is True else None

    def predict(self, X):
        assert self.coef_ is not None

        z = X @ self.coef_
        if self.intercept is True:
            z += self.intercept_
        p = 1 / (1 + np.exp(-z))
        return p


if __name__ == '__main__':
    from pylab import r_, randn, ones, zeros
    X = r_[randn(1000, 2)+1, randn(100, 2)-1]
    y = r_[ones((1000)), zeros((100))]

    lr = nnegLR(lambdo=0.001)
    lr.fit(X, y)
    print(lr.coef_)
    from sklearn import linear_model
    logreg = linear_model.LogisticRegression(C=1e3)
    logreg.fit(X, y)

    print(logreg.coef_)

    # print(lr.predict(X))
    # print(logreg.predict_proba(X))
posted @ 2020-05-15 17:18  bregman  阅读(707)  评论(0编辑  收藏  举报