5.梯度寻优

迭代法求方程组的解:

import numpy as np
from numpy import *
from common_libs import *
import matplotlib.pyplot as plt

#消元法求原方程组的解
A = mat([[8,-3,2],[4,11,-1],[6,3,12]])
b = mat([20,33,36])
result = linalg.solve(A,b.T)
print(result)

#迭代法求方程组的解
error = 1.0e-6
steps = 100
n=3
xk = zeros((n,1))
errorlist=[]
B0 = mat([[0,3/8,-1/4],[-4/11,0,1/11],[-1/2,-1/4,0]])
f = mat([5/2,3,3]).T
for k in range(steps):
    xk_1 = xk
    xk = B0*xk + f
    errorlist.append(linalg.norm(xk - xk_1))
    if errorlist[-1] < error:
        print(k+1)
        break

print(xk)

matpts = zeros((2,k+1))
matpts[0] = linspace(1,k+1,k+1)
matpts[1] = array(errorlist)
drawScatter(plt,matpts)
plt.show()

 logistic_test

# -*- coding: utf-8 -*-
# Filename : Recommand_lib.py

from numpy import *
import numpy as np
import operator
import scipy.spatial.distance as dist
import matplotlib.pyplot as plt


def savefile(savepath, content):
    fp = open(savepath, "wb")
    fp.write(content)
    fp.close()


# 数据文件转矩阵
# path: 数据文件路径
# delimiter: 文件分隔符
def file2matrix(path, delimiter):
    fp = open(path, "rb")  # 读取文件内容
    content = fp.read().decode()
    fp.close()
    rowlist = content.splitlines()  # 按行转换为一维表
    # 逐行遍历,结果按分隔符分割为行向量
    testlist = [row.split(delimiter) for row in rowlist if row.strip()]
    # 返回转换后的矩阵形式
    return mat(testlist)


# 欧氏距离
eps = 1.0e-6


def distEclud(vecA, vecB):
    return linalg.norm(vecA - vecB) + eps


# 相关系数
def distCorrcoef(vecA, vecB):
    return corrcoef(vecA, vecB, rowvar=0)[0][1]


# Jaccard距离
def distJaccard(vecA, vecB):
    temp = mat([array(vecA.tolist()[0]), array(vecB.tolist()[0])])
    return dist.pdist(temp, 'jaccard')


# 余弦相似度
def cosSim(vecA, vecB):
    return (dot(vecA, vecB.T) / ((linalg.norm(vecA) * linalg.norm(vecB)) + eps))[0, 0]


# 绘制散点图
def drawScatter(plt, mydata, size=20, color='blue', mrkr='o'):
    m, n = shape(mydata)
    if m > n and m > 2:
        plt.scatter(mydata.T[0], mydata.T[1], s=size, c=color, marker=mrkr)
    else:
        plt.scatter(mydata[0], mydata[1], s=size, c=color, marker=mrkr)

        # 绘制分类点


def drawScatterbyLabel(plt, Input):
    m, n = shape(Input)
    target = Input[:, -1]

    for i in range(m):
        if float(target[i,0]) == 1.0:
            # print("a:",Input[i, 0], Input[i, 1])
            plt.scatter(float(Input[i, 0]), float(Input[i, 1]), c='blue', marker='o')
        else:
            # print("b:",Input[i, 0], Input[i, 1])
            plt.scatter(float(Input[i, 0]), float(Input[i, 1]), c='red', marker='s')


# 硬限幅函数
def hardlim(dataSet):
    dataSet[nonzero(dataSet.A > 0)[0]] = 1
    dataSet[nonzero(dataSet.A <= 0)[0]] = 0
    return dataSet


# Logistic函数
def logistic(wTx):
    return 1.0 / (1.0 + exp(-wTx))


def buildMat(dataSet):
    m, n = shape(dataSet)
    dataMat = zeros((m, n))
    dataMat[:, 0] = 1   #矩阵的第一列全为1
    dataMat[:, 1:] = dataSet[:, :-1]    #第二列到倒数第二列保持原数据,最后一列被删除
    return dataMat


# 分类函数
def classifier(testData, weights):
    prob = logistic(sum(testData * weights))  # 求取概率--判别算法
    if prob > 0.5:
        return 1.0  # prob>0.5 返回为1
    else:
        return 0.0  # prob<=0.5 返回为0


# 最小二乘回归,用于测试
def standRegres(xArr, yArr):
    xMat = mat(ones((len(xArr), 2)))
    yMat = mat(ones((len(yArr), 1)))
    xMat[:, 1:] = (mat(xArr).T)[:, 0:]
    yMat[:, 0:] = (mat(yArr).T)[:, 0:]
    xTx = xMat.T * xMat
    if linalg.det(xTx) == 0.0:
        print("This matrix is singular, cannot do inverse")
        return
    ws = xTx.I * (xMat.T * yMat)
    return ws
# -*- coding: utf-8 -*-
import os
import sys
import numpy as np
import operator
from numpy import *
from common_libs import *
import matplotlib.pyplot as plt

# 1.导入数据
Input = file2matrix(r"D:\cgw\code\testSet.txt","\t")
# print(Input)
target1 = Input[:,-1] #获取分类标签列表
target = target1.astype(np.float32)
[m,n] = shape(Input)
# # 2.按分类绘制散点图
drawScatterbyLabel(plt,Input)

# 3.构建b+x 系数矩阵:b这里默认为1
dataMat = buildMat(Input)
# print(dataMat[:10,:])
# 4. 定义步长和迭代次数
alpha = 0.001 # 步长
steps = 500  # 迭代次数
weights = ones((n,1))# 初始化权重向量
# 5. 主程序
for k in range(steps):
    gradient = dataMat*mat(weights) # 梯度
    output = logistic(gradient)
    errors = target - output # 计算误差
    weights = weights + alpha*dataMat.T*errors


print(weights)    # 输出训练后的权重
# 6. 绘制训练后超平面
X = np.linspace(-7,7,100)
#y= -b-w*x
# b:weights[0]/weights[2]
# w:weights[1]/weights[2]
Y = -(double(weights[0])+X*(double(weights[1])))/double(weights[2])
plt.plot(X,Y)
plt.show()

 随机梯度下降算法

# -*- coding: utf-8 -*-
import os
import sys
import numpy as np
import operator
from numpy import *
from common_libs import *
import matplotlib.pyplot as plt

# # 1.导入数据
# Input = file2matrix(r"D:\cgw\code\testSet.txt","\t")
# # print(Input)
# target1 = Input[:,-1] #获取分类标签列表
# target = target1.astype(np.float32)
# [m,n] = shape(Input)
# # # 2.按分类绘制散点图
# drawScatterbyLabel(plt,Input)
#
# # 3.构建b+x 系数矩阵:b这里默认为1
# dataMat = buildMat(Input)
# # print(dataMat[:10,:])
# # 4. 定义步长和迭代次数
# alpha = 0.001 # 步长
# steps = 500  # 迭代次数
# weights = ones((n,1))# 初始化权重向量
# weightlist = []
# # 5. 主程序
# for k in range(steps):
#     gradient = dataMat*mat(weights) # 梯度
#     output = logistic(gradient)
#     errors = target - output # 计算误差
#     weights = weights + alpha*dataMat.T*errors
#     weightlist.append(weights)
#
# X = np.linspace(-5,5,100)
# Ylist = []
# lenw = len(weightlist)
# for indx in range(lenw):
#     if indx % 20 == 0:
#         weight = weightlist[indx]
#         Y=-(double(weight[0]) + X*(double(weight[1])))/double(weight[2])
#         plt.plot(X,Y)
#         plt.annotate("hplane:"+str(indx),xy = (X[99],Y[99]))#分类超平面注释
#
# plt.show()
#
#
#
#
# # #分类器函数
# # def classifier(testData,weights):
# #     prob = logistic(sum(testData*weights))
# #     if prob > 0.5:
# #         return 1.0
# #     else:
# #         return 0.0
# #
# #
# # weights = mat([[4.12414349],[0.48007329],[-0.6168482]])
# # testdata = mat([-0.147324,2.874846])
# # m,n = shape(testdata)
# # print(m,n)
# # testmat = zeros((m,n+1))
# # print(testmat)
# # testmat[:,0] = 1
# # print(testmat)
# # testmat[:,1:] = testdata
# # print(testmat)
# # print(classifier(testmat,weights))


#随机梯度下降
# 1.导入数据
Input = file2matrix(r"D:\cgw\code\testSet.txt","\t")
# print(Input)
target1 = Input[:,-1] #获取分类标签列表
target = target1.astype(np.float32)
[m,n] = shape(Input)

#2.按分类绘制散点图
drawScatterbyLabel(plt,Input)


#3.构建b+x系数矩阵,b默认为1
dataMat = buildMat(Input)
# print(dataMat)

weightlist = []

#4. 定义迭代次数
steps = 500
weights = ones(n) #初始化权重向量

#算法主程序
#1.对数据集的每个行向量进行m次随机抽取,保证每个向量都能抽取到,且不重复
#2.对抽取之后的行向量计算动态步长
#3.进行梯度计算
#4.求得行向量的权值,合并为矩阵的权值
for j in range(steps):
    dataIndex = list(range(m)) #以导入数据的行数m为个数产生索引向量:0-99
    for i in range(m):
        alpha = 2/(1.0+j+i) + 0.0001 #动态修改alpha步长
        randIndex = int(random.uniform(0,len(dataIndex)))#生成0-m之间的随机索引
        #计算dataMat随机索引与权重的点积和
        vectSum = sum(dataMat[randIndex] * weights.T)
        grad = logistic(vectSum) #计算点积和的梯度
        errors = target[randIndex] - grad #计算误差
        weights = weights + alpha*errors *dataMat[randIndex]#计算行向量权重
        del(dataIndex[randIndex])
    weightlist.append(weights)

lenwl = len(weightlist)
weightmat = zeros((lenwl,n))
i = 0
for weight in weightlist:
    weightmat[i,:] = weight
    i += 1



print(weights)
weights = weights.tolist()[0]
#6.绘制训练后的超平面
X = np.linspace(-5,5,100)
Y = -(double(weights[0]) + X * (double(weights[1])))/double(weights[2])
plt.plot(X,Y)

fig = plt.figure()
axes1 = plt.subplot(211)
axes2 = plt.subplot(212)
X1 = np.linspace(0,lenwl,lenwl)
axes1.plot(X1,-weightmat[:,0]/weightmat[:,2])#绘制斜距
axes2.plot(X1,-weightmat[:,1]/weightmat[:,2])#绘制斜率
#生成斜距回归线
ws = standRegres(X1,-weightmat[:,0]/weightmat[:,2])
Y1 = ws[0,0] + X1*ws[1,0]
axes1.plot(X1,Y1,color='red',linewidth=2,linestyle="-")
plt.show()

 

posted @ 2019-06-28 11:28  喵小喵~  阅读(374)  评论(0编辑  收藏  举报