numpy.core._exceptions.UFuncTypeError: ufunc 'subtract' did not contain a loop with signature matching types (dtype('<U1'), dtype('float64')) -> None
在机器学习实战
的Logistic回归梯度上升优化算法中遇到了这个问题
numpy.core._exceptions.UFuncTypeError: ufunc 'subtract' did not contain a loop with signature matching types (dtype('<U1'), dtype('float64')) -> None
代码如下
import math
import numpy as np
def loadDataSet():
dataSet = []
labelSet = []
with open('testSet.txt') as fbj:
for line in fbj.readlines():
lineArr = line.strip().split()
# print(lineArr)
dataSet.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelSet.append(lineArr[2])
return dataSet, labelSet
def sigmoid(inX):
result = 1/(1+np.exp(-inX))
return result
def gradAscent(dataSet, labelSet):
dataMtrix = np.mat(dataSet)
labelMat = np.mat(labelSet).transpose()
m, n = np.shape(dataMtrix)
alpha = 0.001
maxCycles = 500
weights = np.ones((n, 1))
for _ in range(maxCycles):
h = sigmoid(dataMtrix * weights)
error = labelMat - h
weights = weights + alpha * dataMtrix.transpose() * error
return weights
dataSet, labelSet = loadDataSet()
# print(dataSet)
# print(labelSet)
print(gradAscent(dataSet, labelSet))
这里报错说的是数据类型不符不能相减
那么分别查看一下(在jupyter调试)
labelMat.dtype
dtype('<U1')
h.dtype
dtype('float64')
那么解决办法就是将<U1
类型换成float64
但是使用如下方法还是报错
labelMat.dtype = 'float64'
ValueError: When changing to a larger dtype, its size must be a divisor of the total size in bytes of the last axis of the array.
那么只好乖乖使用astype
方法
labelMat = labelMat.astype(np.float64)
修改后的代码及结果如下
import math
import numpy as np
def loadDataSet():
dataSet = []
labelSet = []
with open('testSet.txt') as fbj:
for line in fbj.readlines():
lineArr = line.strip().split()
# print(lineArr)
dataSet.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelSet.append(lineArr[2])
return dataSet, labelSet
def sigmoid(inX):
result = 1/(1+np.exp(-inX))
return result
def gradAscent(dataSet, labelSet):
dataMtrix = np.mat(dataSet)
labelMat = np.mat(labelSet).transpose()
labelMat = labelMat.astype(np.float64)
m, n = np.shape(dataMtrix)
alpha = 0.001
maxCycles = 500
weights = np.ones((n, 1))
for _ in range(maxCycles):
h = sigmoid(dataMtrix * weights)
error = labelMat - h
weights = weights + alpha * dataMtrix.transpose() * error
return weights
dataSet, labelSet = loadDataSet()
# print(dataSet)
# print(labelSet)
print(gradAscent(dataSet, labelSet))
[[ 4.12414349]
[ 0.48007329]
[-0.6168482 ]]