逻辑回归实现鸢尾花分类
from sklearn.datasets import load_irisimport numpy asnpimport matplotlib.pyplot as pltfrom sklearn.metrics import classification_reportiris=load_iris()#print(iris)print(iris['target_names'])#分类名称data=
·
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
iris=load_iris()
#print(iris)
print(iris['target_names'])#分类名称
data=iris.data#样本数据150个样本每个样本4个维度
target=iris.target#分类
print(data.shape)
print(target.shape)
['setosa' 'versicolor' 'virginica']
(150, 4)
(150,)
1.使用前两个特征进行分类
对于多分类问题来说,由于逻辑回归一次只能对两个类别进行分类,所有如果有n个类别需要分类,则需要重新制作y数据集(以类别1为列,将类别1的设置为1,其他类别都设置为0),最终用n次分类得到的n个参数分别与x[i]相运算,那个概率最大则预测出来的y就是那个类别的。
#在这里为方便画图进选用两个特征
x=data[:,0:2]
y=target
a0=[]
b0=[]
a1=[]
b1=[]
a2=[]
b2=[]
for i in range(len(data)):
if (y[i]==0):
a0.append(x[i,0])
b0.append(x[i,1])
elif(y[i]==1):
a1.append(x[i,0])
b1.append(x[i,1])
else:
a2.append(x[i,0])
b2.append(x[i,1])
scatter1=plt.scatter(a0,b0,c='b',marker='o')
scatter2=plt.scatter(a1,b1,c='r',marker='x')
scatter3=plt.scatter(a2,b2,c='y',marker='s')
plt.legend(handles=[scatter1,scatter2,scatter3],labels=['setosa','versicolor','virginica'],loc='best')
plt.show()
def sigmoid(x):#sigmoid函数
return 1.0/(1+np.exp(-x))
def cost(x,y,theta):#代价函数
x=np.matrix(x)
y=np.matrix(y)
theta=np.matrix(theta)
first=np.multiply(y,np.log(sigmoid(x*theta)))
second=np.multiply(1-y,np.log(1-sigmoid(x*theta)))
return np.sum(first+second)/(-len(x))
def grad(x,y,theta,epochs=1000,lr=0.001):#进行梯度下降
x=np.matrix(x)
y=np.matrix(y)
theta=np.matrix(theta)
#print(x.shape,' ',theta.shape)
m=x.shape[0]
costList=[]
for i in range(epochs+1):
#print('i=',i,' x',x.shape,' theta',theta.shape)
h=sigmoid(x*theta)
#print('i=',i,'h ',h.shape,'x.T',x.T.shape,'y',y.shape)
delta=x.T*(h-y)/m
#print('i=',i,'delta ',delta.shape)
theta=theta-lr*delta
if(i%50==0):
costList.append(cost(x,y,theta))#计算损失值
return theta,costList
1.对setosa分类,分出setosa和其他种类
x=np.concatenate((np.ones((len(x),1)),x),axis=1)
theta=np.ones((x.shape[1],1))
#print(theta.shape)
y1=[]#重新生成数据集y,
for i in range(len(x)):
if y[i]!=0:
y1.append([0])
else:
y1.append([1])
theta,costList=grad(x,y1,theta,3000,0.6)
#print(theta.shape)
a=np.linspace(0,3000,61)#生成61个数
plt.plot(a,costList,c='y')
plt.show()
theta
matrix([[ 5.0040943 ],
[-6.76191622],
[10.16162326]])
plt.scatter(x[:,1],x[:,2],c=y)
m=[[4.5],[8.0]]
n=(-theta[0]-m*theta[1])/theta[2]
plt.plot(m,n,c='r')
plt.show()
可以看出,对于setosa分类与其他分类versicolor virginica可以用两个特征,就可以很好的将其分好类
2.对versicolor分类
theta2=np.ones((x.shape[1],1))
#print(theta.shape)
y2=[]
for i in range(len(x)):
if y[i]!=1:
y2.append([0])
else:
y2.append([1])
theta2,costList=grad(x,y2,theta2,8000,0.28)
#print(theta.shape)
a=np.linspace(0,6000,161)#生成61个数
plt.plot(a,costList,c='y')
plt.show()
theta2
matrix([[ 8.80218283],
[ 0.25495515],
[-3.45489597]])
plt.scatter(x[:,1],x[:,2],c=y)
m=[[4.5],[8.0]]
n=(-theta[0]-m*theta[1])/theta[2]
l=(-theta2[0]-m*theta2[1])/theta2[2]
plt.plot(m,n,c='b')
plt.plot(m,l,c='r')
plt.show()
3.对virginica分类
theta3=np.ones((x.shape[1],1))
#print(theta.shape)
y3=[]
for i in range(len(x)):
if y[i]!=2:
y3.append([0])
else:
y3.append([1])
theta3,costList=grad(x,y3,theta3,6000,0.1)
#print(theta.shape)
a=np.linspace(0,6000,121)#生成61个数
plt.plot(a,costList,c='y')
plt.show()
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-m6fIt9Sc-1642235863142)(output_14_0.png)]
theta3
matrix([[-6.42102857],
[ 1.79488608],
[-1.6847187 ]])
plt.scatter(x[:,1],x[:,2],c=y)
m=[[4.5],[8.0]]
n=(-theta[0]-m*theta[1])/theta[2]
l=(-theta2[0]-m*theta2[1])/theta2[2]
q=(-theta3[0]-m*theta3[1])/theta3[2]
plt.plot(m,n,c='b')
plt.plot(m,l,c='r')
plt.plot(m,q,c='y')
plt.show()
从setosa(0),versicolor(1) virginica(2)来看,0分类和其他两个分类可以用一条直线很好的将他们分开,但是对1和1分类来说,一条直线显然无法将其分隔开,所以需要我们考虑多个样本特征。
2.4个特征一起使用来评估模型
#x=np.concatenate((np.ones((len(x),1)),x),axis=1)
X=np.concatenate((np.ones((len(data),1)),data),axis=1)
Y=target
print(X.shape,Y.shape)
(150, 5) (150,)
Theta1=np.ones((X.shape[1],1))
#print(theta.shape)
Y1=[]#重新生成数据集y,
for i in range(len(X)):
if Y[i]!=0:
Y1.append([0])
else:
Y1.append([1])
Theta1,costList=grad(X,Y1,Theta1,3000,0.6)
#print(theta.shape)
a=np.linspace(0,3000,61)#生成61个数
plt.plot(a,costList,c='y')
plt.show()
Theta1
matrix([[ 1.33220841],
[ 0.52729505],
[ 3.3411161 ],
[-4.85587593],
[-1.63431095]])
Theta2=np.ones((X.shape[1],1))
#print(theta.shape)
Y2=[]#重新生成数据集y,
for i in range(len(X)):
if Y[i]!=1:
Y2.append([0])
else:
Y2.append([1])
Theta2,costList=grad(X,Y2,Theta2,8000,0.1)
#print(theta.shape)
a=np.linspace(0,3000,161)#生成61个数
plt.plot(a,costList,c='y')
plt.show()
Theta2
matrix([[ 4.75032157],
[ 0.11228482],
[-2.45581587],
[ 1.16623831],
[-2.70084823]])
Theta3=np.ones((X.shape[1],1))
#print(theta.shape)
Y3=[]#重新生成数据集y,
for i in range(len(X)):
if Y[i]!=2:
Y3.append([0])
else:
Y3.append([1])
Theta3,costList=grad(X,Y3,Theta3,3000,0.6)
#print(theta.shape)
a=np.linspace(0,3000,61)#生成61个数
plt.plot(a,costList,c='y')
plt.show()
Theta3
matrix([[-5.40077764],
[-4.5160182 ],
[-4.88791515],
[ 6.77915362],
[ 8.57906837]])
predict_y1=sigmoid(X*Theta1)
predict_y2=sigmoid(X*Theta2)
predict_y3=sigmoid(X*Theta3)
predict_y=[]
for i in range(len(X)):
if max(predict_y1[i],predict_y2[i],predict_y3[i])==predict_y1[i]:
predict_y.append([0])
elif max(predict_y1[i],predict_y2[i],predict_y3[i])==predict_y2[i]:
predict_y.append([1])
else :
predict_y.append([2])
for i in range(len(X)):
print(predict_y1[i],' ',predict_y2[i],' ',predict_y3[i],predict_y[i])
print(classification_report(Y,predict_y))#正确率有0.96
precision recall f1-score support
0 1.00 1.00 1.00 50
1 0.96 0.96 0.96 50
2 0.96 0.96 0.96 50
accuracy 0.97 150
macro avg 0.97 0.97 0.97 150
weighted avg 0.97 0.97 0.97 150
3.用sklearn进行逻辑回归
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
model.fit(data,target)
prediction=model.predict(data)
print(model.intercept_)
print(model.coef_)
[ 9.84186228 2.21913963 -12.06100191]
[[-0.41943756 0.96749376 -2.5205723 -1.084326 ]
[ 0.53147635 -0.3150198 -0.20094963 -0.94785159]
[-0.11203879 -0.65247397 2.72152193 2.03217759]]
print(classification_report(target,prediction))#准确率0.97
precision recall f1-score support
0 1.00 1.00 1.00 50
1 0.98 0.94 0.96 50
2 0.94 0.98 0.96 50
accuracy 0.97 150
macro avg 0.97 0.97 0.97 150
weighted avg 0.97 0.97 0.97 150
更多推荐
已为社区贡献1条内容
所有评论(0)