逻辑回归-线性决策边界(python3版本)

导入相关库

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

导入数据

#load data
fpath  = r'.../ex2data1.txt'
df = pd.read_table(fpath, engine='python', header=None, sep=',')
df.rename(columns={0:'Exam_1', 1:'Exam_2', 2:'Admitted'}, inplace=True)

数据预处理

#数据特征缩放到[0,1]区间
df_norm = df.apply(lambda x: (x - x.min()) / (x.max() - x.min()))

数据可视化

#plot data
plt.scatter(df[df['Admitted'] == 0]['Exam_1'],df[df['Admitted'] == 0]['Exam_2'], edgecolors='k', color='y', label='Not Admitted')
plt.scatter(df[df['Admitted'] == 1]['Exam_1'],df[df['Admitted'] == 1]['Exam_2'], marker='+', color='k', label='Admitted')
plt.legend(loc='upper right')
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')
plt.title('Figure 1:Scatter plot of training data')

这里是引用

Sigmoid Function

  • hypothesis: h θ ( x ) = g ( θ T x ) h_{\theta}(x)=g({\theta}^Tx)

  • sigmoid function: g ( z ) = 1 1 + e z g(z)={\frac{1}{1+e^{-z}}}

#定义sigmoid 函数
def sgd_f(theta_mtr, x_mtr):
    z = x_mtr * theta_mtr.T
    g = 1 / (1 + np.exp(-z))
    return g

#定义代价函数
def cost_f(theta_mtr, x_mtr, y_mtr):
    m, n = x_mtr.shape
    h = sgd_f(theta_mtr, x_mtr)
    lh = y_mtr.T*np.log(h) + (1-y_mtr).T*np.log(1-h)
    J = - lh / m            #为了后面利用梯度下降法求解最小值,此处加“-”是将lh变成凸函数
    return J[0,0]

梯度法

# 定义theta增量函数
def derv_theta(theta_mtr, x_mtr, y_mtr):
    m,_ = x_mtr.shape
    h = sgd_f(theta_mtr, x_mtr)
    derv =  (1 / m) * ((h - y_mtr).T * x_mtr)
    return derv

# 批量梯度法
def grsdient_bgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter):
    J0 = 0
    J_lst = []
    theta_mtr_lst = []
    for iternum in range(MaxIter):
        J = cost_f(theta_mtr, x_mtr, y_mtr)
        if abs(J-J0) < epsilon:
            iternum=iternum-1
            break
        theta_mtr_lst.append(theta_mtr)
        theta_mtr = theta_mtr - alpha*derv_theta(theta_mtr, x_mtr, y_mtr)
        J0 = J
        J_lst.append(J)
    print('MaxIteration Num is %d'%(iternum+1))
    return J_lst, theta_mtr_lst, (iternum+1)

# 随机梯度法
def grsdient_sgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter):
    m, n = x_mtr.shape
    J0 = 0
    J_lst = []
    theta_mtr_lst = []
    for iternum in range(MaxIter):
        J = cost_f(theta_mtr, x_mtr, y_mtr)
        if abs(J-J0) < epsilon:
            iternum=iternum-1
            break
        theta_mtr_lst.append(theta_mtr)
        for i in range(m):
            theta_mtr = theta_mtr - alpha*derv_theta(theta_mtr, x_mtr[i,:], y_mtr[i,:])
        J0 = J
        J_lst.append(J)
    print('MaxIteration Num is %d'%(iternum+1))
    return J_lst, theta_mtr_lst, (iternum+1)
if __name__ == __'main'__:
	alpha = 0.1
	epsilon = 1e-7
	MaxIter = 15000
	m,n = df.iloc[:,:2].shape
	#theta
	theta_mtr = np.zeros([1,n+1])
	    
	#x_matrix
	x_mtr = np.matrix(df_norm.iloc[:,:2].values)
	x0 = np.matrix(np.ones(m))
	x_mtr = np.hstack([x0.T, x_mtr])

	#y_matrix
	y_mtr = np.matrix(df.iloc[:,2:].values)
	J = cost_f(theta_mtr, x_mtr, y_mtr)
	J_lst, theta_mtr_lst, Maxiternum = grsdient_bgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter)

线性决策边界

#求解线性决策边界的系数
line_theta = theta_mtr_lst[-1]
line_param_0 = line_theta[0,0] / -line_theta[0,2]
line_param_1 = line_theta[0,1] / -line_theta[0,2]
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)

#plot classify line
plot_x = np.linspace(0,0.9,100)
plot_y = line_param_1 * plot_x + line_param_0
plot_x = plot_x * (df['Exam_1'].max() - df['Exam_1'].min()) + df['Exam_1'].min()
plot_y = plot_y * (df['Exam_2'].max() - df['Exam_2'].min()) + df['Exam_2'].min()

ax.plot(plot_x, plot_y, 'r-', label='classify line')

#plot original data
ax.scatter(df[df['Admitted'] == 0]['Exam_1'],df[df['Admitted'] == 0]['Exam_2'],\edgecolors='k', color='y', label='Not Admitted')

ax.scatter(df[df['Admitted'] == 1]['Exam_1'],df[df['Admitted'] == 1]['Exam_2'],  marker='+', color='k', label='Admitted')

ax.set(xlim=[30,100], ylim=[30,100], title='Figure 2: Training data with decision boundary', xlabel='Exam 1 Score', ylabel='Exam 2 Score')
plt.legend(loc='upper right')

这里是引用

预测

#predict
def peedic_f(exam1, exam2):
    theta_mtr = theta_mtr_lst[-1]
    x1 = (exam1 - df['Exam_1'].min()) / (df['Exam_1'].max() - df['Exam_1'].min())
    x2 = (exam2 - df['Exam_2'].min()) / (df['Exam_2'].max() - df['Exam_2'].min())
    x_predict = np.matrix([1, x1, x2]) 
    prob = sgd_f(theta_mtr, x_predict)[0,0]
    if prob > 0.5:
        return 1
    else:
        return 0
df['Predicted_label'] = df.apply(lambda x: peedic_f(x['Exam_1'], x['Exam_2']), axis=1)
df[df['Admitted']!=df['Predicted_label']]
  • 利用得到的逻辑回归作为分类器对原始数据是否录取做预测,可以看出有8个数据预测结果与实际结果不符合,这8个数据刚好和上图中8个误分类的点一一对应。 这里是引用

3个 θ \theta 值在迭代过程中的变化

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
#theta
theta0_lst, theta1_lst, theta2_lst = [], [], []
for i in theta_mtr_lst:
    theta0_lst.append(i[0,0])
    theta1_lst.append(i[0,1])
    theta2_lst.append(i[0,2])

#number of interation
IterNum = list(range(0,Maxiternum))

ax.plot(IterNum, theta0_lst, 'r-', label=r'$\theta_0$')
ax.plot(IterNum, theta1_lst, 'b-', label=r'$\theta_1$')
ax.plot(IterNum, theta2_lst, 'g-', label=r'$\theta_2$')
ax.set(xlabel='Number of Iteration', ylabel=r'$\theta$', title=r'Relation between Number of Interation and $\theta$')
plt.legend()
  • 从图中可以看出, θ 0 \theta_0 是先增大后降低, θ 1 θ 2 \theta_1、\theta_2则是一直增大 ,最后3个 θ \theta 值趋于定值。在这里插入图片描述

⚠️博主非相关专业出生,转专业自学,写此博客纯为交流和分享,有错误之处请在留言处指出,谢谢😊。
⚠️参考博客《斯坦福机器学习笔记》:https://yoyoyohamapi.gitbooks.io/mit-ml/content/
⚠️吴恩达《机器学习》课后作业,源数据下载:https://github.com/nsoojin/coursera-ml-py。