正规化逻辑回归-非线性决策边界(python3版本)

现有芯片制造厂需对一批芯片进行质量检查。制造厂对芯片进行两个不同的性能测试:microchip_test_1、microchip_test_2,那么,产品经理如何通过这两个测试数据去判断芯片的质量。通过正规化的逻辑回归模型(Regularized logistic regression)可以完成此任务。

  • 导入所需的库

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
  • 导入数据

fpath  = r'../ex2data2.txt'
df = pd.read_table(fpath, sep=',', header=None)
df.rename(columns={0:'microchip_test_1', 1:'microchip_test_2', 2:'label'}, inplace=True)
  • 原始数据可视化

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
ax.scatter(df[df['label']==1]['microchip_test_1'], df[df['label']==1]['microchip_test_2'],\
           marker='+', color='k', label='y=1 Accpeted')
ax.scatter(df[df['label']==0]['microchip_test_1'], df[df['label']==0]['microchip_test_2'], \
           marker='o', color='y', edgecolors='k', label='y=0 Rejected')

ax.set(xlabel='Microchip Test 1', ylabel='Microchip Test 2', title='Figure 1: Plot of training data')
plt.legend()
  • 由下图可知,原始数据是线性不可分。这里是引用
  • 数据预处理

    • 对于线性不可分数据,直接利用原始数据的两个特征是无法将正例、负例分开;需要将二维特征映射到多维特征。本文将二维特征映射成28维特征,函数datas用于将二维数据映射成多维数据。 通过调用sklearn 中 PolynomialFeatures库可以实现上诉目的,本文此处通过自己写代码实现。
def datas(df, Maxpower):
    #x_mat
    x_1 = df.iloc[:,:1].values
    x_2 = df.iloc[:,1:2].values
    n = len(x_1)
    array_0 = np.ones([1,n])
    for powerNum in range(1, Maxpower+1):
        for power_index in range(powerNum+1):
            new_feat = (np.power(x_1,power_index) * np.power(x_2,powerNum-power_index)).reshape([n,1])
            array_0 = np.concatenate((array_0,new_feat.T), axis = 0)

    x_mat = np.mat(array_0)

    #theta_mat
    m,n = x_mat.shape
    theta_mat = np.mat(np.zeros(m))

    #y_mat
    y_mat = np.mat(df['label'].values)
    return x_mat, y_mat, theta_mat
  • 定义逻辑回归相关的函数

#sigmoid function
def sgm_f(x_mat, theta_mat):
    z = theta_mat * x_mat
    g = 1 / (1+np.exp(-z))
    return g

#cost function
def cost_f(x_mat, y_mat, theta_mat, re_lambda):
    _,m = x_mat.shape
    h = sgm_f(x_mat, theta_mat)
    J = (1/m) * (-y_mat * np.log(h.T) - (1-y_mat) * np.log((1-h).T)) +\
        (re_lambda / (2*m)) * (theta_mat[0,1:] * theta_mat[0,1:].T)
    return J

#derivation function
def deriv_f(x_mat, y_mat, theta_mat, re_lambda):
    #make theta_0 unchanged by Regularization
    n,m = x_mat.shape
    tmp_theta = np.mat(np.zeros(n))
    tmp_theta[0,1:] = theta_mat[0,1:]
    h = sgm_f(x_mat, theta_mat)
    deriv_J = (1/m) * ((h - y_mat) * x_mat.T) + (re_lambda / m ) * tmp_theta
    return deriv_J
  • 定义梯度下降法函数(批量梯度下降法&随机梯度下降法)

#gradient descent algorithm
def grsdient_bgd(x_mat, y_mat, theta_mat, alpha, epsilon, MaxIter, re_lambda):
    J0 = 0
    J_lst = []
    theta_mtr_lst = []
    for iternum in range(MaxIter):
        J = cost_f(x_mat, y_mat, theta_mat, re_lambda)
        if abs(J-J0) < epsilon:
            iternum=iternum-1
            break
        theta_mtr_lst.append(theta_mat)
        theta_mat = theta_mat - alpha*deriv_f(x_mat, y_mat, theta_mat, re_lambda)
        J0 = J
        J_lst.append(J)
    print('MaxIteration Num is %d'%(iternum+1))
    return J_lst, theta_mtr_lst, (iternum+1)

def grsdient_sgd(x_mat, y_mat, theta_mat, alpha, epsilon, MaxIter, re_lambda):
    J0 = 0
    J_lst = []
    theta_mtr_lst = []
    n,m = x_mat.shape
    for iternum in range(MaxIter):
        J = cost_f(x_mat, y_mat, theta_mat, re_lambda)
        if abs(J-J0) < epsilon:
            iternum=iternum-1
            break
        theta_mtr_lst.append(theta_mat)
        for i in range(m):
            theta_mat = theta_mat - alpha*deriv_f(x_mat[:,i], y_mat[:,i], theta_mat, re_lambda)
        theta_mat = theta_mat - alpha*deriv_f(x_mat, y_mat, theta_mat, re_lambda)
        J0 = J
        J_lst.append(J)
    print('MaxIteration Num is %d'%(iternum+1))
    return J_lst, theta_mtr_lst, (iternum+1)
  • 定义绘制所需的相关函数

#plt_x_mat
def plt_x_mat(df, Maxpower):
    x1_Min = df['microchip_test_1'].min()
    x1_Max = df['microchip_test_1'].max()
    x2_Min = df['microchip_test_2'].min()
    x2_Max = df['microchip_test_2'].max()

    plot_x1, plot_x2 = np.meshgrid(np.linspace(x1_Min, x1_Max), np.linspace(x2_Min, x2_Max))

    x_1 = plot_x1.ravel()
    x_2 = plot_x2.ravel()
    n = len(x_1)
    x_1 = x_1.reshape(n,1)
    x_2 = x_2.reshape(n,1)
    plt_array_0 = np.ones([1,n])
    for powerNum in range(1, Maxpower+1):
        for power_index in range(powerNum+1):
            plt_new_feat = np.power(x_1,power_index) * np.power(x_2,powerNum-power_index)
            plt_array_0 = np.concatenate((plt_array_0,plt_new_feat.T), axis = 0)

    plt_x_mat = np.matrix(plt_array_0)
    return plt_x_mat, plot_x1, plot_x2

def draw_data_boundary(df, plot_x_mat, plot_x1, plot_x2, theta_mtr_lst, re_lambda):
    fig = plt.figure(figsize=(8,6))
    ax = fig.add_subplot(111)
    ax.scatter(df[df['label']==1]['microchip_test_1'], df[df['label']==1]['microchip_test_2'],\
               marker='+', color='k', label='y=1 Accpeted')
    ax.scatter(df[df['label']==0]['microchip_test_1'], df[df['label']==0]['microchip_test_2'], \
               marker='o', color='y', edgecolors='k', label='y=0 Rejected')

    ax.set(xlabel='Microchip Test 1', ylabel='Microchip Test 2', title=r'Figure 1: Plot of training data ($\lambda=$%d)'%re_lambda)

    #plot boundary
    theta_mat_cal = theta_mtr_lst[-1]
    h_cal = sgm_f(plot_x_mat, theta_mat_cal)
    h_cal = h_cal.reshape(plot_x1.shape)

    ax.contour(plot_x1, plot_x2, h_cal, [0.5], colors='r', linewidth=0.5)
    
    plt.legend()
    plt.show()
    return

定义预测函数

def predict(theta_mat, x_mat):
    h = sgm_f(x_mat, theta_mat)
    predict_label = np.where(h.T >=0.5,1,0)
    return predict_label
if __name__=='__main__':
    for re_lambda in [0, 1, 10, 100]:
        MaxIter = 1500
        alpha = 1
        epsilon = 1e-10
        Maxpower = 6
        #datas
        x_mat, y_mat, theta_mat = datas(df, Maxpower)

        J_lst, theta_mtr_lst, Maxiternum = grsdient_bgd(x_mat, y_mat, theta_mat, alpha, epsilon, MaxIter, re_lambda)

        plot_x_mat, plot_x1, plot_x2 = plt_x_mat(df, Maxpower)

        draw_data_boundary(df, plot_x_mat, plot_x1, plot_x2, theta_mtr_lst, re_lambda)

        #predict
        theta_mat = theta_mtr_lst[-1]
        pre_label = predict(theta_mat, x_mat)
        df['pre_label'] = pre_label

        ##correct_num
        df_correct = df[df['label']==df['pre_label']]
        df_correct_num = len(df_correct)

        ##total_num
        df_total_num = len(df)

        ##accuracy_rate
        accuracy_rate = df_correct_num / df_total_num
        print('Train Accuracy is: %.4f'%accuracy_rate)
  • 展示不同 λ \lambda 对决策边界的影响 。
  • 不同 λ \lambda 对应的准确率分别为:0.8305、0.8305、0.7458、0.6102这里是引用
    在这里插入图片描述
    在这里插入图片描述
    在这里插入图片描述

⚠️参考博客《斯坦福机器学习笔记》:https://yoyoyohamapi.gitbooks.io/mit-ml/content/
⚠️吴恩达《机器学习》课后作业,源数据下载:https://github.com/nsoojin/coursera-ml-py。