import numpy as np import pandas as pd import matplotlib.pyplot as plt
#load data fpath = r'.../ex2data1.txt' df = pd.read_table(fpath, engine='python', header=None, sep=',') df.rename(columns={0:'Exam_1', 1:'Exam_2', 2:'Admitted'}, inplace=True)
#数据特征缩放到[0,1]区间 df_norm = df.apply(lambda x: (x - x.min()) / (x.max() - x.min()))
#plot data plt.scatter(df[df['Admitted'] == 0]['Exam_1'],df[df['Admitted'] == 0]['Exam_2'], edgecolors='k', color='y', label='Not Admitted') plt.scatter(df[df['Admitted'] == 1]['Exam_1'],df[df['Admitted'] == 1]['Exam_2'], marker='+', color='k', label='Admitted') plt.legend(loc='upper right') plt.xlabel('Exam 1 Score') plt.ylabel('Exam 2 Score') plt.title('Figure 1:Scatter plot of training data')
hypothesis:
sigmoid function:
#定义sigmoid 函数 def sgd_f(theta_mtr, x_mtr): z = x_mtr * theta_mtr.T g = 1 / (1 + np.exp(-z)) return g #定义代价函数 def cost_f(theta_mtr, x_mtr, y_mtr): m, n = x_mtr.shape h = sgd_f(theta_mtr, x_mtr) lh = y_mtr.T*np.log(h) + (1-y_mtr).T*np.log(1-h) J = - lh / m #为了后面利用梯度下降法求解最小值,此处加“-”是将lh变成凸函数 return J[0,0]
# 定义theta增量函数 def derv_theta(theta_mtr, x_mtr, y_mtr): m,_ = x_mtr.shape h = sgd_f(theta_mtr, x_mtr) derv = (1 / m) * ((h - y_mtr).T * x_mtr) return derv # 批量梯度法 def grsdient_bgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter): J0 = 0 J_lst = [] theta_mtr_lst = [] for iternum in range(MaxIter): J = cost_f(theta_mtr, x_mtr, y_mtr) if abs(J-J0) < epsilon: iternum=iternum-1 break theta_mtr_lst.append(theta_mtr) theta_mtr = theta_mtr - alpha*derv_theta(theta_mtr, x_mtr, y_mtr) J0 = J J_lst.append(J) print('MaxIteration Num is %d'%(iternum+1)) return J_lst, theta_mtr_lst, (iternum+1) # 随机梯度法 def grsdient_sgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter): m, n = x_mtr.shape J0 = 0 J_lst = [] theta_mtr_lst = [] for iternum in range(MaxIter): J = cost_f(theta_mtr, x_mtr, y_mtr) if abs(J-J0) < epsilon: iternum=iternum-1 break theta_mtr_lst.append(theta_mtr) for i in range(m): theta_mtr = theta_mtr - alpha*derv_theta(theta_mtr, x_mtr[i,:], y_mtr[i,:]) J0 = J J_lst.append(J) print('MaxIteration Num is %d'%(iternum+1)) return J_lst, theta_mtr_lst, (iternum+1)
if __name__ == __'main'__: alpha = 0.1 epsilon = 1e-7 MaxIter = 15000 m,n = df.iloc[:,:2].shape #theta theta_mtr = np.zeros([1,n+1]) #x_matrix x_mtr = np.matrix(df_norm.iloc[:,:2].values) x0 = np.matrix(np.ones(m)) x_mtr = np.hstack([x0.T, x_mtr]) #y_matrix y_mtr = np.matrix(df.iloc[:,2:].values) J = cost_f(theta_mtr, x_mtr, y_mtr) J_lst, theta_mtr_lst, Maxiternum = grsdient_bgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter)
#求解线性决策边界的系数 line_theta = theta_mtr_lst[-1] line_param_0 = line_theta[0,0] / -line_theta[0,2] line_param_1 = line_theta[0,1] / -line_theta[0,2]
fig = plt.figure(figsize=(8,6)) ax = fig.add_subplot(111) #plot classify line plot_x = np.linspace(0,0.9,100) plot_y = line_param_1 * plot_x + line_param_0 plot_x = plot_x * (df['Exam_1'].max() - df['Exam_1'].min()) + df['Exam_1'].min() plot_y = plot_y * (df['Exam_2'].max() - df['Exam_2'].min()) + df['Exam_2'].min() ax.plot(plot_x, plot_y, 'r-', label='classify line') #plot original data ax.scatter(df[df['Admitted'] == 0]['Exam_1'],df[df['Admitted'] == 0]['Exam_2'],\edgecolors='k', color='y', label='Not Admitted') ax.scatter(df[df['Admitted'] == 1]['Exam_1'],df[df['Admitted'] == 1]['Exam_2'], marker='+', color='k', label='Admitted') ax.set(xlim=[30,100], ylim=[30,100], title='Figure 2: Training data with decision boundary', xlabel='Exam 1 Score', ylabel='Exam 2 Score') plt.legend(loc='upper right')
#predict def peedic_f(exam1, exam2): theta_mtr = theta_mtr_lst[-1] x1 = (exam1 - df['Exam_1'].min()) / (df['Exam_1'].max() - df['Exam_1'].min()) x2 = (exam2 - df['Exam_2'].min()) / (df['Exam_2'].max() - df['Exam_2'].min()) x_predict = np.matrix([1, x1, x2]) prob = sgd_f(theta_mtr, x_predict)[0,0] if prob > 0.5: return 1 else: return 0
df['Predicted_label'] = df.apply(lambda x: peedic_f(x['Exam_1'], x['Exam_2']), axis=1) df[df['Admitted']!=df['Predicted_label']]
- 利用得到的逻辑回归作为分类器对原始数据是否录取做预测,可以看出有8个数据预测结果与实际结果不符合,这8个数据刚好和上图中8个误分类的点一一对应。
fig = plt.figure(figsize=(8,6)) ax = fig.add_subplot(111) #theta theta0_lst, theta1_lst, theta2_lst = [], [], [] for i in theta_mtr_lst: theta0_lst.append(i[0,0]) theta1_lst.append(i[0,1]) theta2_lst.append(i[0,2]) #number of interation IterNum = list(range(0,Maxiternum)) ax.plot(IterNum, theta0_lst, 'r-', label=r'$\theta_0$') ax.plot(IterNum, theta1_lst, 'b-', label=r'$\theta_1$') ax.plot(IterNum, theta2_lst, 'g-', label=r'$\theta_2$') ax.set(xlabel='Number of Iteration', ylabel=r'$\theta$', title=r'Relation between Number of Interation and $\theta$') plt.legend()
- 从图中可以看出, 是先增大后降低, ,最后3个 值趋于定值。
⚠️博主非相关专业出生,转专业自学,写此博客纯为交流和分享,有错误之处请在留言处指出,谢谢😊。
⚠️参考博客《斯坦福机器学习笔记》:https://yoyoyohamapi.gitbooks.io/mit-ml/content/
⚠️吴恩达《机器学习》课后作业,源数据下载:https://github.com/nsoojin/coursera-ml-py。