本文是做者看慕课网学习记录笔记,内容技术水准是入门,大佬看到请轻言指点,手下留情
机器学习的本质:模拟人的神经元
人工神经元原理:python
权重向量W,训练样本Ⅹgit
预测数据特色可线性分割,要求能够把感知的值有一条线明确分割出来,目标就是找到中间的分割线
源码地址: klinson/machine-learning-python
Python3.7.3github
自行下载Python环境包算法
须要安装tkinterubuntu
// centos sudo yum install tkinter // ubuntu sudo apt-get install tkinter // cd python-path sudo ./configure --with-tcltk-includes="-I/usr/include" --with-tcltk-libs="-L/usr/lib64 -ltcl8.5 -L/usr/lib64 -ltk8.5" --enable-optimizations sudo make && sudo make install
文件名Perceptron.py
centos
# -*- coding: utf-8 -*- import numpy as np class Perceptron(object): """ Perceptron 感知器算法 eta: 学习率 n_iter: 权重向量的训练次数 w_: 神经分叉权重向量 errors_: 用来记录神经元判断出错次数 """ def __init__(self, eta = 0.01, n_iter = 0): self.eta = eta; self.n_iter = n_iter; pass def fit(self, X, y): """ 权重更新算法 根据输入样本,进行神经元培训,x是输入样本向量,y对应样本分类 X:shape[n_samples, n_features] X:[[1, 2, 3], [4, 5, 6]] n_samples: 2 n_features: 3 """ # 初始化权重为0 # 加一是由于前面算法提到的w0,是步调函数阈值 self.w_ = np.zeros(1 + X.shape[1]); self.errors_ = []; for _ in range(self.n_iter): errors = 0; """ X:[[1, 2, 3], [4, 5, 6]] y:[1, -1] zip(X, y) = [[1, 2, 3, 1], [4, 5, 6, -1]] target = 1 / -1 """ for xi, target in zip(X, y): """ update = n(成功率) * (y - y'),结果为0表示预测正确 target: y,预约结果 self.predict(xi): y', 对xi进行预测结果 """ update = self.eta * (target - self.predict(xi)); """ xi 是一个向量 update * xi 等级: [▽w(1) = x[1] * update, ▽w(2) = x[2] * update, ▽w(n) = x[n] * update] """ self.w_[1:] += update * xi; # 更新阈值 self.w_[0] += update; errors += int(update != 0.0) self.errors_.append(errors); pass pass pass def net_input(self, X): """ 实现向量点积 z = W0*x0+W1*x1+...+Wn*xn; """ return np.dot(X, self.w_[1:] + self.w_[0]) pass def predict(self, X): # 计算xn所属于的分类,先进行点积,再进行判断 return np.where(self.net_input(X) >= 0.0, 1, -1); pass
文件名main.py
网络
# -*- coding: utf-8 -*- import pandas as pd import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap import numpy as np from Perceptron import Perceptron def plot_decision_regions(X, y, classifier, resolution=0.02): #画图划线分割 markers = ['s', 'x', 'o', 'v']; colors= ['red', 'blue', 'lightred', 'gray', 'cyan'] cmap = ListedColormap(colors[:len(np.unique(y))]); x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max(); x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max(); #print(x1_min, x1_max, x2_min, x2_max); # 根据数据最大最小值构建向量,差值resolution xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) #print(np.arange(x1_min, x1_max, resolution).shape, np.arange(x1_min, x1_max, resolution), xx1.shape, xx1) z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T); z = z.reshape(xx1.shape); plt.contourf(xx1, xx2, z, alpha=0.4, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) for idx,cl in enumerate(np.unique(y)): plt.scatter(x=X[y==cl, 0], y=X[y==cl, 1], alpha=0.8, c=cmap(idx), marker=markers[idx], label=cl) pass plt.xlabel('花瓣长度'); plt.ylabel('花径长度'); plt.legend(loc='upper left'); plt.show(); pass # 读取文件 file = './examples.csv'; df = pd.read_csv(file, header=None); # print(df.head(10)) # 处理第4列表 y = df.loc[0: 100, 4].values; y = np.where(y == 'Tris-setosa', -1, 1); # print(y) # 讲第0和2列取出来分析 X = df.iloc[0: 100, [0, 2]].values; # print(X) # 对数据可视化 """ plt.scatter(X[:5, 0], X[:5, 1], color='red', marker='o', label='setosa'),; plt.scatter(X[5:10, 0], X[5:10, 1], color='blue', marker='x', label='versicolor'); plt.xlabel('花瓣长度'); plt.ylabel('花径长度'); plt.legend(loc='upper left'); plt.show(); """ # 训练 ppn = Perceptron(eta=0.1, n_iter=10); ppn.fit(X, y) """ # 训练输出 plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o'); plt.xlabel('Epochs'); plt.ylabel('错误分类次数'); plt.show(); """ # 绘制分割图 plot_decision_regions(X, y, ppn, resolution=0.02)
文件名examples.csv
app
2.1,3.5,2.7,0.2,Tris-setosa 3.1,2.6,2.6,0.2,Tris-setosa 2.1,5.5,3.4,0.5,Tris-setosa 3.1,5.2,2.9,0.2,Tris-setosa 3.1,3.4,2.3,0.3,Tris-setosa 3.1,2.3,2.1,0.2,Tris-setosa 4.7,3.5,1.4,0.2,Tris-versicolor 4.3,7.3,1.1,0.1,Tris-versicolor 4.1,4.5,1.4,0.2,Tris-versicolor 4.4,4.2,1.3,0.3,Tris-versicolor
$ python main.py
未完待续,敬请期待