####⑷为何咱们须要dual problem 其实这个最优问题用普通的QP求解也是能够的,可是若是咱们的数据维度很大,而通过feature transform以后维度就好更大了,这样就会致使VC dimension会增大,特别是后面用的多项式核,RBF核等等。通过对偶问题KKT条件的变换以后,咱们的目标式子:
####⑹Comparison of Kernels Polynomial Kernel的hyperplanes是由多项式曲线构成。优势:阶数能够灵活设置,更贴近实际分布;缺点:当Q很到的时候,若是kernel里面的值算出来是<1,那就基本接近0了,大于1就会变得很大,增长计算复杂度。并且参数过多,难以选择合适的值。
不正确的就加个1,最小为止。SVM也能够用这种方法来限制。
加上一个条件,C参数就是对于这些错误惩罚度是多少,条件也变了,正确的≥ 1,错误的无论他。无论是小错仍是大错。 整合一下:
这个式子其实没有什么用,
首先不是线性的,用不了二次规划,更不要说对偶这些了,其次大错小错都是同等对待,connot distinguish small error and large error。 对于上述方案继续修正: 咱们采用一个ξ做为一个犯错程度,程度越大,惩罚越大。惩罚就是这个式子数值会变大,而后SVM要花更多的力气去处理。
此时soft margin就是这样了,大于0就是1小于就是0。
不敏感损失函数 —— hinge lost function还有对数损失函数交叉熵等等。logistics用的是交叉熵,SVM就是用的hinge lost function。支持向量机就是一个结构风险最小化的近似实现,结构风险至关于指望风险(Eout)的一个上界,它是经验风险(Ein)和置信区间(Ω模型复杂度)的和,经验风险依赖于决策函数f的选取,可是置信区间是,F的VC维的增函数,二者是矛盾的。矛盾体如今:当VC维数变大的时候能够选到更好的f使得经验风险比较小,可是此时的置信区间比较大。这就是对应了VC bound理论。还好去听了台湾大学林轩宇老师课程,对这些机器学习理论基础有了解。
####⑼变量的选择方式 SMO称选择第1个变量的过程为外层循环。外层循环在训练样本中选取违反KKT条件最严重的样本点,Violation of the most serious sample of KKT conditions。我第一次看这东西是懵逼的。可是仔细想一下,就是检测哪个样本是没有知足KKT的条件:
''''' Generate a random number '''
def select_jrand(i , m):
j = i
while(j == i):
j = int(random.uniform(0 , m))
return j
pass
''''' restraint the α '''
def clip_alpha(aj , H , L):
if aj > H:
aj = H
elif aj < L:
aj = L
return aj
pass
复制代码
接下来就是实现支持向量机了:
def SVM(data_mat , class_label , C , tolar , max_iter):
data_mat = np.mat(data_mat)
label_mat = np.mat(class_label)
b = 0
m , n = np.shape(data_mat)
alphas = np.zeros((m , 1))
iter = 0
while iter < max_iter:
#做为迭代变化量
alpha_pairs_changed = 0
#做为第一个a for i in range(m):
WT_i = np.dot(np.multiply(alphas , label_mat).T , data_mat)
f_xi = float(np.dot(WT_i , data_mat[i , :].T)) + b
Ei = f_xi - float(label_mat[i])
if ((label_mat[i]*Ei < -tolar) and (alphas[i] < C)) or ((label_mat[i]*Ei > tolar) and (alphas[i] > 0)):
j = Tools.select_jrand(i , m)
WT_j = np.dot(np.multiply(alphas , label_mat).T , data_mat)
f_xj = float(np.dot(WT_j , data_mat[j , :].T)) + b
Ej = f_xj - float(label_mat[j])
alpha_iold = alphas[i].copy()
alpha_jold = alphas[j].copy()
if (label_mat[i] != label_mat[j]):
L = max(0 , alphas[j] - alphas[i])
H = min(C , C + alphas[j] - alphas[i])
else:
L = max(0 , alphas[j] + alphas[i] - C)
H = min(C , alphas[j] + alphas[i])
if H == L:
continue
eta = 2.0 * data_mat[i, :] * data_mat[j, :].T - data_mat[i, :] * data_mat[i, :].T - data_mat[j, :] * data_mat[j, :].T
if eta >= 0: continue
alphas[j] = (alphas[j] - label_mat[j]*(Ei - Ej))/eta
alphas[j] = Tools.clip_alpha(alphas[j], H, L)
if (abs(alphas[j] - alpha_jold) < 0.00001):
continue
alphas[i] = alphas[i] + label_mat[j]*label_mat[i]*(alpha_jold - alphas[j])
b1 = b - Ei + label_mat[i]*(alpha_iold - alphas[i])*np.dot(data_mat[i,:], data_mat[i,:].T) +\
label_mat[j]*(alpha_jold - alphas[j])*np.dot(data_mat[i,:], data_mat[j,:].T)
b2 = b - Ej + label_mat[i]*(alpha_iold - alphas[i])*np.dot(data_mat[i,:], data_mat[j,:].T) +\
label_mat[j]*(alpha_jold - alphas[j])*np.dot(data_mat[j,:], data_mat[j,:].T)
if (0 < alphas[i]) and (C > alphas[i]):
b = b1
elif (0 < alphas[j]) and (C > alphas[j]):
b = b2
else:
b = (b1 + b2)/2.0
print(b)
alpha_pairs_changed += 1
pass
if alpha_pairs_changed == 0:
iter += 1
else:
iter = 0
support_x = []
support_y = []
class1_x = []
class1_y = []
class01_x = []
class01_y = []
for i in range(m):
if alphas[i] > 0.0:
support_x.append(data_mat[i, 0])
support_y.append(data_mat[i, 1])
for i in range(m):
if label_mat[i] == 1:
class1_x.append(data_mat[i, 0])
class1_y.append(data_mat[i, 1])
else:
class01_x.append(data_mat[i, 0])
class01_y.append(data_mat[i, 1])
w_best = np.dot(np.multiply(alphas, label_mat).T, data_mat)
fig, ax = plt.subplots(figsize=(10, 5))
ax.scatter(support_x, support_y, s=100, c="y", marker="v", label="support_v")
ax.scatter(class1_x, class1_y, s=30, c="b", marker="o", label="class 1")
ax.scatter(class01_x, class01_y, s=30, c="r", marker="x", label="class -1")
lin_x = np.linspace(0, 100)
lin_y = (-float(b) - w_best[0, 0] * lin_x) / w_best[0, 1]
plt.plot(lin_x, lin_y, color="black")
ax.legend()
ax.set_xlabel("factor1")
ax.set_ylabel("factor2")
plt.show()
return b , alphas
datamat , labelmat = dataSet.load_data_set()
b, alphas = SVM(datamat , labelmat , 0.6 , 0.001 , 10)
print(b , alphas)
复制代码
首先传入的后面几个参数分别是惩罚力度,容忍度。比较重要的应该是这一句:
if ((label_mat[i]*Ei < -tolar) and (alphas[i] < C)) or ((label_mat[i]*Ei > tolar) and (alphas[i] > 0)):
复制代码
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import random
import Tool
import smo_class
import KernelTransform
def innerL(i ,os):
Ei = Tool.calculateEi(os , i)
if ((os.labels[i]*Ei < -os.toler) and
(os.alphas[i] < os.C)) or ((os.labels[i]*Ei > os.toler) and
(os.alphas[i] > 0)):
j , Ej = Tool.selectj(i , os , Ei)
alphaIold = os.alphas[i].copy()
alphaJold = os.alphas[j].copy()
if (os.labels[i] != os.labels[j]):
L = max(0 , os.alphas[j] - os.alphas[i])
H = min(os.C , os.C + np.array(os.alphas)[j] - np.array(os.alphas)[i])
else:
L = max(0 , os.alphas[j] + os.alphas[i] - os.C)
H = min(os.C , np.array(os.alphas)[j] + np.array(os.alphas)[i])
if L == H:
return 0
eta = 2.0*os.x[i,:]*os.x[j,:].T - os.x[i,:]*os.x[i,:].T - os.x[j,:]*os.x[j,:].T
if eta >= 0:
print('η> 0,the kernel matrix is not semi-positive definite')
return 0
os.alphas[j] -= os.labels[j]*(Ei - Ej)/eta
os.alphas[j] = Tool.rangeSelectionForAlpha(os.alphas[j] , H , L)
Tool.updateEk(os , j)
if (abs(os.alphas[j] - alphaJold) < 0.00001):
print("j not moving enough")
return 0
os.alphas[i] += os.labels[j] * os.labels[i] * (alphaJold - os.alphas[j])
Tool.updateEk(os , i)
b1 = os.b - Ei - os.labels[i] * (os.alphas[i] - alphaIold) * \
os.x[i, :] * os.x[i, :].T - os.labels[j] * \
(os.alphas[j] - alphaJold) * os.x[i, :] * os.x[j, :].T
b2 = os.b - Ej - os.labels[i] * (os.alphas[i] - alphaIold) * \
os.x[i, :] * os.x[j, :].T - os.labels[j] * \
(os.alphas[j] - alphaJold) * os.x[j, :] * os.x[j, :].T
if (0 < os.alphas[i]) and (os.C > os.alphas[i]):
os.b = b1
elif (0 < os.alphas[j]) and (os.C > os.alphas[j]):
os.b = b2
else:
os.b = (b1 + b2) / 2.0
return 1
else:
return 0
def smo(data,labels,C = 0.6,toler = 0.001,maxIter = 40 , kernel = True):
oS = smo_class.optStruct(np.mat(data),np.mat(labels).transpose(),C,toler)
iter =0
entireSet = True
alphaPairsChanged = 0
while(iter < maxIter) and ((alphaPairsChanged >0) or (entireSet)):
alphaPairsChanged = 0
if entireSet:
for i in range(oS.m):
if kernel == True:
alphaPairsChanged += KernelTransform.innerL(i,oS)
else:
alphaPairsChanged += innerL(i, oS)
print("fullSet,iter: %d i: %d,pairs changed %d" %\
(iter,i,alphaPairsChanged))
iter +=1
else:
# 两个元素乘积非零,每两个元素作乘法[0,1,1,0,0]*[1,1,0,1,0]=[0,1,0,0,0]
nonBoundIs = np.nonzero((oS.alphas.A > 0)*(oS.alphas.A < C))[0]
for i in nonBoundIs:
alphaPairsChanged += innerL(i,oS)
print("nou-bound,iter: %d i:%d,pairs changed %d" % (iter,i,alphaPairsChanged))
iter +=1
# entireSet 控制交替的策略选择if entireSet:
entireSet = False
# 必须有alpha对进行更新elif(alphaPairsChanged == 0):
entireSet = True
print("iteration number:%d" % iter)
return oS.b,oS.alphas
复制代码
entireSet就是交换策略的标志。貌似没有什么好说的。 以后就是执行函数这些了:
import Tool
import SMO
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import KernelTransform
''' calculate w and draw the picture, the variable which the α not equal zero , we call support vector '''
def calculateW(alphas , data , labels):
x = np.mat(data)
label = np.mat(labels).transpose()
m , n = np.shape(x)
w = np.zeros((n , 1))
for i in range(m):
w += np.multiply(alphas[i] * label[i] , x[i , :].T)
return w
pass
if __name__ == '__main__':
data, label = Tool.loadDataSet('../Data/testSet.txt')
b,alphas = SMO.smo(data , label , kernel=False)
w = calculateW(alphas , data , label)
x = np.arange(0 , 11)
print(w)
y = (-b - w[0]*x)/w[1]
Tool.drawDataset(data , label , x , y.tolist()[0] , line=True , alphas=alphas)
data, label = Tool.loadDataSet('../Data/testSetRBF.txt')
b, alphas = SMO.smo(data, label,kernel=True ,maxIter=100)
svInd = np.nonzero(alphas.A > 0)[0]
Tool.drawDataset(data, label, line=False, alphas=alphas)
复制代码