import numpy as np def CreateDateSet(): group = np.array([[1.0, 2.0], [1.2, 0.1], [0.1, 1.4], [0.3, 3.5]]) labels = ['A','A','B','B'] return group,labelscode
from numpy import * import operator排序
##给出训练数据以及对应的类别 def createDataSet(): group = array([[1.0, 2.0], [1.2, 0.1], [0.1, 1.4], [0.3, 3.5],[0.2,1.5]]) labels = ['A', 'A', 'B', 'B','B'] return group, labelsutf-8
###经过KNN进行分类 def classify(input, dataSet, label, k): dataSize = dataSet.shape[0] ####计算欧式距离 diff = tile(input, (dataSize, 1)) - dataSet sqdiff = diff ** 2get
squareDist = sum(sqdiff, axis=1) ###行向量分别相加,从而获得新的一个行向量 dist = squareDist ** 0.5 ##对距离进行排序 sortedDistIndex = argsort(dist) ##argsort()根据元素的值从大到小对元素进行排序,返回下标 print(sortedDistIndex ) classCount = {} for i in range(k): voteLabel = label[sortedDistIndex[i]] ###对选取的K个样本所属的类别个数进行统计 classCount[voteLabel] = classCount.get(voteLabel, 0) + 1 print(classCount) ###选取出现的类别次数最多的类别 maxCount = 0 for key, value in classCount.items(): if value > maxCount: maxCount = value classes = key return classes
def main(): input = array([1.1, 0.3]) dataSet, labels = createDataSet() K = 3 classify(input,dataSet,labels,K)input
main()it