自2007年发布以来,scikit-learn已经成为Python重要的机器学习库了。scikit-learn简称sklearn,支持包括分类、回归、降维和聚类四大机器学习算法。还包含了特征提取、数据处理和模型评估三大模块。
sklearn是Scipy的扩展,创建在NumPy和matplotlib库的基础上。利用这几大模块的优点,能够大大提升机器学习的效率。
sklearn拥有着完善的文档,上手容易,具备着丰富的API,在学术界颇受欢迎。sklearn已经封装了大量的机器学习算法,包括LIBSVM和LIBINEAR。同时sklearn内置了大量数据集,节省了获取和整理数据集的时间。算法
链路预测是经过历史链接信息预测将来可能产生的链接,即经过当前网络中的连边信息预测未来可能产生的连边信息。
from sklearn.model_selection import train_test_split # 分割数据模块
from sklearn.neighbors import KNeighborsClassifier # K最近邻(kNN,k-NearestNeighbor)分类算法
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from math import isnan网络
def Jaccavrd(MatrixAdjacency_Train):app
Matrix_similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train) deg_row = sum(MatrixAdjacency_Train) deg_row.shape = (deg_row.shape[0],1) deg_row_T = deg_row.T tempdeg = deg_row + deg_row_T temp = tempdeg - Matrix_similarity Matrix_similarity = Matrix_similarity / temp return Matrix_similarity
def Salton_Cal(MatrixAdjacency_Train):机器学习
similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train) deg_row = sum(MatrixAdjacency_Train) deg_row.shape = (deg_row.shape[0],1) deg_row_T = deg_row.T tempdeg = np.dot(deg_row,deg_row_T) temp = np.sqrt(tempdeg) np.seterr(divide='ignore', invalid='ignore') Matrix_similarity = np.nan_to_num(similarity / temp)
return Matrix_similarity
def file2matrix(filepath):ide
f = open(filepath) lines = f.readlines() matrix = np.zeros((50, 50), dtype=float) A_row = 0 for line in lines: list = line.strip('\n').split(' ') matrix[A_row:] = list[0:50] A_row += 1 return matrix
filepath = '3600/s0001.txt'
MatrixAdjacency = file2matrix(filepath)学习
similarity_matrix_Jaccavrd = Jaccavrd(MatrixAdjacency)
similarity_matrix_Salton = Salton_Cal(MatrixAdjacency)rest
filepath2 = '3600/s0002.txt'
MatrixAdjacency2 = file2matrix(filepath2)code
similarity_matrix_Jaccavrd2 = Jaccavrd(MatrixAdjacency2)
similarity_matrix_Salton2 = Salton_Cal(MatrixAdjacency2)ip
filepath3 = '3600/s0003.txt'
MatrixAdjacency3 = file2matrix(filepath3)ci
similarity_matrix_Jaccavrd3 = Jaccavrd(MatrixAdjacency3)
similarity_matrix_Salton3 = Salton_Cal(MatrixAdjacency3)
Jaccard_Row = similarity_matrix_Jaccavrd.shape[0]
Jaccard_Column = similarity_matrix_Jaccavrd.shape[1]
Jaccard_List = []
for i in range(Jaccard_Row):
for j in range(Jaccard_Column): if i<j: index = similarity_matrix_Jaccavrd[i,j] if isnan(index) == True: index = 0 Jaccard_List.append(index)
Salton_Row = similarity_matrix_Salton.shape[0]
Salton_Column = similarity_matrix_Salton.shape[1]
Salton_List = []
for i in range(Salton_Row):
for j in range(Salton_Column): if i<j: index = similarity_matrix_Salton[i,j] if isnan(index) == True: index = 0 Salton_List.append(index)
Jaccard_Row2 = similarity_matrix_Jaccavrd2.shape[0]
Jaccard_Column2 = similarity_matrix_Jaccavrd2.shape[1]
Jaccard_List2 = []
for i in range(Jaccard_Row2):
for j in range(Jaccard_Column2): if i<j: index2 = similarity_matrix_Jaccavrd2[i,j] if isnan(index2) == True: index2 = 0 Jaccard_List2.append(index2)
Salton_Row2 = similarity_matrix_Salton2.shape[0]
Salton_Column2 = similarity_matrix_Salton2.shape[1]
Salton_List2 = []
for i in range(Salton_Row2):
for j in range(Salton_Column2): if i<j: index2 = similarity_matrix_Salton2[i,j] if isnan(index2) == True: index2 = 0 Salton_List2.append(index2)
Jaccard_Row3 = similarity_matrix_Jaccavrd3.shape[0]
Jaccard_Column3 = similarity_matrix_Jaccavrd3.shape[1]
Jaccard_List3 = []
for i in range(Jaccard_Row3):
for j in range(Jaccard_Column3): if i<j: index3 = similarity_matrix_Jaccavrd3[i,j] if isnan(index3) == True: index3 = 0 Jaccard_List3.append(index3)
Salton_Row3 = similarity_matrix_Salton3.shape[0]
Salton_Column3 = similarity_matrix_Salton3.shape[1]
Salton_List3 = []
for i in range(Salton_Row3):
for j in range(Salton_Column3): if i<j: index3 = similarity_matrix_Salton3[i,j] if isnan(index3) == True: index3 = 0 Salton_List3.append(index3)
Adjacency_Row = MatrixAdjacency.shape[0]
Adjacency_Column = MatrixAdjacency.shape[1]
Adjacency = []
for i in range(Adjacency_Row):
for j in range(Adjacency_Column): if i<j: index = MatrixAdjacency[i,j] Adjacency.append(index)
Adjacency_Row2 = MatrixAdjacency2.shape[0]
Adjacency_Column2 = MatrixAdjacency2.shape[1]
Adjacency2 = []
for i in range(Adjacency_Row2):
for j in range(Adjacency_Column2): if i<j: index2 = MatrixAdjacency2[i,j] Adjacency2.append(index2)
Adjacency_Row3 = MatrixAdjacency3.shape[0]
Adjacency_Column3 = MatrixAdjacency3.shape[1]
Adjacency3 = []
for i in range(Adjacency_Row3):
for j in range(Adjacency_Column3): if i<j: index3 = MatrixAdjacency3[i,j] Adjacency3.append(index3)
data = np.zeros((1225,3))
data2 = np.zeros((1225,3))
data3 = np.zeros((1225,3))
for i in range(1225):
data[i][0] = Jaccard_List[i] data[i][1] = Salton_List[i] data[i][2] = Adjacency[i]
for j in range(1225):
data2[j][0] = Jaccard_List2[j] data2[j][1] = Salton_List2[j] data2[j][2] = Adjacency2[j]
for k in range(1225):
data3[k][0] = Jaccard_List3[k] data3[k][1] = Salton_List3[k] data3[k][2] = Adjacency3[k]
data_train_X = data[:,0:2]
data_train_y = data[:,2]
data_test_X = data2[:,0:2]
data_test_y = data2[:,2]
data_target_X = data3[:,0:2]
data_target_y = data3[:,2]
knn = KNeighborsClassifier()
knn.fit(data_train_X,data_train_y)
print(knn.predict(data_test_X))
print(data_test_y)
clf = SVC()
clf.fit(data_train_X,data_test_y)
print(clf.score(data_test_X,data_target_y))
如需详细本项目信息,可发送邮件至18770918982@gmail.com