-
Notifications
You must be signed in to change notification settings - Fork 1
/
Algorithms2.py
101 lines (83 loc) · 2.66 KB
/
Algorithms2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time: 2021/3/13 0013 23:21
# @Author&Email: COLFLIP&colflip@163.com
# @File: Algorithms2.py
# @Software: PyCharm
# ---------------------------------------------
# # Algorithms2:Normalized spectral clustering according toShi and Malik(2000)
# generalized eigenproblem Lu=λDu
# ---------------------------------------------
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
from sklearn.cluster import KMeans
def getDistanceMatrix(data):
n = len(data)
dist_matrix = np.zeros((n, n))
for i in range(n):
for j in range(i + 1, n):
dist = np.sqrt(np.power(data[i] - data[j], 2).sum())
dist_matrix[i][j] = dist_matrix[j][i] = dist
return dist_matrix
def getAdjacencyMatrix(data):
k = 5
n = len(data)
dist_matrix = getDistanceMatrix(data)
W = np.zeros((n, n))
for idx, item in enumerate(dist_matrix):
idx_array = np.argsort(item)
W[idx][idx_array[1:k + 1]] = 1
transpW = np.transpose(W)
return (W + transpW) / 2
def getDegreeMatrix(W):
D = np.diag(sum(W))
return D
def getLaplacianMatrix(D, W):
return D - W
def getEigen(L, D, k):
"""
获得广义拉普拉斯矩阵的特征矩阵 Lu=λDu
:param L:
:param cluter_num: 聚类数目
:return:
"""
DD = np.linalg.inv(D)
L = DD @ L
# print(L)
eigval, eigvec = np.linalg.eig(L)
ix = np.argsort(eigval)[0:k]
return eigvec[:, ix]
def plotRes(data, clusterResult, clusterNum):
n = len(data)
scatterColors = ['black', 'blue', 'green', 'yellow', 'red', 'purple', 'orange', 'LightGrey']
for i in range(clusterNum):
color = scatterColors[i % len(scatterColors)]
x1 = [];
y1 = []
for j in range(n):
if clusterResult[j] == i:
x1.append(data[j, 0])
y1.append(data[j, 1])
plt.scatter(x1, y1, c=color, marker='+')
plt.title("Algorithms2")
plt.show()
def NSCBySAndM(data, k):
W = getAdjacencyMatrix(data)
D = getDegreeMatrix(W)
L = getLaplacianMatrix(D, W)
eigvec = getEigen(L, D, k)
clf = KMeans(n_clusters=k)
s = clf.fit(eigvec)
label = s.labels_
return label
k = 7
filename = 'Aggregation_cluster=7.txt'
data = np.loadtxt(filename, delimiter='\t')
data = data[0:-1] # 除了最后一列 最后一列为标签列
data = np.array(data)
label = NSCBySAndM(data, k)
plotRes(data, label, k)
print(metrics.silhouette_score(data, label)) # 轮廓系数评价
print(metrics.davies_bouldin_score(data, label)) # 戴维森堡丁指数(DBI)评价
print(metrics.calinski_harabasz_score(data, label)) # CH指标评价