-
Notifications
You must be signed in to change notification settings - Fork 10
/
5. step3.py
57 lines (40 loc) · 1.35 KB
/
5. step3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import codecademylib3_seaborn
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from copy import deepcopy
iris = datasets.load_iris()
samples = iris.data
samples = iris.data
x = samples[:,0]
y = samples[:,1]
sepal_length_width = np.array(list(zip(x, y)))
# Step 1: Place K random centroids
k = 3
centroids_x = np.random.uniform(min(x), max(x), size=k)
centroids_y = np.random.uniform(min(y), max(y), size=k)
centroids = np.array(list(zip(centroids_x, centroids_y)))
# Step 2: Assign samples to nearest centroid
def distance(a, b):
one = (a[0] - b[0]) **2
two = (a[1] - b[1]) **2
distance = (one+two) ** 0.5
return distance
# Cluster labels for each point (either 0, 1, or 2)
labels = np.zeros(len(samples))
# Distances to each centroid
distances = np.zeros(k)
for i in range(len(samples)):
distances[0] = distance(sepal_length_width[i], centroids[0])
distances[1] = distance(sepal_length_width[i], centroids[1])
distances[2] = distance(sepal_length_width[i], centroids[2])
cluster = np.argmin(distances)
labels[i] = cluster
# Step 3: Update centroids
centroids_old = deepcopy(centroids)
for i in range(k):
points = [sepal_length_width[j] for j in range(len(sepal_length_width)) if labels[j] == i]
centroids[i] = np.mean(points, axis=0)
print(centroids_old)
print("- - - - - - - - - - - - - -")
print(centroids)