-
Notifications
You must be signed in to change notification settings - Fork 1
/
GMM.py
95 lines (87 loc) · 3.17 KB
/
GMM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 1 19:41:04 2022
@author: hugo
"""
import numpy as np
from numpy import random
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
from scipy.stats import multivariate_normal
class GMM():
def __init__(self, k, dim, init_mu=None, init_sigma=None, init_pi=None, colors=None):
'''
Define a model with known number of clusters and dimensions.
input:
- k: Number of Gaussian clusters
- dim: Dimension
- init_mu: initial value of mean of clusters (k, dim)
(default) random from uniform[-10, 10]
- init_sigma: initial value of covariance matrix of clusters (k, dim, dim)
(default) Identity matrix for each cluster
- init_pi: initial value of cluster weights (k,)
(default) equal value to all cluster i.e. 1/k
- colors: Color valu for plotting each cluster (k, 3)
(default) random from uniform[0, 1]
'''
self.k = k
self.dim = dim
if(init_mu is None):
init_mu = random.rand(k, dim)*20 - 10
self.mu = init_mu
if(init_sigma is None):
init_sigma = np.zeros((k, dim, dim))
for i in range(k):
init_sigma[i] = np.eye(dim)
self.sigma = init_sigma
if(init_pi is None):
init_pi = np.ones(self.k)/self.k
self.pi = init_pi
if(colors is None):
colors = random.rand(k, 3)
self.colors = colors
def init_em(self, X):
'''
Initialization for EM algorithm.
input:
- X: data (batch_size, dim)
'''
self.data = X
self.num_points = X.shape[0]
self.z = np.zeros((self.num_points, self.k))
def e_step(self):
'''
E-step of EM algorithm.
'''
for i in range(self.k):
self.z[:, i] = self.pi[i] * multivariate_normal.pdf(self.data, mean=self.mu[i], cov=self.sigma[i] + 1e-10)
self.z /= self.z.sum(axis=1, keepdims=True)
def m_step(self):
'''
M-step of EM algorithm.
'''
sum_z = self.z.sum(axis=0)
self.pi = sum_z / self.num_points
# self.mu = np.matmul(self.z.T, self.data)
# self.mu /= sum_z[:, None]
for i in range(self.k):
j = np.expand_dims(self.data, axis=1) - self.mu[i]
s = np.matmul(j.transpose([0, 2, 1]), j)
self.sigma[i] = np.matmul(s.transpose(1, 2, 0), self.z[:, i] )
self.sigma[i] /= sum_z[i]
def log_likelihood(self, X):
'''
Compute the log-likelihood of X under current parameters
input:
- X: Data (batch_size, dim)
output:
- log-likelihood of X: Sum_n Sum_k log(pi_k * N( X_n | mu_k, sigma_k ))
'''
ll = []
for d in X:
tot = 0
for i in range(self.k):
tot += self.pi[i] * multivariate_normal.pdf(d, mean=self.mu[i], cov=self.sigma[i] + 1e-10)
ll.append(np.log(tot + 1e-10))
return np.sum(ll)