EM

The following example require the data set found here.

#!/usr/bin/python
"""
Author: Jeremy M. Stober
Program: GMM.PY
Date: Thursday, September  4 2008
Description: Fit a Gaussian Mixture Model with EM.
"""
 
import os, sys, getopt, pdb, string
from numpy import *
import pylab
import matplotlib
 
faithful = [array([float(elem) for elem in line.split()]) for line in open('faithful.txt').readlines()]
xvals = array([elem[0] for elem in faithful])
yvals = array([elem[1] for elem in faithful])
 
# Standard normalization.
xmean = mean(xvals)
xstd = std(xvals)
 
ymean = mean(yvals)
ystd = std(yvals)
 
xnorm = (xvals - xmean) / xstd
ynorm = (yvals - ymean) / ystd
 
nfaithful= array(zip(xnorm, ynorm))
 
# Does the data look reasonable?
# pylab.plot(xnorm,ynorm, '+')
# pylab.show()
 
# initialize the means and covariances
mus = [array([-1.5,1.0]), array([1.0,-1.0])]
sigmas = [eye(2),eye(2)]
mixings = [0.5, 0.5]
 
def normal(x,mu,sigma):
    """ Return the normal density at a point. """
    D = len(sigma)
    det = linalg.det(sigma)
    div = (2.0 * pi)**(D / 2.0) * (det)**(0.5)
    return exp(-0.5 * dot(dot(x - mu, linalg.inv(sigma)), x - mu)) / div
 
k = 2 # 2 dimensional data
n = len(nfaithful) # number of data points
 
for l in range(100):
    print l
    # E step
    print mus, mixings
 
    responses = zeros((k,n))
 
    for j in range(n):    
        for i in range(k):
            responses[i,j] = mixings[i] * normal(nfaithful[j],mus[i],sigmas[i])
 
    responses = responses / sum(responses,axis=0) # normalize the weights
 
    # M step
 
    N = sum(responses,axis=1)
    for i in range(k):
        mus[i] = dot(responses[i,:],nfaithful) / N[i]
        sigmas[i] = zeros((2,2))
 
        for j in range(n):
            sigmas[i] += responses[i,j] * outer(nfaithful[j,:] - mus[i],nfaithful[j,:] - mus[i])
 
        sigmas[i] = sigmas[i] / N[i]
 
        mixings[i] = N[i] / sum(N)
 
 
def shownormal(mus,sigmas):
 
    # Plot the normalized faithful data points.
    fig = pylab.figure(num = 1, figsize=(4,4))
    axes = fig.add_subplot(111)
    axes.plot(xnorm,ynorm, '+')
 
    # Plot the ellipses representing the principle components of the normals.
    k = len(mus)
    for i in range(k):
 
        color = None
        if i == 0:
            color = 'red'
        else:
            color = 'blue'
 
        [u,s,v] = linalg.svd(sigmas[i])
        angle = arccos(dot(u[1],array([1,0]))) * 180.0 / pi
        ellipse = matplotlib.patches.Ellipse(mus[i], sqrt(s[1]), sqrt(s[0]), angle=angle, fill=False, ec=color)
        axes.add_patch(ellipse)
 
    pylab.draw()
    pylab.show()
 
shownormal(mus,sigmas)
Share and Enjoy:
  • Digg
  • del.icio.us
  • Facebook
  • Google Bookmarks
  • Reddit
  • Technorati
  • Furl
  • StumbleUpon
  • Tumblr
  • TwitThis
One Response to “EM”
  1. [...] The next algorithm in my continuing series of short, hackable implementations of common machine learning algorithms is fitting a Gaussian mixture model through expectation maximization. [...]

  2.  
Leave a Reply