EM
The following example require the data set found here.
#!/usr/bin/python """ Author: Jeremy M. Stober Program: GMM.PY Date: Thursday, September 4 2008 Description: Fit a Gaussian Mixture Model with EM. """ import os, sys, getopt, pdb, string from numpy import * import pylab import matplotlib faithful = [array([float(elem) for elem in line.split()]) for line in open('faithful.txt').readlines()] xvals = array([elem[0] for elem in faithful]) yvals = array([elem[1] for elem in faithful]) # Standard normalization. xmean = mean(xvals) xstd = std(xvals) ymean = mean(yvals) ystd = std(yvals) xnorm = (xvals - xmean) / xstd ynorm = (yvals - ymean) / ystd nfaithful= array(zip(xnorm, ynorm)) # Does the data look reasonable? # pylab.plot(xnorm,ynorm, '+') # pylab.show() # initialize the means and covariances mus = [array([-1.5,1.0]), array([1.0,-1.0])] sigmas = [eye(2),eye(2)] mixings = [0.5, 0.5] def normal(x,mu,sigma): """ Return the normal density at a point. """ D = len(sigma) det = linalg.det(sigma) div = (2.0 * pi)**(D / 2.0) * (det)**(0.5) return exp(-0.5 * dot(dot(x - mu, linalg.inv(sigma)), x - mu)) / div k = 2 # 2 dimensional data n = len(nfaithful) # number of data points for l in range(100): print l # E step print mus, mixings responses = zeros((k,n)) for j in range(n): for i in range(k): responses[i,j] = mixings[i] * normal(nfaithful[j],mus[i],sigmas[i]) responses = responses / sum(responses,axis=0) # normalize the weights # M step N = sum(responses,axis=1) for i in range(k): mus[i] = dot(responses[i,:],nfaithful) / N[i] sigmas[i] = zeros((2,2)) for j in range(n): sigmas[i] += responses[i,j] * outer(nfaithful[j,:] - mus[i],nfaithful[j,:] - mus[i]) sigmas[i] = sigmas[i] / N[i] mixings[i] = N[i] / sum(N) def shownormal(mus,sigmas): # Plot the normalized faithful data points. fig = pylab.figure(num = 1, figsize=(4,4)) axes = fig.add_subplot(111) axes.plot(xnorm,ynorm, '+') # Plot the ellipses representing the principle components of the normals. k = len(mus) for i in range(k): color = None if i == 0: color = 'red' else: color = 'blue' [u,s,v] = linalg.svd(sigmas[i]) angle = arccos(dot(u[1],array([1,0]))) * 180.0 / pi ellipse = matplotlib.patches.Ellipse(mus[i], sqrt(s[1]), sqrt(s[0]), angle=angle, fill=False, ec=color) axes.add_patch(ellipse) pylab.draw() pylab.show() shownormal(mus,sigmas)
Entries (RSS)
[...] The next algorithm in my continuing series of short, hackable implementations of common machine learning algorithms is fitting a Gaussian mixture model through expectation maximization. [...]