Files

copied
Last update 6 years 2 months by Olivier Gillet
Filestoolslearning
..
__init__.py
som.py
som.py
#!/usr/bin/python2.5 # # Copyright 2009 Olivier Gillet. # # Author: Olivier Gillet (ol.gillet@gmail.com) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # ----------------------------------------------------------------------------- # # Self-organizing map. import numpy import random class SOM(object): def __init__(self, grid_size, radius, learning_rate): self._grid_size = grid_size self._grid_x = numpy.arange(0, grid_size * grid_size) % grid_size self._grid_y = numpy.arange(0, grid_size * grid_size) / grid_size self._radius = radius self._learning_rate = learning_rate self._codewords = None @staticmethod def standardize(x, std_power=1, axis=0, regularization=0.0): t, n = x.shape mean = x.mean(axis=axis) std = x.std(axis=axis) ** std_power + regularization return (x - mean) / std def classify(self, data): distances = ((self._codewords - data) ** 2).sum(axis=1) return distances.argmin(), distances def train(self, data, iterations=10000, seed=42): n, d = data.shape nodes = self._grid_size numpy.random.seed(seed) random.seed(seed) self._codewords = numpy.random.randn(nodes * nodes, d) self._error_history = [] milestone = 2 for i in xrange(iterations): if i == milestone: print 'iteration', i, 'of', iterations, '\t', \ round(1000.0 * i / iterations) * 0.1, '%' milestone <<= 1 radius = self._radius * 2 ** (-2 * float(i) / iterations) learning_rate = self._learning_rate * 2 ** (-7 * float(i) / iterations) # Pick a random vector. x = random.choice(data) # Find best matching unit. bmu, distances = self.classify(x) self._error_history.append(distances[bmu]) # Compute neighborhood update function. delta_x = self._grid_x[bmu] - self._grid_x delta_y = self._grid_y[bmu] - self._grid_y rbf = numpy.exp(-(delta_x ** 2 + delta_y ** 2) / (radius * radius)) rbf = numpy.tile(rbf.reshape((nodes * nodes, 1)), (1, d)) update = rbf * (numpy.tile(x, (nodes * nodes, 1)) - self._codewords) self._codewords += learning_rate * update return self._error_history def checkpoint(self): numpy.save('weights', self._codewords) def resume(self): self._codewords = numpy.load('weights.npy') def plot(self, x): import pylab for i in xrange(x.shape[0]): _, d = self.classify(x[i, :]) d = numpy.exp(-d) pylab.figure() pylab.imshow( d.reshape((self._grid_size, self._grid_size)), interpolation='nearest') pylab.savefig('response_%d.pdf' % i)
Report a bug