All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ClassifierEMStrategy.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2001-2009 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file terralib/rp/ClassifierEMStrategy.cpp
22 
23  \brief EM (Expectation-Maximization) strategy for pixel-based classification.
24 */
25 
26 // TerraLib
27 #include "../common/MatrixUtils.h"
28 #include "../common/progress/TaskProgress.h"
29 #include "../raster/Grid.h"
30 #include "../raster/PositionIterator.h"
31 #include "../raster/RasterIterator.h"
32 #include "../raster/Utils.h"
33 #include "ClassifierEMStrategy.h"
34 #include "Macros.h"
35 #include "Functions.h"
36 
37 // STL
38 #include <complex>
39 #include <iostream>
40 #include <map>
41 
42 // Boost
43 #include <boost/numeric/ublas/io.hpp>
44 #include <boost/numeric/ublas/lu.hpp>
45 #include <boost/numeric/ublas/matrix.hpp>
46 
47 namespace
48 {
49  static te::rp::ClassifierEMStrategyFactory classifierEMStrategyFactoryInstance;
50 }
51 
53 {
54  reset();
55 }
56 
58 {
59 }
60 
62 {
63  reset();
64 
65  m_numberOfClusters = rhs.m_numberOfClusters;
66  m_maxIterations = rhs.m_maxIterations;
67  m_maxInputPoints = rhs.m_maxInputPoints;
68  m_epsilon = rhs.m_epsilon;
69  m_clustersMeans = rhs.m_clustersMeans;
70 
71  return *this;
72 }
73 
74 void te::rp::ClassifierEMStrategy::Parameters::reset() throw(te::rp::Exception)
75 {
76  m_numberOfClusters = 0;
77  m_maxIterations = 0;
78  m_maxInputPoints = 0;
79  m_epsilon = 0.0;
80  m_clustersMeans.clear();
81 }
82 
84 {
86 }
87 
89 {
90  m_isInitialized = false;
91 }
92 
94 {
95 }
96 
97 bool te::rp::ClassifierEMStrategy::initialize(te::rp::StrategyParameters const* const strategyParams) throw(te::rp::Exception)
98 {
99  m_isInitialized = false;
100 
101  te::rp::ClassifierEMStrategy::Parameters const* paramsPtr = dynamic_cast<te::rp::ClassifierEMStrategy::Parameters const*>(strategyParams);
102 
103  if(!paramsPtr)
104  return false;
105 
106  m_parameters = *(paramsPtr);
107 
108  TERP_TRUE_OR_RETURN_FALSE(m_parameters.m_numberOfClusters > 1, TR_RP("The number of clusters must be at least 2."))
109  TERP_TRUE_OR_RETURN_FALSE(m_parameters.m_maxIterations > 0, TR_RP("The number of iterations must be at least 1."))
112  TERP_TRUE_OR_RETURN_FALSE(m_parameters.m_maxInputPoints >= m_parameters.m_numberOfClusters, TR_RP("The maximum number of points must be at least higher than the number of clusters."))
113  TERP_TRUE_OR_RETURN_FALSE(m_parameters.m_epsilon > 0, TR_RP("The stop criteria must be higher than 0."))
114 
115  m_isInitialized = true;
116 
117  return true;
118 }
119 
120 bool te::rp::ClassifierEMStrategy::execute(const te::rst::Raster& inputRaster, const std::vector<unsigned int>& inputRasterBands,
121  const std::vector<te::gm::Polygon*>& inputPolygons, te::rst::Raster& outputRaster,
122  const unsigned int outputRasterBand, const bool enableProgressInterface) throw(te::rp::Exception)
123 {
124  TERP_TRUE_OR_RETURN_FALSE(m_isInitialized, TR_RP("Instance not initialized"))
125 
126 // create a vector of points with random positions inside raster to obtain input data
127  std::vector<te::gm::Point*> randomPoints = te::rp::GetRandomPointsInRaster(inputRaster, m_parameters.m_maxInputPoints);
128 
129 // M is the number of clusters
130  const unsigned int M = m_parameters.m_numberOfClusters;
131 // N is the number of vectors used to estimate the probabilities
132  const unsigned int N = randomPoints.size();
133 // S is the number of elements inside each vector
134  const unsigned int S = inputRasterBands.size();
135 
136 // get the input data
137  boost::numeric::ublas::matrix<double> Xk = boost::numeric::ublas::matrix<double>(N, S);
138 
141  unsigned int k = 0;
142  double max_pixel_value = 0.0;
143  while (pit != pitend)
144  {
145  for (unsigned int l = 0; l < S; l++)
146  {
147  inputRaster.getValue(pit.getColumn(), pit.getRow(), Xk(k, l), inputRasterBands[l]);
148  if (Xk(k, l) > max_pixel_value)
149  max_pixel_value = Xk(k, l);
150  }
151 
152  ++k;
153  ++pit;
154  }
155 
156  srand((unsigned) time(0));
157 // the parameter vector of means for each cluster
158  boost::numeric::ublas::matrix<double> MUj = boost::numeric::ublas::matrix<double>(M, S);
159  boost::numeric::ublas::matrix<double> previous_MUj = boost::numeric::ublas::matrix<double>(M, S);
160  if (m_parameters.m_clustersMeans.size() > 0)
161  {
162  for (unsigned int j = 0; j < M; j++)
163  for (unsigned int l = 0; l < S; l++)
164  MUj(j, l) = m_parameters.m_clustersMeans[j][l];
165  }
166  else
167  {
168 // define vector of means randomly, in the interval [0, max_pixel_value].
169  for (unsigned int j = 0; j < M; j++)
170  for (unsigned int l = 0; l < S; l++)
171  MUj(j, l) = rand() % (int) ceil(max_pixel_value);
172  }
173  previous_MUj = MUj;
174 
175 // the parameter vector of covariance matrices for each cluster
176  std::vector<boost::numeric::ublas::matrix<double> > SIGMAj;
177  for (unsigned int j = 0; j < M; j++)
178  {
179  boost::numeric::ublas::matrix<double> tmp_sigma(S, S);
180  for (unsigned int l = 0; l < S; l++)
181  {
182  for (unsigned int m = 0; m < S; m++)
183  tmp_sigma(l, m) = 0.0;
184  tmp_sigma(l, l) = 10.0;
185  }
186  SIGMAj.push_back(tmp_sigma);
187  }
188 
189 // variables used to estimate the cluster's probabilities
190  boost::numeric::ublas::matrix<double> Pj = boost::numeric::ublas::matrix<double>(M, 1);
191  boost::numeric::ublas::matrix<double> PCj_Xk = boost::numeric::ublas::matrix<double>(M, N);
192  for (unsigned int j = 0; j < M; j++)
193  {
194  Pj(j, 0) = 1 / (double) M;
195  for (unsigned int k = 0; k < N; k++)
196  PCj_Xk(j, k) = 0.0;
197  }
198 
199 // estimating cluster's probabilities
200  double sum_PCj_Xk;
201  double numerator_PCj_Xk;
202  double denominator_PCj_Xk;
203  double det_SIGMAj;
204  double det_SIGMAj2;
205  double distance_MUj;
206 
207  boost::numeric::ublas::matrix<double> Xk_minus_MUj(S, 1);
208  boost::numeric::ublas::matrix<double> Xk_minus_MUj_T(1, S);
209  boost::numeric::ublas::matrix<double> product_NETAj(1, 1);
210  boost::numeric::ublas::matrix<double> product_NETAj2(1, 1);
211  boost::numeric::ublas::matrix<double> inverse_SIGMAj(S, S);
212  boost::numeric::ublas::matrix<double> inverse_SIGMAj2(S, S);
213  boost::numeric::ublas::matrix<double> product_Xk_minusMUj(S, S);
214  boost::numeric::ublas::matrix<double> sum_product_Xk_minusMUj(S, S);
215 
216  te::common::TaskProgress task(TR_RP("Expectation Maximization algorithm - estimating clusters"), te::common::TaskProgress::UNDEFINED, m_parameters.m_maxIterations);
217  for (unsigned int i = 0; i < m_parameters.m_maxIterations; i++)
218  {
219 // computing PCj_Xk
220  for (unsigned int j = 0; j < M; j++)
221  {
222  te::common::GetDeterminant(SIGMAj[j], det_SIGMAj);
223  if (det_SIGMAj >= 0.0)
224  det_SIGMAj = pow(det_SIGMAj, -0.5);
225  else
226  det_SIGMAj = 1.0;
227 
228  for (unsigned int k = 0; k < N; k++)
229  {
230 // computing numerator of equation for PCj_Xk
231  numerator_PCj_Xk = 0.0;
232  for (unsigned int l = 0; l < S; l++)
233  Xk_minus_MUj(l, 0) = Xk(k, l) - MUj(j, l);
234  Xk_minus_MUj_T = boost::numeric::ublas::trans(Xk_minus_MUj);
235 
236  te::common::getInverseMatrix(SIGMAj[j], inverse_SIGMAj);
237 
238  product_NETAj = prod(Xk_minus_MUj_T, inverse_SIGMAj);
239  product_NETAj = prod(product_NETAj, Xk_minus_MUj);
240  product_NETAj *= -0.5;
241 
242  numerator_PCj_Xk = det_SIGMAj * exp(product_NETAj(0, 0)) * Pj(j, 0);
243 
244 // computing denominator of equation for PCj_Xk
245  denominator_PCj_Xk = 0.0;
246  for (unsigned int j2 = 0; j2 < M; j2++)
247  {
248  te::common::GetDeterminant(SIGMAj[j2], det_SIGMAj2);
249  if (det_SIGMAj2 >= 0.0)
250  det_SIGMAj2 = pow(det_SIGMAj2, -0.5);
251  else
252  det_SIGMAj2 = 1.0;
253 
254  for (unsigned int l = 0; l < S; l++)
255  Xk_minus_MUj(l, 0) = Xk(k, l) - MUj(j2, l);
256  Xk_minus_MUj_T = boost::numeric::ublas::trans(Xk_minus_MUj);
257 
258  te::common::getInverseMatrix(SIGMAj[j2], inverse_SIGMAj2);
259 
260  product_NETAj2 = prod(Xk_minus_MUj_T, inverse_SIGMAj2);
261  product_NETAj2 = prod(product_NETAj2, Xk_minus_MUj);
262  product_NETAj2 *= -0.5;
263 
264  denominator_PCj_Xk += det_SIGMAj2 * exp(product_NETAj2(0, 0)) * Pj(j2, 0);
265  }
266  if (denominator_PCj_Xk == 0.0)
267  denominator_PCj_Xk = 0.0000000001;
268 
269 // computing PCj_Xk
270  PCj_Xk(j, k) = numerator_PCj_Xk / denominator_PCj_Xk;
271  }
272  }
273 
274 // computing SIGMAj for t + 1
275  for (unsigned int j = 0; j < M; j++)
276  {
277  sum_PCj_Xk = 0.0;
278  for (unsigned int l = 0; l < S; l++)
279  for (unsigned int l2 = 0; l2 < S; l2++)
280  sum_product_Xk_minusMUj(l, l2) = 0.0;
281  for (unsigned int k = 0; k < N; k++)
282  {
283  sum_PCj_Xk += PCj_Xk(j, k);
284 
285  for (unsigned int l = 0; l < S; l++)
286  Xk_minus_MUj(l, 0) = Xk(k, l) - MUj(j, l);
287  Xk_minus_MUj_T = boost::numeric::ublas::trans(Xk_minus_MUj);
288 
289  product_Xk_minusMUj = prod(Xk_minus_MUj, Xk_minus_MUj_T);
290  product_Xk_minusMUj *= PCj_Xk(j, k);
291 
292  sum_product_Xk_minusMUj += product_Xk_minusMUj;
293  }
294  if (sum_PCj_Xk == 0.0)
295  sum_PCj_Xk = 0.0000000001;
296  SIGMAj[j] = sum_product_Xk_minusMUj / sum_PCj_Xk;
297  }
298 
299 // computing MUj for t + 1
300  for (unsigned int j = 0; j < M; j++)
301  {
302  sum_PCj_Xk = 0.0;
303  for (unsigned int l = 0; l < S; l++)
304  MUj(j, l) = 0.0;
305  for (unsigned int k = 0; k < N; k++)
306  {
307  for (unsigned int l = 0; l < S; l++)
308  MUj(j, l) += PCj_Xk(j, k) * Xk(k, l);
309  sum_PCj_Xk += PCj_Xk(j, k);
310  }
311  if (sum_PCj_Xk == 0.0)
312  sum_PCj_Xk = 0.0000000001;
313  for (unsigned int l = 0; l < S; l++)
314  MUj(j, l) /= sum_PCj_Xk;
315  }
316 
317 // computing Pj for t + 1
318  for (unsigned int j = 0; j < M; j++)
319  {
320  Pj(j, 0) = 0.0;
321  for (unsigned int k = 0; k < N; k++)
322  Pj(j, 0) += PCj_Xk(j, k);
323  Pj(j, 0) /= N;
324  }
325 
326 // checking convergence
327  distance_MUj = 0.0;
328  double a_minus_b;
329  for (unsigned int j = 0; j < M; j++)
330  for (unsigned int l = 0; l < S; l++)
331  {
332  a_minus_b = MUj(j, l) - previous_MUj(j, l);
333  distance_MUj += a_minus_b * a_minus_b;
334  }
335  distance_MUj = sqrt(distance_MUj);
336  if (distance_MUj < m_parameters.m_epsilon)
337  break;
338  previous_MUj = MUj;
339 
340  task.pulse();
341  }
342 
343 // classifying image
346 
347  boost::numeric::ublas::matrix<double> X = boost::numeric::ublas::matrix<double>(1, S);
348  boost::numeric::ublas::matrix<double> PCj_X = boost::numeric::ublas::matrix<double>(M, 1);
349 
350  double max_PCj_X;
351  unsigned int cluster;
352 
353  double numerator_PCj_X;
354  double denominator_PCj_X;
355 
356  boost::numeric::ublas::matrix<double> X_minus_MUj(S, 1);
357  boost::numeric::ublas::matrix<double> X_minus_MUj_T(1, S);
358 
359  task.setTotalSteps(inputRaster.getNumberOfColumns() * inputRaster.getNumberOfRows());
360  task.setMessage(TR_RP("Expectation Maximization algorithm - classifying image"));
361  task.setCurrentStep(0);
362  while (it != itend)
363  {
364  for (unsigned int l = 0; l < S; l++)
365  X(0, l) = (*it)[l];
366 
367 // computing PCj_X
368  for (unsigned int j = 0; j < M; j++)
369  {
370 // computing numerator of equation for PCj_X
371  te::common::GetDeterminant(SIGMAj[j], det_SIGMAj);
372  if (det_SIGMAj >= 0.0)
373  det_SIGMAj = pow(det_SIGMAj, -0.5);
374  else
375  det_SIGMAj = 1.0;
376 
377  numerator_PCj_X = 0.0;
378  for (unsigned int l = 0; l < S; l++)
379  X_minus_MUj(l, 0) = X(0, l) - MUj(j, l);
380  X_minus_MUj_T = boost::numeric::ublas::trans(X_minus_MUj);
381 
382  te::common::getInverseMatrix(SIGMAj[j], inverse_SIGMAj);
383 
384  product_NETAj = prod(X_minus_MUj_T, inverse_SIGMAj);
385  product_NETAj = prod(product_NETAj, X_minus_MUj);
386  product_NETAj *= -0.5;
387 
388  numerator_PCj_X = det_SIGMAj * exp(product_NETAj(0, 0)) * Pj(j, 0);
389 
390 // computing denominator of equation for PCj_X
391  denominator_PCj_X = 0.0;
392  for (unsigned int j2 = 0; j2 < M; j2++)
393  {
394  te::common::GetDeterminant(SIGMAj[j2], det_SIGMAj2);
395  if (det_SIGMAj2 >= 0.0)
396  det_SIGMAj2 = pow(det_SIGMAj2, -0.5);
397  else
398  det_SIGMAj2 = 1.0;
399 
400  for (unsigned int l = 0; l < S; l++)
401  X_minus_MUj(l, 0) = X(0, l) - MUj(j2, l);
402  X_minus_MUj_T = boost::numeric::ublas::trans(X_minus_MUj);
403 
404  te::common::getInverseMatrix(SIGMAj[j2], inverse_SIGMAj2);
405 
406  product_NETAj2 = prod(X_minus_MUj_T, inverse_SIGMAj2);
407  product_NETAj2 = prod(product_NETAj2, X_minus_MUj);
408  product_NETAj2 *= -0.5;
409 
410  denominator_PCj_X += det_SIGMAj2 * exp(product_NETAj2(0, 0)) * Pj(j2, 0);
411  }
412  if (denominator_PCj_X == 0.0)
413  denominator_PCj_X = 0.0000000001;
414 
415  PCj_X(j, 0) = numerator_PCj_X / denominator_PCj_X;
416  }
417 
418  max_PCj_X = 0.0;
419  cluster = 0;
420  for (unsigned int j = 0; j < M; j++)
421  if (PCj_X(j, 0) > max_PCj_X)
422  {
423  max_PCj_X = PCj_X(j, 0);
424  cluster = j + 1;
425  }
426 
427 // save cluster information in output raster
428  outputRaster.setValue(it.getColumn(), it.getRow(), cluster, outputRasterBand);
429 
430  ++it;
431  task.pulse();
432  }
433 
434  return true;
435 }
436 
438  : te::rp::ClassifierStrategyFactory("em")
439 {
440 }
441 
443 {
444 }
445 
447 {
448  return new te::rp::ClassifierEMStrategy();
449 }
Raster strategy parameters base class.
static PointSetIterator begin(const te::rst::Raster *r, const std::vector< te::gm::Point * > p)
Returns an iterator referring to the first value of the band.
std::vector< te::gm::Point * > GetRandomPointsInRaster(const te::rst::Raster &inputRaster, unsigned int numberOfPoints)
Creates a vector of random positions (points) inside the raster.
Definition: Functions.cpp:700
Raster Processing functions.
std::vector< std::vector< double > > m_clustersMeans
The previously estimated means of the clusters (optional).
void setTotalSteps(int value)
Set the task total stepes.
This class implements and iterator to &quot;navigate&quot; over a raster, with a predefined number of bands...
#define TR_RP(message)
It marks a string in order to get translated. This is a special mark used in the Raster Processing mo...
Definition: Config.h:58
void setCurrentStep(int value)
Set the task current step.
Raster EM Classifier strategy factory.
te::rp::ClassifierStrategy * build()
Concrete factories (derived from this one) must implement this method in order to create objects...
unsigned int getRow() const
Returns the current row in iterator.
#define TERP_TRUE_OR_RETURN_FALSE(value, message)
Checks if value is true. For false values a warning message will be logged and a return of context wi...
Definition: Macros.h:183
unsigned int m_maxIterations
The maximum of iterations (E/M steps) to perform if convergence is not achieved.
Abstract parameters base interface.
double m_epsilon
The stop criteria. When the clusters change in a value smaller then epsilon, the convergence is achie...
void pulse()
Calls setCurrentStep() function using getCurrentStep() + 1.
void reset()
Clear all internal allocated resources and reset the parameters instance to its initial state...
Raster classifier strategy factory base class.
bool initialize(StrategyParameters const *const strategyParams)
Initialize the classification strategy.
EM strategy for pixel-based classification. This is an unsupervised and pixel-based classification al...
bool getInverseMatrix(const boost::numeric::ublas::matrix< T > &inputMatrix, boost::numeric::ublas::matrix< T > &outputMatrix)
Matrix inversion.
Definition: MatrixUtils.h:104
bool execute(const te::rst::Raster &inputRaster, const std::vector< unsigned int > &inputRasterBands, const std::vector< te::gm::Polygon * > &inputPolygons, te::rst::Raster &outputRaster, const unsigned int outputRasterBand, const bool enableProgressInterface)
Executes the classification strategy.
bool m_isInitialized
True if this instance is initialized.
This class implements the strategy to iterate with spatial restriction, the iteration occurs inside a...
ClassifierEMStrategy::Parameters m_parameters
Internal execution parameters.
bool GetDeterminant(const boost::numeric::ublas::matrix< T > &inputMatrix, double &determinant)
Get the Matrix determinant value.
Definition: MatrixUtils.h:57
unsigned int m_maxInputPoints
The maximum number of points used to estimate the clusters (default = 1000).
static RasterIterator end(Raster *r, const std::vector< unsigned int > &bands)
Returns an iterator referring to after the end of the iterator.
static PointSetIterator end(const te::rst::Raster *r, const std::vector< te::gm::Point * > p)
Returns an iterator referring to after the end of the iterator.
AbstractParameters * clone() const
Create a clone copy of this instance.
const Parameters & operator=(const Parameters &params)
unsigned int m_numberOfClusters
The number of clusters (classes) to estimate in the image.
void setMessage(const std::string &message)
Set the task message.
An abstract class for raster data strucutures.
Definition: Raster.h:70
This class can be used to inform the progress of a task.
Definition: TaskProgress.h:53
unsigned int getColumn() const
Returns the current column in iterator.
Raster classifier strategy base class.
static RasterIterator begin(Raster *r, const std::vector< unsigned int > &bands)
Returns an iterator referring to the first value.
EM (Expectation-Maximization) strategy for pixel-based classification.