All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
ClassifierEMStrategy.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file terralib/rp/ClassifierEMStrategy.cpp
22 
23  \brief EM (Expectation-Maximization) strategy for pixel-based classification.
24 */
25 
26 // TerraLib
27 #include "../common/MatrixUtils.h"
28 #include "../common/progress/TaskProgress.h"
29 #include "../raster/Grid.h"
30 #include "../raster/PositionIterator.h"
31 #include "../raster/RasterIterator.h"
32 #include "../raster/Utils.h"
33 #include "ClassifierEMStrategy.h"
34 #include "Macros.h"
35 #include "Functions.h"
36 
37 // STL
38 #include <complex>
39 #include <iostream>
40 #include <map>
41 
42 // Boost
43 #include <boost/numeric/ublas/io.hpp>
44 #include <boost/numeric/ublas/lu.hpp>
45 #include <boost/numeric/ublas/matrix.hpp>
46 
47 namespace
48 {
49  static te::rp::ClassifierEMStrategyFactory classifierEMStrategyFactoryInstance;
50 }
51 
53 {
54  reset();
55 }
56 
58 {
59 }
60 
62 {
63  reset();
64 
65  m_numberOfClusters = rhs.m_numberOfClusters;
66  m_maxIterations = rhs.m_maxIterations;
67  m_maxInputPoints = rhs.m_maxInputPoints;
68  m_epsilon = rhs.m_epsilon;
69  m_clustersMeans = rhs.m_clustersMeans;
70 
71  return *this;
72 }
73 
74 void te::rp::ClassifierEMStrategy::Parameters::reset() throw(te::rp::Exception)
75 {
76  m_numberOfClusters = 0;
77  m_maxIterations = 0;
78  m_maxInputPoints = 0;
79  m_epsilon = 0.0;
80  m_clustersMeans.clear();
81 }
82 
84 {
86 }
87 
89 {
90  m_isInitialized = false;
91 }
92 
94 {
95 }
96 
97 bool te::rp::ClassifierEMStrategy::initialize(te::rp::StrategyParameters const* const strategyParams) throw(te::rp::Exception)
98 {
99  m_isInitialized = false;
100 
101  te::rp::ClassifierEMStrategy::Parameters const* paramsPtr = dynamic_cast<te::rp::ClassifierEMStrategy::Parameters const*>(strategyParams);
102 
103  if(!paramsPtr)
104  return false;
105 
106  m_parameters = *(paramsPtr);
107 
108  TERP_TRUE_OR_RETURN_FALSE(m_parameters.m_numberOfClusters > 1, TE_TR("The number of clusters must be at least 2."))
109  TERP_TRUE_OR_RETURN_FALSE(m_parameters.m_maxIterations > 0, TE_TR("The number of iterations must be at least 1."))
112  TERP_TRUE_OR_RETURN_FALSE(m_parameters.m_maxInputPoints >= m_parameters.m_numberOfClusters, TE_TR("The maximum number of points must be at least higher than the number of clusters."))
113  TERP_TRUE_OR_RETURN_FALSE(m_parameters.m_epsilon > 0, TE_TR("The stop criteria must be higher than 0."))
114 
115  m_isInitialized = true;
116 
117  return true;
118 }
119 
120 bool te::rp::ClassifierEMStrategy::execute(const te::rst::Raster& inputRaster, const std::vector<unsigned int>& inputRasterBands,
121  const std::vector<te::gm::Polygon*>& inputPolygons, te::rst::Raster& outputRaster,
122  const unsigned int outputRasterBand, const bool enableProgressInterface) throw(te::rp::Exception)
123 {
124  TERP_TRUE_OR_RETURN_FALSE(m_isInitialized, TE_TR("Instance not initialized"))
125 
126 // create a vector of points with random positions inside raster to obtain input data
127  std::vector<te::gm::Point*> randomPoints = te::rst::GetRandomPointsInRaster(inputRaster, m_parameters.m_maxInputPoints);
128 
129 // M is the number of clusters
130  const unsigned int M = m_parameters.m_numberOfClusters;
131 // N is the number of vectors used to estimate the probabilities
132  const unsigned int N = randomPoints.size();
133 // S is the number of elements inside each vector
134  const unsigned int S = inputRasterBands.size();
135 
136 // get the input data
137  boost::numeric::ublas::matrix<double> Xk = boost::numeric::ublas::matrix<double>(N, S);
138 
141  unsigned int k = 0;
142  double max_pixel_value = 0.0;
143  while (pit != pitend)
144  {
145  for (unsigned int l = 0; l < S; l++)
146  {
147  inputRaster.getValue(pit.getColumn(), pit.getRow(), Xk(k, l), inputRasterBands[l]);
148  if (Xk(k, l) > max_pixel_value)
149  max_pixel_value = Xk(k, l);
150  }
151 
152  ++k;
153  ++pit;
154  }
155 
156  srand((unsigned) time(0));
157 // the parameter vector of means for each cluster
158  boost::numeric::ublas::matrix<double> MUj = boost::numeric::ublas::matrix<double>(M, S);
159  boost::numeric::ublas::matrix<double> previous_MUj = boost::numeric::ublas::matrix<double>(M, S);
160  if (m_parameters.m_clustersMeans.size() > 0)
161  {
162  for (unsigned int j = 0; j < M; j++)
163  for (unsigned int l = 0; l < S; l++)
164  MUj(j, l) = m_parameters.m_clustersMeans[j][l];
165  }
166  else
167  {
168 // define vector of means randomly, in the interval [0, max_pixel_value].
169  for (unsigned int j = 0; j < M; j++)
170  for (unsigned int l = 0; l < S; l++)
171  MUj(j, l) = rand() % (int) ceil(max_pixel_value);
172  }
173  previous_MUj = MUj;
174 
175 // the parameter vector of covariance matrices for each cluster
176  std::vector<boost::numeric::ublas::matrix<double> > SIGMAj;
177  for (unsigned int j = 0; j < M; j++)
178  {
179  boost::numeric::ublas::matrix<double> tmp_sigma(S, S);
180  for (unsigned int l = 0; l < S; l++)
181  {
182  for (unsigned int m = 0; m < S; m++)
183  tmp_sigma(l, m) = 0.0;
184  tmp_sigma(l, l) = 10.0;
185  }
186  SIGMAj.push_back(tmp_sigma);
187  }
188 
189 // variables used to estimate the cluster's probabilities
190  boost::numeric::ublas::matrix<double> Pj = boost::numeric::ublas::matrix<double>(M, 1);
191  boost::numeric::ublas::matrix<double> PCj_Xk = boost::numeric::ublas::matrix<double>(M, N);
192  for (unsigned int j = 0; j < M; j++)
193  {
194  Pj(j, 0) = 1 / (double) M;
195  for (unsigned int k = 0; k < N; k++)
196  PCj_Xk(j, k) = 0.0;
197  }
198 
199 // estimating cluster's probabilities
200  double sum_PCj_Xk;
201  double numerator_PCj_Xk;
202  double denominator_PCj_Xk;
203  double det_SIGMAj;
204  double det_SIGMAj2;
205  double distance_MUj;
206 
207  boost::numeric::ublas::matrix<double> Xk_minus_MUj(S, 1);
208  boost::numeric::ublas::matrix<double> Xk_minus_MUj_T(1, S);
209  boost::numeric::ublas::matrix<double> product_NETAj(1, 1);
210  boost::numeric::ublas::matrix<double> product_NETAj2(1, 1);
211  boost::numeric::ublas::matrix<double> inverse_SIGMAj(S, S);
212  boost::numeric::ublas::matrix<double> inverse_SIGMAj2(S, S);
213  boost::numeric::ublas::matrix<double> product_Xk_minusMUj(S, S);
214  boost::numeric::ublas::matrix<double> sum_product_Xk_minusMUj(S, S);
215 
216  te::common::TaskProgress task(TE_TR("Expectation Maximization algorithm - estimating clusters"), te::common::TaskProgress::UNDEFINED, m_parameters.m_maxIterations);
217  for (unsigned int i = 0; i < m_parameters.m_maxIterations; i++)
218  {
219 // computing PCj_Xk
220  for (unsigned int j = 0; j < M; j++)
221  {
222  te::common::GetDeterminant(SIGMAj[j], det_SIGMAj);
223  if (det_SIGMAj >= 0.0)
224  det_SIGMAj = pow(det_SIGMAj, -0.5);
225  else
226  det_SIGMAj = 1.0;
227 
228  for (unsigned int k = 0; k < N; k++)
229  {
230 // computing numerator of equation for PCj_Xk
231  numerator_PCj_Xk = 0.0;
232  for (unsigned int l = 0; l < S; l++)
233  Xk_minus_MUj(l, 0) = Xk(k, l) - MUj(j, l);
234  Xk_minus_MUj_T = boost::numeric::ublas::trans(Xk_minus_MUj);
235 
236  te::common::GetInverseMatrix(SIGMAj[j], inverse_SIGMAj);
237 
238  product_NETAj = prod(Xk_minus_MUj_T, inverse_SIGMAj);
239  product_NETAj = prod(product_NETAj, Xk_minus_MUj);
240  product_NETAj *= -0.5;
241 
242  numerator_PCj_Xk = det_SIGMAj * exp(product_NETAj(0, 0)) * Pj(j, 0);
243 
244 // computing denominator of equation for PCj_Xk
245  denominator_PCj_Xk = 0.0;
246  for (unsigned int j2 = 0; j2 < M; j2++)
247  {
248  te::common::GetDeterminant(SIGMAj[j2], det_SIGMAj2);
249  if (det_SIGMAj2 >= 0.0)
250  det_SIGMAj2 = pow(det_SIGMAj2, -0.5);
251  else
252  det_SIGMAj2 = 1.0;
253 
254  for (unsigned int l = 0; l < S; l++)
255  Xk_minus_MUj(l, 0) = Xk(k, l) - MUj(j2, l);
256  Xk_minus_MUj_T = boost::numeric::ublas::trans(Xk_minus_MUj);
257 
258  te::common::GetInverseMatrix(SIGMAj[j2], inverse_SIGMAj2);
259 
260  product_NETAj2 = prod(Xk_minus_MUj_T, inverse_SIGMAj2);
261  product_NETAj2 = prod(product_NETAj2, Xk_minus_MUj);
262  product_NETAj2 *= -0.5;
263 
264  denominator_PCj_Xk += det_SIGMAj2 * exp(product_NETAj2(0, 0)) * Pj(j2, 0);
265  }
266  if (denominator_PCj_Xk == 0.0)
267  denominator_PCj_Xk = 0.0000000001;
268 
269 // computing PCj_Xk
270  PCj_Xk(j, k) = numerator_PCj_Xk / denominator_PCj_Xk;
271  }
272  }
273 
274 // computing SIGMAj for t + 1
275  for (unsigned int j = 0; j < M; j++)
276  {
277  sum_PCj_Xk = 0.0;
278  for (unsigned int l = 0; l < S; l++)
279  for (unsigned int l2 = 0; l2 < S; l2++)
280  sum_product_Xk_minusMUj(l, l2) = 0.0;
281  for (unsigned int k = 0; k < N; k++)
282  {
283  sum_PCj_Xk += PCj_Xk(j, k);
284 
285  for (unsigned int l = 0; l < S; l++)
286  Xk_minus_MUj(l, 0) = Xk(k, l) - MUj(j, l);
287  Xk_minus_MUj_T = boost::numeric::ublas::trans(Xk_minus_MUj);
288 
289  product_Xk_minusMUj = prod(Xk_minus_MUj, Xk_minus_MUj_T);
290  product_Xk_minusMUj *= PCj_Xk(j, k);
291 
292  sum_product_Xk_minusMUj += product_Xk_minusMUj;
293  }
294  if (sum_PCj_Xk == 0.0)
295  sum_PCj_Xk = 0.0000000001;
296  SIGMAj[j] = sum_product_Xk_minusMUj / sum_PCj_Xk;
297  }
298 
299 // computing MUj for t + 1
300  for (unsigned int j = 0; j < M; j++)
301  {
302  sum_PCj_Xk = 0.0;
303  for (unsigned int l = 0; l < S; l++)
304  MUj(j, l) = 0.0;
305  for (unsigned int k = 0; k < N; k++)
306  {
307  for (unsigned int l = 0; l < S; l++)
308  MUj(j, l) += PCj_Xk(j, k) * Xk(k, l);
309  sum_PCj_Xk += PCj_Xk(j, k);
310  }
311  if (sum_PCj_Xk == 0.0)
312  sum_PCj_Xk = 0.0000000001;
313  for (unsigned int l = 0; l < S; l++)
314  MUj(j, l) /= sum_PCj_Xk;
315  }
316 
317 // computing Pj for t + 1
318  for (unsigned int j = 0; j < M; j++)
319  {
320  Pj(j, 0) = 0.0;
321  for (unsigned int k = 0; k < N; k++)
322  Pj(j, 0) += PCj_Xk(j, k);
323  Pj(j, 0) /= N;
324  }
325 
326 // checking convergence
327  distance_MUj = 0.0;
328  double a_minus_b;
329  for (unsigned int j = 0; j < M; j++)
330  for (unsigned int l = 0; l < S; l++)
331  {
332  a_minus_b = MUj(j, l) - previous_MUj(j, l);
333  distance_MUj += a_minus_b * a_minus_b;
334  }
335  distance_MUj = sqrt(distance_MUj);
336  if (distance_MUj < m_parameters.m_epsilon)
337  break;
338  previous_MUj = MUj;
339 
340  task.pulse();
341  }
342 
343 // classifying image
346 
347  boost::numeric::ublas::matrix<double> X = boost::numeric::ublas::matrix<double>(1, S);
348  boost::numeric::ublas::matrix<double> PCj_X = boost::numeric::ublas::matrix<double>(M, 1);
349 
350  double max_PCj_X;
351  unsigned int cluster;
352 
353  double numerator_PCj_X;
354  double denominator_PCj_X;
355 
356  boost::numeric::ublas::matrix<double> X_minus_MUj(S, 1);
357  boost::numeric::ublas::matrix<double> X_minus_MUj_T(1, S);
358 
359  task.setTotalSteps(inputRaster.getNumberOfColumns() * inputRaster.getNumberOfRows());
360  task.setMessage(TE_TR("Expectation Maximization algorithm - classifying image"));
361  task.setCurrentStep(0);
362  while (it != itend)
363  {
364  for (unsigned int l = 0; l < S; l++)
365  X(0, l) = (*it)[l];
366 
367 // computing PCj_X
368  for (unsigned int j = 0; j < M; j++)
369  {
370 // computing numerator of equation for PCj_X
371  te::common::GetDeterminant(SIGMAj[j], det_SIGMAj);
372  if (det_SIGMAj >= 0.0)
373  det_SIGMAj = pow(det_SIGMAj, -0.5);
374  else
375  det_SIGMAj = 1.0;
376 
377  numerator_PCj_X = 0.0;
378  for (unsigned int l = 0; l < S; l++)
379  X_minus_MUj(l, 0) = X(0, l) - MUj(j, l);
380  X_minus_MUj_T = boost::numeric::ublas::trans(X_minus_MUj);
381 
382  te::common::GetInverseMatrix(SIGMAj[j], inverse_SIGMAj);
383 
384  product_NETAj = prod(X_minus_MUj_T, inverse_SIGMAj);
385  product_NETAj = prod(product_NETAj, X_minus_MUj);
386  product_NETAj *= -0.5;
387 
388  numerator_PCj_X = det_SIGMAj * exp(product_NETAj(0, 0)) * Pj(j, 0);
389 
390 // computing denominator of equation for PCj_X
391  denominator_PCj_X = 0.0;
392  for (unsigned int j2 = 0; j2 < M; j2++)
393  {
394  te::common::GetDeterminant(SIGMAj[j2], det_SIGMAj2);
395  if (det_SIGMAj2 >= 0.0)
396  det_SIGMAj2 = pow(det_SIGMAj2, -0.5);
397  else
398  det_SIGMAj2 = 1.0;
399 
400  for (unsigned int l = 0; l < S; l++)
401  X_minus_MUj(l, 0) = X(0, l) - MUj(j2, l);
402  X_minus_MUj_T = boost::numeric::ublas::trans(X_minus_MUj);
403 
404  te::common::GetInverseMatrix(SIGMAj[j2], inverse_SIGMAj2);
405 
406  product_NETAj2 = prod(X_minus_MUj_T, inverse_SIGMAj2);
407  product_NETAj2 = prod(product_NETAj2, X_minus_MUj);
408  product_NETAj2 *= -0.5;
409 
410  denominator_PCj_X += det_SIGMAj2 * exp(product_NETAj2(0, 0)) * Pj(j2, 0);
411  }
412  if (denominator_PCj_X == 0.0)
413  denominator_PCj_X = 0.0000000001;
414 
415  PCj_X(j, 0) = numerator_PCj_X / denominator_PCj_X;
416  }
417 
418  max_PCj_X = 0.0;
419  cluster = 0;
420  for (unsigned int j = 0; j < M; j++)
421  if (PCj_X(j, 0) > max_PCj_X)
422  {
423  max_PCj_X = PCj_X(j, 0);
424  cluster = j + 1;
425  }
426 
427 // save cluster information in output raster
428  outputRaster.setValue(it.getColumn(), it.getRow(), cluster, outputRasterBand);
429 
430  ++it;
431  task.pulse();
432  }
433 
434  return true;
435 }
436 
438  : te::rp::ClassifierStrategyFactory("em")
439 {
440 }
441 
443 {
444 }
445 
447 {
448  return new te::rp::ClassifierEMStrategy();
449 }
void setMessage(const std::string &message)
Set the task message.
unsigned int m_numberOfClusters
The number of clusters (classes) to estimate in the image.
unsigned int getRow() const
Returns the current row in iterator.
void reset()
Clear all internal allocated resources and reset the parameters instance to its initial state...
This class implements and iterator to "navigate" over a raster, with a predefined number of bands...
bool initialize(StrategyParameters const *const strategyParams)
Initialize the classification strategy.
This class can be used to inform the progress of a task.
Definition: TaskProgress.h:53
static PointSetIterator end(const te::rst::Raster *r, const std::vector< te::gm::Point * > p)
Returns an iterator referring to after the end of the iterator.
EM (Expectation-Maximization) strategy for pixel-based classification.
const Parameters & operator=(const Parameters &params)
#define TE_TR(message)
It marks a string in order to get translated.
Definition: Translator.h:347
This class implements the strategy to iterate with spatial restriction, the iteration occurs inside a...
Raster Processing functions.
void setTotalSteps(int value)
Set the task total stepes.
#define TERP_TRUE_OR_RETURN_FALSE(value, message)
Checks if value is true. For false values a warning message will be logged and a return of context wi...
Definition: Macros.h:183
Raster classifier strategy factory base class.
bool GetDeterminant(const boost::numeric::ublas::matrix< T > &inputMatrix, double &determinant)
Get the Matrix determinant value.
Definition: MatrixUtils.h:57
Raster EM Classifier strategy factory.
bool execute(const te::rst::Raster &inputRaster, const std::vector< unsigned int > &inputRasterBands, const std::vector< te::gm::Polygon * > &inputPolygons, te::rst::Raster &outputRaster, const unsigned int outputRasterBand, const bool enableProgressInterface)
Executes the classification strategy.
EM strategy for pixel-based classification. This is an unsupervised and pixel-based classification al...
An abstract class for raster data strucutures.
Definition: Raster.h:71
static RasterIterator begin(Raster *r, const std::vector< unsigned int > &bands)
Returns an iterator referring to the first value.
ClassifierEMStrategy::Parameters m_parameters
Internal execution parameters.
Raster strategy parameters base class.
bool m_isInitialized
True if this instance is initialized.
AbstractParameters * clone() const
Create a clone copy of this instance.
void pulse()
Calls setCurrentStep() function using getCurrentStep() + 1.
unsigned int m_maxInputPoints
The maximum number of points used to estimate the clusters (default = 1000).
Abstract parameters base interface.
unsigned int m_maxIterations
The maximum of iterations (E/M steps) to perform if convergence is not achieved.
void setCurrentStep(int value)
Set the task current step.
Raster classifier strategy base class.
bool GetInverseMatrix(const boost::numeric::ublas::matrix< T > &inputMatrix, boost::numeric::ublas::matrix< T > &outputMatrix)
Matrix inversion.
Definition: MatrixUtils.h:104
TERASTEREXPORT std::vector< te::gm::Point * > GetRandomPointsInRaster(const te::rst::Raster &inputRaster, unsigned int numberOfPoints=1000)
Creates a vector of random positions (points) inside the raster.
Definition: Utils.cpp:485
static RasterIterator end(Raster *r, const std::vector< unsigned int > &bands)
Returns an iterator referring to after the end of the iterator.
unsigned int getColumn() const
Returns the current column in iterator.
std::vector< std::vector< double > > m_clustersMeans
The previously estimated means of the clusters (optional).
te::rp::ClassifierStrategy * build()
Concrete factories (derived from this one) must implement this method in order to create objects...
static PointSetIterator begin(const te::rst::Raster *r, const std::vector< te::gm::Point * > p)
Returns an iterator referring to the first value of the band.
double m_epsilon
The stop criteria. When the clusters change in a value smaller then epsilon, the convergence is achie...