MAP.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file terralib/classification/MAP.cpp
22 
23  \brief MAP (Maximum a Posteriori) strategy for classification.
24 */
25 
26 // TerraLib
27 #include "MAP.h"
28 
29 // STL
30 #include <limits>
31 
32 namespace te
33 {
34  namespace cl
35  {
37  {
38  reset();
39  }
40 
41  MAP::Parameters::~Parameters() = default;
42 
44  const MAP::Parameters& rhs)
45  {
46  reset();
47 
50 
51  return *this;
52  }
53 
55  {
56  m_prioriProbs.clear();
58  }
59 
61  {
62  return new MAP::Parameters(*this);
63  }
64 
65  // ------------------------------------------------------------------------------------------------------------------------------------------
66 
68  {
69  reset();
70  }
71 
72  MAP::~MAP() = default;
73 
74  bool MAP::initialize(const MAP::Parameters& params) throw(Exception)
75  {
76  reset();
77 
78  if( !params.m_prioriProbs.empty() )
79  {
80  double sum = 0.0;
81 
82  for( std::size_t prioriProbsIdx = 0 ; prioriProbsIdx < params.m_prioriProbs.size() ;
83  ++prioriProbsIdx )
84  {
85  if(
86  ( params.m_prioriProbs[ prioriProbsIdx ] < 0.0 )
87  ||
88  ( params.m_prioriProbs[ prioriProbsIdx ] > 1.0 )
89  )
90  {
91  return false;
92  }
93 
94  sum += params.m_prioriProbs[ prioriProbsIdx ];
95  }
96 
97  if( sum != 1.0 )
98  {
99  return false;
100  }
101  }
102 
103  if( params.m_prioriCalcSampleStep < 1 )
104  {
105  return false;
106  }
107 
108  m_parameters = params;
109 
110  m_isInitialized = true;
111 
112  return true;
113  }
114 
115  void MAP::reset()
116  {
117  m_isInitialized = false;
119  m_classesMeans.clear();
122  m_classLabels.clear();
124  }
125 
126  bool MAP::train( const InputAdaptor< double >& samples,
127  const std::vector<unsigned int>& attributesIndices,
128  const std::vector<unsigned int>& sampleLabels,
129  const bool enableProgressInterface) throw(Exception)
130  {
131  if( ! m_isInitialized ) return false;
132  if( samples.getElementsCount() == 0 ) return false;
133  if( sampleLabels.size() != samples.getElementsCount() ) return false;
134  if( attributesIndices.empty() ) return false;
135 
136  const unsigned int attributesIndicesSize = static_cast<unsigned int>(attributesIndices.size());
137 
138  std::unique_ptr< te::common::TaskProgress > progressPtr;
139  if( enableProgressInterface )
140  {
141  progressPtr.reset( new te::common::TaskProgress );
142  progressPtr->setTotalSteps( 3 );
143  progressPtr->setMessage( "Trainning" );
144  }
145 
146  // grouping samples by class
147 
148  std::vector< std::vector< std::vector< double > > > samplesByClass;
149 
150  {
151  m_classLabels.clear();
152 
153  const unsigned int samplesCount = samples.getElementsCount();
154  std::map< unsigned int, unsigned int > labels2ClassIndexMap;
155  std::map< unsigned int, unsigned int >::iterator labels2ClassIndexMapIt;
156  unsigned int attributesIndicesIdx = 0;
157 
158  for( unsigned int sampleIdx = 0 ; sampleIdx < samplesCount ; ++sampleIdx )
159  {
160  const unsigned int& sampleLabel = sampleLabels[ sampleIdx ];
161 
162  labels2ClassIndexMapIt = labels2ClassIndexMap.find( sampleLabel );
163 
164  if( labels2ClassIndexMapIt == labels2ClassIndexMap.end() )
165  {
166  labels2ClassIndexMap[sampleLabel] = static_cast<unsigned int>(m_classLabels.size());
167  m_classLabels.push_back( sampleLabel );
168 
169  samplesByClass.push_back( std::vector< std::vector< double > >() );
170  samplesByClass.back().push_back( std::vector< double >() );
171 
172  std::vector< double >& sample = samplesByClass.back().back();
173  sample.resize( attributesIndicesSize, 0.0 );
174 
175  for( attributesIndicesIdx = 0; attributesIndicesIdx < attributesIndicesSize;
176  ++attributesIndicesIdx )
177  {
178  samples.getFeature( sampleIdx, attributesIndicesIdx, sample[ attributesIndicesIdx ] );
179  }
180  }
181  else
182  {
183  samplesByClass[ labels2ClassIndexMapIt->second ].push_back( std::vector< double >() );
184  std::vector< double >& sample = samplesByClass[ labels2ClassIndexMapIt->second ].back();
185  sample.resize( attributesIndicesSize, 0.0 );
186 
187  for( attributesIndicesIdx = 0; attributesIndicesIdx < attributesIndicesSize;
188  ++attributesIndicesIdx )
189  {
190  samples.getFeature( sampleIdx, attributesIndicesIdx, sample[ attributesIndicesIdx ] );
191  }
192  }
193  }
194  }
195 
196  if( enableProgressInterface )
197  {
198  progressPtr->pulse();
199  if( ! progressPtr->isActive() ) return false;
200  }
201 
202  // Checking priori probabilities
203 
204  if( ! m_parameters.m_prioriProbs.empty() )
205  {
206  if( m_parameters.m_prioriProbs.size() != samplesByClass.size() )
207  {
208  return false;
209  }
210  }
211 
212  // classes means calcule
213 
214  {
215  const std::size_t samplesByClassSize = samplesByClass.size();
216 
217  m_classesMeans.resize( samplesByClassSize );
218  std::vector< double > dymmyMeansVec( attributesIndicesSize, 0.0 );
219  std::fill( m_classesMeans.begin(), m_classesMeans.end(), dymmyMeansVec );
220 
221  unsigned int attributeIdx = 0;
222  double mean = 0;
223  std::size_t classSamplesIdx = 0;
224  std::size_t classSamplesSize = 0;
225 
226  for( std::size_t samplesByClassIdx = 0 ; samplesByClassIdx < samplesByClassSize ;
227  ++samplesByClassIdx )
228  {
229  const std::vector< std::vector< double > >& classSamples = samplesByClass[
230  samplesByClassIdx ];
231  classSamplesSize = classSamples.size();
232 
233  for( attributeIdx = 0; attributeIdx < attributesIndicesSize; ++attributeIdx )
234  {
235  mean = 0;
236 
237  for( classSamplesIdx = 0 ; classSamplesIdx < classSamplesSize ; ++classSamplesIdx )
238  {
239  mean += classSamples[ classSamplesIdx ][ attributeIdx ];
240  }
241 
242  mean /= (double)classSamplesSize;
243 
244  m_classesMeans[ samplesByClassIdx ][ attributeIdx ] = mean;
245  }
246  }
247  }
248 
249  if( enableProgressInterface )
250  {
251  progressPtr->pulse();
252  if( ! progressPtr->isActive() ) return false;
253  }
254 
255  // covariance matrixes calcule
256 
257  {
258  const std::size_t samplesByClassSize = samplesByClass.size();
259  const boost::numeric::ublas::matrix< double > dummyCovarianceMatrix(
260  attributesIndicesSize, attributesIndicesSize );
261 
263  m_classesCovarianceMatrixes.resize( samplesByClassSize, dummyCovarianceMatrix );
264 
266  m_classesCovarianceInvMatrixes.resize( samplesByClassSize, dummyCovarianceMatrix );
267 
269  m_classesOptizedMAPDiscriminantTerm.resize( samplesByClassSize );
270 
271  unsigned int attributeIdx1 = 0;
272  unsigned int attributeIdx2 = 0;
273  std::size_t classSamplesIdx = 0;
274  std::size_t classSamplesSize = 0;
275  double mean1 = 0;
276  double mean2 = 0;
277  double covariance = 0;
278 
279  for( std::size_t samplesByClassIdx = 0 ; samplesByClassIdx < samplesByClassSize ;
280  ++samplesByClassIdx )
281  {
282  const std::vector< std::vector< double > >& classSamples = samplesByClass[
283  samplesByClassIdx ];
284  classSamplesSize = classSamples.size();
285 
286  for( attributeIdx1 = 0; attributeIdx1 < attributesIndicesSize; ++attributeIdx1 )
287  {
288  mean1 = m_classesMeans[ samplesByClassIdx ][ attributeIdx1 ];
289 
290  for( attributeIdx2 = 0; attributeIdx2 < attributesIndicesSize; ++attributeIdx2 )
291  {
292  mean2 = m_classesMeans[ samplesByClassIdx ][ attributeIdx2 ];
293  covariance = 0.0;
294 
295  for( classSamplesIdx = 0 ; classSamplesIdx < classSamplesSize ; ++classSamplesIdx )
296  {
297  covariance +=
298  (
299  ( classSamples[ classSamplesIdx ][ attributeIdx1 ] - mean1 )
300  *
301  ( classSamples[ classSamplesIdx ][ attributeIdx2 ] - mean2 )
302  );
303  }
304 
305  covariance /= (double)( classSamplesSize - 1 );
306 
307  m_classesCovarianceMatrixes[ samplesByClassIdx ]( attributeIdx1,
308  attributeIdx2 ) = covariance;
309  }
310  }
311 
312  if( ! te::common::GetInverseMatrix( m_classesCovarianceMatrixes[ samplesByClassIdx ],
313  m_classesCovarianceInvMatrixes[ samplesByClassIdx ] ) )
314  {
315  return false;
316  }
317 
318  double classCovarianceMatrixDet = 0;
319  if( ! te::common::GetDeterminant( m_classesCovarianceMatrixes[ samplesByClassIdx ],
320  classCovarianceMatrixDet ) )
321  {
322  return false;
323  }
324 
325  if( classCovarianceMatrixDet > 0.0 )
326  {
327  m_classesOptizedMAPDiscriminantTerm.push_back( -0.5 * std::log(
328  classCovarianceMatrixDet ) );
329  }
330  else
331  {
332  m_classesOptizedMAPDiscriminantTerm.push_back( 0.0 );
333  }
334  }
335  }
336 
337  if( enableProgressInterface )
338  {
339  progressPtr->pulse();
340  if( ! progressPtr->isActive() ) return false;
341  }
342 
343  return true;
344  }
345 
347  const std::vector<unsigned int>& attributesIndices,
348  const std::vector<double>& inputNoDataValues,
350  const unsigned int outputIndex,
351  const double outputNoDataValue,
352  const bool /*enableProgressInterface*/) throw(Exception)
353  {
354  if( !m_isInitialized ) return false;
355  if( m_classesCovarianceMatrixes.empty() ) return false;
356  if( input.getElementsCount() != output.getElementsCount() ) return false;
357  if( attributesIndices.size() != m_classesMeans[ 0 ].size() ) return false;
358  if( outputIndex >= output.getFeaturesCount() ) return false;
359  if( inputNoDataValues.size() != attributesIndices.size() ) return false;
360 
361  // globals
362 
363  const unsigned int attributesIndicesSize = static_cast<unsigned int>(attributesIndices.size());
364  const unsigned int inputCount = input.getElementsCount();
365  const unsigned int inputFeaturesCount = input.getFeaturesCount();
366  const unsigned int classesNmb = static_cast<unsigned int>(m_classesMeans.size());
367  const unsigned int featuresCnt = static_cast<unsigned int>(m_classesMeans[0].size());
368 
369  // Dealing with the logarithm of priori probabilities
370 
371  std::vector< double > logPrioriProbs;
372 
373  if( m_parameters.m_prioriProbs.empty() )
374  {
375  if( ! getPrioriProbabilities( input,attributesIndices, logPrioriProbs ) )
376  {
377  return false;
378  }
379 
380  for( unsigned int pIdx = 0 ; pIdx < logPrioriProbs.size() ; ++pIdx )
381  {
382  logPrioriProbs[ pIdx ] = ( logPrioriProbs[ pIdx ] > 0.0 ) ?
383  std::log( logPrioriProbs[ pIdx ] ) : 0.0;
384  }
385  }
386  else
387  {
388  for( unsigned int pIdx = 0 ; pIdx < m_parameters.m_prioriProbs.size() ;
389  ++pIdx )
390  {
391  logPrioriProbs.push_back( std::log( m_parameters.m_prioriProbs[ pIdx ] ) );
392  }
393  }
394 
395  // classifying
396 
397  boost::numeric::ublas::matrix< double > sample( attributesIndicesSize, 1 );
398  boost::numeric::ublas::matrix< double > sampleMinusMean( attributesIndicesSize, 1 );
399  boost::numeric::ublas::matrix< double > sampleMinusMeanT( 1, attributesIndicesSize );
400  boost::numeric::ublas::matrix< double > auxMatrix;
401  boost::numeric::ublas::matrix< double > mahalanobisDistanceMatrix;
402  unsigned int attributesIndicesIdx = 0;
403  unsigned int featureIdx = 0;
404  unsigned int classIdx = 0;
405  double closestClassdiscriminantFunctionValue = 0;
406  double discriminantFunctionValue = 0;
407  unsigned int closestClassIdx = 0;
408  bool isValidInput = false;
409 
410  for( unsigned int inputIdx = 0 ; inputIdx < inputCount ; ++inputIdx )
411  {
412  // load sample
413 
414  isValidInput = true;
415 
416  for( attributesIndicesIdx = 0 ; attributesIndicesIdx < attributesIndicesSize ;
417  ++attributesIndicesIdx )
418  {
419  featureIdx = attributesIndices[ attributesIndicesIdx ];
420  if( featureIdx >= inputFeaturesCount ) return false;
421 
422  input.getFeature( inputIdx, featureIdx, sample( attributesIndicesIdx, 0 ) );
423 
424  if( sample( attributesIndicesIdx, 0 ) == inputNoDataValues[ attributesIndicesIdx ] )
425  {
426  isValidInput = false;
427  break;
428  }
429  }
430 
431  if( isValidInput )
432  {
433  // finding the clesest class
434 
435  closestClassdiscriminantFunctionValue = -1.0 * std::numeric_limits< double >::max();
436 
437  for( classIdx = 0 ; classIdx < classesNmb ; ++classIdx )
438  {
439  for( featureIdx = 0 ; featureIdx < featuresCnt ; ++featureIdx )
440  {
441  sampleMinusMean( featureIdx, 0 ) = sampleMinusMeanT( 0, featureIdx ) =
442  ( sample( featureIdx, 0 ) - m_classesMeans[ classIdx ][ featureIdx ] );
443  }
444 
445  //calculate the mahalanobis distance: (x-mean)T * CovMatrizInvert * (x-mean)
446 
447  auxMatrix = boost::numeric::ublas::prod( sampleMinusMeanT,
448  m_classesCovarianceInvMatrixes[ classIdx ] );
449 
450  mahalanobisDistanceMatrix = boost::numeric::ublas::prod( auxMatrix, sampleMinusMean );
451  assert( mahalanobisDistanceMatrix.size1() == 1 );
452  assert( mahalanobisDistanceMatrix.size2() == 1 );
453 
454  discriminantFunctionValue = logPrioriProbs[ classIdx ]
456  - ( 0.5 * mahalanobisDistanceMatrix( 0, 0 ) );
457 
458  if( discriminantFunctionValue > closestClassdiscriminantFunctionValue )
459  {
460  closestClassdiscriminantFunctionValue = discriminantFunctionValue;
461  closestClassIdx = classIdx;
462  }
463  }
464 
465  output.setFeature( inputIdx, 0, m_classLabels[ closestClassIdx ] );
466  }
467  else
468  {
469  output.setFeature(inputIdx, 0, static_cast<unsigned int>(outputNoDataValue));
470  }
471  }
472 
473  return true;
474  }
475 
476 
478  const std::vector<unsigned int>& attributesIndices,
479  std::vector< double >& prioriProbs ) const
480  {
481  const unsigned int inputCount = input.getElementsCount();
482  const unsigned int inputFeaturesCount = input.getFeaturesCount();
483  const unsigned int attributesIndicesSize = static_cast<unsigned int>(attributesIndices.size());
484  const unsigned int classesNmb = static_cast<unsigned int>(m_classesMeans.size());
485  const unsigned int featuresCnt = static_cast<unsigned int>(m_classesMeans[0].size());
486  const double initialPrioriProbLog = std::log( 1.0 / ( (double)classesNmb ) );
487 
488  prioriProbs.resize( classesNmb );
489  std::fill( prioriProbs.begin(), prioriProbs.end(), 0.0 );
490 
491  boost::numeric::ublas::matrix< double > sample( attributesIndicesSize, 1 );
492  boost::numeric::ublas::matrix< double > sampleMinusMean( attributesIndicesSize, 1 );
493  boost::numeric::ublas::matrix< double > sampleMinusMeanT( 1, attributesIndicesSize );
494  boost::numeric::ublas::matrix< double > auxMatrix;
495  boost::numeric::ublas::matrix< double > mahalanobisDistanceMatrix;
496  unsigned int attributesIndicesIdx = 0;
497  unsigned int featureIdx = 0;
498  unsigned int classIdx = 0;
499  double closestClassdiscriminantFunctionValue = 0;
500  double discriminantFunctionValue = 0;
501  unsigned int closestClassIdx = 0;
502  unsigned int processedSamplesNmb = 0;
503 
504  for( unsigned int inputIdx = 0 ; inputIdx < inputCount ; inputIdx +=
506  {
507  // load sample
508 
509  for( attributesIndicesIdx = 0 ; attributesIndicesIdx < attributesIndicesSize ;
510  ++attributesIndicesIdx )
511  {
512  featureIdx = attributesIndices[ attributesIndicesIdx ];
513  if( featureIdx >= inputFeaturesCount ) return false;
514 
515  input.getFeature( inputIdx, featureIdx, sample( attributesIndicesIdx, 0 ) );
516  }
517 
518  // finding the clesest class
519 
520  closestClassdiscriminantFunctionValue = -1.0 * std::numeric_limits< double >::max();
521 
522  for( classIdx = 0 ; classIdx < classesNmb ; ++classIdx )
523  {
524  for( featureIdx = 0 ; featureIdx < featuresCnt ; ++featureIdx )
525  {
526  sampleMinusMean( featureIdx, 0 ) = sampleMinusMeanT( 0, featureIdx ) =
527  ( sample( featureIdx, 0 ) - m_classesMeans[ classIdx ][ featureIdx ] );
528  }
529 
530  //calculate the mahalanobis distance: (x-mean)T * CovMatrizInvert * (x-mean)
531 
532  auxMatrix = boost::numeric::ublas::prod( sampleMinusMeanT,
533  m_classesCovarianceInvMatrixes[ classIdx ] );
534 
535  mahalanobisDistanceMatrix = boost::numeric::ublas::prod( auxMatrix, sampleMinusMean );
536  assert( mahalanobisDistanceMatrix.size1() == 1 );
537  assert( mahalanobisDistanceMatrix.size2() == 1 );
538 
539  discriminantFunctionValue = initialPrioriProbLog
541  - ( 0.5 * mahalanobisDistanceMatrix( 0, 0 ) );
542 
543  if( discriminantFunctionValue > closestClassdiscriminantFunctionValue )
544  {
545  closestClassdiscriminantFunctionValue = discriminantFunctionValue;
546  closestClassIdx = classIdx;
547  }
548  }
549 
550  prioriProbs[ closestClassIdx ] += 1.0;
551  ++processedSamplesNmb;
552  }
553 
554  for( classIdx = 0 ; classIdx < classesNmb ; ++classIdx )
555  {
556  prioriProbs[ classIdx ] /= ((double)processedSamplesNmb);
557  }
558 
559  return true;
560  }
561 
562 
563  } // end namespace cl
564 } // end namespace te
unsigned int m_prioriCalcSampleStep
A positive non-zero sample step used when calculating piori probabilities (default:5 - 1/5 of samples...
Definition: MAP.h:67
AbstractParameters * clone() const
Create a clone copy of this instance.
Definition: MAP.cpp:60
Classifier Parameters.
Definition: MAP.h:62
Parameters m_parameters
Internal execution parameters.
Definition: MAP.h:128
const Parameters & operator=(const Parameters &params)
Definition: MAP.cpp:43
Base exception class for plugin module.
bool m_isInitialized
True if this instance is initialized.
Definition: MAP.h:127
bool train(const InputAdaptor< double > &samples, const std::vector< unsigned int > &attributesIndices, const std::vector< unsigned int > &sampleLabels, const bool enableProgressInterface)
Train this classifier instance using the initialization parameters and the suppied train data...
Definition: MAP.cpp:126
This class can be used to inform the progress of a task.
Definition: TaskProgress.h:53
std::vector< double > m_prioriProbs
Priori probabilities, one for each class. Values from 0 to 1 (use an empty vector to allow internal c...
Definition: MAP.h:66
std::vector< unsigned int > m_classLabels
class labels
Definition: MAP.h:132
bool getPrioriProbabilities(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, std::vector< double > &prioriProbs) const
Calculate priori probabilities by pre-classifying the input data.
Definition: MAP.cpp:477
virtual unsigned int getElementsCount() const =0
Returns the total elements number.
void reset()
Clear all internal allocated resources and reset the parameters instance to its initial state...
Definition: MAP.cpp:54
bool initialize(const Parameters &params)
Initialize this classifier instance with new parameters.
Definition: MAP.cpp:74
bool GetDeterminant(const boost::numeric::ublas::matrix< T > &inputMatrix, double &determinant)
Get the Matrix determinant value.
Definition: MatrixUtils.h:57
bool classify(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, const std::vector< double > &inputNoDataValues, OutputAdaptor< unsigned int > &output, const unsigned int outputIndex, const double outputNoDataValue, const bool enableProgressInterface)
Classify an input iterated data and save the result on the output iterated data.
Definition: MAP.cpp:346
std::vector< boost::numeric::ublas::matrix< double > > m_classesCovarianceInvMatrixes
Classes covariance inverse matrixes.
Definition: MAP.h:131
Classifiers input data adaptor.
Definition: Adaptors.h:45
Classifiers output data adaptor.
Definition: Adaptors.h:79
URI C++ Library.
Definition: Attributes.h:37
void reset()
Reset this instance to its initial state.
Definition: MAP.cpp:115
virtual unsigned int getElementsCount() const =0
Returns the total elements number.
virtual void getFeature(const unsigned int &elementIndex, const unsigned int &featureIndex, DataType &featureValue) const =0
Returns one feature value.
std::vector< std::vector< double > > m_classesMeans
Classes means;.
Definition: MAP.h:129
std::vector< double > m_classesOptizedMAPDiscriminantTerm
An optimized portion of the MAP discriminant function.
Definition: MAP.h:133
Abstract parameters base interface.
std::vector< boost::numeric::ublas::matrix< double > > m_classesCovarianceMatrixes
Classes covariance matrixes.
Definition: MAP.h:130
bool GetInverseMatrix(const boost::numeric::ublas::matrix< T > &inputMatrix, boost::numeric::ublas::matrix< T > &outputMatrix)
Matrix inversion.
Definition: MatrixUtils.h:143
virtual void setFeature(const unsigned int &elementIndex, const unsigned int &featureIndex, const DataType &value)=0
Set one feature value.
virtual unsigned int getFeaturesCount() const =0
Returns the total features per element number.
virtual unsigned int getFeaturesCount() const =0
Returns the total features per element number.
MAP (Maximum a Posteriori) strategy for classification.