78 if( !params.m_prioriProbs.empty() )
82 for( std::size_t prioriProbsIdx = 0 ; prioriProbsIdx < params.m_prioriProbs.size() ;
86 ( params.m_prioriProbs[ prioriProbsIdx ] < 0.0 )
88 ( params.m_prioriProbs[ prioriProbsIdx ] > 1.0 )
94 sum += params.m_prioriProbs[ prioriProbsIdx ];
103 if( params.m_prioriCalcSampleStep < 1 )
127 const std::vector<unsigned int>& attributesIndices,
128 const std::vector<unsigned int>& sampleLabels,
129 const bool enableProgressInterface)
throw(
Exception)
132 if( samples.getElementsCount() == 0 )
return false;
133 if( sampleLabels.size() != samples.getElementsCount() )
return false;
134 if( attributesIndices.empty() )
return false;
136 const unsigned int attributesIndicesSize =
static_cast<unsigned int>(attributesIndices.size());
138 std::unique_ptr< te::common::TaskProgress > progressPtr;
139 if( enableProgressInterface )
142 progressPtr->setTotalSteps( 3 );
143 progressPtr->setMessage(
"Trainning" );
148 std::vector< std::vector< std::vector< double > > > samplesByClass;
153 const unsigned int samplesCount = samples.getElementsCount();
154 std::map< unsigned int, unsigned int > labels2ClassIndexMap;
155 std::map< unsigned int, unsigned int >::iterator labels2ClassIndexMapIt;
156 unsigned int attributesIndicesIdx = 0;
158 for(
unsigned int sampleIdx = 0 ; sampleIdx < samplesCount ; ++sampleIdx )
160 const unsigned int& sampleLabel = sampleLabels[ sampleIdx ];
162 labels2ClassIndexMapIt = labels2ClassIndexMap.find( sampleLabel );
164 if( labels2ClassIndexMapIt == labels2ClassIndexMap.end() )
166 labels2ClassIndexMap[sampleLabel] =
static_cast<unsigned int>(
m_classLabels.size());
169 samplesByClass.push_back( std::vector< std::vector< double > >() );
170 samplesByClass.back().push_back( std::vector< double >() );
172 std::vector< double >& sample = samplesByClass.back().back();
173 sample.resize( attributesIndicesSize, 0.0 );
175 for( attributesIndicesIdx = 0; attributesIndicesIdx < attributesIndicesSize;
176 ++attributesIndicesIdx )
178 samples.getFeature( sampleIdx, attributesIndicesIdx, sample[ attributesIndicesIdx ] );
183 samplesByClass[ labels2ClassIndexMapIt->second ].push_back( std::vector< double >() );
184 std::vector< double >& sample = samplesByClass[ labels2ClassIndexMapIt->second ].back();
185 sample.resize( attributesIndicesSize, 0.0 );
187 for( attributesIndicesIdx = 0; attributesIndicesIdx < attributesIndicesSize;
188 ++attributesIndicesIdx )
190 samples.getFeature( sampleIdx, attributesIndicesIdx, sample[ attributesIndicesIdx ] );
196 if( enableProgressInterface )
198 progressPtr->pulse();
199 if( ! progressPtr->isActive() )
return false;
215 const std::size_t samplesByClassSize = samplesByClass.size();
218 std::vector< double > dymmyMeansVec( attributesIndicesSize, 0.0 );
221 unsigned int attributeIdx = 0;
223 std::size_t classSamplesIdx = 0;
224 std::size_t classSamplesSize = 0;
226 for( std::size_t samplesByClassIdx = 0 ; samplesByClassIdx < samplesByClassSize ;
227 ++samplesByClassIdx )
229 const std::vector< std::vector< double > >& classSamples = samplesByClass[
231 classSamplesSize = classSamples.size();
233 for( attributeIdx = 0; attributeIdx < attributesIndicesSize; ++attributeIdx )
237 for( classSamplesIdx = 0 ; classSamplesIdx < classSamplesSize ; ++classSamplesIdx )
239 mean += classSamples[ classSamplesIdx ][ attributeIdx ];
242 mean /= (double)classSamplesSize;
249 if( enableProgressInterface )
251 progressPtr->pulse();
252 if( ! progressPtr->isActive() )
return false;
258 const std::size_t samplesByClassSize = samplesByClass.size();
259 const boost::numeric::ublas::matrix< double > dummyCovarianceMatrix(
260 attributesIndicesSize, attributesIndicesSize );
271 unsigned int attributeIdx1 = 0;
272 unsigned int attributeIdx2 = 0;
273 std::size_t classSamplesIdx = 0;
274 std::size_t classSamplesSize = 0;
277 double covariance = 0;
279 for( std::size_t samplesByClassIdx = 0 ; samplesByClassIdx < samplesByClassSize ;
280 ++samplesByClassIdx )
282 const std::vector< std::vector< double > >& classSamples = samplesByClass[
284 classSamplesSize = classSamples.size();
286 for( attributeIdx1 = 0; attributeIdx1 < attributesIndicesSize; ++attributeIdx1 )
290 for( attributeIdx2 = 0; attributeIdx2 < attributesIndicesSize; ++attributeIdx2 )
295 for( classSamplesIdx = 0 ; classSamplesIdx < classSamplesSize ; ++classSamplesIdx )
299 ( classSamples[ classSamplesIdx ][ attributeIdx1 ] - mean1 )
301 ( classSamples[ classSamplesIdx ][ attributeIdx2 ] - mean2 )
305 covariance /= (double)( classSamplesSize - 1 );
308 attributeIdx2 ) = covariance;
318 double classCovarianceMatrixDet = 0;
320 classCovarianceMatrixDet ) )
325 if( classCovarianceMatrixDet > 0.0 )
328 classCovarianceMatrixDet ) );
337 if( enableProgressInterface )
339 progressPtr->pulse();
340 if( ! progressPtr->isActive() )
return false;
347 const std::vector<unsigned int>& attributesIndices,
348 const std::vector<double>& inputNoDataValues,
350 const unsigned int outputIndex,
351 const double outputNoDataValue,
357 if( attributesIndices.size() !=
m_classesMeans[ 0 ].size() )
return false;
359 if( inputNoDataValues.size() != attributesIndices.size() )
return false;
363 const unsigned int attributesIndicesSize =
static_cast<unsigned int>(attributesIndices.size());
366 const unsigned int classesNmb =
static_cast<unsigned int>(
m_classesMeans.size());
367 const unsigned int featuresCnt =
static_cast<unsigned int>(
m_classesMeans[0].size());
371 std::vector< double > logPrioriProbs;
380 for(
unsigned int pIdx = 0 ; pIdx < logPrioriProbs.size() ; ++pIdx )
382 logPrioriProbs[ pIdx ] = ( logPrioriProbs[ pIdx ] > 0.0 ) ?
383 std::log( logPrioriProbs[ pIdx ] ) : 0.0;
397 boost::numeric::ublas::matrix< double > sample( attributesIndicesSize, 1 );
398 boost::numeric::ublas::matrix< double > sampleMinusMean( attributesIndicesSize, 1 );
399 boost::numeric::ublas::matrix< double > sampleMinusMeanT( 1, attributesIndicesSize );
400 boost::numeric::ublas::matrix< double > auxMatrix;
401 boost::numeric::ublas::matrix< double > mahalanobisDistanceMatrix;
402 unsigned int attributesIndicesIdx = 0;
403 unsigned int featureIdx = 0;
404 unsigned int classIdx = 0;
405 double closestClassdiscriminantFunctionValue = 0;
406 double discriminantFunctionValue = 0;
407 unsigned int closestClassIdx = 0;
408 bool isValidInput =
false;
410 for(
unsigned int inputIdx = 0 ; inputIdx < inputCount ; ++inputIdx )
416 for( attributesIndicesIdx = 0 ; attributesIndicesIdx < attributesIndicesSize ;
417 ++attributesIndicesIdx )
419 featureIdx = attributesIndices[ attributesIndicesIdx ];
420 if( featureIdx >= inputFeaturesCount )
return false;
422 input.
getFeature( inputIdx, featureIdx, sample( attributesIndicesIdx, 0 ) );
424 if( sample( attributesIndicesIdx, 0 ) == inputNoDataValues[ attributesIndicesIdx ] )
426 isValidInput =
false;
435 closestClassdiscriminantFunctionValue = -1.0 * std::numeric_limits< double >::max();
437 for( classIdx = 0 ; classIdx < classesNmb ; ++classIdx )
439 for( featureIdx = 0 ; featureIdx < featuresCnt ; ++featureIdx )
441 sampleMinusMean( featureIdx, 0 ) = sampleMinusMeanT( 0, featureIdx ) =
442 ( sample( featureIdx, 0 ) -
m_classesMeans[ classIdx ][ featureIdx ] );
447 auxMatrix = boost::numeric::ublas::prod( sampleMinusMeanT,
450 mahalanobisDistanceMatrix = boost::numeric::ublas::prod( auxMatrix, sampleMinusMean );
451 assert( mahalanobisDistanceMatrix.size1() == 1 );
452 assert( mahalanobisDistanceMatrix.size2() == 1 );
454 discriminantFunctionValue = logPrioriProbs[ classIdx ]
456 - ( 0.5 * mahalanobisDistanceMatrix( 0, 0 ) );
458 if( discriminantFunctionValue > closestClassdiscriminantFunctionValue )
460 closestClassdiscriminantFunctionValue = discriminantFunctionValue;
461 closestClassIdx = classIdx;
469 output.
setFeature(inputIdx, 0, static_cast<unsigned int>(outputNoDataValue));
478 const std::vector<unsigned int>& attributesIndices,
479 std::vector< double >& prioriProbs )
const 483 const unsigned int attributesIndicesSize =
static_cast<unsigned int>(attributesIndices.size());
484 const unsigned int classesNmb =
static_cast<unsigned int>(
m_classesMeans.size());
485 const unsigned int featuresCnt =
static_cast<unsigned int>(
m_classesMeans[0].size());
486 const double initialPrioriProbLog = std::log( 1.0 / ( (
double)classesNmb ) );
488 prioriProbs.resize( classesNmb );
489 std::fill( prioriProbs.begin(), prioriProbs.end(), 0.0 );
491 boost::numeric::ublas::matrix< double > sample( attributesIndicesSize, 1 );
492 boost::numeric::ublas::matrix< double > sampleMinusMean( attributesIndicesSize, 1 );
493 boost::numeric::ublas::matrix< double > sampleMinusMeanT( 1, attributesIndicesSize );
494 boost::numeric::ublas::matrix< double > auxMatrix;
495 boost::numeric::ublas::matrix< double > mahalanobisDistanceMatrix;
496 unsigned int attributesIndicesIdx = 0;
497 unsigned int featureIdx = 0;
498 unsigned int classIdx = 0;
499 double closestClassdiscriminantFunctionValue = 0;
500 double discriminantFunctionValue = 0;
501 unsigned int closestClassIdx = 0;
502 unsigned int processedSamplesNmb = 0;
504 for(
unsigned int inputIdx = 0 ; inputIdx < inputCount ; inputIdx +=
509 for( attributesIndicesIdx = 0 ; attributesIndicesIdx < attributesIndicesSize ;
510 ++attributesIndicesIdx )
512 featureIdx = attributesIndices[ attributesIndicesIdx ];
513 if( featureIdx >= inputFeaturesCount )
return false;
515 input.
getFeature( inputIdx, featureIdx, sample( attributesIndicesIdx, 0 ) );
520 closestClassdiscriminantFunctionValue = -1.0 * std::numeric_limits< double >::max();
522 for( classIdx = 0 ; classIdx < classesNmb ; ++classIdx )
524 for( featureIdx = 0 ; featureIdx < featuresCnt ; ++featureIdx )
526 sampleMinusMean( featureIdx, 0 ) = sampleMinusMeanT( 0, featureIdx ) =
527 ( sample( featureIdx, 0 ) -
m_classesMeans[ classIdx ][ featureIdx ] );
532 auxMatrix = boost::numeric::ublas::prod( sampleMinusMeanT,
535 mahalanobisDistanceMatrix = boost::numeric::ublas::prod( auxMatrix, sampleMinusMean );
536 assert( mahalanobisDistanceMatrix.size1() == 1 );
537 assert( mahalanobisDistanceMatrix.size2() == 1 );
539 discriminantFunctionValue = initialPrioriProbLog
541 - ( 0.5 * mahalanobisDistanceMatrix( 0, 0 ) );
543 if( discriminantFunctionValue > closestClassdiscriminantFunctionValue )
545 closestClassdiscriminantFunctionValue = discriminantFunctionValue;
546 closestClassIdx = classIdx;
550 prioriProbs[ closestClassIdx ] += 1.0;
551 ++processedSamplesNmb;
554 for( classIdx = 0 ; classIdx < classesNmb ; ++classIdx )
556 prioriProbs[ classIdx ] /= ((double)processedSamplesNmb);
unsigned int m_prioriCalcSampleStep
A positive non-zero sample step used when calculating piori probabilities (default:5 - 1/5 of samples...
AbstractParameters * clone() const
Create a clone copy of this instance.
Parameters m_parameters
Internal execution parameters.
const Parameters & operator=(const Parameters ¶ms)
Base exception class for plugin module.
bool m_isInitialized
True if this instance is initialized.
bool train(const InputAdaptor< double > &samples, const std::vector< unsigned int > &attributesIndices, const std::vector< unsigned int > &sampleLabels, const bool enableProgressInterface)
Train this classifier instance using the initialization parameters and the suppied train data...
This class can be used to inform the progress of a task.
std::vector< double > m_prioriProbs
Priori probabilities, one for each class. Values from 0 to 1 (use an empty vector to allow internal c...
std::vector< unsigned int > m_classLabels
class labels
bool getPrioriProbabilities(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, std::vector< double > &prioriProbs) const
Calculate priori probabilities by pre-classifying the input data.
virtual unsigned int getElementsCount() const =0
Returns the total elements number.
void reset()
Clear all internal allocated resources and reset the parameters instance to its initial state...
bool initialize(const Parameters ¶ms)
Initialize this classifier instance with new parameters.
bool GetDeterminant(const boost::numeric::ublas::matrix< T > &inputMatrix, double &determinant)
Get the Matrix determinant value.
bool classify(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, const std::vector< double > &inputNoDataValues, OutputAdaptor< unsigned int > &output, const unsigned int outputIndex, const double outputNoDataValue, const bool enableProgressInterface)
Classify an input iterated data and save the result on the output iterated data.
std::vector< boost::numeric::ublas::matrix< double > > m_classesCovarianceInvMatrixes
Classes covariance inverse matrixes.
Classifiers output data adaptor.
void reset()
Reset this instance to its initial state.
std::vector< std::vector< double > > m_classesMeans
Classes means;.
std::vector< double > m_classesOptizedMAPDiscriminantTerm
An optimized portion of the MAP discriminant function.
Abstract parameters base interface.
std::vector< boost::numeric::ublas::matrix< double > > m_classesCovarianceMatrixes
Classes covariance matrixes.
bool GetInverseMatrix(const boost::numeric::ublas::matrix< T > &inputMatrix, boost::numeric::ublas::matrix< T > &outputMatrix)
Matrix inversion.
virtual void setFeature(const unsigned int &elementIndex, const unsigned int &featureIndex, const DataType &value)=0
Set one feature value.
virtual unsigned int getFeaturesCount() const =0
Returns the total features per element number.
MAP (Maximum a Posteriori) strategy for classification.