ISOData.h
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file terralib/classification/ISOData.h
22 
23  \brief ISOData strategy for classification.
24 */
25 
26 #ifndef __TERRALIB_CLASSIFICATION_INTERNAL_ISODATA_H
27 #define __TERRALIB_CLASSIFICATION_INTERNAL_ISODATA_H
28 
29 // TerraLib
30 #include "../common/AbstractParameters.h"
31 #include "../common/MatrixUtils.h"
32 #include "../common/progress/TaskProgress.h"
33 #include "Adaptors.h"
34 #include "Config.h"
35 #include "Exception.h"
36 
37 // STL
38 #include <vector>
39 #include <map>
40 
41 namespace te
42 {
43  namespace cl
44  {
45  /*!
46  \class ISOData
47  \brief ISOData strategy for an unsupervised pixel-based classification algorithm.
48  \note Based on Reference: IsoData Algorithm - Pattern Recognition Principles - Rafael Gonzalez, Julius T. Tou - Addison Wesley Publishing Company - 1974.
49  */
50 
52  {
53  public:
54 
55  /*!
56  \class Parameters
57  \brief Classifier Parameters
58  */
60  {
61  public:
62 
63  unsigned int m_clustersNumber; //!< Desired number of clusters (This is a mean number. Final clusters number can go from 0 up to k+(k/2). Minimum:2, default:2).
64  unsigned int m_maxIterationsNumber; //!< Maximum number of iterations (minimum:1, default:0=automatic).
65  unsigned int m_maxMergesPerIteration; //!< Maximum number of clusters merges per iteration (zero=automatic, defaul:1 ).
66  unsigned int m_maxSplitsPerIteration; //!< Maximum number of clusters splits per iteration (zero=automatic, defaul:1 ).
67  double m_minClusterSizePercent; //!< Minimum cluster size (percentual proportion related to the total input elments size. Valid values: from zero up to 100, default:0).
68  double m_stdDevSplitThreshold; //!< A standart deviation threshold to split clusters (lower values tend to create more clusters by splitting. Valid values: zero or positive values. Defaul: 0.1).
69  double m_mergeDistThreshold; //!< A distance treshold to control the merge process (higher values will allow to merge more clusters. Valid values: zero or positive values. Default:0.1 ).
70 
72 
74  //overload
75  const Parameters& operator=(const Parameters& params);
76 
77  //overload
78  void reset();
79 
80  //overload
81  AbstractParameters* clone() const;
82  };
83 
84  /*!
85  \class ClusterData
86  \brief Cluster data
87  */
89  {
90  public:
91 
92  /*!
93  \typedef ClusterLabelT Cluster label type definition.
94  */
95  typedef unsigned int ClusterLabelT;
96 
97  double m_avgDistToClusterCenter; //!< Average distance from all samples to the cluster center.
98  unsigned int m_size; //!< Cluster size (elements number).
99  ClusterLabelT m_label; //!< Cluster label.
100  std::vector< double > m_means; //!< Cluster means (one for each cluster feature dimention).
101  std::vector< double > m_sums; //!< Sums of all cluster pixel values(one sum for each cluster feature dimention).
102  std::vector< double > m_stdDevs; //!< Cluster standart deviations (one for each cluster feature dimention).
103 
106  };
107 
109 
111 
112  /*!
113  \brief Initialize this classifier instance with new parameters.
114  \param params New initialization parameters.
115  */
116  bool initialize(const Parameters& params);
117 
118  /*!
119  \brief Classify an input iterated data and save the result on the output iterated data.
120  \param input Input data to be classified.
121  \param attributesIndices The attributes indexes to process from the iterated train data.
122  \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
123  \param output Output classified data.
124  \param outputIndex The output attribute index.
125  \param outputNoDataValue A output label value to use when dealing with input no-data.
126  \param enableProgressInterface Enable/disable the use of a progress interface.
127  \param clustersDataPtr A pointer to a vector where the current clusters data will be storer or a null pointer.
128  */
129  bool classify( const InputAdaptor< double >& input,
130  const std::vector<unsigned int>& attributesIndices,
131  const std::vector< double >& inputNoDataValues,
133  const unsigned int outputIndex,
134  const ClusterData::ClusterLabelT outputNoDataValue,
135  const bool enableProgressInterface,
136  std::vector< ClusterData >* clustersDataPtr );
137 
138  protected:
139 
140  /*!
141  \typedef ClustersContainerT Clusters container type definition.
142  */
143  typedef std::map< ClusterData::ClusterLabelT, ISOData::ClusterData >
145 
146  bool m_isInitialized; //!< True if this instance is initialized.
147  Parameters m_parameters; //!< Internal execution parameters.
148 
149  /*! \brief Reset this instance to its initial state */
150  void reset();
151 
152  /*!
153  \brief Initialize clusters.
154  \param input Input data to be classified.
155  \param attributesIndices The attributes indexes to process from the iterated train data.
156  \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
157  \param clustersMap Clusters map.
158  \param nextAvaliableClusterLabel Reference to the global clusters labels counter.
159  \param inputElementsOffsetsAndGains Elements offsets (first) and gains (second).
160  \note Updated clusters members: ClusterData::m_means, ClusterData::m_label.
161  \note Output = ( input + offset ) * gain;
162  */
164  const std::vector<unsigned int>& attributesIndices,
165  ClustersContainerT& clustersMap,
166  ClusterData::ClusterLabelT& nextAvaliableClusterLabel,
167  const std::vector<double>& inputNoDataValues,
168  std::vector< std::pair< double, double > >& inputElementsOffsetsAndGains ) const;
169 
170  /*!
171  \brief Classify each element following clusers centers.
172  \param input Input data to be classified.
173  \param attributesIndices The attributes indexes to process from the iterated train data.
174  \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
175  \param output Output classified data.
176  \param outputIndex The output attribute index.
177  \param clustersMap Clusters map.
178  \param outputNoDataValue Output no-data value.
179  \param inputElementsOffsetsAndGains Elements offsets (first) and gains (second).
180  \note Only the member ClusterData::m_means is used for this operation.
181  \note Valid updated members after: ClusterData::m_label, ClusterData::m_means, ClusterData::m_sums, ClusterData::m_size
182  */
184  const std::vector<unsigned int>& attributesIndices,
185  const std::vector<double>& inputNoDataValues,
186  ClustersContainerT& clustersMap,
188  const unsigned int outputIndex,
189  const unsigned int outputNoDataValue,
190  const std::vector< std::pair< double, double > >& inputElementsOffsetsAndGains ) const;
191 
192  /*!
193  \brief Remove too mall clusters following the current parameters.
194  \param inputElelementsCount Input data elements counte.
195  \param clustersMap Clusters map.
196  \note Only the member ClusterData::m_size is used for this operation.
197  */
199  const unsigned int inputElelementsCount,
200  ClustersContainerT& clustersMap ) const;
201 
202  /*!
203  \brief Update clusters statistical data.
204  \param input Input data to be classified.
205  \param attributesIndices The attributes indexes to process from the iterated train data.
206  \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
207  \param output Output classified data.
208  \param outputIndex The output attribute index.
209  \param clustersMap Clusters map.
210  \param inputElementsOffsetsAndGains Elements offsets (first) and gains (second).
211  \note Used clusters members: ClusterData::m_label, ClusterData::m_means, ClusterData::m_size
212  \note Updated clusters members: ClusterData::m_avgDistToClusterCenter, ClusterData::m_stdDevs.
213  */
215  const InputAdaptor<double>& input,
216  const std::vector<unsigned int>& attributesIndices,
217  const std::vector<double>& inputNoDataValues,
218  ClustersContainerT& clustersMap,
219  const InputOutputAdaptor<unsigned int>& output,
220  const unsigned int outputIndex,
221  const std::vector< std::pair< double, double > >& inputElementsOffsetsAndGains ) const;
222 
223  /*!
224  \brief Split clusters.
225  \param clustersMap Clusters map.
226  \param inputElementsCount Input data total elements number.
227  \param maxNumberOfSplits Maximum number of splits.
228  \param nextAvaliableClusterLabel Reference to the global clusters labels counter.
229  \note Only the members ClusterData::m_label, ClusterData::m_means, ClusterData::m_stdDevs, ClusterData::m_size, ClusterData::m_avgDistToClusterCenter are used for this operation.
230  \note Only the following members will be valid after splitting: ClusterData::m_means, ClusterData::m_label.
231  */
233  const unsigned int inputElementsCount,
234  const unsigned int maxNumberOfSplits,
235  ClusterData::ClusterLabelT& nextAvaliableClusterLabel,
236  ClustersContainerT& clustersMap ) const;
237 
238  /*!
239  \brief Merge clusters.
240  \param clustersMap Clusters map.
241  \param mergeDistThreshold A distance treshold to control the merge process (higher values will allow the merge more clusters. Valid values: zero or positive values ).
242  \param maxNumberOfMerges Maximum number of merges.
243  \note Only the members ClusterData::m_label, ClusterData::m_means, ClusterData::m_size are used for this operation.
244  \note Only the following members will be valid after merging: ClusterData::m_means, ClusterData::m_label, ClusterData::m_size.
245  */
246  static bool mergeClusters(
247  const double mergeDistThreshold,
248  const unsigned int maxNumberOfMerges,
249  ClustersContainerT& clustersMap );
250 
251  static void printClusters( const ClustersContainerT& clustersMap );
252 
253  /*!
254  \brief Recode clusters labels to be in range zero up to the number of clusters - 1.
255  \param clustersMap Clusters map.
256  */
257  static void recodeClustersLabels( ClustersContainerT& clustersMap );
258  };
259 
260  } // end namespace cl
261 } // end namespace te
262 
263 #endif // __TERRALIB_CLASSIFICATION_INTERNAL_ISODATA_H
Classifiers adaptors.
std::vector< double > m_sums
Sums of all cluster pixel values(one sum for each cluster feature dimention).
Definition: ISOData.h:101
unsigned int m_size
Cluster size (elements number).
Definition: ISOData.h:98
unsigned int ClusterLabelT
Definition: ISOData.h:95
ClusterLabelT m_label
Cluster label.
Definition: ISOData.h:99
std::vector< double > m_means
Cluster means (one for each cluster feature dimention).
Definition: ISOData.h:100
double m_avgDistToClusterCenter
Average distance from all samples to the cluster center.
Definition: ISOData.h:97
std::vector< double > m_stdDevs
Cluster standart deviations (one for each cluster feature dimention).
Definition: ISOData.h:102
Classifier Parameters.
Definition: ISOData.h:60
AbstractParameters * clone() const
Create a clone copy of this instance.
unsigned int m_maxSplitsPerIteration
Maximum number of clusters splits per iteration (zero=automatic, defaul:1 ).
Definition: ISOData.h:66
unsigned int m_clustersNumber
Desired number of clusters (This is a mean number. Final clusters number can go from 0 up to k+(k/2)....
Definition: ISOData.h:63
unsigned int m_maxIterationsNumber
Maximum number of iterations (minimum:1, default:0=automatic).
Definition: ISOData.h:64
double m_minClusterSizePercent
Minimum cluster size (percentual proportion related to the total input elments size....
Definition: ISOData.h:67
const Parameters & operator=(const Parameters &params)
void reset()
Clear all internal allocated resources and reset the parameters instance to its initial state.
double m_stdDevSplitThreshold
A standart deviation threshold to split clusters (lower values tend to create more clusters by splitt...
Definition: ISOData.h:68
unsigned int m_maxMergesPerIteration
Maximum number of clusters merges per iteration (zero=automatic, defaul:1 ).
Definition: ISOData.h:65
double m_mergeDistThreshold
A distance treshold to control the merge process (higher values will allow to merge more clusters....
Definition: ISOData.h:69
ISOData strategy for an unsupervised pixel-based classification algorithm.
Definition: ISOData.h:52
bool classify(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, const std::vector< double > &inputNoDataValues, InputOutputAdaptor< ClusterData::ClusterLabelT > &output, const unsigned int outputIndex, const ClusterData::ClusterLabelT outputNoDataValue, const bool enableProgressInterface, std::vector< ClusterData > *clustersDataPtr)
Classify an input iterated data and save the result on the output iterated data.
std::map< ClusterData::ClusterLabelT, ISOData::ClusterData > ClustersContainerT
Definition: ISOData.h:144
bool removeSmallClusters(const unsigned int inputElelementsCount, ClustersContainerT &clustersMap) const
Remove too mall clusters following the current parameters.
static void recodeClustersLabels(ClustersContainerT &clustersMap)
Recode clusters labels to be in range zero up to the number of clusters - 1.
bool updateClustersStats(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, const std::vector< double > &inputNoDataValues, ClustersContainerT &clustersMap, const InputOutputAdaptor< unsigned int > &output, const unsigned int outputIndex, const std::vector< std::pair< double, double > > &inputElementsOffsetsAndGains) const
Update clusters statistical data.
static void printClusters(const ClustersContainerT &clustersMap)
bool initialize(const Parameters &params)
Initialize this classifier instance with new parameters.
static bool mergeClusters(const double mergeDistThreshold, const unsigned int maxNumberOfMerges, ClustersContainerT &clustersMap)
Merge clusters.
bool initializeClusters(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, ClustersContainerT &clustersMap, ClusterData::ClusterLabelT &nextAvaliableClusterLabel, const std::vector< double > &inputNoDataValues, std::vector< std::pair< double, double > > &inputElementsOffsetsAndGains) const
Initialize clusters.
Parameters m_parameters
Internal execution parameters.
Definition: ISOData.h:147
bool splitClusters(const unsigned int inputElementsCount, const unsigned int maxNumberOfSplits, ClusterData::ClusterLabelT &nextAvaliableClusterLabel, ClustersContainerT &clustersMap) const
Split clusters.
bool classifyElements(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, const std::vector< double > &inputNoDataValues, ClustersContainerT &clustersMap, InputOutputAdaptor< unsigned int > &output, const unsigned int outputIndex, const unsigned int outputNoDataValue, const std::vector< std::pair< double, double > > &inputElementsOffsetsAndGains) const
Classify each element following clusers centers.
void reset()
Reset this instance to its initial state.
bool m_isInitialized
True if this instance is initialized.
Definition: ISOData.h:146
Classifiers input data adaptor.
Definition: Adaptors.h:46
Classifiers input and output data adaptor.
Definition: Adaptors.h:113
Abstract parameters base interface.
TerraLib.
#define TECLEXPORT
You can use this macro in order to export/import classes and functions from this module.
Definition: Config.h:102
Proxy configuration file for TerraView (see terraview_config.h).
An exception class for the XML module.