ISOData.h
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file terralib/classification/ISOData.h
22 
23  \brief ISOData strategy for classification.
24 */
25 
26 #ifndef __TERRALIB_CLASSIFICATION_INTERNAL_ISODATA_H
27 #define __TERRALIB_CLASSIFICATION_INTERNAL_ISODATA_H
28 
29 // TerraLib
30 #include "../common/AbstractParameters.h"
31 #include "../common/MatrixUtils.h"
32 #include "../common/progress/TaskProgress.h"
33 #include "Adaptors.h"
34 #include "Config.h"
35 #include "Exception.h"
36 
37 // STL
38 #include <vector>
39 #include <map>
40 
41 namespace te
42 {
43  namespace cl
44  {
45  /*!
46  \class ISOData
47  \brief ISOData strategy for an unsupervised pixel-based classification algorithm.
48  \note Based on Reference: IsoData Algorithm - Pattern Recognition Principles - Rafael Gonzalez, Julius T. Tou - Addison Wesley Publishing Company - 1974.
49  */
50 
52  {
53  public:
54 
55  /*!
56  \class Parameters
57  \brief Classifier Parameters
58  */
60  {
61  public:
62 
63  unsigned int m_clustersNumber; //!< Desired number of clusters (This is a mean number. Final clusters number can go from 0 up to k+(k/2). Minimum:2, default:2).
64  unsigned int m_maxIterationsNumber; //!< Maximum number of iterations (minimum:1, default:0=automatic).
65  unsigned int m_maxMergesPerIteration; //!< Maximum number of clusters merges per iteration (zero=automatic, defaul:1 ).
66  unsigned int m_maxSplitsPerIteration; //!< Maximum number of clusters splits per iteration (zero=automatic, defaul:1 ).
67  double m_minClusterSizePercent; //!< Minimum cluster size (percentual proportion related to the total input elments size. Valid values: from zero up to 100, default:0).
68  double m_stdDevSplitThreshold; //!< A standart deviation threshold to split clusters (lower values tend to create more clusters by splitting. Valid values: zero or positive values. Defaul: 0.1).
69  double m_mergeDistThreshold; //!< A distance treshold to control the merge process (higher values will allow to merge more clusters. Valid values: zero or positive values. Default:0.1 ).
70 
71  Parameters();
72 
73  ~Parameters();
74  //overload
75  const Parameters& operator=(const Parameters& params);
76 
77  //overload
78  void reset();
79 
80  //overload
81  AbstractParameters* clone() const;
82  };
83 
84  /*!
85  \class ClusterData
86  \brief Cluster data
87  */
89  {
90  public:
91 
92  /*!
93  \typedef ClusterLabelT Cluster label type definition.
94  */
95  typedef unsigned int ClusterLabelT;
96 
97  double m_avgDistToClusterCenter; //!< Average distance from all samples to the cluster center.
98  unsigned int m_size; //!< Cluster size (elements number).
99  ClusterLabelT m_label; //!< Cluster label.
100  std::vector< double > m_means; //!< Cluster means (one for each cluster feature dimention).
101  std::vector< double > m_sums; //!< Sums of all cluster pixel values(one sum for each cluster feature dimention).
102  std::vector< double > m_stdDevs; //!< Cluster standart deviations (one for each cluster feature dimention).
103 
104  ClusterData();
105  ~ClusterData();
106  };
107 
108  ISOData();
109 
110  ~ISOData();
111 
112  /*!
113  \brief Initialize this classifier instance with new parameters.
114  \param params New initialization parameters.
115  */
116  bool initialize(const Parameters& params);
117 
118  /*!
119  \brief Classify an input iterated data and save the result on the output iterated data.
120  \param input Input data to be classified.
121  \param attributesIndices The attributes indexes to process from the iterated train data.
122  \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
123  \param output Output classified data.
124  \param outputIndex The output attribute index.
125  \param outputNoDataValue A output label value to use when dealing with input no-data.
126  \param enableProgressInterface Enable/disable the use of a progress interface.
127  \param clustersDataPtr A pointer to a vector where the current clusters data will be storer or a null pointer.
128  */
129  bool classify( const InputAdaptor< double >& input,
130  const std::vector<unsigned int>& attributesIndices,
131  const std::vector< double >& inputNoDataValues,
133  const unsigned int outputIndex,
134  const ClusterData::ClusterLabelT outputNoDataValue,
135  const bool enableProgressInterface,
136  std::vector< ClusterData >* clustersDataPtr );
137 
138  protected:
139 
140  /*!
141  \typedef ClustersContainerT Clusters container type definition.
142  */
143  typedef std::map< ClusterData::ClusterLabelT, ISOData::ClusterData >
145 
146  bool m_isInitialized; //!< True if this instance is initialized.
147  Parameters m_parameters; //!< Internal execution parameters.
148 
149  /*! \brief Reset this instance to its initial state */
150  void reset();
151 
152  /*!
153  \brief Initialize clusters.
154  \param input Input data to be classified.
155  \param attributesIndices The attributes indexes to process from the iterated train data.
156  \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
157  \param clustersMap Clusters map.
158  \param nextAvaliableClusterLabel Reference to the global clusters labels counter.
159  \param inputElementsOffsetsAndGains Elements offsets (first) and gains (second).
160  \note Updated clusters members: ClusterData::m_means, ClusterData::m_label.
161  \note Output = ( input + offset ) * gain;
162  */
163  bool initializeClusters( const InputAdaptor<double>& input,
164  const std::vector<unsigned int>& attributesIndices,
165  ClustersContainerT& clustersMap,
166  ClusterData::ClusterLabelT& nextAvaliableClusterLabel,
167  const std::vector<double>& inputNoDataValues,
168  std::vector< std::pair< double, double > >& inputElementsOffsetsAndGains ) const;
169 
170  /*!
171  \brief Classify each element following clusers centers.
172  \param input Input data to be classified.
173  \param attributesIndices The attributes indexes to process from the iterated train data.
174  \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
175  \param output Output classified data.
176  \param outputIndex The output attribute index.
177  \param clustersMap Clusters map.
178  \param outputNoDataValue Output no-data value.
179  \param inputElementsOffsetsAndGains Elements offsets (first) and gains (second).
180  \note Only the member ClusterData::m_means is used for this operation.
181  \note Valid updated members after: ClusterData::m_label, ClusterData::m_means, ClusterData::m_sums, ClusterData::m_size
182  */
183  bool classifyElements( const InputAdaptor<double>& input,
184  const std::vector<unsigned int>& attributesIndices,
185  const std::vector<double>& inputNoDataValues,
186  ClustersContainerT& clustersMap,
188  const unsigned int outputIndex,
189  const unsigned int outputNoDataValue,
190  const std::vector< std::pair< double, double > >& inputElementsOffsetsAndGains ) const;
191 
192  /*!
193  \brief Remove too mall clusters following the current parameters.
194  \param inputElelementsCount Input data elements counte.
195  \param clustersMap Clusters map.
196  \note Only the member ClusterData::m_size is used for this operation.
197  */
198  bool removeSmallClusters(
199  const unsigned int inputElelementsCount,
200  ClustersContainerT& clustersMap ) const;
201 
202  /*!
203  \brief Update clusters statistical data.
204  \param input Input data to be classified.
205  \param attributesIndices The attributes indexes to process from the iterated train data.
206  \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
207  \param output Output classified data.
208  \param outputIndex The output attribute index.
209  \param clustersMap Clusters map.
210  \param inputElementsOffsetsAndGains Elements offsets (first) and gains (second).
211  \note Used clusters members: ClusterData::m_label, ClusterData::m_means, ClusterData::m_size
212  \note Updated clusters members: ClusterData::m_avgDistToClusterCenter, ClusterData::m_stdDevs.
213  */
214  bool updateClustersStats(
215  const InputAdaptor<double>& input,
216  const std::vector<unsigned int>& attributesIndices,
217  const std::vector<double>& inputNoDataValues,
218  ClustersContainerT& clustersMap,
219  const InputOutputAdaptor<unsigned int>& output,
220  const unsigned int outputIndex,
221  const std::vector< std::pair< double, double > >& inputElementsOffsetsAndGains ) const;
222 
223  /*!
224  \brief Split clusters.
225  \param clustersMap Clusters map.
226  \param inputElementsCount Input data total elements number.
227  \param maxNumberOfSplits Maximum number of splits.
228  \param nextAvaliableClusterLabel Reference to the global clusters labels counter.
229  \note Only the members ClusterData::m_label, ClusterData::m_means, ClusterData::m_stdDevs, ClusterData::m_size, ClusterData::m_avgDistToClusterCenter are used for this operation.
230  \note Only the following members will be valid after splitting: ClusterData::m_means, ClusterData::m_label.
231  */
232  bool splitClusters(
233  const unsigned int inputElementsCount,
234  const unsigned int maxNumberOfSplits,
235  ClusterData::ClusterLabelT& nextAvaliableClusterLabel,
236  ClustersContainerT& clustersMap ) const;
237 
238  /*!
239  \brief Merge clusters.
240  \param clustersMap Clusters map.
241  \param mergeDistThreshold A distance treshold to control the merge process (higher values will allow the merge more clusters. Valid values: zero or positive values ).
242  \param maxNumberOfMerges Maximum number of merges.
243  \note Only the members ClusterData::m_label, ClusterData::m_means, ClusterData::m_size are used for this operation.
244  \note Only the following members will be valid after merging: ClusterData::m_means, ClusterData::m_label, ClusterData::m_size.
245  */
246  static bool mergeClusters(
247  const double mergeDistThreshold,
248  const unsigned int maxNumberOfMerges,
249  ClustersContainerT& clustersMap );
250 
251  static void printClusters( const ClustersContainerT& clustersMap );
252 
253  /*!
254  \brief Recode clusters labels to be in range zero up to the number of clusters - 1.
255  \param clustersMap Clusters map.
256  */
257  static void recodeClustersLabels( ClustersContainerT& clustersMap );
258  };
259 
260  } // end namespace cl
261 } // end namespace te
262 
263 #endif // __TERRALIB_CLASSIFICATION_INTERNAL_ISODATA_H
unsigned int m_clustersNumber
Desired number of clusters (This is a mean number. Final clusters number can go from 0 up to k+(k/2)...
Definition: ISOData.h:63
unsigned int m_maxSplitsPerIteration
Maximum number of clusters splits per iteration (zero=automatic, defaul:1 ).
Definition: ISOData.h:66
unsigned int ClusterLabelT
Definition: ISOData.h:95
Classifier Parameters.
Definition: ISOData.h:59
bool m_isInitialized
True if this instance is initialized.
Definition: ISOData.h:146
Parameters m_parameters
Internal execution parameters.
Definition: ISOData.h:147
std::map< ClusterData::ClusterLabelT, ISOData::ClusterData > ClustersContainerT
Definition: ISOData.h:144
double m_stdDevSplitThreshold
A standart deviation threshold to split clusters (lower values tend to create more clusters by splitt...
Definition: ISOData.h:68
unsigned int m_maxMergesPerIteration
Maximum number of clusters merges per iteration (zero=automatic, defaul:1 ).
Definition: ISOData.h:65
Configuration flags for the Terrralib Classification module.
std::vector< double > m_sums
Sums of all cluster pixel values(one sum for each cluster feature dimention).
Definition: ISOData.h:101
Classifiers input data adaptor.
Definition: Adaptors.h:45
TerraLib.
Classifiers input and output data adaptor.
Definition: Adaptors.h:112
std::vector< double > m_means
Cluster means (one for each cluster feature dimention).
Definition: ISOData.h:100
ClusterLabelT m_label
Cluster label.
Definition: ISOData.h:99
Abstract parameters base interface.
#define TECLEXPORT
You can use this macro in order to export/import classes and functions from this module.
Definition: Config.h:102
double m_mergeDistThreshold
A distance treshold to control the merge process (higher values will allow to merge more clusters...
Definition: ISOData.h:69
std::vector< double > m_stdDevs
Cluster standart deviations (one for each cluster feature dimention).
Definition: ISOData.h:102
An exception class for the Classification module.
unsigned int m_maxIterationsNumber
Maximum number of iterations (minimum:1, default:0=automatic).
Definition: ISOData.h:64
double m_avgDistToClusterCenter
Average distance from all samples to the cluster center.
Definition: ISOData.h:97
unsigned int m_size
Cluster size (elements number).
Definition: ISOData.h:98
Classifiers adaptors.
double m_minClusterSizePercent
Minimum cluster size (percentual proportion related to the total input elments size. Valid values: from zero up to 100, default:0).
Definition: ISOData.h:67
ISOData strategy for an unsupervised pixel-based classification algorithm.
Definition: ISOData.h:51