Loading...
Searching...
No Matches
ISOData.h
Go to the documentation of this file.
1/* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2
3 This file is part of the TerraLib - a Framework for building GIS enabled applications.
4
5 TerraLib is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 TerraLib is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with TerraLib. See COPYING. If not, write to
17 TerraLib Team at <terralib-team@terralib.org>.
18 */
19
20/*!
21 \file terralib/classification/ISOData.h
22
23 \brief ISOData strategy for classification.
24*/
25
26#ifndef __TERRALIB_CLASSIFICATION_INTERNAL_ISODATA_H
27#define __TERRALIB_CLASSIFICATION_INTERNAL_ISODATA_H
28
29// TerraLib
30#include "../common/AbstractParameters.h"
31#include "../common/MatrixUtils.h"
32#include "../common/progress/TaskProgress.h"
33#include "Adaptors.h"
34#include "Config.h"
35#include "Exception.h"
36
37// STL
38#include <vector>
39#include <map>
40
41namespace te
42{
43 namespace cl
44 {
45 /*!
46 \class ISOData
47 \brief ISOData strategy for an unsupervised pixel-based classification algorithm.
48 \note Based on Reference: IsoData Algorithm - Pattern Recognition Principles - Rafael Gonzalez, Julius T. Tou - Addison Wesley Publishing Company - 1974.
49 */
50
52 {
53 public:
54
55 /*!
56 \class Parameters
57 \brief Classifier Parameters
58 */
60 {
61 public:
62
63 unsigned int m_clustersNumber; //!< Desired number of clusters (This is a mean number. Final clusters number can go from 0 up to k+(k/2). Minimum:2, default:2).
64 unsigned int m_maxIterationsNumber; //!< Maximum number of iterations (minimum:1, default:0=automatic).
65 unsigned int m_maxMergesPerIteration; //!< Maximum number of clusters merges per iteration (zero=automatic, defaul:1 ).
66 unsigned int m_maxSplitsPerIteration; //!< Maximum number of clusters splits per iteration (zero=automatic, defaul:1 ).
67 double m_minClusterSizePercent; //!< Minimum cluster size (percentual proportion related to the total input elments size. Valid values: from zero up to 100, default:0).
68 double m_stdDevSplitThreshold; //!< A standart deviation threshold to split clusters (lower values tend to create more clusters by splitting. Valid values: zero or positive values. Defaul: 0.1).
69 double m_mergeDistThreshold; //!< A distance treshold to control the merge process (higher values will allow to merge more clusters. Valid values: zero or positive values. Default:0.1 ).
70
72
74 //overload
75 const Parameters& operator=(const Parameters& params);
76
77 //overload
78 void reset();
79
80 //overload
81 AbstractParameters* clone() const;
82 };
83
84 /*!
85 \class ClusterData
86 \brief Cluster data
87 */
89 {
90 public:
91
92 /*!
93 \typedef ClusterLabelT Cluster label type definition.
94 */
95 typedef unsigned int ClusterLabelT;
96
97 double m_avgDistToClusterCenter; //!< Average distance from all samples to the cluster center.
98 unsigned int m_size; //!< Cluster size (elements number).
99 ClusterLabelT m_label; //!< Cluster label.
100 std::vector< double > m_means; //!< Cluster means (one for each cluster feature dimention).
101 std::vector< double > m_sums; //!< Sums of all cluster pixel values(one sum for each cluster feature dimention).
102 std::vector< double > m_stdDevs; //!< Cluster standart deviations (one for each cluster feature dimention).
103
106 };
107
109
111
112 /*!
113 \brief Initialize this classifier instance with new parameters.
114 \param params New initialization parameters.
115 */
116 bool initialize(const Parameters& params);
117
118 /*!
119 \brief Classify an input iterated data and save the result on the output iterated data.
120 \param input Input data to be classified.
121 \param attributesIndices The attributes indexes to process from the iterated train data.
122 \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
123 \param output Output classified data.
124 \param outputIndex The output attribute index.
125 \param outputNoDataValue A output label value to use when dealing with input no-data.
126 \param enableProgressInterface Enable/disable the use of a progress interface.
127 \param clustersDataPtr A pointer to a vector where the current clusters data will be storer or a null pointer.
128 */
130 const std::vector<unsigned int>& attributesIndices,
131 const std::vector< double >& inputNoDataValues,
133 const unsigned int outputIndex,
134 const ClusterData::ClusterLabelT outputNoDataValue,
135 const bool enableProgressInterface,
136 std::vector< ClusterData >* clustersDataPtr );
137
138 protected:
139
140 /*!
141 \typedef ClustersContainerT Clusters container type definition.
142 */
143 typedef std::map< ClusterData::ClusterLabelT, ISOData::ClusterData >
145
146 bool m_isInitialized; //!< True if this instance is initialized.
147 Parameters m_parameters; //!< Internal execution parameters.
148
149 /*! \brief Reset this instance to its initial state */
150 void reset();
151
152 /*!
153 \brief Initialize clusters.
154 \param input Input data to be classified.
155 \param attributesIndices The attributes indexes to process from the iterated train data.
156 \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
157 \param clustersMap Clusters map.
158 \param nextAvaliableClusterLabel Reference to the global clusters labels counter.
159 \param inputElementsOffsetsAndGains Elements offsets (first) and gains (second).
160 \note Updated clusters members: ClusterData::m_means, ClusterData::m_label.
161 \note Output = ( input + offset ) * gain;
162 */
164 const std::vector<unsigned int>& attributesIndices,
165 ClustersContainerT& clustersMap,
166 ClusterData::ClusterLabelT& nextAvaliableClusterLabel,
167 const std::vector<double>& inputNoDataValues,
168 std::vector< std::pair< double, double > >& inputElementsOffsetsAndGains ) const;
169
170 /*!
171 \brief Classify each element following clusers centers.
172 \param input Input data to be classified.
173 \param attributesIndices The attributes indexes to process from the iterated train data.
174 \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
175 \param output Output classified data.
176 \param outputIndex The output attribute index.
177 \param clustersMap Clusters map.
178 \param outputNoDataValue Output no-data value.
179 \param inputElementsOffsetsAndGains Elements offsets (first) and gains (second).
180 \note Only the member ClusterData::m_means is used for this operation.
181 \note Valid updated members after: ClusterData::m_label, ClusterData::m_means, ClusterData::m_sums, ClusterData::m_size
182 */
184 const std::vector<unsigned int>& attributesIndices,
185 const std::vector<double>& inputNoDataValues,
186 ClustersContainerT& clustersMap,
188 const unsigned int outputIndex,
189 const unsigned int outputNoDataValue,
190 const std::vector< std::pair< double, double > >& inputElementsOffsetsAndGains ) const;
191
192 /*!
193 \brief Remove too mall clusters following the current parameters.
194 \param inputElelementsCount Input data elements counte.
195 \param clustersMap Clusters map.
196 \note Only the member ClusterData::m_size is used for this operation.
197 */
199 const unsigned int inputElelementsCount,
200 ClustersContainerT& clustersMap ) const;
201
202 /*!
203 \brief Update clusters statistical data.
204 \param input Input data to be classified.
205 \param attributesIndices The attributes indexes to process from the iterated train data.
206 \param inputNoDataValues A vector of no-data values for each attribute dimension or an empty vector if no-data values are not used.
207 \param output Output classified data.
208 \param outputIndex The output attribute index.
209 \param clustersMap Clusters map.
210 \param inputElementsOffsetsAndGains Elements offsets (first) and gains (second).
211 \note Used clusters members: ClusterData::m_label, ClusterData::m_means, ClusterData::m_size
212 \note Updated clusters members: ClusterData::m_avgDistToClusterCenter, ClusterData::m_stdDevs.
213 */
215 const InputAdaptor<double>& input,
216 const std::vector<unsigned int>& attributesIndices,
217 const std::vector<double>& inputNoDataValues,
218 ClustersContainerT& clustersMap,
220 const unsigned int outputIndex,
221 const std::vector< std::pair< double, double > >& inputElementsOffsetsAndGains ) const;
222
223 /*!
224 \brief Split clusters.
225 \param clustersMap Clusters map.
226 \param inputElementsCount Input data total elements number.
227 \param maxNumberOfSplits Maximum number of splits.
228 \param nextAvaliableClusterLabel Reference to the global clusters labels counter.
229 \note Only the members ClusterData::m_label, ClusterData::m_means, ClusterData::m_stdDevs, ClusterData::m_size, ClusterData::m_avgDistToClusterCenter are used for this operation.
230 \note Only the following members will be valid after splitting: ClusterData::m_means, ClusterData::m_label.
231 */
233 const unsigned int inputElementsCount,
234 const unsigned int maxNumberOfSplits,
235 ClusterData::ClusterLabelT& nextAvaliableClusterLabel,
236 ClustersContainerT& clustersMap ) const;
237
238 /*!
239 \brief Merge clusters.
240 \param clustersMap Clusters map.
241 \param mergeDistThreshold A distance treshold to control the merge process (higher values will allow the merge more clusters. Valid values: zero or positive values ).
242 \param maxNumberOfMerges Maximum number of merges.
243 \note Only the members ClusterData::m_label, ClusterData::m_means, ClusterData::m_size are used for this operation.
244 \note Only the following members will be valid after merging: ClusterData::m_means, ClusterData::m_label, ClusterData::m_size.
245 */
246 static bool mergeClusters(
247 const double mergeDistThreshold,
248 const unsigned int maxNumberOfMerges,
249 ClustersContainerT& clustersMap );
250
251 static void printClusters( const ClustersContainerT& clustersMap );
252
253 /*!
254 \brief Recode clusters labels to be in range zero up to the number of clusters - 1.
255 \param clustersMap Clusters map.
256 */
257 static void recodeClustersLabels( ClustersContainerT& clustersMap );
258 };
259
260 } // end namespace cl
261} // end namespace te
262
263#endif // __TERRALIB_CLASSIFICATION_INTERNAL_ISODATA_H
Classifiers adaptors.
std::vector< double > m_sums
Sums of all cluster pixel values(one sum for each cluster feature dimention).
Definition: ISOData.h:101
unsigned int m_size
Cluster size (elements number).
Definition: ISOData.h:98
unsigned int ClusterLabelT
Definition: ISOData.h:95
ClusterLabelT m_label
Cluster label.
Definition: ISOData.h:99
std::vector< double > m_means
Cluster means (one for each cluster feature dimention).
Definition: ISOData.h:100
double m_avgDistToClusterCenter
Average distance from all samples to the cluster center.
Definition: ISOData.h:97
std::vector< double > m_stdDevs
Cluster standart deviations (one for each cluster feature dimention).
Definition: ISOData.h:102
Classifier Parameters.
Definition: ISOData.h:60
const Parameters & operator=(const Parameters &params)
unsigned int m_maxSplitsPerIteration
Maximum number of clusters splits per iteration (zero=automatic, defaul:1 ).
Definition: ISOData.h:66
unsigned int m_clustersNumber
Desired number of clusters (This is a mean number. Final clusters number can go from 0 up to k+(k/2)....
Definition: ISOData.h:63
unsigned int m_maxIterationsNumber
Maximum number of iterations (minimum:1, default:0=automatic).
Definition: ISOData.h:64
double m_minClusterSizePercent
Minimum cluster size (percentual proportion related to the total input elments size....
Definition: ISOData.h:67
AbstractParameters * clone() const
Create a clone copy of this instance.
void reset()
Clear all internal allocated resources and reset the parameters instance to its initial state.
double m_stdDevSplitThreshold
A standart deviation threshold to split clusters (lower values tend to create more clusters by splitt...
Definition: ISOData.h:68
unsigned int m_maxMergesPerIteration
Maximum number of clusters merges per iteration (zero=automatic, defaul:1 ).
Definition: ISOData.h:65
double m_mergeDistThreshold
A distance treshold to control the merge process (higher values will allow to merge more clusters....
Definition: ISOData.h:69
ISOData strategy for an unsupervised pixel-based classification algorithm.
Definition: ISOData.h:52
bool classify(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, const std::vector< double > &inputNoDataValues, InputOutputAdaptor< ClusterData::ClusterLabelT > &output, const unsigned int outputIndex, const ClusterData::ClusterLabelT outputNoDataValue, const bool enableProgressInterface, std::vector< ClusterData > *clustersDataPtr)
Classify an input iterated data and save the result on the output iterated data.
std::map< ClusterData::ClusterLabelT, ISOData::ClusterData > ClustersContainerT
Definition: ISOData.h:144
bool removeSmallClusters(const unsigned int inputElelementsCount, ClustersContainerT &clustersMap) const
Remove too mall clusters following the current parameters.
static void recodeClustersLabels(ClustersContainerT &clustersMap)
Recode clusters labels to be in range zero up to the number of clusters - 1.
bool updateClustersStats(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, const std::vector< double > &inputNoDataValues, ClustersContainerT &clustersMap, const InputOutputAdaptor< unsigned int > &output, const unsigned int outputIndex, const std::vector< std::pair< double, double > > &inputElementsOffsetsAndGains) const
Update clusters statistical data.
static void printClusters(const ClustersContainerT &clustersMap)
bool initialize(const Parameters &params)
Initialize this classifier instance with new parameters.
static bool mergeClusters(const double mergeDistThreshold, const unsigned int maxNumberOfMerges, ClustersContainerT &clustersMap)
Merge clusters.
bool initializeClusters(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, ClustersContainerT &clustersMap, ClusterData::ClusterLabelT &nextAvaliableClusterLabel, const std::vector< double > &inputNoDataValues, std::vector< std::pair< double, double > > &inputElementsOffsetsAndGains) const
Initialize clusters.
Parameters m_parameters
Internal execution parameters.
Definition: ISOData.h:147
bool splitClusters(const unsigned int inputElementsCount, const unsigned int maxNumberOfSplits, ClusterData::ClusterLabelT &nextAvaliableClusterLabel, ClustersContainerT &clustersMap) const
Split clusters.
bool classifyElements(const InputAdaptor< double > &input, const std::vector< unsigned int > &attributesIndices, const std::vector< double > &inputNoDataValues, ClustersContainerT &clustersMap, InputOutputAdaptor< unsigned int > &output, const unsigned int outputIndex, const unsigned int outputNoDataValue, const std::vector< std::pair< double, double > > &inputElementsOffsetsAndGains) const
Classify each element following clusers centers.
void reset()
Reset this instance to its initial state.
bool m_isInitialized
True if this instance is initialized.
Definition: ISOData.h:146
Classifiers input data adaptor.
Definition: Adaptors.h:46
Classifiers input and output data adaptor.
Definition: Adaptors.h:113
Abstract parameters base interface.
TerraLib.
#define TECLEXPORT
You can use this macro in order to export/import classes and functions from this module.
Definition: Config.h:102
Proxy configuration file for TerraView (see terraview_config.h).
An exception class for the XML module.