Loading...
Searching...
No Matches
KMeans.h
Go to the documentation of this file.
1/* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2
3 This file is part of the TerraLib - a Framework for building GIS enabled applications.
4
5 TerraLib is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 TerraLib is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with TerraLib. See COPYING. If not, write to
17 TerraLib Team at <terralib-team@terralib.org>.
18 */
19
20/*!
21 \file terralib/classification/KMeans.h
22
23 \brief KMeans strategy for classification.
24*/
25
26#ifndef __TERRALIB_CLASSIFICATION_INTERNAL_KMEANS_H
27#define __TERRALIB_CLASSIFICATION_INTERNAL_KMEANS_H
28
29// TerraLib
30#include "../common/AbstractParameters.h"
31#include "../common/progress/TaskProgress.h"
32#include "../raster/Grid.h"
33#include "Config.h"
34#include "Exception.h"
35#include "Utils.h"
36
37// STL
38#include <iostream>
39#include <cmath>
40#include <stdlib.h>
41#include <map>
42#include <vector>
43
44namespace te
45{
46 namespace cl
47 {
48 /*!
49 \class KMeans
50
51 \brief KMeans strategy for classification.
52 Step-by-step:
53 1. Define a random solution, creating K values of mean.
54 2. Classify each input value according to the closest mean.
55 3. Calculate new means using the classified pixels.
56 4. Go back to step 2, or stop depending on the following checks:
57 4.1. Check if the new means are different from the previous iteration,
58 using the convergence threshold.
59 4.2. Check if maximum number of iterations has achieved.
60 \note Output classification IDs starting at 1.
61 */
62 template<class TTRAIN, class TCLASSIFY>
63 class KMeans
64 {
65 public:
66
67 /*!
68 \class Parameters
69
70 \brief Classifier Parameters
71 */
73 {
74 public:
75
76 unsigned int m_K; //!< The number of clusters (means) to detect in image.
77 unsigned int m_maxIterations; //!< The maximum of iterations to perform if convergence is not achieved.
78 double m_epsilon; //!< The stop criteria. When the clusters change in a value smaller then epsilon, the convergence is achieved.
79
80 Parameters();
81
83
84 //overload
85 const Parameters& operator=(const Parameters& params);
86
87 //overload
88 void reset() ;
89
90 //overload
92 };
93
94 public:
95
96 KMeans();
97
98 ~KMeans();
99
100 bool initialize(const Parameters& params) ;
101
102 bool train(TTRAIN& itBegin, TTRAIN& itEnd,
103 const std::vector<unsigned int>& attributesIndices,
104 const std::vector<unsigned int>& labels,
105 const bool enableProgressInterface) ;
106
107 bool classify(TCLASSIFY& itBegin, TCLASSIFY& itEnd,
108 const std::vector<unsigned int>& attributesIndices,
109 std::vector<unsigned int>& classification,
110 const bool enableProgressInterface) ;
111
112 protected:
113
114 unsigned int getClassification(std::vector<double> values);
115
116 bool m_isInitialized; //!< True if this instance is initialized.
117 std::vector<std::vector<double> > m_KMeans; //!< The vector of K means.
118 Parameters m_parameters; //!< Internal execution parameters.
119 };
120
121 } // end namespace cl
122} // end namespace te
123
124// class of Parameters
125template <class TTRAIN, class TCLASSIFY>
127{
128 reset();
129}
130
131template <class TTRAIN, class TCLASSIFY>
133{
134}
135
136template <class TTRAIN, class TCLASSIFY>
138{
139 reset();
140
141 m_K = rhs.m_K;
142 m_maxIterations = rhs.m_maxIterations;
143 m_epsilon = rhs.m_epsilon;
144
145 return *this;
146}
147
148template <class TTRAIN, class TCLASSIFY>
150{
151 m_K = 0;
152 m_maxIterations = 0;
153 m_epsilon = 0.0;
154}
155
156template <class TTRAIN, class TCLASSIFY>
158{
160}
161
162// class KMeans Strategy
163template <class TTRAIN, class TCLASSIFY>
165{
166 m_isInitialized = false;
167 m_KMeans.clear();
168}
169
170template <class TTRAIN, class TCLASSIFY>
172{
173}
174
175template <class TTRAIN, class TCLASSIFY>
177{
178 m_isInitialized = false;
179
180 m_KMeans.clear();
181 m_parameters = params;
182
183 if (m_parameters.m_K < 2)
184 {
185 throw te::cl::Exception(TE_TR("The value of K must be at least 2."));
186 return false;
187 }
190 if (m_parameters.m_epsilon < 0)
191 m_parameters.m_epsilon = 0.0000000001;
192
193 m_isInitialized = true;
194
195 return true;
196}
197
198template<class TTRAIN, class TCLASSIFY>
199bool te::cl::KMeans<TTRAIN, TCLASSIFY>::train(TTRAIN& itBegin, TTRAIN& itEnd,
200 const std::vector<unsigned int>& attributesIndices,
201 const std::vector<unsigned int>& labels,
202 const bool enableProgressInterface)
203{
204 std::vector<double> maxValues;
205 std::vector<double> minValues;
206
207 for (unsigned int i = 0; i < attributesIndices.size(); i++)
208 {
209 maxValues.push_back(std::numeric_limits<double>::min());
210 minValues.push_back(std::numeric_limits<double>::max());
211 }
212
213// find maximum and minimum values for each attribute
214 std::vector<unsigned int> tmpClassification;
215 TTRAIN it = itBegin;
216 while(it != itEnd)
217 {
218 tmpClassification.push_back(0);
219 for (unsigned int i = 0; i < attributesIndices.size(); i++)
220 {
221 if ((*it)[attributesIndices[i]] > maxValues[i])
222 maxValues[i] = (*it)[attributesIndices[i]];
223 if ((*it)[attributesIndices[i]] < minValues[i])
224 minValues[i] = (*it)[attributesIndices[i]];
225 }
226 ++it;
227 }
228
229 srand((unsigned) time(0));
230// starting random K means
231 m_KMeans.clear();
232 for (unsigned int k = 0; k <= m_parameters.m_K; k++)
233 {
234 std::vector<double> newMean;
235 for (unsigned int i = 0; i < attributesIndices.size(); i++)
236 {
237 newMean.push_back(
238 ( maxValues[i] == minValues[i] )
239 ?
240 ( ((double)rand()) * maxValues[i] / ((double)RAND_MAX) )
241 :
242 (
243 ( ( (double)rand() ) / ((double)RAND_MAX) )
244 *
245 ( maxValues[i] - minValues[i] )
246 )
247 );
248 }
249
250 m_KMeans.push_back(newMean);
251 }
252
253// estimate K means
254 std::vector<double> values;
255 std::map<unsigned int, std::vector<double> > tmpValues;
256 std::vector<unsigned int> tmpNs;
257 unsigned int tmpClass;
258 double distanceKMeans;
259 double a_minus_b;
260 std::vector<std::vector<double> > oldKMeans;
261 for (unsigned int k = 0; k <= m_parameters.m_K; k++)
262 {
263 tmpValues[k] = std::vector<double>(attributesIndices.size());
264 tmpNs.push_back(0);
265 }
266
268 for (unsigned int t = 0; t < m_parameters.m_maxIterations; t++)
269 {
270 for (unsigned int k = 0; k <= m_parameters.m_K; k++)
271 {
272 for (unsigned int i = 0; i < attributesIndices.size(); i++)
273 tmpValues[k][i] = 0.0;
274 tmpNs[k] = 0;
275 }
276
277 // classifying input data according to actual K means
278 it = itBegin;
279 while(it != itEnd)
280 {
281 values.clear();
282 for (unsigned int i = 0; i < attributesIndices.size(); i++)
283 values.push_back((*it)[attributesIndices[i]]);
284 tmpClass = getClassification(values);
285 for (unsigned int i = 0; i < attributesIndices.size(); i++)
286 tmpValues[tmpClass][i] += values[i];
287 tmpNs[tmpClass]++;
288 ++it;
289 }
290
291 oldKMeans = m_KMeans;
292// recomputing K means
293 for (unsigned int k = 0; k <= m_parameters.m_K; k++)
294 for (unsigned int i = 0; i < attributesIndices.size(); i++)
295 if (tmpNs[k] > 0)
296 m_KMeans[k][i] = (double) (tmpValues[k][i] / (double) tmpNs[k]);
297
298// checking convergence
299 distanceKMeans = 0.0;
300 for (unsigned int k = 0; k <= m_parameters.m_K; k++)
301 for (unsigned int i = 0; i < attributesIndices.size(); i++)
302 {
303 a_minus_b = m_KMeans[k][i] - oldKMeans[k][i];
304 distanceKMeans += a_minus_b * a_minus_b;
305 }
306 distanceKMeans = sqrt(distanceKMeans);
307 if (distanceKMeans < m_parameters.m_epsilon)
308 break;
309
310 task.pulse();
311 }
312
313 return true;
314}
315
316template<class TTRAIN, class TCLASSIFY>
317bool te::cl::KMeans<TTRAIN, TCLASSIFY>::classify(TCLASSIFY& itBegin, TCLASSIFY& itEnd,
318 const std::vector<unsigned int>& attributesIndices,
319 std::vector<unsigned int>& classification,
320 const bool enableProgressInterface)
321{
322 TCLASSIFY it = itBegin;
323 std::vector<double> values(attributesIndices.size());
324 classification.clear();
325
326// count number of elements to be classified
327 unsigned int N = 0;
328 while(it != itEnd)
329 {
330 N++;
331 ++it;
332 }
333 classification.resize(N);
334
335// classify elements
336 te::common::TaskProgress task(TE_TR("K-Means algorithm - classifying data"), te::common::TaskProgress::UNDEFINED, N);
337 unsigned j = 0;
338 it = itBegin;
339 while(it != itEnd)
340 {
341 for (unsigned int i = 0; i < attributesIndices.size(); i++)
342 values[i] = (*it)[attributesIndices[i]];
343 classification[j++] = getClassification(values);
344
345 ++it;
346 task.pulse();
347 }
348
349 return true;
350}
351
352template<class TTRAIN, class TCLASSIFY>
353unsigned int te::cl::KMeans<TTRAIN, TCLASSIFY>::getClassification(std::vector<double> values)
354{
355 double minDistance = std::numeric_limits<double>::max();
356 double distance;
357 unsigned int clusterNumber = 0;
358
359 for (unsigned int k = 1; k <= m_parameters.m_K; k++)
360 {
361 distance = GetEuclideanDistance(m_KMeans[k], values);
362 if (distance < minDistance)
363 {
364 minDistance = distance;
365 clusterNumber = k;
366 }
367 }
368
369 return clusterNumber;
370}
371
372#endif // __TERRALIB_CLASSIFICATION_INTERNAL_KMEANS_H
#define TE_TR(message)
It marks a string in order to get translated.
Definition: Translator.h:264
Classifier Parameters.
Definition: KMeans.h:73
const Parameters & operator=(const Parameters &params)
Definition: KMeans.h:137
AbstractParameters * clone() const
Create a clone copy of this instance.
Definition: KMeans.h:157
unsigned int m_K
The number of clusters (means) to detect in image.
Definition: KMeans.h:76
double m_epsilon
The stop criteria. When the clusters change in a value smaller then epsilon, the convergence is achie...
Definition: KMeans.h:78
void reset()
Clear all internal allocated resources and reset the parameters instance to its initial state.
Definition: KMeans.h:149
unsigned int m_maxIterations
The maximum of iterations to perform if convergence is not achieved.
Definition: KMeans.h:77
KMeans strategy for classification. Step-by-step:
Definition: KMeans.h:64
Parameters m_parameters
Internal execution parameters.
Definition: KMeans.h:118
bool train(TTRAIN &itBegin, TTRAIN &itEnd, const std::vector< unsigned int > &attributesIndices, const std::vector< unsigned int > &labels, const bool enableProgressInterface)
Definition: KMeans.h:199
unsigned int getClassification(std::vector< double > values)
Definition: KMeans.h:353
bool m_isInitialized
True if this instance is initialized.
Definition: KMeans.h:116
std::vector< std::vector< double > > m_KMeans
The vector of K means.
Definition: KMeans.h:117
bool initialize(const Parameters &params)
Definition: KMeans.h:176
bool classify(TCLASSIFY &itBegin, TCLASSIFY &itEnd, const std::vector< unsigned int > &attributesIndices, std::vector< unsigned int > &classification, const bool enableProgressInterface)
Definition: KMeans.h:317
Abstract parameters base interface.
This class can be used to inform the progress of a task.
Definition: TaskProgress.h:54
void pulse()
Calls setCurrentStep() function using getCurrentStep() + 1.
TECLEXPORT double GetEuclideanDistance(std::vector< double > v1, std::vector< double > v2)
Computes euclidean distance between two double vectors.
TerraLib.
Proxy configuration file for TerraView (see terraview_config.h).
An exception class for the XML module.