All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
SkaterPartition.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file terralib/sa/core/SkaterPartition.cpp
22 
23  \brief This file contains a class that represents the skater partition operation.
24 
25  \reference Adapted from TerraLib4.
26 */
27 
28 //TerraLib
29 #include "../../graph/core/Edge.h"
30 #include "../../graph/core/Vertex.h"
31 #include "../../graph/iterator/MemoryIterator.h"
32 #include "SkaterPartition.h"
33 #include "Utils.h"
34 
35 //STL
36 #include <cassert>
37 #include <queue>
38 
40 {
41  m_graph = graph;
42 
43  m_attrs = attrs;
44 }
45 
47 {
48 
49 }
50 
51 std::vector<std::size_t> te::sa::SkaterPartition::execute(std::size_t nGroups, std::string popAttr, std::size_t minPop)
52 {
53  assert(m_graph);
54 
55  m_popAttr = popAttr;
56 
57  m_popMin = minPop;
58 
59  //get first vertex id from graph
60  std::auto_ptr<te::graph::MemoryIterator> memIt(new te::graph::MemoryIterator(m_graph));
61  int firstVertex = memIt->getFirstVertex()->getId();
62 
63  //create vector with root information
64  std::vector< std::pair<int, double> > rootSet;
65  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(firstVertex, 0.));
66 
67  std::vector<std::size_t> rootsVertexId;
68 
69  //bfs over the graph
70  while(!rootSet.empty() && (rootSet.size() + rootsVertexId.size() < nGroups))
71  {
72  //get max diff value index
73  std::vector< std::pair<int, double> >::iterator rootIt;
74  std::vector< std::pair<int, double> >::iterator rootItMax;
75  double maxDiff = -std::numeric_limits<double>::max();
76 
77  for(rootIt = rootSet.begin(); rootIt != rootSet.end(); ++rootIt)
78  {
79  if(rootIt->second > maxDiff)
80  {
81  maxDiff = rootIt->second;
82  rootItMax = rootIt;
83  }
84  }
85 
86  std::size_t currentId = rootItMax->first;
87 
88  rootSet.erase(rootItMax);
89 
90  //get vertex from graph
91  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
92 
93  if(vertex)
94  {
95  double diffA = 0.;
96  double diffB = 0.;
97 
98  std::size_t edgeId;
99 
100  bool remove = edgeToRemove(currentId, diffA, diffB, edgeId);
101 
102  if(remove)
103  {
104  te::graph::Edge* edge = m_graph->getEdge(edgeId);
105 
106  int vertexFrom = edge->getIdFrom();
107  int vertexTo = edge->getIdTo();
108 
109  //remove edge
110  m_graph->removeEdge(edgeId);
111 
112  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(vertexFrom, diffA));
113  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(vertexTo, diffB));
114  }
115  else
116  {
117  rootsVertexId.push_back(currentId);
118  }
119  }
120  }
121 
122  for(std::size_t t = 0; t < rootSet.size(); ++t)
123  {
124  rootsVertexId.push_back(rootSet[t].first);
125  }
126 
127  return rootsVertexId;
128 }
129 
130 std::vector<std::size_t> te::sa::SkaterPartition::execute(std::string popAttr, std::size_t minPop)
131 {
132  assert(m_graph);
133 
134  m_popAttr = popAttr;
135 
136  m_popMin = minPop;
137 
138  //get first vertex id from graph
139  std::auto_ptr<te::graph::MemoryIterator> memIt(new te::graph::MemoryIterator(m_graph));
140  int firstVertex = memIt->getFirstVertex()->getId();
141 
142  //create vector with root information
143  std::vector< std::pair<int, double> > rootSet;
144  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(firstVertex, 0.));
145 
146  std::vector<std::size_t> rootsVertexId;
147 
148  //bfs over the graph
149  while(!rootSet.empty())
150  {
151  //get max diff value index
152  std::vector< std::pair<int, double> >::iterator rootIt;
153  std::vector< std::pair<int, double> >::iterator rootItMax;
154  double maxDiff = -std::numeric_limits<double>::max();
155 
156  for(rootIt = rootSet.begin(); rootIt != rootSet.end(); ++rootIt)
157  {
158  if(rootIt->second > maxDiff)
159  {
160  maxDiff = rootIt->second;
161  rootItMax = rootIt;
162  }
163  }
164 
165  std::size_t currentId = rootItMax->first;
166 
167  rootSet.erase(rootItMax);
168 
169  //get vertex from graph
170  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
171 
172  if(vertex)
173  {
174  double diffA = 0.;
175  double diffB = 0.;
176 
177  std::size_t edgeId;
178 
179  bool remove = edgeToRemove(currentId, diffA, diffB, edgeId);
180 
181  if(remove)
182  {
183  te::graph::Edge* edge = m_graph->getEdge(edgeId);
184 
185  int vertexFrom = edge->getIdFrom();
186  int vertexTo = edge->getIdTo();
187 
188  //remove edge
189  m_graph->removeEdge(edgeId);
190 
191  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(vertexFrom, diffA));
192  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(vertexTo, diffB));
193  }
194  else
195  {
196  rootsVertexId.push_back(currentId);
197  }
198  }
199  }
200 
201  for(std::size_t t = 0; t < rootSet.size(); ++t)
202  {
203  rootsVertexId.push_back(rootSet[t].first);
204  }
205 
206  return rootsVertexId;
207 }
208 
209 bool te::sa::SkaterPartition::edgeToRemove(int startVertex, double& diffA, double& diffB, std::size_t& edgeToRemoveId)
210 {
211  //create map of differences
212  std::vector<EdgeRemovalInfo> edgeRemovalVec;
213 
214  //calculate SSDTO
215  std::size_t totalPop = 0;
216  std::vector<double> meanVecStart = calculateRootMean(startVertex, -1, totalPop);
217  double deviationStart = calculateRootDeviation(startVertex, -1, meanVecStart);
218 
219  //create list
220  std::queue<int> queue;
221  std::set<int> visited;
222 
223  queue.push(startVertex);
224  visited.insert(startVertex);
225 
226  //bfs over the graph
227  while(!queue.empty())
228  {
229  int currentId = queue.front();
230  queue.pop();
231 
232  //get vertex from graph
233  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
234 
235  if(vertex)
236  {
237  //get neighbours
238  std::set<int> neighbours = vertex->getNeighborhood();
239  std::set<int>::iterator itNeighbours = neighbours.begin();
240 
241  while(itNeighbours != neighbours.end())
242  {
243  te::graph::Edge* e = m_graph->getEdge(*itNeighbours);
244  te::graph::Vertex* vTo = 0;
245 
246  if(e)
247  {
248  if(e->getIdFrom() == currentId)
249  vTo = m_graph->getVertex(e->getIdTo());
250  else
251  vTo = m_graph->getVertex(e->getIdFrom());
252  }
253 
254  if(vTo)
255  {
256  //check if already visted
257  if(visited.find(vTo->getId()) == visited.end())
258  {
259  //add in queue
260  queue.push(vTo->getId());
261  visited.insert(vTo->getId());
262 
263  //calculate SSDi
264  double diffVFrom = 0.;
265  double diffVTo = 0.;
266  std::size_t popA = 0;
267  std::size_t popB = 0;
268 
269  double ssdi = calculateEdgeDifference(vertex->getId(), vTo->getId(), diffVFrom, diffVTo, popA, popB);
270 
271  double l = deviationStart - ssdi;
272 
273  EdgeRemovalInfo eri;
274  eri.m_edgeId = e->getId();
275  eri.m_SSDT = deviationStart;
276  eri.m_SSDi = ssdi;
277  eri.m_l = l;
278 
279  if(e->getIdFrom() == vertex->getId())
280  {
281  eri.m_SSDTa = diffVFrom;
282  eri.m_SSDTb = diffVTo;
283  }
284  else
285  {
286  eri.m_SSDTa = diffVTo;
287  eri.m_SSDTb = diffVFrom;
288  }
289 
290  eri.m_popa = popA;
291  eri.m_popb = popB;
292 
293  edgeRemovalVec.push_back(eri);
294  }
295  }
296 
297  ++itNeighbours;
298  }
299  }
300  }
301 
302  //get the edge with maximum SSDi
303  bool found = false;
304 
305  double maxDiff = -std::numeric_limits<double>::max();
306 
307  for(std::size_t t = 0; t < edgeRemovalVec.size(); ++t)
308  {
309  if(edgeRemovalVec[t].m_l > maxDiff && edgeRemovalVec[t].m_popa >= m_popMin && edgeRemovalVec[t].m_popb >= m_popMin)
310  {
311  maxDiff = edgeRemovalVec[t].m_l;
312 
313  edgeToRemoveId = edgeRemovalVec[t].m_edgeId;
314 
315  diffA = edgeRemovalVec[t].m_SSDTa;
316  diffB = edgeRemovalVec[t].m_SSDTb;
317 
318  found = true;
319  }
320  }
321 
322  if(found)
323  m_SSDiValues.push_back(maxDiff);
324 
325  edgeRemovalVec.clear();
326 
327  return found;
328 }
329 
330 double te::sa::SkaterPartition::calculateEdgeDifference(int vertexFrom, int vertexTo, double& diffA, double& diffB, std::size_t& popA, std::size_t& popB)
331 {
332  //calculate the deviation for the tree that begins with the vertex from SQDTA
333  std::vector<double> meanVecFrom = calculateRootMean(vertexFrom, vertexTo, popA);
334  diffA = calculateRootDeviation(vertexFrom, vertexTo, meanVecFrom);
335 
336  //calculate the deviation for the tree that begins with the vertex to SQDTB
337  std::vector<double> meanVecTo = calculateRootMean(vertexTo, vertexFrom, popB);
338  diffB = calculateRootDeviation(vertexTo, vertexFrom, meanVecTo);
339 
340  //return the edge cost
341  return diffA + diffB;
342 }
343 
344 std::vector<double> te::sa::SkaterPartition::calculateRootMean(int startVertex, int vertexToIgnore, std::size_t& pop)
345 {
346  std::vector<double> meanAttrs(m_attrs.size(), 0.);
347 
348  std::queue<int> queue;
349  std::set<int> visited;
350 
351  queue.push(startVertex);
352  visited.insert(startVertex);
353 
354  std::size_t count = 0;
355 
356  while(!queue.empty())
357  {
358  int currentId = queue.front();
359  queue.pop();
360 
361  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
362 
363  if(vertex)
364  {
365  for(std::size_t t = 0; t < m_attrs.size(); ++t)
366  {
367  std::string attrName = m_attrs[t];
368 
369  int attrIdx;
370 
371  if(te::sa::GetGraphVertexAttrIndex(m_graph, attrName, attrIdx))
372  {
373  meanAttrs[t] += te::sa::GetDataValue(vertex->getAttributes()[attrIdx]);
374  }
375  }
376 
377  //get population information
378  int popIdx;
379  if(te::sa::GetGraphVertexAttrIndex(m_graph, m_popAttr, popIdx))
380  {
381  pop += (std::size_t)te::sa::GetDataValue(vertex->getAttributes()[popIdx]);
382  }
383 
384  //get neighbours
385  std::set<int> neighbours = vertex->getNeighborhood();
386  std::set<int>::iterator itNeighbours = neighbours.begin();
387 
388  while(itNeighbours != neighbours.end())
389  {
390  te::graph::Edge* e = m_graph->getEdge(*itNeighbours);
391  te::graph::Vertex* vTo = 0;
392 
393  if(e)
394  {
395  if(e->getIdFrom() == currentId)
396  vTo = m_graph->getVertex(e->getIdTo());
397  else
398  vTo = m_graph->getVertex(e->getIdFrom());
399  }
400 
401  if(vTo && vTo->getId() != vertexToIgnore)
402  {
403  if(visited.find(vTo->getId()) == visited.end())
404  {
405  queue.push(vTo->getId());
406  visited.insert(vTo->getId());
407  }
408  }
409 
410  ++itNeighbours;
411  }
412  }
413 
414  ++count;
415  }
416 
417  for(std::size_t t = 0; t < m_attrs.size(); ++t)
418  {
419  meanAttrs[t] /= count;
420  }
421 
422  return meanAttrs;
423 }
424 
425 double te::sa::SkaterPartition::calculateRootDeviation(int startVertex, int vertexToIgnore, std::vector<double>& meanVec)
426 {
427  double deviation = 0.;
428 
429  std::queue<int> queue;
430  std::set<int> visited;
431 
432  queue.push(startVertex);
433  visited.insert(startVertex);
434 
435  while(!queue.empty())
436  {
437  int currentId = queue.front();
438  queue.pop();
439 
440  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
441 
442  if(vertex)
443  {
444  deviation += calculateDistance(vertex, meanVec);
445 
446  //get neighbours
447  std::set<int> neighbours = vertex->getNeighborhood();
448  std::set<int>::iterator itNeighbours = neighbours.begin();
449 
450  while(itNeighbours != neighbours.end())
451  {
452  te::graph::Edge* e = m_graph->getEdge(*itNeighbours);
453  te::graph::Vertex* vTo = 0;
454 
455  if(e)
456  {
457  if(e->getIdFrom() == currentId)
458  vTo = m_graph->getVertex(e->getIdTo());
459  else
460  vTo = m_graph->getVertex(e->getIdFrom());
461  }
462 
463  if(vTo && vTo->getId() != vertexToIgnore)
464  {
465  if(visited.find(vTo->getId()) == visited.end())
466  {
467  queue.push(vTo->getId());
468  visited.insert(vTo->getId());
469  }
470  }
471 
472  ++itNeighbours;
473  }
474  }
475  }
476 
477  return deviation;
478 }
479 
480 double te::sa::SkaterPartition::calculateDistance(te::graph::Vertex* vertex, std::vector<double>& meanVec)
481 {
482  double distance = 0.;
483 
484  for(std::size_t t = 0; t < m_attrs.size(); ++t)
485  {
486  std::string attrName = m_attrs[t];
487 
488  int attrIdx;
489 
490  if(te::sa::GetGraphVertexAttrIndex(m_graph, attrName, attrIdx))
491  {
492  distance += (te::sa::GetDataValue(vertex->getAttributes()[attrIdx]) - meanVec[t]) * (te::sa::GetDataValue(vertex->getAttributes()[attrIdx]) - meanVec[t]);
493  }
494  }
495 
496  return distance;
497 }
double m_SSDTa
Sum of Square Difference for Tree A.
SkaterPartition(te::graph::AbstractGraph *graph, std::vector< std::string > attrs)
Default constructor.
A struct that represents the skater partition values for each edge.
int getId()
It returns the edge identification.
Definition: Edge.cpp:66
std::vector< te::dt::AbstractData * > & getAttributes()
It returns the vector of attributes associated with this element.
Definition: Vertex.cpp:74
double calculateRootDeviation(int startVertex, int vertexToIgnore, std::vector< double > &meanVec)
Utilitary function for spatial analysis module.
From the point of view of graph theory, vertices are treated as featureless and indivisible objects...
Definition: Vertex.h:68
Class used to define the edge struct of a graph. Its compose with a identifier, the vertex origin and...
Definition: Edge.h:58
std::vector< double > calculateRootMean(int startVertex, int vertexToIgnore, std::size_t &pop)
This file contains a class that represents the skater partition operation.
~SkaterPartition()
Virtual destructor.
std::vector< std::size_t > execute(std::size_t nGroups, std::string popAttr="", std::size_t minPop=0)
Function to execute the skater partition.
Abstract class used to define the main functions of graph struct. All graph implementations must used...
Definition: AbstractGraph.h:55
double m_l
Difference between m_SSDT and m_SSDi;.
std::size_t m_edgeId
Edge identification.
std::size_t m_popa
Sum of population for Tree A.
int getIdFrom()
It returns the vertex origin identification.
Definition: Edge.cpp:71
std::set< int > & getNeighborhood()
Returns the Neighborhood vector.
Definition: Vertex.cpp:111
bool edgeToRemove(int startVertex, double &diffA, double &diffB, std::size_t &edgeToRemoveId)
double m_SSDi
Sum of m_SSDa and m_SSDb.
double calculateEdgeDifference(int vertexFrom, int vertexTo, double &diffA, double &diffB, std::size_t &popA, std::size_t &popB)
TESAEXPORT bool GetGraphVertexAttrIndex(te::graph::AbstractGraph *graph, std::string attrName, int &index)
Function used to get the vertex attribute index in the graph of the gpm.
Definition: Utils.cpp:168
std::size_t m_popb
Sum of population for Tree B.
double m_SSDT
Sum of Square Difference for Tree.
int getId()
It returns the vertex id.
Definition: Vertex.cpp:69
TESAEXPORT double GetDataValue(te::dt::AbstractData *ad)
Function used to get the numeric value from a gpm property.
Definition: Utils.cpp:200
double m_SSDTb
Sum of Square Difference for Tree B.
te::graph::AbstractGraph * m_graph
Pointer to a graph that represents a minimum spanning tree.
std::vector< std::string > m_attrs
Vector with attributes names used to calculate the skater operation.
double calculateDistance(te::graph::Vertex *vertex, std::vector< double > &meanVec)
int getIdTo()
It returns the vertex destiny identification.
Definition: Edge.cpp:76