SkaterPartition.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file terralib/sa/core/SkaterPartition.cpp
22 
23  \brief This file contains a class that represents the skater partition operation.
24 
25  \reference Adapted from TerraLib4.
26 */
27 
28 //TerraLib
29 #include "../../graph/core/Edge.h"
30 #include "../../graph/core/Vertex.h"
31 #include "../../graph/iterator/MemoryIterator.h"
32 #include "SkaterPartition.h"
33 #include "Utils.h"
34 
35 //STL
36 #include <cassert>
37 #include <queue>
38 
39 te::sa::SkaterPartition::SkaterPartition(te::graph::AbstractGraph* graph, std::vector<std::string> attrs) :
40  m_graph(graph),
41  m_attrs(attrs),
42  m_popAttr(""),
43  m_popMin(0)
44 {
45 }
46 
48 {
49  m_SSDiValues.clear();
50 }
51 
52 std::vector<std::size_t> te::sa::SkaterPartition::execute(std::size_t nGroups, std::string popAttr, std::size_t minPop)
53 {
54  assert(m_graph);
55 
56  m_popAttr = popAttr;
57 
58  m_popMin = minPop;
59 
60  //get first vertex id from graph
61  std::auto_ptr<te::graph::MemoryIterator> memIt(new te::graph::MemoryIterator(m_graph));
62  int firstVertex = memIt->getFirstVertex()->getId();
63 
64  //create vector with root information
65  std::vector< std::pair<int, double> > rootSet;
66  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(firstVertex, 0.));
67 
68  std::vector<std::size_t> rootsVertexId;
69 
70  //bfs over the graph
71  while(!rootSet.empty() && (rootSet.size() + rootsVertexId.size() < nGroups))
72  {
73  //get max diff value index
74  std::vector< std::pair<int, double> >::iterator rootIt;
75  std::vector< std::pair<int, double> >::iterator rootItMax;
76  double maxDiff = -std::numeric_limits<double>::max();
77 
78  for(rootIt = rootSet.begin(); rootIt != rootSet.end(); ++rootIt)
79  {
80  if(rootIt->second > maxDiff)
81  {
82  maxDiff = rootIt->second;
83  rootItMax = rootIt;
84  }
85  }
86 
87  std::size_t currentId = rootItMax->first;
88 
89  rootSet.erase(rootItMax);
90 
91  //get vertex from graph
92  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
93 
94  if(vertex)
95  {
96  double diffA = 0.;
97  double diffB = 0.;
98 
99  std::size_t edgeId;
100 
101  bool remove = edgeToRemove(currentId, diffA, diffB, edgeId);
102 
103  if(remove)
104  {
105  te::graph::Edge* edge = m_graph->getEdge(edgeId);
106 
107  int vertexFrom = edge->getIdFrom();
108  int vertexTo = edge->getIdTo();
109 
110  //remove edge
111  m_graph->removeEdge(edgeId);
112 
113  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(vertexFrom, diffA));
114  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(vertexTo, diffB));
115  }
116  else
117  {
118  rootsVertexId.push_back(currentId);
119  }
120  }
121  }
122 
123  for(std::size_t t = 0; t < rootSet.size(); ++t)
124  {
125  rootsVertexId.push_back(rootSet[t].first);
126  }
127 
128  return rootsVertexId;
129 }
130 
131 std::vector<std::size_t> te::sa::SkaterPartition::execute(std::string popAttr, std::size_t minPop)
132 {
133  assert(m_graph);
134 
135  m_popAttr = popAttr;
136 
137  m_popMin = minPop;
138 
139  //get first vertex id from graph
140  std::auto_ptr<te::graph::MemoryIterator> memIt(new te::graph::MemoryIterator(m_graph));
141  int firstVertex = memIt->getFirstVertex()->getId();
142 
143  //create vector with root information
144  std::vector< std::pair<int, double> > rootSet;
145  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(firstVertex, 0.));
146 
147  std::vector<std::size_t> rootsVertexId;
148 
149  //bfs over the graph
150  while(!rootSet.empty())
151  {
152  //get max diff value index
153  std::vector< std::pair<int, double> >::iterator rootIt;
154  std::vector< std::pair<int, double> >::iterator rootItMax;
155  double maxDiff = -std::numeric_limits<double>::max();
156 
157  for(rootIt = rootSet.begin(); rootIt != rootSet.end(); ++rootIt)
158  {
159  if(rootIt->second > maxDiff)
160  {
161  maxDiff = rootIt->second;
162  rootItMax = rootIt;
163  }
164  }
165 
166  std::size_t currentId = rootItMax->first;
167 
168  rootSet.erase(rootItMax);
169 
170  //get vertex from graph
171  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
172 
173  if(vertex)
174  {
175  double diffA = 0.;
176  double diffB = 0.;
177 
178  std::size_t edgeId;
179 
180  bool remove = edgeToRemove(currentId, diffA, diffB, edgeId);
181 
182  if(remove)
183  {
184  te::graph::Edge* edge = m_graph->getEdge(edgeId);
185 
186  int vertexFrom = edge->getIdFrom();
187  int vertexTo = edge->getIdTo();
188 
189  //remove edge
190  m_graph->removeEdge(edgeId);
191 
192  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(vertexFrom, diffA));
193  rootSet.push_back(std::vector< std::pair<int, double> >::value_type(vertexTo, diffB));
194  }
195  else
196  {
197  rootsVertexId.push_back(currentId);
198  }
199  }
200  }
201 
202  for(std::size_t t = 0; t < rootSet.size(); ++t)
203  {
204  rootsVertexId.push_back(rootSet[t].first);
205  }
206 
207  return rootsVertexId;
208 }
209 
210 bool te::sa::SkaterPartition::edgeToRemove(int startVertex, double& diffA, double& diffB, std::size_t& edgeToRemoveId)
211 {
212  //create map of differences
213  std::vector<EdgeRemovalInfo> edgeRemovalVec;
214 
215  //calculate SSDTO
216  std::size_t totalPop = 0;
217  std::vector<double> meanVecStart = calculateRootMean(startVertex, -1, totalPop);
218  double deviationStart = calculateRootDeviation(startVertex, -1, meanVecStart);
219 
220  //create list
221  std::queue<int> queue;
222  std::set<int> visited;
223 
224  queue.push(startVertex);
225  visited.insert(startVertex);
226 
227  //bfs over the graph
228  while(!queue.empty())
229  {
230  int currentId = queue.front();
231  queue.pop();
232 
233  //get vertex from graph
234  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
235 
236  if(vertex)
237  {
238  //get neighbours
239  std::set<int> neighbours = vertex->getNeighborhood();
240  std::set<int>::iterator itNeighbours = neighbours.begin();
241 
242  while(itNeighbours != neighbours.end())
243  {
244  te::graph::Edge* e = m_graph->getEdge(*itNeighbours);
245  te::graph::Vertex* vTo = 0;
246 
247  if(e)
248  {
249  if(e->getIdFrom() == currentId)
250  vTo = m_graph->getVertex(e->getIdTo());
251  else
252  vTo = m_graph->getVertex(e->getIdFrom());
253  }
254 
255  if(vTo)
256  {
257  //check if already visted
258  if(visited.find(vTo->getId()) == visited.end())
259  {
260  //add in queue
261  queue.push(vTo->getId());
262  visited.insert(vTo->getId());
263 
264  //calculate SSDi
265  double diffVFrom = 0.;
266  double diffVTo = 0.;
267  std::size_t popA = 0;
268  std::size_t popB = 0;
269 
270  double ssdi = calculateEdgeDifference(vertex->getId(), vTo->getId(), diffVFrom, diffVTo, popA, popB);
271 
272  double l = deviationStart - ssdi;
273 
274  EdgeRemovalInfo eri;
275  eri.m_edgeId = e->getId();
276  eri.m_SSDT = deviationStart;
277  eri.m_SSDi = ssdi;
278  eri.m_l = l;
279 
280  if(e->getIdFrom() == vertex->getId())
281  {
282  eri.m_SSDTa = diffVFrom;
283  eri.m_SSDTb = diffVTo;
284  }
285  else
286  {
287  eri.m_SSDTa = diffVTo;
288  eri.m_SSDTb = diffVFrom;
289  }
290 
291  eri.m_popa = popA;
292  eri.m_popb = popB;
293 
294  edgeRemovalVec.push_back(eri);
295  }
296  }
297 
298  ++itNeighbours;
299  }
300  }
301  }
302 
303  //get the edge with maximum SSDi
304  bool found = false;
305 
306  double maxDiff = -std::numeric_limits<double>::max();
307 
308  for(std::size_t t = 0; t < edgeRemovalVec.size(); ++t)
309  {
310  if(edgeRemovalVec[t].m_l > maxDiff && edgeRemovalVec[t].m_popa >= m_popMin && edgeRemovalVec[t].m_popb >= m_popMin)
311  {
312  maxDiff = edgeRemovalVec[t].m_l;
313 
314  edgeToRemoveId = edgeRemovalVec[t].m_edgeId;
315 
316  diffA = edgeRemovalVec[t].m_SSDTa;
317  diffB = edgeRemovalVec[t].m_SSDTb;
318 
319  found = true;
320  }
321  }
322 
323  if(found)
324  m_SSDiValues.push_back(maxDiff);
325 
326  edgeRemovalVec.clear();
327 
328  return found;
329 }
330 
331 double te::sa::SkaterPartition::calculateEdgeDifference(int vertexFrom, int vertexTo, double& diffA, double& diffB, std::size_t& popA, std::size_t& popB)
332 {
333  //calculate the deviation for the tree that begins with the vertex from SQDTA
334  std::vector<double> meanVecFrom = calculateRootMean(vertexFrom, vertexTo, popA);
335  diffA = calculateRootDeviation(vertexFrom, vertexTo, meanVecFrom);
336 
337  //calculate the deviation for the tree that begins with the vertex to SQDTB
338  std::vector<double> meanVecTo = calculateRootMean(vertexTo, vertexFrom, popB);
339  diffB = calculateRootDeviation(vertexTo, vertexFrom, meanVecTo);
340 
341  //return the edge cost
342  return diffA + diffB;
343 }
344 
345 std::vector<double> te::sa::SkaterPartition::calculateRootMean(int startVertex, int vertexToIgnore, std::size_t& pop)
346 {
347  std::vector<double> meanAttrs(m_attrs.size(), 0.);
348 
349  std::queue<int> queue;
350  std::set<int> visited;
351 
352  queue.push(startVertex);
353  visited.insert(startVertex);
354 
355  std::size_t count = 0;
356 
357  while(!queue.empty())
358  {
359  int currentId = queue.front();
360  queue.pop();
361 
362  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
363 
364  if(vertex)
365  {
366  for(std::size_t t = 0; t < m_attrs.size(); ++t)
367  {
368  std::string attrName = m_attrs[t];
369 
370  int attrIdx;
371 
372  if(te::sa::GetGraphVertexAttrIndex(m_graph, attrName, attrIdx))
373  {
374  meanAttrs[t] += te::sa::GetDataValue(vertex->getAttributes()[attrIdx]);
375  }
376  }
377 
378  //get population information
379  int popIdx;
380  if(te::sa::GetGraphVertexAttrIndex(m_graph, m_popAttr, popIdx))
381  {
382  pop += (std::size_t)te::sa::GetDataValue(vertex->getAttributes()[popIdx]);
383  }
384 
385  //get neighbours
386  std::set<int> neighbours = vertex->getNeighborhood();
387  std::set<int>::iterator itNeighbours = neighbours.begin();
388 
389  while(itNeighbours != neighbours.end())
390  {
391  te::graph::Edge* e = m_graph->getEdge(*itNeighbours);
392  te::graph::Vertex* vTo = 0;
393 
394  if(e)
395  {
396  if(e->getIdFrom() == currentId)
397  vTo = m_graph->getVertex(e->getIdTo());
398  else
399  vTo = m_graph->getVertex(e->getIdFrom());
400  }
401 
402  if(vTo && vTo->getId() != vertexToIgnore)
403  {
404  if(visited.find(vTo->getId()) == visited.end())
405  {
406  queue.push(vTo->getId());
407  visited.insert(vTo->getId());
408  }
409  }
410 
411  ++itNeighbours;
412  }
413  }
414 
415  ++count;
416  }
417 
418  for(std::size_t t = 0; t < m_attrs.size(); ++t)
419  {
420  meanAttrs[t] /= count;
421  }
422 
423  return meanAttrs;
424 }
425 
426 double te::sa::SkaterPartition::calculateRootDeviation(int startVertex, int vertexToIgnore, std::vector<double>& meanVec)
427 {
428  double deviation = 0.;
429 
430  std::queue<int> queue;
431  std::set<int> visited;
432 
433  queue.push(startVertex);
434  visited.insert(startVertex);
435 
436  while(!queue.empty())
437  {
438  int currentId = queue.front();
439  queue.pop();
440 
441  te::graph::Vertex* vertex = m_graph->getVertex(currentId);
442 
443  if(vertex)
444  {
445  deviation += calculateDistance(vertex, meanVec);
446 
447  //get neighbours
448  std::set<int> neighbours = vertex->getNeighborhood();
449  std::set<int>::iterator itNeighbours = neighbours.begin();
450 
451  while(itNeighbours != neighbours.end())
452  {
453  te::graph::Edge* e = m_graph->getEdge(*itNeighbours);
454  te::graph::Vertex* vTo = 0;
455 
456  if(e)
457  {
458  if(e->getIdFrom() == currentId)
459  vTo = m_graph->getVertex(e->getIdTo());
460  else
461  vTo = m_graph->getVertex(e->getIdFrom());
462  }
463 
464  if(vTo && vTo->getId() != vertexToIgnore)
465  {
466  if(visited.find(vTo->getId()) == visited.end())
467  {
468  queue.push(vTo->getId());
469  visited.insert(vTo->getId());
470  }
471  }
472 
473  ++itNeighbours;
474  }
475  }
476  }
477 
478  return deviation;
479 }
480 
481 double te::sa::SkaterPartition::calculateDistance(te::graph::Vertex* vertex, std::vector<double>& meanVec)
482 {
483  double distance = 0.;
484 
485  for(std::size_t t = 0; t < m_attrs.size(); ++t)
486  {
487  std::string attrName = m_attrs[t];
488 
489  int attrIdx;
490 
491  if(te::sa::GetGraphVertexAttrIndex(m_graph, attrName, attrIdx))
492  {
493  distance += (te::sa::GetDataValue(vertex->getAttributes()[attrIdx]) - meanVec[t]) * (te::sa::GetDataValue(vertex->getAttributes()[attrIdx]) - meanVec[t]);
494  }
495  }
496 
497  return distance;
498 }
double m_SSDTa
Sum of Square Difference for Tree A.
SkaterPartition(te::graph::AbstractGraph *graph, std::vector< std::string > attrs)
Default constructor.
A struct that represents the skater partition values for each edge.
int getId()
It returns the edge identification.
Definition: Edge.cpp:66
std::vector< te::dt::AbstractData * > & getAttributes()
It returns the vector of attributes associated with this element.
Definition: Vertex.cpp:74
double calculateRootDeviation(int startVertex, int vertexToIgnore, std::vector< double > &meanVec)
Utilitary function for spatial analysis module.
From the point of view of graph theory, vertices are treated as featureless and indivisible objects...
Definition: Vertex.h:68
Class used to define the edge struct of a graph. Its compose with a identifier, the vertex origin and...
Definition: Edge.h:58
std::vector< double > calculateRootMean(int startVertex, int vertexToIgnore, std::size_t &pop)
This file contains a class that represents the skater partition operation.
~SkaterPartition()
Virtual destructor.
std::vector< std::size_t > execute(std::size_t nGroups, std::string popAttr="", std::size_t minPop=0)
Function to execute the skater partition.
Abstract class used to define the main functions of graph struct. All graph implementations must used...
Definition: AbstractGraph.h:55
double m_l
Difference between m_SSDT and m_SSDi;.
std::size_t m_edgeId
Edge identification.
std::size_t m_popa
Sum of population for Tree A.
int getIdFrom()
It returns the vertex origin identification.
Definition: Edge.cpp:71
std::set< int > & getNeighborhood()
Returns the Neighborhood vector.
Definition: Vertex.cpp:111
bool edgeToRemove(int startVertex, double &diffA, double &diffB, std::size_t &edgeToRemoveId)
double m_SSDi
Sum of m_SSDa and m_SSDb.
double calculateEdgeDifference(int vertexFrom, int vertexTo, double &diffA, double &diffB, std::size_t &popA, std::size_t &popB)
TESAEXPORT bool GetGraphVertexAttrIndex(te::graph::AbstractGraph *graph, std::string attrName, int &index)
Function used to get the vertex attribute index in the graph of the gpm.
Definition: Utils.cpp:168
std::size_t m_popb
Sum of population for Tree B.
double m_SSDT
Sum of Square Difference for Tree.
int getId()
It returns the vertex id.
Definition: Vertex.cpp:69
TESAEXPORT double GetDataValue(te::dt::AbstractData *ad)
Function used to get the numeric value from a gpm property.
Definition: Utils.cpp:200
double m_SSDTb
Sum of Square Difference for Tree B.
double calculateDistance(te::graph::Vertex *vertex, std::vector< double > &meanVec)
int getIdTo()
It returns the vertex destiny identification.
Definition: Edge.cpp:76