All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
AggregationMemory.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file AggregationMemory.h
22 
23  \brief Aggregation Vector Processing functions.
24 */
25 
26 //Terralib
27 
28 #include "terralib_config.h"
29 #include "../common/progress/TaskProgress.h"
30 #include "../common/Logger.h"
31 #include "../common/Translator.h"
32 
33 #include "../dataaccess/dataset/DataSet.h"
34 #include "../dataaccess/utils/Utils.h"
35 #include "../datatype/Property.h"
36 #include "../datatype/SimpleProperty.h"
37 #include "../datatype/StringProperty.h"
38 
39 #include "../geometry/Geometry.h"
40 #include "../geometry/GeometryProperty.h"
41 #include "../geometry/Utils.h"
42 
43 #include "../memory/DataSet.h"
44 #include "../memory/DataSetItem.h"
45 
46 #include "../statistics/core/SummaryFunctions.h"
47 #include "../statistics/core/StringStatisticalSummary.h"
48 #include "../statistics/core/NumericStatisticalSummary.h"
49 #include "../statistics/core/Utils.h"
50 
51 #include "AggregationMemory.h"
52 #include "Config.h"
53 #include "Exception.h"
54 #include "Utils.h"
55 
56 // STL
57 #include <map>
58 #include <math.h>
59 #include <string>
60 #include <vector>
61 
62 // BOOST
63 #include <boost/lexical_cast.hpp>
64 #include <boost/algorithm/string.hpp>
65 
67 {}
68 
70 {}
71 
72 std::map<std::string, std::string> te::vp::AggregationMemory::calculateStringStats(const std::vector<te::mem::DataSetItem*>& items)
73 {
74  std::map<std::string, std::string> result;
75 
76  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = m_statSum.begin();
77  while(it != m_statSum.end())
78  {
79  if(it->first->getType() == te::dt::STRING_TYPE)
80  {
81  size_t propPos = m_inDsetType->getPropertyPosition(it->first->getName());
82  std::vector<std::string> values;
83  for(std::size_t i = 0; i < items.size(); ++i)
84  {
85  if (!items[i]->isNull(propPos))
86  values.push_back(items[i]->getString(propPos));
87  }
88 
91 
92  result.insert( std::map<std::string, std::string>::value_type(it->first->getName() + "_MIN_VALUE", ss.m_minVal ));
93  result.insert( std::map<std::string, std::string>::value_type(it->first->getName() + "_MAX_VALUE", ss.m_maxVal ));
94  result.insert( std::map<std::string, std::string>::value_type(it->first->getName() + "_COUNT", boost::lexical_cast<std::string>(items.size())));
95  result.insert( std::map<std::string, std::string>::value_type(it->first->getName()+ "_VALID_COUNT", boost::lexical_cast<std::string>(values.size())));
96  }
97  ++it;
98  }
99  return result;
100 }
101 
102 std::map<std::string,double> te::vp::AggregationMemory::calculateNumStats(const std::vector<te::mem::DataSetItem*>& items, std::string& modeProp, std::string& modeName)
103 {
104  std::map<std::string, double> result;
105  int idProp = 0;
106  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = m_statSum.begin();
107  while(it != m_statSum.end())
108  {
109  if (it->first->getType() != te::dt::STRING_TYPE)
110  {
111  size_t propPos = m_inDsetType->getPropertyPosition(it->first->getName());
112  int propType = it->first->getType();
113 
114  std::vector<double> values;
115  for(std::size_t i = 0; i < items.size(); ++i)
116  {
117  if (!items[i]->isNull(propPos))
118  {
119  double numval;
120  if (propType == te::dt::INT16_TYPE)
121  numval = items[i]->getInt16(propPos);
122  else if (propType == te::dt::INT32_TYPE)
123  numval = items[i]->getInt32(propPos);
124  else if (propType == te::dt::INT64_TYPE)
125  numval = (double)items[i]->getInt64(propPos);
126  else if (propType == te::dt::FLOAT_TYPE)
127  numval = items[i]->getFloat(propPos);
128  else if (propType == te::dt::DOUBLE_TYPE)
129  numval = items[i]->getDouble(propPos);
130  else
131  continue;
132 
133  values.push_back(numval);
134  }
135  }
136 
137  std::string propName = it->first->getName();
138 
141 
142  result.insert( std::map<std::string, double>::value_type(propName + "_MIN_VALUE", ss.m_minVal));
143  result.insert( std::map<std::string, double>::value_type(propName + "_MAX_VALUE", ss.m_maxVal));
144  result.insert( std::map<std::string, double>::value_type(propName + "_COUNT", items.size()));
145  result.insert( std::map<std::string, double>::value_type(propName + "_VALID_COUNT", values.size()));
146  result.insert( std::map<std::string, double>::value_type(propName + "_MEAN", ss.m_mean));
147  result.insert( std::map<std::string, double>::value_type(propName + "_SUM", ss.m_sum));
148  result.insert( std::map<std::string, double>::value_type(propName + "_STANDARD_DEVIATION", ss.m_stdDeviation));
149  result.insert( std::map<std::string, double>::value_type(propName + "_VARIANCE", ss.m_variance));
150  result.insert( std::map<std::string, double>::value_type(propName + "_SKEWNESS", ss.m_skewness));
151  result.insert( std::map<std::string, double>::value_type(propName + "_KURTOSIS", ss.m_kurtosis));
152  result.insert( std::map<std::string, double>::value_type(propName + "_AMPLITUDE", ss.m_amplitude));
153  result.insert( std::map<std::string, double>::value_type(propName + "_MEDIAN", ss.m_median));
154  result.insert( std::map<std::string, double>::value_type(propName + "_VAR_COEFF", ss.m_varCoeff));
155 
156  if (!ss.m_mode.empty())
157  {
158  modeProp = boost::lexical_cast<std::string>(ss.m_mode[0]);
159  for(std::size_t i=1; i<ss.m_mode.size(); ++i)
160  {
161  modeProp += ",";
162  modeProp += boost::lexical_cast<std::string>(ss.m_mode[i]);
163  }
164  modeName = propName + "_MODE";
165  }
166  else
167  {
168  modeName = "";
169  modeProp = "";
170  }
171  }
172  ++it;
173  ++idProp;
174  }
175  return result;
176 }
177 
178 std::auto_ptr<te::da::DataSetType> te::vp::AggregationMemory::buildOutDataSetType()
179 {
180  std::auto_ptr<te::da::DataSetType> dataSetType(new te::da::DataSetType(m_outDset));
181 
182  // the property values used to execute the aggregation
183  te::dt::StringProperty* stringProperty = new te::dt::StringProperty("AGG_PROP");
184  dataSetType->add(stringProperty);
185 
186  // the number of objects aggregated
187  te::dt::SimpleProperty* aggregationProperty = new te::dt::SimpleProperty("NUM_OBJ", te::dt::INT32_TYPE);
188  dataSetType->add(aggregationProperty);
189 
190  // properties generated from the statistics requested
191  std::string functionResult;
192  std::vector<te::stat::StatisticalSummary> vectorResult;
193  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = m_statSum.begin();
194  std::string propResult, funcResult;
195  while(it != m_statSum.end())
196  {
197  propResult = "";
198  propResult = it->first->getName();
199  propResult += "_";
200 
201  vectorResult = it->second;
202 
203  for(std::size_t i = 0; i < vectorResult.size(); ++i)
204  {
205  funcResult = propResult;
206  funcResult += te::stat::GetStatSummaryShortName(vectorResult[i]);
207 
208  if(it->first->getType() == te::dt::STRING_TYPE || vectorResult[i] == te::stat::MODE)
209  {
210  te::dt::StringProperty* funcProp = new te::dt::StringProperty(funcResult);
211  dataSetType->add(funcProp);
212  }
213  else
214  {
216  dataSetType->add(funcProp);
217  }
218  }
219  ++it;
220  }
221 
222  // define the resulting spatial property
223  te::gm::GeometryProperty* p = static_cast<te::gm::GeometryProperty*>(m_inDsetType->findFirstPropertyOfType(te::dt::GEOMETRY_TYPE));
224 
225  // creates the output geometry property
226  te::gm::GeometryProperty* geometry = new te::gm::GeometryProperty("geom");
227  geometry->setGeometryType(this->getGeomResultType(p->getGeometryType()));
228  geometry->setSRID(p->getSRID());
229  dataSetType->add(geometry);
230 
231  return dataSetType;
232 }
233 
234 
235 bool te::vp::AggregationMemory::run() throw( te::common::Exception )
236 {
237  te::gm::GeometryProperty* geom = te::da::GetFirstGeomProperty(m_inDsetType.get());
238  std::string geomName = geom->getName();
239  std::size_t geomIdx = boost::lexical_cast<std::size_t>(m_inDsetType->getPropertyPosition(geomName));
240 
241  // calculate the groups
242 
243  // get the positions of the grouping properties
244  std::vector<size_t> groupPropIdxs;
245  for(std::size_t i=0; i<m_groupProps.size(); ++i)
246  groupPropIdxs.push_back(te::da::GetPropertyPos(m_inDsetType.get(), m_groupProps[i]->getName()));
247 
248  std::map<std::string, std::vector<te::mem::DataSetItem*> > groups;
249  std::map<std::string, std::vector<te::mem::DataSetItem*> >::iterator itg;
250 
251  std::auto_ptr<te::da::DataSet> inDset;
252 
253  if(m_oidSet == 0)
254  inDset = m_inDsrc->getDataSet(m_inDsetName);
255  else
256  inDset = m_inDsrc->getDataSet(m_inDsetName, m_oidSet);
257 
258  size_t nprops = inDset->getNumProperties();
259 
260  inDset->moveBeforeFirst();
261  while(inDset->moveNext())
262  {
263  // the group key is a combination of the distinct grouping property values as a string
264  std::string key = inDset->getAsString(groupPropIdxs[0]);
265  for(std::size_t i=1; i<groupPropIdxs.size(); ++i)
266  key += "_" + inDset->getAsString(groupPropIdxs[i]);
267 
268  // copy it to a dataset item in memory (TODO: this should be reviewed to avoid the copy)
269  te::mem::DataSetItem* dataSetItem = new te::mem::DataSetItem(inDset.get());
270  for(std::size_t j=0; j<nprops; ++j)
271  {
272  if (!inDset->isNull(j))
273  {
274  std::auto_ptr<te::dt::AbstractData> val = inDset->getValue(j);
275  dataSetItem->setValue(j,val.release());
276  }
277  }
278 
279  itg = groups.find(key);
280  if (itg==groups.end())
281  {
282  std::vector<te::mem::DataSetItem*> dataSetItemVector;
283  dataSetItemVector.push_back(dataSetItem);
284  groups.insert(std::pair<std::string, std::vector<te::mem::DataSetItem*> >(key,dataSetItemVector));
285  }
286  else
287  itg->second.push_back(dataSetItem);
288  }
289 
290  // define the schema of the output dataset based on the aggregation parameters
291  // for the non-spatial attributes
292  std::auto_ptr<te::da::DataSetType> outDsType = this->buildOutDataSetType();
293 
294  // create the output dataset in memory
295  std::auto_ptr<te::mem::DataSet> outDataset(new te::mem::DataSet(outDsType.get()));
296 
297  // now calculate the aggregation of non spatial and spatial attributes and save it to the output dataset
298  te::common::TaskProgress task("Processing aggregation...");
299  task.setTotalSteps(groups.size());
300  task.useTimer(true);
301 
302  itg = groups.begin();
303  while(itg != groups.end())
304  {
305  // calculate the spatial aggregation
306  std::string value = itg->first;
308 
309  //verify geometries
310  for (size_t i = 0; i < itg->second.size(); ++i)
311  {
312  if (!itg->second[i]->getGeometry(geomIdx)->isValid())
313  {
314 #ifdef TERRALIB_LOGGER_ENABLED
315  te::common::Logger::logDebug("vp", "Aggregation - The input layer has invalid geometry.");
316 #endif // TERRALIB_LOGGER_ENABLED
317  }
318  }
319 
320  te::gm::Geometry* geometry = te::vp::GetGeometryUnion(itg->second, geomIdx, outGeoType);
321 
322  // if it returned a valid geometry, include the summarization over non-spatial attributes
323  if(geometry)
324  {
325  // calculate the statistical of text attributes
326  std::map<std::string, std::string> resultString = calculateStringStats(itg->second);
327 
328  // calculate the statistical of numerical attributes
329  std::string smodeprop, smodeval; // special treatment for mode values
330  std::map<std::string, double> resultNumeric = calculateNumStats(itg->second,smodeval,smodeprop);
331 
332  te::mem::DataSetItem* outDSetItem = new te::mem::DataSetItem(outDataset.get());
333 
334  outDSetItem->setString(0, value); // save the group identification (mandatory)
335  outDSetItem->setInt32(1, itg->second.size()); // save the number of objects in the group (mandatory)
336 
337  // save statistics of text attributes
338  std::map<std::string, std::string>::iterator itString = resultString.begin();
339  while(itString != resultString.end())
340  {
341  // esse teste é necessário????
342  if (te::da::GetPropertyPos(outDataset.get(), itString->first) < outDataset->getNumProperties())
343  outDSetItem->setString(itString->first, itString->second);
344  ++itString;
345  }
346 
347  // save statistics of numerical attributes
348  if (!smodeval.empty())
349  outDSetItem->setString(smodeprop, smodeval);
350 
351  std::map<std::string, double>::iterator itNumeric = resultNumeric.begin();
352  while(itNumeric != resultNumeric.end())
353  {
354  if (te::da::GetPropertyPos(outDataset.get(), itNumeric->first) < outDataset->getNumProperties())
355  outDSetItem->setDouble(itNumeric->first, itNumeric->second);
356  ++itNumeric;
357  }
358  outDSetItem->setGeometry("geom", geometry);
359  outDataset->add(outDSetItem);
360  }
361  else
362  {
363 #ifdef TERRALIB_LOGGER_ENABLED
364  te::common::Logger::logDebug("vp", "Aggregation - The operation generated invalid geometry.");
365 #endif // TERRALIB_LOGGER_ENABLED
366  }
367  ++itg;
368 
369  if (task.isActive() == false)
370  throw te::vp::Exception(TE_TR("Operation canceled!"));
371 
372  task.pulse();
373  }
374 
375  te::vp::Save(m_outDsrc.get(), outDataset.get(), outDsType.get());
376  return true;
377 }
TESTATEXPORT void GetNumericStatisticalSummary(std::vector< double > &values, te::stat::NumericStatisticalSummary &ss, double nullVal)
Geometric property.
A structure to hold the set of statistics from a set of numerical values.
An exception class for the Vector processing module.
GeomType
Each enumerated type is compatible with a Well-known Binary (WKB) type code.
Definition: Enums.h:41
void setGeometry(std::size_t i, te::gm::Geometry *value)
It sets the value of the i-th property.
Utility functions for the data access module.
void setSRID(int srid)
It sets the spatial reference system identifier associated to this property.
void setGeometryType(GeomType t)
It sets the geometry subtype.
void setDouble(std::size_t i, double value)
It sets the value of the i-th property.
An atomic property like an integer or double.
A class that models the description of a dataset.
Definition: DataSetType.h:72
void useTimer(bool flag)
Used to define if task use progress timer information.
std::auto_ptr< te::dt::AbstractData > getValue(std::size_t i) const
It returns the value of the i-th property.
TESTATEXPORT std::string GetStatSummaryShortName(const int &e)
Get the statistical parameter short name from its enumerator.
Definition: Utils.cpp:37
te::gm::Geometry * GetGeometryUnion(const std::vector< te::mem::DataSetItem * > &items, size_t geomIdx, te::gm::GeomType outGeoType)
It returns the union of a geometry vector.
Definition: Utils.cpp:52
void Save(te::da::DataSource *source, te::da::DataSet *result, te::da::DataSetType *outDsType)
Definition: Utils.cpp:213
This class can be used to inform the progress of a task.
Definition: TaskProgress.h:53
TEDATAACCESSEXPORT std::size_t GetPropertyPos(const DataSet *dataset, const std::string &name)
Definition: Utils.cpp:500
void setValue(std::size_t i, te::dt::AbstractData *value)
It sets the value of the i-th property.
#define TE_TR(message)
It marks a string in order to get translated.
Definition: Translator.h:347
bool isActive() const
Verify if the task is active.
Aggregation Vector Processing functions.
std::map< std::string, double > calculateNumStats(const std::vector< te::mem::DataSetItem * > &items, std::string &modeProp, std::string &modeName)
void setTotalSteps(int value)
Set the task total stepes.
void setInt32(std::size_t i, boost::int32_t value)
It sets the value of the i-th property.
Implementation of a random-access dataset class for the TerraLib In-Memory Data Access driver...
Definition: DataSet.h:65
int getSRID() const
It returns the spatial reference system identifier associated to this property.
GeomType getGeometryType() const
It returns the geometry subtype allowed for the property.
void pulse()
Calls setCurrentStep() function using getCurrentStep() + 1.
The type for string types: FIXED_STRING, VAR_STRING or STRING.
TESTATEXPORT void GetStringStatisticalSummary(std::vector< std::string > &values, te::stat::StringStatisticalSummary &ss)
Geometry is the root class of the geometries hierarchy, it follows OGC and ISO standards.
Definition: Geometry.h:73
An implementation of the DatasetItem class for the TerraLib In-Memory Data Access driver...
Definition: DataSetItem.h:56
te::gm::GeomType GeomOpResultType(te::gm::GeomType firstGeom, te::gm::GeomType secondGeom)
Definition: Utils.cpp:172
A structure to hold the set of statistics from a set of categorical (sample) values.
Mode.
Definition: Enums.h:54
Configuration flags for the Terrralib Vector Processing module.
void setString(std::size_t i, const std::string &value)
It sets the value of the i-th property.
std::auto_ptr< te::da::DataSetType > buildOutDataSetType()
TEDATAACCESSEXPORT te::gm::GeometryProperty * GetFirstGeomProperty(const DataSetType *dt)
Definition: Utils.cpp:557
std::map< std::string, std::string > calculateStringStats(const std::vector< te::mem::DataSetItem * > &items)
const std::string & getName() const
It returns the property name.
Definition: Property.h:127