AggregationMemory.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file AggregationMemory.h
22 
23  \brief Aggregation Vector Processing functions.
24 */
25 
26 //Terralib
27 
28 #include "../BuildConfig.h"
29 #include "../common/progress/TaskProgress.h"
30 #include "../common/Logger.h"
31 #include "../common/Translator.h"
32 
33 #include "../dataaccess/dataset/DataSet.h"
34 #include "../dataaccess/dataset/DataSetAdapter.h"
35 #include "../dataaccess/utils/Utils.h"
36 #include "../datatype/Property.h"
37 #include "../datatype/SimpleProperty.h"
38 #include "../datatype/StringProperty.h"
39 
40 #include "../geometry/Geometry.h"
41 #include "../geometry/GeometryProperty.h"
42 #include "../geometry/Utils.h"
43 
44 #include "../memory/DataSet.h"
45 #include "../memory/DataSetItem.h"
46 
47 #include "../statistics/core/SummaryFunctions.h"
48 #include "../statistics/core/StringStatisticalSummary.h"
49 #include "../statistics/core/NumericStatisticalSummary.h"
50 #include "../statistics/core/Utils.h"
51 
52 #include "AggregationMemory.h"
53 #include "Config.h"
54 #include "Exception.h"
55 #include "Utils.h"
56 
57 // STL
58 #include <map>
59 #include <math.h>
60 #include <string>
61 #include <vector>
62 
63 // BOOST
64 #include <boost/lexical_cast.hpp>
65 #include <boost/algorithm/string.hpp>
66 
68 {}
69 
71 {}
72 
73 std::map<std::string, std::string> te::vp::AggregationMemory::calculateStringStats(const std::vector<te::mem::DataSetItem*>& items)
74 {
75  std::map<std::string, std::string> result;
76 
77  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = m_statSum.begin();
78  while(it != m_statSum.end())
79  {
80  if(it->first->getType() == te::dt::STRING_TYPE)
81  {
82  size_t propPos = m_converter->getResult()->getPropertyPosition(it->first->getName());
83  std::vector<std::string> values;
84  for(std::size_t i = 0; i < items.size(); ++i)
85  {
86  if (!items[i]->isNull(propPos))
87  values.push_back(items[i]->getString(propPos));
88  }
89 
92 
93  result.insert( std::map<std::string, std::string>::value_type(it->first->getName() + "_MIN_VALUE", ss.m_minVal ));
94  result.insert( std::map<std::string, std::string>::value_type(it->first->getName() + "_MAX_VALUE", ss.m_maxVal ));
95  result.insert( std::map<std::string, std::string>::value_type(it->first->getName() + "_COUNT", boost::lexical_cast<std::string>(items.size())));
96  result.insert( std::map<std::string, std::string>::value_type(it->first->getName()+ "_VALID_COUNT", boost::lexical_cast<std::string>(values.size())));
97  }
98  ++it;
99  }
100  return result;
101 }
102 
103 std::map<std::string,double> te::vp::AggregationMemory::calculateNumStats(const std::vector<te::mem::DataSetItem*>& items, std::string& modeProp, std::string& modeName)
104 {
105  std::map<std::string, double> result;
106  int idProp = 0;
107  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = m_statSum.begin();
108  while(it != m_statSum.end())
109  {
110  if (it->first->getType() != te::dt::STRING_TYPE)
111  {
112  size_t propPos = m_converter->getResult()->getPropertyPosition(it->first->getName());
113  int propType = it->first->getType();
114 
115  std::vector<double> values;
116  for(std::size_t i = 0; i < items.size(); ++i)
117  {
118  if (!items[i]->isNull(propPos))
119  {
120  double numval;
121  if (propType == te::dt::INT16_TYPE)
122  numval = items[i]->getInt16(propPos);
123  else if (propType == te::dt::INT32_TYPE)
124  numval = items[i]->getInt32(propPos);
125  else if (propType == te::dt::INT64_TYPE)
126  numval = (double)items[i]->getInt64(propPos);
127  else if (propType == te::dt::FLOAT_TYPE)
128  numval = items[i]->getFloat(propPos);
129  else if (propType == te::dt::DOUBLE_TYPE)
130  numval = items[i]->getDouble(propPos);
131  else
132  continue;
133 
134  values.push_back(numval);
135  }
136  }
137 
138  std::string propName = it->first->getName();
139 
142 
143  result.insert( std::map<std::string, double>::value_type(propName + "_MIN_VALUE", ss.m_minVal));
144  result.insert( std::map<std::string, double>::value_type(propName + "_MAX_VALUE", ss.m_maxVal));
145  result.insert( std::map<std::string, double>::value_type(propName + "_COUNT", items.size()));
146  result.insert( std::map<std::string, double>::value_type(propName + "_VALID_COUNT", values.size()));
147  result.insert( std::map<std::string, double>::value_type(propName + "_MEAN", ss.m_mean));
148  result.insert( std::map<std::string, double>::value_type(propName + "_SUM", ss.m_sum));
149  result.insert( std::map<std::string, double>::value_type(propName + "_STANDARD_DEVIATION", ss.m_stdDeviation));
150  result.insert( std::map<std::string, double>::value_type(propName + "_VARIANCE", ss.m_variance));
151  result.insert( std::map<std::string, double>::value_type(propName + "_SKEWNESS", ss.m_skewness));
152  result.insert( std::map<std::string, double>::value_type(propName + "_KURTOSIS", ss.m_kurtosis));
153  result.insert( std::map<std::string, double>::value_type(propName + "_AMPLITUDE", ss.m_amplitude));
154  result.insert( std::map<std::string, double>::value_type(propName + "_MEDIAN", ss.m_median));
155  result.insert( std::map<std::string, double>::value_type(propName + "_VAR_COEFF", ss.m_varCoeff));
156 
157  if (!ss.m_mode.empty())
158  {
159  modeProp = boost::lexical_cast<std::string>(ss.m_mode[0]);
160  for(std::size_t i=1; i<ss.m_mode.size(); ++i)
161  {
162  modeProp += ",";
163  modeProp += boost::lexical_cast<std::string>(ss.m_mode[i]);
164  }
165  modeName = propName + "_MODE";
166  }
167  else
168  {
169  modeName = "";
170  modeProp = "";
171  }
172  }
173  ++it;
174  ++idProp;
175  }
176  return result;
177 }
178 
179 std::auto_ptr<te::da::DataSetType> te::vp::AggregationMemory::buildOutDataSetType()
180 {
181  std::auto_ptr<te::da::DataSetType> dataSetType(new te::da::DataSetType(m_outDset));
182 
183  // the property values used to execute the aggregation
184  te::dt::StringProperty* stringProperty = new te::dt::StringProperty("AGG_PROP");
185  dataSetType->add(stringProperty);
186 
187  // the number of objects aggregated
188  te::dt::SimpleProperty* aggregationProperty = new te::dt::SimpleProperty("NUM_OBJ", te::dt::INT32_TYPE);
189  dataSetType->add(aggregationProperty);
190 
191  // properties generated from the statistics requested
192  std::string functionResult;
193  std::vector<te::stat::StatisticalSummary> vectorResult;
194  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = m_statSum.begin();
195  std::string propResult = "";
196  std::string funcResult = "";
197  while(it != m_statSum.end())
198  {
199  propResult = it->first->getName();
200  propResult += "_";
201 
202  vectorResult = it->second;
203 
204  for(std::size_t i = 0; i < vectorResult.size(); ++i)
205  {
206  funcResult = propResult;
207  funcResult += te::stat::GetStatSummaryShortName(vectorResult[i]);
208 
209  if(it->first->getType() == te::dt::STRING_TYPE || vectorResult[i] == te::stat::MODE)
210  {
211  te::dt::StringProperty* funcProp = new te::dt::StringProperty(funcResult);
212  dataSetType->add(funcProp);
213  }
214  else
215  {
217  dataSetType->add(funcProp);
218  }
219  }
220  ++it;
221  }
222 
223  // define the resulting spatial property
224  te::gm::GeometryProperty* p = static_cast<te::gm::GeometryProperty*>(m_converter->getResult()->findFirstPropertyOfType(te::dt::GEOMETRY_TYPE));
225 
226  // creates the output geometry property
227  te::gm::GeometryProperty* geometry = new te::gm::GeometryProperty("geom");
228  geometry->setGeometryType(this->getGeomResultType(p->getGeometryType()));
229  geometry->setSRID(p->getSRID());
230  dataSetType->add(geometry);
231 
232  return dataSetType;
233 }
234 
235 
236 bool te::vp::AggregationMemory::run() throw( te::common::Exception )
237 {
238  te::gm::GeometryProperty* geom = te::da::GetFirstGeomProperty(m_converter->getResult());
239  std::string geomName = geom->getName();
240  std::size_t geomIdx = boost::lexical_cast<std::size_t>(m_converter->getResult()->getPropertyPosition(geomName));
241 
242  // calculate the groups
243 
244  // get the positions of the grouping properties
245  std::vector<size_t> groupPropIdxs;
246  for(std::size_t i=0; i<m_groupProps.size(); ++i)
247  groupPropIdxs.push_back(te::da::GetPropertyPos(m_converter->getResult(), m_groupProps[i]->getName()));
248 
249  std::map<std::string, std::vector<te::mem::DataSetItem*> > groups;
250  std::map<std::string, std::vector<te::mem::DataSetItem*> >::iterator itg;
251 
252  std::auto_ptr<te::da::DataSet> inDsetSrc;
253 
254  if(m_oidSet == 0)
255  inDsetSrc = m_inDsrc->getDataSet(m_inDsetName);
256  else
257  inDsetSrc = m_inDsrc->getDataSet(m_inDsetName, m_oidSet);
258 
259  std::auto_ptr<te::da::DataSetAdapter> inDset(te::da::CreateAdapter(inDsetSrc.get(), m_converter.get()));
260 
261  size_t nprops = inDset->getNumProperties();
262 
263  inDset->moveBeforeFirst();
264  while(inDset->moveNext())
265  {
266  // the group key is a combination of the distinct grouping property values as a string
267  std::string key = inDset->getAsString(groupPropIdxs[0]);
268  for(std::size_t i=1; i<groupPropIdxs.size(); ++i)
269  key += "_" + inDset->getAsString(groupPropIdxs[i]);
270 
271  // copy it to a dataset item in memory (TODO: this should be reviewed to avoid the copy)
272  te::mem::DataSetItem* dataSetItem = new te::mem::DataSetItem(inDset.get());
273  for(std::size_t j=0; j<nprops; ++j)
274  {
275  if (!inDset->isNull(j))
276  {
277  std::auto_ptr<te::dt::AbstractData> val = inDset->getValue(j);
278  dataSetItem->setValue(j,val.release());
279  }
280  }
281 
282  itg = groups.find(key);
283  if (itg==groups.end())
284  {
285  std::vector<te::mem::DataSetItem*> dataSetItemVector;
286  dataSetItemVector.push_back(dataSetItem);
287  groups.insert(std::pair<std::string, std::vector<te::mem::DataSetItem*> >(key,dataSetItemVector));
288  }
289  else
290  itg->second.push_back(dataSetItem);
291  }
292 
293  // define the schema of the output dataset based on the aggregation parameters
294  // for the non-spatial attributes
295  std::auto_ptr<te::da::DataSetType> outDsType = this->buildOutDataSetType();
296 
297  // create the output dataset in memory
298  std::auto_ptr<te::mem::DataSet> outDataset(new te::mem::DataSet(outDsType.get()));
299 
300  // now calculate the aggregation of non spatial and spatial attributes and save it to the output dataset
301  te::common::TaskProgress task("Processing aggregation...");
302  task.setTotalSteps((int)groups.size());
303  task.useTimer(true);
304 
305  itg = groups.begin();
306  while(itg != groups.end())
307  {
308  // calculate the spatial aggregation
309  std::string value = itg->first;
310  te::gm::GeomType outGeoType = getGeomResultType(geom->getGeometryType());
311 
312  //verify geometries
313  for (size_t i = 0; i < itg->second.size(); ++i)
314  {
315  if (!itg->second[i]->getGeometry(geomIdx)->isValid())
316  {
317 #ifdef TERRALIB_LOGGER_ENABLED
318  te::common::Logger::logDebug("vp", "Aggregation - The input layer has invalid geometry.");
319 #endif // TERRALIB_LOGGER_ENABLED
320  }
321  }
322 
323  te::gm::Geometry* geometry = te::vp::GetGeometryUnion(itg->second, geomIdx, outGeoType);
324 
325  // if it returned a valid geometry, include the summarization over non-spatial attributes
326  if(geometry)
327  {
328  // calculate the statistical of text attributes
329  std::map<std::string, std::string> resultString = calculateStringStats(itg->second);
330 
331  // calculate the statistical of numerical attributes
332  std::string smodeprop, smodeval; // special treatment for mode values
333  std::map<std::string, double> resultNumeric = calculateNumStats(itg->second,smodeval,smodeprop);
334 
335  te::mem::DataSetItem* outDSetItem = new te::mem::DataSetItem(outDataset.get());
336 
337  outDSetItem->setString(0, value); // save the group identification (mandatory)
338  outDSetItem->setInt32(1, (int)itg->second.size()); // save the number of objects in the group (mandatory)
339 
340  // save statistics of text attributes
341  std::map<std::string, std::string>::iterator itString = resultString.begin();
342  while(itString != resultString.end())
343  {
344  // esse teste é necessário????
345  if (te::da::GetPropertyPos(outDataset.get(), itString->first) < outDataset->getNumProperties())
346  outDSetItem->setString(itString->first, itString->second);
347  ++itString;
348  }
349 
350  // save statistics of numerical attributes
351  if (!smodeval.empty())
352  outDSetItem->setString(smodeprop, smodeval);
353 
354  std::map<std::string, double>::iterator itNumeric = resultNumeric.begin();
355  while(itNumeric != resultNumeric.end())
356  {
357  if (te::da::GetPropertyPos(outDataset.get(), itNumeric->first) < outDataset->getNumProperties())
358  outDSetItem->setDouble(itNumeric->first, itNumeric->second);
359  ++itNumeric;
360  }
361  outDSetItem->setGeometry("geom", geometry);
362  outDataset->add(outDSetItem);
363  }
364  else
365  {
366 #ifdef TERRALIB_LOGGER_ENABLED
367  te::common::Logger::logDebug("vp", "Aggregation - The operation generated invalid geometry.");
368 #endif // TERRALIB_LOGGER_ENABLED
369  }
370  ++itg;
371 
372  if (task.isActive() == false)
373  throw te::vp::Exception(TE_TR("Operation canceled!"));
374 
375  task.pulse();
376  }
377 
378  te::vp::Save(m_outDsrc.get(), outDataset.get(), outDsType.get());
379  return true;
380 }
TESTATEXPORT void GetNumericStatisticalSummary(std::vector< double > &values, te::stat::NumericStatisticalSummary &ss, double nullVal)
Geometric property.
A structure to hold the set of statistics from a set of numerical values.
An exception class for the Vector processing module.
GeomType
Each enumerated type is compatible with a Well-known Binary (WKB) type code.
Definition: Enums.h:41
void setGeometry(std::size_t i, te::gm::Geometry *value)
It sets the value of the i-th property.
Utility functions for the data access module.
void setSRID(int srid)
It sets the spatial reference system identifier associated to this property.
void setGeometryType(GeomType t)
It sets the geometry subtype.
void setDouble(std::size_t i, double value)
It sets the value of the i-th property.
An atomic property like an integer or double.
A class that models the description of a dataset.
Definition: DataSetType.h:72
void useTimer(bool flag)
Used to define if task use progress timer information.
std::auto_ptr< te::dt::AbstractData > getValue(std::size_t i) const
It returns the value of the i-th property.
TESTATEXPORT std::string GetStatSummaryShortName(const int &e)
Get the statistical parameter short name from its enumerator.
Definition: Utils.cpp:37
te::gm::Geometry * GetGeometryUnion(const std::vector< te::mem::DataSetItem * > &items, size_t geomIdx, te::gm::GeomType outGeoType)
It returns the union of a geometry vector.
Definition: Utils.cpp:52
void Save(te::da::DataSource *source, te::da::DataSet *result, te::da::DataSetType *outDsType)
Definition: Utils.cpp:172
This class can be used to inform the progress of a task.
Definition: TaskProgress.h:53
TEDATAACCESSEXPORT std::size_t GetPropertyPos(const DataSet *dataset, const std::string &name)
Definition: Utils.cpp:500
void setValue(std::size_t i, te::dt::AbstractData *value)
It sets the value of the i-th property.
#define TE_TR(message)
It marks a string in order to get translated.
Definition: Translator.h:346
bool isActive() const
Verify if the task is active.
Aggregation Vector Processing functions.
std::map< std::string, double > calculateNumStats(const std::vector< te::mem::DataSetItem * > &items, std::string &modeProp, std::string &modeName)
void setTotalSteps(int value)
Set the task total stepes.
void setInt32(std::size_t i, boost::int32_t value)
It sets the value of the i-th property.
Implementation of a random-access dataset class for the TerraLib In-Memory Data Access driver...
Definition: DataSet.h:65
int getSRID() const
It returns the spatial reference system identifier associated to this property.
URI C++ Library.
GeomType getGeometryType() const
It returns the geometry subtype allowed for the property.
void pulse()
Calls setCurrentStep() function using getCurrentStep() + 1.
The type for string types: FIXED_STRING, VAR_STRING or STRING.
TESTATEXPORT void GetStringStatisticalSummary(std::vector< std::string > &values, te::stat::StringStatisticalSummary &ss)
Geometry is the root class of the geometries hierarchy, it follows OGC and ISO standards.
Definition: Geometry.h:73
An implementation of the DatasetItem class for the TerraLib In-Memory Data Access driver...
Definition: DataSetItem.h:56
A structure to hold the set of statistics from a set of categorical (sample) values.
Mode.
Definition: Enums.h:54
Configuration flags for the Terrralib Vector Processing module.
void setString(std::size_t i, const std::string &value)
It sets the value of the i-th property.
std::auto_ptr< te::da::DataSetType > buildOutDataSetType()
TEDATAACCESSEXPORT te::gm::GeometryProperty * GetFirstGeomProperty(const DataSetType *dt)
Definition: Utils.cpp:557
std::map< std::string, std::string > calculateStringStats(const std::vector< te::mem::DataSetItem * > &items)
TEDATAACCESSEXPORT DataSetAdapter * CreateAdapter(DataSet *ds, DataSetTypeConverter *converter, bool isOwner=false)
Definition: Utils.cpp:644
const std::string & getName() const
It returns the property name.
Definition: Property.h:127