All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
AggregationMemory.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008-2013 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file AggregationMemory.h
22 
23  \brief Aggregation Vector Processing functions.
24 */
25 
26 //Terralib
27 
28 #include "../common/progress/TaskProgress.h"
29 #include "../common/Logger.h"
30 #include "../common/Translator.h"
31 
32 #include "../dataaccess/dataset/DataSet.h"
33 #include "../dataaccess/utils/Utils.h"
34 #include "../datatype/Property.h"
35 #include "../datatype/SimpleProperty.h"
36 #include "../datatype/StringProperty.h"
37 
38 #include "../geometry/Geometry.h"
39 #include "../geometry/GeometryProperty.h"
40 #include "../geometry/Utils.h"
41 
42 #include "../memory/DataSet.h"
43 #include "../memory/DataSetItem.h"
44 
45 #include "../statistics/core/SummaryFunctions.h"
46 #include "../statistics/core/StringStatisticalSummary.h"
47 #include "../statistics/core/NumericStatisticalSummary.h"
48 #include "../statistics/core/Utils.h"
49 
50 #include "AggregationMemory.h"
51 #include "Config.h"
52 #include "Exception.h"
53 #include "Utils.h"
54 
55 // STL
56 #include <map>
57 #include <math.h>
58 #include <string>
59 #include <vector>
60 
61 // BOOST
62 #include <boost/lexical_cast.hpp>
63 #include <boost/algorithm/string.hpp>
64 
66 {}
67 
69 {}
70 
71 std::map<std::string, std::string> te::vp::AggregationMemory::calculateStringStats(const std::vector<te::mem::DataSetItem*>& items)
72 {
73  std::map<std::string, std::string> result;
74 
75  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = m_statSum.begin();
76  while(it != m_statSum.end())
77  {
78  if(it->first->getType() == te::dt::STRING_TYPE)
79  {
80  size_t propPos = m_inDsetType->getPropertyPosition(it->first->getName());
81  std::vector<std::string> values;
82  for(std::size_t i = 0; i < items.size(); ++i)
83  {
84  if (!items[i]->isNull(propPos))
85  values.push_back(items[i]->getString(propPos));
86  }
87 
90 
91  result.insert( std::map<std::string, std::string>::value_type(it->first->getName() + "_MIN_VALUE", ss.m_minVal ));
92  result.insert( std::map<std::string, std::string>::value_type(it->first->getName() + "_MAX_VALUE", ss.m_maxVal ));
93  result.insert( std::map<std::string, std::string>::value_type(it->first->getName() + "_COUNT", boost::lexical_cast<std::string>(items.size())));
94  result.insert( std::map<std::string, std::string>::value_type(it->first->getName()+ "_VALID_COUNT", boost::lexical_cast<std::string>(values.size())));
95  }
96  ++it;
97  }
98  return result;
99 }
100 
101 std::map<std::string,double> te::vp::AggregationMemory::calculateNumStats(const std::vector<te::mem::DataSetItem*>& items, std::string& modeProp, std::string& modeName)
102 {
103  std::map<std::string, double> result;
104  int idProp = 0;
105  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = m_statSum.begin();
106  while(it != m_statSum.end())
107  {
108  if (it->first->getType() != te::dt::STRING_TYPE)
109  {
110  size_t propPos = m_inDsetType->getPropertyPosition(it->first->getName());
111  int propType = it->first->getType();
112 
113  std::vector<double> values;
114  for(std::size_t i = 0; i < items.size(); ++i)
115  {
116  if (!items[i]->isNull(propPos))
117  {
118  double numval;
119  if (propType == te::dt::INT16_TYPE)
120  numval = items[i]->getInt16(propPos);
121  else if (propType == te::dt::INT32_TYPE)
122  numval = items[i]->getInt32(propPos);
123  else if (propType == te::dt::INT64_TYPE)
124  numval = (double)items[i]->getInt64(propPos);
125  else if (propType == te::dt::FLOAT_TYPE)
126  numval = items[i]->getFloat(propPos);
127  else if (propType == te::dt::DOUBLE_TYPE)
128  numval = items[i]->getDouble(propPos);
129  else
130  continue;
131 
132  values.push_back(numval);
133  }
134  }
135 
136  std::string propName = it->first->getName();
137 
140 
141  result.insert( std::map<std::string, double>::value_type(propName + "_MIN_VALUE", ss.m_minVal));
142  result.insert( std::map<std::string, double>::value_type(propName + "_MAX_VALUE", ss.m_maxVal));
143  result.insert( std::map<std::string, double>::value_type(propName + "_COUNT", items.size()));
144  result.insert( std::map<std::string, double>::value_type(propName + "_VALID_COUNT", values.size()));
145  result.insert( std::map<std::string, double>::value_type(propName + "_MEAN", ss.m_mean));
146  result.insert( std::map<std::string, double>::value_type(propName + "_SUM", ss.m_sum));
147  result.insert( std::map<std::string, double>::value_type(propName + "_STANDARD_DEVIATION", ss.m_stdDeviation));
148  result.insert( std::map<std::string, double>::value_type(propName + "_VARIANCE", ss.m_variance));
149  result.insert( std::map<std::string, double>::value_type(propName + "_SKEWNESS", ss.m_skewness));
150  result.insert( std::map<std::string, double>::value_type(propName + "_KURTOSIS", ss.m_kurtosis));
151  result.insert( std::map<std::string, double>::value_type(propName + "_AMPLITUDE", ss.m_amplitude));
152  result.insert( std::map<std::string, double>::value_type(propName + "_MEDIAN", ss.m_median));
153  result.insert( std::map<std::string, double>::value_type(propName + "_VAR_COEFF", ss.m_varCoeff));
154 
155  if (!ss.m_mode.empty())
156  {
157  modeProp = boost::lexical_cast<std::string>(ss.m_mode[0]);
158  for(std::size_t i=1; i<ss.m_mode.size(); ++i)
159  {
160  modeProp += ",";
161  modeProp += boost::lexical_cast<std::string>(ss.m_mode[i]);
162  }
163  modeName = propName + "_MODE";
164  }
165  else
166  {
167  modeName = "";
168  modeProp = "";
169  }
170  }
171  ++it;
172  ++idProp;
173  }
174  return result;
175 }
176 
177 std::auto_ptr<te::da::DataSetType> te::vp::AggregationMemory::buildOutDataSetType()
178 {
179  std::auto_ptr<te::da::DataSetType> dataSetType(new te::da::DataSetType(m_outDset));
180 
181  // the property values used to execute the aggregation
182  te::dt::StringProperty* stringProperty = new te::dt::StringProperty("AGG_PROP");
183  dataSetType->add(stringProperty);
184 
185  // the number of objects aggregated
186  te::dt::SimpleProperty* aggregationProperty = new te::dt::SimpleProperty("NUM_OBJ", te::dt::INT32_TYPE);
187  dataSetType->add(aggregationProperty);
188 
189  // properties generated from the statistics requested
190  std::string functionResult;
191  std::vector<te::stat::StatisticalSummary> vectorResult;
192  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = m_statSum.begin();
193  std::string propResult, funcResult;
194  while(it != m_statSum.end())
195  {
196  propResult = "";
197  propResult = it->first->getName();
198  propResult += "_";
199 
200  vectorResult = it->second;
201 
202  for(std::size_t i = 0; i < vectorResult.size(); ++i)
203  {
204  funcResult = propResult;
205  funcResult += te::stat::GetStatSummaryShortName(vectorResult[i]);
206 
207  if(it->first->getType() == te::dt::STRING_TYPE || vectorResult[i] == te::stat::MODE)
208  {
209  te::dt::StringProperty* funcProp = new te::dt::StringProperty(funcResult);
210  dataSetType->add(funcProp);
211  }
212  else
213  {
215  dataSetType->add(funcProp);
216  }
217  }
218  ++it;
219  }
220 
221  // define the resulting spatial property
222  te::gm::GeometryProperty* p = static_cast<te::gm::GeometryProperty*>(m_inDsetType->findFirstPropertyOfType(te::dt::GEOMETRY_TYPE));
223 
224  // creates the output geometry property
225  te::gm::GeometryProperty* geometry = new te::gm::GeometryProperty("geom");
226  geometry->setGeometryType(this->getGeomResultType(p->getGeometryType()));
227  geometry->setSRID(p->getSRID());
228  dataSetType->add(geometry);
229 
230  return dataSetType;
231 }
232 
233 
235 {
236  te::gm::GeometryProperty* geom = te::da::GetFirstGeomProperty(m_inDsetType.get());
237  std::string geomName = geom->getName();
238  std::size_t geomIdx = boost::lexical_cast<std::size_t>(m_inDsetType->getPropertyPosition(geomName));
239 
240  // calculate the groups
241 
242  // get the positions of the grouping properties
243  std::vector<size_t> groupPropIdxs;
244  for(std::size_t i=0; i<m_groupProps.size(); ++i)
245  groupPropIdxs.push_back(te::da::GetPropertyPos(m_inDsetType.get(), m_groupProps[i]->getName()));
246 
247  std::map<std::string, std::vector<te::mem::DataSetItem*> > groups;
248  std::map<std::string, std::vector<te::mem::DataSetItem*> >::iterator itg;
249 
250  std::auto_ptr<te::da::DataSet> inDset;
251 
252  if(m_oidSet == 0)
253  inDset = m_inDsrc->getDataSet(m_inDsetName);
254  else
255  inDset = m_inDsrc->getDataSet(m_inDsetName, m_oidSet);
256 
257  size_t nprops = inDset->getNumProperties();
258 
259  inDset->moveBeforeFirst();
260  while(inDset->moveNext())
261  {
262  // the group key is a combination of the distinct grouping property values as a string
263  std::string key = inDset->getAsString(groupPropIdxs[0]);
264  for(std::size_t i=1; i<groupPropIdxs.size(); ++i)
265  key += "_" + inDset->getAsString(groupPropIdxs[i]);
266 
267  // copy it to a dataset item in memory (TODO: this should be reviewed to avoid the copy)
268  te::mem::DataSetItem* dataSetItem = new te::mem::DataSetItem(inDset.get());
269  for(std::size_t j=0; j<nprops; ++j)
270  {
271  if (!inDset->isNull(j))
272  {
273  std::auto_ptr<te::dt::AbstractData> val = inDset->getValue(j);
274  dataSetItem->setValue(j,val.release());
275  }
276  }
277 
278  itg = groups.find(key);
279  if (itg==groups.end())
280  {
281  std::vector<te::mem::DataSetItem*> dataSetItemVector;
282  dataSetItemVector.push_back(dataSetItem);
283  groups.insert(std::pair<std::string, std::vector<te::mem::DataSetItem*> >(key,dataSetItemVector));
284  }
285  else
286  itg->second.push_back(dataSetItem);
287  }
288 
289 
290  // tratamento de erro se nao gerou grupos
291 
292  // define the schema of the output dataset based on the aggregation parameters
293  // for the non-spatial attributes
294  std::auto_ptr<te::da::DataSetType> outDsType = this->buildOutDataSetType();
295 
296  // create the output dataset in memory
297  std::auto_ptr<te::mem::DataSet> outDataset(new te::mem::DataSet(outDsType.get()));
298 
299  // now calculate the aggregation of non spatial and spatial attributes and save it to the output dataset
300  te::common::TaskProgress task("Processing aggregation...");
301  task.setTotalSteps(groups.size());
302  task.useTimer(true);
303 
304  itg = groups.begin();
305  while(itg != groups.end())
306  {
307  // calculate the spatial aggregation
308  std::string value = itg->first;
310  te::gm::Geometry* geometry = te::vp::GetGeometryUnion(itg->second, geomIdx, outGeoType);
311 
312  // if it returned a valid geometry, include the summarization over non-spatial attributes
313  if(geometry)
314  {
315  // calculate the statistical of text attributes
316  std::map<std::string, std::string> resultString = calculateStringStats(itg->second);
317 
318  // calculate the statistical of numerical attributes
319  std::string smodeprop, smodeval; // special treatment for mode values
320  std::map<std::string, double> resultNumeric = calculateNumStats(itg->second,smodeval,smodeprop);
321 
322  te::mem::DataSetItem* outDSetItem = new te::mem::DataSetItem(outDataset.get());
323 
324  outDSetItem->setString(0, value); // save the group identification (mandatory)
325  outDSetItem->setInt32(1, itg->second.size()); // save the number of objects in the group (mandatory)
326 
327  // save statistics of text attributes
328  std::map<std::string, std::string>::iterator itString = resultString.begin();
329  while(itString != resultString.end())
330  {
331  // esse teste é necessário????
332  if (te::da::GetPropertyPos(outDataset.get(), itString->first) < outDataset->getNumProperties())
333  outDSetItem->setString(itString->first, itString->second);
334  ++itString;
335  }
336 
337  // save statistics of numerical attributes
338  if (!smodeval.empty())
339  outDSetItem->setString(smodeprop, smodeval);
340 
341  std::map<std::string, double>::iterator itNumeric = resultNumeric.begin();
342  while(itNumeric != resultNumeric.end())
343  {
344  if (te::da::GetPropertyPos(outDataset.get(), itNumeric->first) < outDataset->getNumProperties())
345  outDSetItem->setDouble(itNumeric->first, itNumeric->second);
346  ++itNumeric;
347  }
348  outDSetItem->setGeometry("geom", geometry);
349  outDataset->add(outDSetItem);
350  }
351  ++itg;
352 
353  if (task.isActive() == false)
354  throw te::vp::Exception(TE_TR("Operation canceled!"));
355 
356  task.pulse();
357  }
358 
359  // save the result
360  return save(outDataset,outDsType);
361 }
TESTATEXPORT void GetNumericStatisticalSummary(std::vector< double > &values, te::stat::NumericStatisticalSummary &ss, double nullVal)
Geometric property.
A structure to hold the set of statistics from a set of numerical values.
An exception class for the Vector processing module.
GeomType
Each enumerated type is compatible with a Well-known Binary (WKB) type code.
Definition: Enums.h:41
void setGeometry(std::size_t i, te::gm::Geometry *value)
It sets the value of the i-th property.
Utility functions for the data access module.
void setSRID(int srid)
It sets the spatial reference system identifier associated to this property.
void setGeometryType(GeomType t)
It sets the geometry subtype.
void setDouble(std::size_t i, double value)
It sets the value of the i-th property.
An atomic property like an integer or double.
A class that models the description of a dataset.
Definition: DataSetType.h:72
void useTimer(bool flag)
Used to define if task use progress timer information.
std::auto_ptr< te::dt::AbstractData > getValue(std::size_t i) const
It returns the value of the i-th property.
TESTATEXPORT std::string GetStatSummaryShortName(const int &e)
Get the statistical parameter short name from its enumerator.
Definition: Utils.cpp:34
te::gm::Geometry * GetGeometryUnion(const std::vector< te::mem::DataSetItem * > &items, size_t geomIdx, te::gm::GeomType outGeoType)
It returns the union of a geometry vector.
Definition: Utils.cpp:47
This class can be used to inform the progress of a task.
Definition: TaskProgress.h:53
TEDATAACCESSEXPORT std::size_t GetPropertyPos(const DataSet *dataset, const std::string &name)
Definition: Utils.cpp:451
void setValue(std::size_t i, te::dt::AbstractData *value)
It sets the value of the i-th property.
#define TE_TR(message)
It marks a string in order to get translated.
Definition: Translator.h:345
bool isActive() const
Verify if the task is active.
Aggregation Vector Processing functions.
std::map< std::string, double > calculateNumStats(const std::vector< te::mem::DataSetItem * > &items, std::string &modeProp, std::string &modeName)
void setTotalSteps(int value)
Set the task total stepes.
void setInt32(std::size_t i, boost::int32_t value)
It sets the value of the i-th property.
Implementation of a random-access dataset class for the TerraLib In-Memory Data Access driver...
Definition: DataSet.h:65
int getSRID() const
It returns the spatial reference system identifier associated to this property.
GeomType getGeometryType() const
It returns the geometry subtype allowed for the property.
void pulse()
Calls setCurrentStep() function using getCurrentStep() + 1.
The type for string types: FIXED_STRING, VAR_STRING or STRING.
TESTATEXPORT void GetStringStatisticalSummary(std::vector< std::string > &values, te::stat::StringStatisticalSummary &ss)
Geometry is the root class of the geometries hierarchy, it follows OGC and ISO standards.
Definition: Geometry.h:73
An implementation of the DatasetItem class for the TerraLib In-Memory Data Access driver...
Definition: DataSetItem.h:56
te::gm::GeomType GeomOpResultType(te::gm::GeomType firstGeom, te::gm::GeomType secondGeom)
Definition: Utils.cpp:167
A structure to hold the set of statistics from a set of categorical (sample) values.
Mode.
Definition: Enums.h:54
Configuration flags for the Terrralib Vector Processing module.
void setString(std::size_t i, const std::string &value)
It sets the value of the i-th property.
std::auto_ptr< te::da::DataSetType > buildOutDataSetType()
TEDATAACCESSEXPORT te::gm::GeometryProperty * GetFirstGeomProperty(const DataSetType *dt)
Definition: Utils.cpp:508
std::map< std::string, std::string > calculateStringStats(const std::vector< te::mem::DataSetItem * > &items)
const std::string & getName() const
It returns the property name.
Definition: Property.h:126