All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Aggregation.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008-2013 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file Aggregation.h
22 
23  \brief Aggregation Vector Processing functions.
24 */
25 
26 //Terralib
27 #include "../common/Translator.h"
28 #include "../common/progress/TaskProgress.h"
29 #include "../dataaccess/dataset/DataSet.h"
30 #include "../dataaccess/dataset/DataSetAdapter.h"
31 #include "../dataaccess/dataset/DataSetType.h"
32 #include "../dataaccess/datasource/DataSourceCapabilities.h"
33 #include "../dataaccess/datasource/DataSourceInfo.h"
34 #include "../dataaccess/datasource/DataSourceManager.h"
35 #include "../dataaccess/datasource/DataSourceFactory.h"
36 #include "../dataaccess/dataset/DataSetTypeConverter.h"
37 #include "../dataaccess/query_h.h"
38 #include "../dataaccess/utils/Utils.h"
39 #include "../datatype/Property.h"
40 #include "../datatype/SimpleProperty.h"
41 #include "../datatype/StringProperty.h"
42 #include "../geometry/Geometry.h"
43 #include "../geometry/GeometryCollection.h"
44 #include "../geometry/GeometryProperty.h"
45 #include "../geometry/Utils.h"
46 #include "../memory/DataSet.h"
47 #include "../memory/DataSetItem.h"
48 #include "../statistics/core/SummaryFunctions.h"
49 #include "../statistics/core/StringStatisticalSummary.h"
50 #include "../statistics/core/NumericStatisticalSummary.h"
51 #include "../statistics/core/Utils.h"
52 #include "Aggregation.h"
53 #include "Config.h"
54 #include "Exception.h"
55 #include "Utils.h"
56 
57 // STL
58 #include <map>
59 #include <math.h>
60 #include <string>
61 #include <vector>
62 
63 // BOOST
64 #include <boost/lexical_cast.hpp>
65 #include <boost/algorithm/string.hpp>
66 
67 
68 // -- auxiliary functions
69 bool AggregationQuery(const std::string& inDataset,
70  te::da::DataSource* inDatasource,
71  const std::vector<te::dt::Property*>& groupingProperties,
72  const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary,
73  te::mem::DataSet* outputDataSet,
74  te::gm::GeomType outGeoType);
75 
76 bool AggregationMemory(const std::string& inDataset,
77  te::da::DataSource* inDatasource,
78  const std::vector<te::dt::Property*>& groupingProperties,
79  const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary,
80  te::mem::DataSet* outputDataSet,
81  te::gm::GeomType outGeoType);
82 
83 te::da::DataSetType* BuildOutputDataSetType(const std::string& name,
84  const std::vector<te::dt::Property*>& properties,
85  const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary);
86 
87 void SetOutputDatasetQuery( const std::vector<te::dt::Property*>& groupingProperties,
88  te::da::DataSet* dsQuery,
89  te::mem::DataSet* outputDataSet, te::gm::GeomType outGeoType);
90 
91 std::map<std::string, std::vector<te::mem::DataSetItem*> > GetGroups(te::da::DataSet* inputDataSet,
92  const std::vector<te::dt::Property*>& groupingProperties);
93 
94 std::map<std::string, std::string> CalculateStringGroupingFunctions(const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary,
95  const std::vector<te::mem::DataSetItem*>& items);
96 
97 std::map<std::string, double> CalculateDoubleGroupingFunctions(const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary,
98  const std::vector<te::mem::DataSetItem*>& items);
99 
100 // ---
101 
102 bool te::vp::Aggregation(const std::string& inDataset,
103  te::da::DataSource* inDatasource,
104  const std::vector<te::dt::Property*>& groupingProperties,
105  const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary,
106  const std::string& outDataset,
107  te::da::DataSource* outDatasource)
108 {
109  assert(inDatasource);
110  assert(outDatasource);
111 
112  // define the schema of the output dataset based on the aggregation parameters for the non-spatial attributes
113  std::auto_ptr<te::da::DataSetType> outputDataSetType(BuildOutputDataSetType(outDataset, groupingProperties, statisticalSummary));
114 
115  // define the resulting spatial property
116  std::auto_ptr<te::da::DataSetType> inSchema = inDatasource->getDataSetType(inDataset);
117  te::gm::GeometryProperty* p = static_cast<te::gm::GeometryProperty*>(inSchema->findFirstPropertyOfType(te::dt::GEOMETRY_TYPE));
118 
119  // creates the output geometry property
120  te::gm::GeometryProperty* geometry = new te::gm::GeometryProperty("geom");
122  geometry->setGeometryType(outGeoType);
123  geometry->setSRID(p->getSRID());
124  outputDataSetType->add(geometry);
125 
126  // create the output dataset in memory
127  std::auto_ptr<te::mem::DataSet> memDataset(new te::mem::DataSet(outputDataSetType.get()));
128 
129  // select a strategy based on the capabilities of the input datasource
130  const te::da::DataSourceCapabilities dsCapabilities = inDatasource->getCapabilities();
131 
132  // execute the strategy
133  bool res;
134  if(dsCapabilities.supportsPreparedQueryAPI() && dsCapabilities.getQueryCapabilities().supportsSpatialSQLDialect())
135  {
136  res = AggregationQuery(inDataset, inDatasource, groupingProperties, statisticalSummary, memDataset.get(),outGeoType);
137  }
138  else
139  {
140  res = AggregationMemory(inDataset, inDatasource, groupingProperties, statisticalSummary, memDataset.get(),outGeoType);
141  }
142 
143  if (!res)
144  return false;
145 
146  // do any adaptation necessary to persist the output dataset
147  te::da::DataSetTypeConverter* converter = new te::da::DataSetTypeConverter(outputDataSetType.get(), outDatasource->getCapabilities());
148  te::da::DataSetType* dsTypeResult = converter->getResult();
149  std::auto_ptr<te::da::DataSetAdapter> dsAdapter(te::da::CreateAdapter(memDataset.get(), converter));
150 
151  std::map<std::string, std::string> options;
152  // create the dataset
153  outDatasource->createDataSet(dsTypeResult, options);
154 
155  // copy from memory to output datasource
156  memDataset->moveBeforeFirst();
157  outDatasource->add(dsTypeResult->getName(),memDataset.get(), options);
158 
159  // create the primary key if it is possible
161  {
162  std::string pk_name = dsTypeResult->getName() + "_pkey";
163  te::da::PrimaryKey* pk = new te::da::PrimaryKey(pk_name, dsTypeResult);
164  pk->add(dsTypeResult->getProperty(0));
165  outDatasource->addPrimaryKey(outDataset,pk);
166  }
167  return true;
168 }
169 
170 
171 bool AggregationQuery(const std::string& inDataset,
172  te::da::DataSource* inDatasource,
173  const std::vector<te::dt::Property*>& groupingProperties,
174  const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary,
175  te::mem::DataSet* outputDataSet,
176  te::gm::GeomType outGeoType)
177 {
178  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator itStatSummary = statisticalSummary.begin();
179 
180  std::auto_ptr<te::da::DataSetType> dsType = inDatasource->getDataSetType(inDataset);
181 
182  te::da::Fields* fields = new te::da::Fields;
183 
184  for(std::size_t i = 0; i < groupingProperties.size(); ++i)
185  {
186  te::da::Field* f_aggName = new te::da::Field(groupingProperties[i]->getName());
187  fields->push_back(f_aggName);
188  }
189 
190  te::da::Expression* e_aggCount = new te::da::Count(new te::da::PropertyName(groupingProperties[0]->getName()));
191  te::da::Field* f_aggCount = new te::da::Field(*e_aggCount, "Aggregation_Count");
192  fields->push_back(f_aggCount);
193 
194  while(itStatSummary != statisticalSummary.end())
195  {
196  int propType = itStatSummary->first->getType();
197 
198  if(propType == te::dt::STRING_TYPE)
199  {
200  te::da::PropertyName* p_name = new te::da::PropertyName(itStatSummary->first->getName());
201  te::da::PropertyName* p_count = new te::da::PropertyName("*");
202 
203  te::da::Expression* e_min = new te::da::Min(p_name);
204  te::da::Field* f_min = new te::da::Field(*e_min, p_name->getName() + "_MIN_VALUE");
205 
206  te::da::Expression* e_max = new te::da::Max(p_name);
207  te::da::Field* f_max = new te::da::Field(*e_max, p_name->getName() + "_MAX_VALUE");
208 
209  te::da::Expression* e_count = new te::da::Count(p_count);
210  te::da::Field* f_count = new te::da::Field(*e_count, p_name->getName() + "_COUNT");
211 
212  te::da::Expression* e_validcount = new te::da::Count(p_name);
213  te::da::Field* f_validcount = new te::da::Field(*e_validcount, p_name->getName() + "_VALID_COUNT");
214 
215  fields->push_back(f_min);
216  fields->push_back(f_max);
217  fields->push_back(f_count);
218  fields->push_back(f_validcount);
219  }
220  else
221  {
222  te::da::PropertyName* p_name = new te::da::PropertyName(itStatSummary->first->getName());
223  te::da::PropertyName* p_count = new te::da::PropertyName("*");
224 
225  te::da::Expression* e_min = new te::da::Min(p_name);
226  te::da::Field* f_min = new te::da::Field(*e_min, p_name->getName() + "_MIN_VALUE");
227 
228  te::da::Expression* e_max = new te::da::Max(p_name);
229  te::da::Field* f_max = new te::da::Field(*e_max, p_name->getName() + "_MAX_VALUE");
230 
231  te::da::Expression* e_count = new te::da::Count(p_count);
232  te::da::Field* f_count = new te::da::Field(*e_count, p_name->getName() + "_COUNT");
233 
234  te::da::Expression* e_validcount = new te::da::Count(p_name);
235  te::da::Field* f_validcount = new te::da::Field(*e_validcount, p_name->getName() + "_VALID_COUNT");
236 
237  te::da::Expression* e_sum = new te::da::Sum(p_name);
238  te::da::Field* f_sum = new te::da::Field(*e_sum, p_name->getName() + "_SUM");
239 
240  te::da::Expression* e_mean = new te::da::Avg(p_name);
241  te::da::Field* f_mean = new te::da::Field(*e_mean, p_name->getName() + "_MEAN");
242 
243  te::da::Expression* e_stddev = new te::da::StdDev(p_name);
244  te::da::Field* f_stddev = new te::da::Field(*e_stddev, p_name->getName() + "_STANDARD_DEVIATION");
245 
246  te::da::Expression* e_variance = new te::da::Variance(p_name);
247  te::da::Field* f_variance = new te::da::Field(*e_variance, p_name->getName() + "_VARIANCE");
248 
249  te::da::Expression* e_amplitude = new te::da::Sub(*e_max, *e_min);
250  te::da::Field* f_amplitude = new te::da::Field(*e_amplitude, p_name->getName() + "_AMPLITUDE");
251 
252  fields->push_back(f_min);
253  fields->push_back(f_max);
254  fields->push_back(f_count);
255  fields->push_back(f_validcount);
256  fields->push_back(f_sum);
257  fields->push_back(f_mean);
258  fields->push_back(f_stddev);
259  fields->push_back(f_variance);
260  fields->push_back(f_amplitude);
261 
262  }
263  ++itStatSummary;
264  }
265 
266  if(dsType->hasGeom())
267  {
269 
271  te::da::Field* f_union = new te::da::Field(*e_union, "geom");
272  fields->push_back(f_union);
273  }
274 
275  te::da::FromItem* fromItem = new te::da::DataSetName(dsType->getName());
276  te::da::From* from = new te::da::From;
277  from->push_back(fromItem);
278 
279  te::da::Select select(fields, from);
280 
281  if(!groupingProperties.empty())
282  {
283  te::da::GroupBy* groupBy = new te::da::GroupBy();
284 
285  for(std::size_t i = 0; i < groupingProperties.size(); ++i)
286  {
287  te::da::GroupByItem* e_groupBy = new te::da::GroupByItem(groupingProperties[i]->getName());
288  groupBy->push_back(e_groupBy);
289  }
290  select.setGroupBy(groupBy);
291  }
292 
293  std::auto_ptr<te::da::DataSet> dsQuery = inDatasource->query(select);
294 
295  SetOutputDatasetQuery(groupingProperties, dsQuery.get(), outputDataSet,outGeoType);
296 
297  return true;
298 }
299 
300 bool AggregationMemory(const std::string& inDataset,
301  te::da::DataSource* inDatasource,
302  const std::vector<te::dt::Property*>& groupingProperties,
303  const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary,
304  te::mem::DataSet* outputDataSet,
305  te::gm::GeomType outGeoType)
306 {
307  std::auto_ptr<te::da::DataSet> inputDataSet = inDatasource->getDataSet(inDataset);
308  std::auto_ptr<te::da::DataSetType> dsType = inDatasource->getDataSetType(inDataset);
309 
310  std::size_t geomIdx;
311  std::string geomName = "";
312 
313  if(dsType->hasGeom())
314  {
316  geomName = geom->getName();
317  geomIdx = boost::lexical_cast<std::size_t>(dsType->getPropertyPosition(geomName));
318  }
319 
320  std::map<std::string, std::vector<te::mem::DataSetItem*> > groupValues = GetGroups(inputDataSet.get(), groupingProperties);
321 
322  std::map<std::string, std::vector<te::mem::DataSetItem*> >::const_iterator itGroupValues = groupValues.begin();
323 
324  te::common::TaskProgress task("Processing aggregation...");
325  task.setTotalSteps(groupValues.size());
326  task.useTimer(true);
327 
328  std::auto_ptr<te::mem::DataSetItem> dataSetItem(new te::mem::DataSetItem(inputDataSet.get()));
329  while(itGroupValues != groupValues.end())
330  {
331  std::string value = itGroupValues->first.c_str();
332  int aggregationCount = itGroupValues->second.size();
333 
334  std::map<std::string, std::string> functionResultStringMap = CalculateStringGroupingFunctions(statisticalSummary, itGroupValues->second);
335  std::map<std::string, double> functionResultDoubleMap = CalculateDoubleGroupingFunctions(statisticalSummary, itGroupValues->second);
336 
337 
338  te::gm::Geometry* geometry = te::vp::GetGeometryUnion(itGroupValues->second, geomIdx, outGeoType);
339 
340  if(geometry)
341  {
342  te::mem::DataSetItem* outputDataSetItem = new te::mem::DataSetItem(outputDataSet);
343 
344  outputDataSetItem->setString(0, value);
345  outputDataSetItem->setInt32(1, aggregationCount);
346 
347  if(!functionResultStringMap.empty())
348  {
349  std::map<std::string, std::string>::iterator itFuncResultString = functionResultStringMap.begin();
350 
351  while(itFuncResultString != functionResultStringMap.end())
352  {
353  if(te::da::GetPropertyPos(outputDataSet, itFuncResultString->first.c_str()) < outputDataSet->getNumProperties())
354  outputDataSetItem->setString(itFuncResultString->first.c_str(), itFuncResultString->second.c_str());
355 
356  ++itFuncResultString;
357  }
358  }
359 
360  if(!functionResultDoubleMap.empty())
361  {
362  std::map<std::string, double>::iterator itFuncResultDouble = functionResultDoubleMap.begin();
363 
364  std::string propMode;
365  std::string auxPropMode;
366  std::string modeValue;
367  std::string auxValue;
368  bool mode = false;
369 
370  while(itFuncResultDouble != functionResultDoubleMap.end())
371  {
372  propMode = itFuncResultDouble->first.c_str();
373 
374  unsigned pos = propMode.find("_");
375  propMode = propMode.substr(pos+1);
376  pos = propMode.find("_");
377  propMode = propMode.substr(pos+1);
378 
379  std::string shortMode = "";
380 
381  if(propMode.length() > 3)
382  shortMode = propMode.substr(propMode.length()-4, 4);
383 
384  if(propMode == auxPropMode || auxPropMode == "")
385  {
386  if(shortMode == "MODE")
387  {
388  modeValue += ", "+boost::lexical_cast<std::string>(itFuncResultDouble->second);
389  auxPropMode = propMode;
390  mode = true;
391  }
392  }
393  else
394  {
395  if(mode)
396  {
397  modeValue.erase(0,2);
398 
399  if(te::da::GetPropertyPos(outputDataSet, auxPropMode) < outputDataSet->getNumProperties())
400  outputDataSetItem->setString(auxPropMode, modeValue);
401 
402  mode = false;
403 
404  if(shortMode == "MODE")
405  {
406  modeValue = "";
407  modeValue += ", "+boost::lexical_cast<std::string>(itFuncResultDouble->second);
408  auxPropMode = propMode;
409  mode = true;
410  }
411  }
412  }
413 
414  if(te::da::GetPropertyPos(outputDataSet, itFuncResultDouble->first.c_str()) < outputDataSet->getNumProperties())
415  outputDataSetItem->setDouble(itFuncResultDouble->first.c_str(), itFuncResultDouble->second);
416 
417  ++itFuncResultDouble;
418  }
419  }
420 
421  outputDataSetItem->setGeometry("geom", geometry);
422 
423  outputDataSet->add(outputDataSetItem);
424  }
425 
426  ++itGroupValues;
427 
428  if(task.isActive() == false)
429  {
430  throw te::vp::Exception(TR_VP("Operation canceled!"));
431  }
432 
433  task.pulse();
434  }
435 
436  return true;
437 }
438 
440  const std::vector<te::dt::Property*>& properties,
441  const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary)
442 {
443  te::da::DataSetType* dataSetType = new te::da::DataSetType(name);
444 
445  // those are the properties used to aggregate the objects
446  std::string propertyResult;
447  for(std::size_t i = 0; i < properties.size(); ++i)
448  {
449  propertyResult += "_" + properties[i]->getName();
450  }
451  propertyResult.erase(propertyResult.begin());
452  te::dt::StringProperty* stringProperty = new te::dt::StringProperty(propertyResult);
453  dataSetType->add(stringProperty);
454 
455  // the number of objects aggregated
456  te::dt::SimpleProperty* aggregationProperty = new te::dt::SimpleProperty("NUM_OBJ", te::dt::INT32_TYPE);
457  dataSetType->add(aggregationProperty);
458 
459  // properties that reference the statistics requested
460  std::string functionResult;
461  std::vector<te::stat::StatisticalSummary> vectorResult;
462  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = statisticalSummary.begin();
463  while(it != statisticalSummary.end())
464  {
465  propertyResult = "";
466  propertyResult = it->first->getName();
467  propertyResult += "_";
468 
469  vectorResult = it->second;
470 
471  for(std::size_t i = 0; i < vectorResult.size(); ++i)
472  {
473  functionResult = propertyResult;
474  functionResult += te::stat::GetStatSummaryShortName(vectorResult[i]);
475 
476  if(it->first->getType() == te::dt::STRING_TYPE || vectorResult[i] == te::stat::MODE)
477  {
478  te::dt::StringProperty* functrionProperty = new te::dt::StringProperty(functionResult);
479  dataSetType->add(functrionProperty);
480  }
481  else
482  {
483  te::dt::SimpleProperty* functrionProperty = new te::dt::SimpleProperty(functionResult, te::dt::DOUBLE_TYPE);
484  dataSetType->add(functrionProperty);
485  }
486  }
487 
488  ++it;
489  }
490  return dataSetType;
491 }
492 
493 
494 void SetOutputDatasetQuery( const std::vector<te::dt::Property*>& groupingProperties,
495  te::da::DataSet* dsQuery,
496  te::mem::DataSet* outputDataSet,
497  te::gm::GeomType outGeoType)
498 {
499  std::size_t n_dsProp = dsQuery->getNumProperties();
500  std::size_t selectedPropSize = groupingProperties.size();
501 
502  te::mem::DataSetItem* outputDataSetItem;
503  int dsPropType;
504 
505  dsQuery->moveBeforeFirst();
506  while(dsQuery->moveNext())
507  {
508  outputDataSetItem = new te::mem::DataSetItem(outputDataSet);
509  std::string aggregItem = "";
510 
511  for(std::size_t i = 0; i < n_dsProp; ++i)
512  {
513  if(i < selectedPropSize)
514  {
515  if(aggregItem != "")
516  aggregItem += "_";
517 
518  aggregItem += dsQuery->getAsString(i);
519 
520  outputDataSetItem->setString(0, aggregItem);
521  }
522  else
523  {
524  std::string propName = dsQuery->getPropertyName(i);
525 
526  if(boost::iequals(propName, "NUM_OBJ"))
527  {
528  int aggregValue = boost::lexical_cast<int>(dsQuery->getAsString(i));
529  outputDataSetItem->setInt32(1, aggregValue);
530  }
531  else
532  {
533  dsPropType = dsQuery->getPropertyDataType(i);
534 
535  if(dsPropType == te::dt::GEOMETRY_TYPE)
536  {
537  std::auto_ptr<te::gm::Geometry> geometry(dsQuery->getGeometry(i));
538  if (geometry->getGeomTypeId() != outGeoType)
539  {
540  te::gm::GeometryCollection* gc = new te::gm::GeometryCollection(1,te::vp::GeomOpResultType(geometry->getGeomTypeId()),geometry->getSRID());
541  gc->setGeometryN(0, geometry.release());
542  outputDataSetItem->setGeometry("geom", gc);
543  }
544  else
545  outputDataSetItem->setGeometry("geom", geometry.release());
546  }
547  if(dsPropType == te::dt::STRING_TYPE)
548  {
549  std::string propName = dsQuery->getPropertyName(i);
550  std::size_t index = te::da::GetPropertyPos(outputDataSetItem->getParent(), propName);
551 
552  if(index < outputDataSetItem->getNumProperties())
553  {
554  std::string value = dsQuery->getAsString(i);
555  if(!value.empty())
556  outputDataSetItem->setString(index, value);
557  }
558  }
559  if(dsPropType == te::dt::NUMERIC_TYPE)
560  {
561  std::string propName = dsQuery->getPropertyName(i);
562  std::size_t index = te::da::GetPropertyPos(outputDataSetItem->getParent(), propName);
563 
564  if(index < outputDataSetItem->getNumProperties())
565  {
566  std::string queryValue = dsQuery->getNumeric(i);
567  if(!queryValue.empty())
568  {
569  double value = boost::lexical_cast<double>(queryValue);
570  outputDataSetItem->setDouble(index, value);
571  }
572  }
573  }
574  if(dsPropType == te::dt::DOUBLE_TYPE)
575  {
576  std::string propName = dsQuery->getPropertyName(i);
577  std::size_t index = te::da::GetPropertyPos(outputDataSetItem->getParent(), propName);
578 
579  if(index < outputDataSetItem->getNumProperties())
580  {
581  double value = dsQuery->getDouble(i);
582  outputDataSetItem->setDouble(index, value);
583  }
584  }
585  if(dsPropType == te::dt::INT64_TYPE)
586  {
587  std::string propName = dsQuery->getPropertyName(i);
588  std::size_t index = te::da::GetPropertyPos(outputDataSetItem->getParent(), propName);
589 
590  if(index < outputDataSetItem->getNumProperties())
591  {
592  int type = outputDataSetItem->getPropertyDataType(index);
593  if(type == te::dt::DOUBLE_TYPE)
594  {
595  std::string queryValue = dsQuery->getAsString(i);
596  if(!queryValue.empty())
597  {
598  double value = boost::lexical_cast<double>(queryValue);
599  outputDataSetItem->setDouble(index, value);
600  }
601  }
602  if(type == te::dt::STRING_TYPE)
603  {
604  std::string value = dsQuery->getAsString(i);
605  if(!value.empty())
606  outputDataSetItem->setString(index, value);
607  }
608  }
609  }
610  if(dsPropType == te::dt::INT32_TYPE)
611  {
612  std::string propName = dsQuery->getPropertyName(i);
613  std::size_t index = te::da::GetPropertyPos(outputDataSetItem->getParent(), propName);
614 
615  if(index < outputDataSetItem->getNumProperties())
616  {
617  int type = outputDataSetItem->getPropertyDataType(index);
618  if(type == te::dt::DOUBLE_TYPE)
619  {
620  std::string queryValue = dsQuery->getAsString(i);
621  if(!queryValue.empty())
622  {
623  double value = boost::lexical_cast<double>(queryValue);
624  outputDataSetItem->setDouble(index, value);
625  }
626  }
627  if(type == te::dt::STRING_TYPE)
628  {
629  std::string value = dsQuery->getAsString(i);
630  if(!value.empty())
631  outputDataSetItem->setString(index, value);
632  }
633  }
634  }
635  }
636  }
637  }
638 
639  outputDataSet->add(outputDataSetItem);
640  }
641 
642  outputDataSet->moveBeforeFirst();
643 }
644 
645 std::map<std::string, std::vector<te::mem::DataSetItem*> > GetGroups( te::da::DataSet* inputDataSet,
646  const std::vector<te::dt::Property*>& groupingProperties)
647 {
648  std::map<std::string, std::vector<te::mem::DataSetItem*> > groupValues;
649 
650  while(inputDataSet->moveNext())
651  {
652  te::mem::DataSetItem* dataSetItem = new te::mem::DataSetItem(inputDataSet);
653 
654  for(std::size_t i = 0; i < inputDataSet->getNumProperties(); ++i)
655  {
656  if (!inputDataSet->isNull(i))
657  {
658  std::auto_ptr<te::dt::AbstractData> val = inputDataSet->getValue(i);
659  dataSetItem->setValue(i,val.release());
660  }
661  }
662 
663  std::size_t propertyIndex = 0;
664 
665  bool found = false;
666  std::vector<te::mem::DataSetItem*> dataSetItemVector;
667  std::map<std::string, std::vector<te::mem::DataSetItem*> >::iterator it;
668 
669  std::string propertyName;
670  std::string value;
671 
672  for(std::size_t i = 0; i < groupingProperties.size(); ++i)
673  {
674  propertyName += "_" + groupingProperties[i]->getName();
675 
676  propertyIndex = te::da::GetPropertyPos(dataSetItem->getParent(), groupingProperties[i]->getName());
677  value += "_" + inputDataSet->getAsString(propertyIndex);
678  }
679 
680  propertyName.erase(propertyName.begin());
681  value.erase(value.begin());
682 
683  for(it = groupValues.begin(); it != groupValues.end(); ++it)
684  {
685  if(it->first == value)
686  {
687  it->second.push_back(dataSetItem);
688  found = true;
689  break;
690  }
691  }
692 
693  if(found == false)
694  {
695  dataSetItemVector.push_back(dataSetItem);
696  groupValues.insert(std::pair<std::string, std::vector<te::mem::DataSetItem*> >(value, dataSetItemVector));
697  }
698  }
699 
700  return groupValues;
701 }
702 
703 std::map<std::string, std::string> CalculateStringGroupingFunctions(const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary,
704  const std::vector<te::mem::DataSetItem*>& items)
705 {
706  std::map<std::string, std::string> result;
707 
708  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = statisticalSummary.begin();
709 
710  while(it != statisticalSummary.end())
711  {
712  if(it->first->getType() == te::dt::STRING_TYPE)
713  {
714  std::vector<std::string> values;
715  std::string propertyName = it->first->getName();
716  std::size_t index = te::da::GetPropertyPos(items[0]->getParent(),propertyName);
717 
718  for(std::size_t i = 0; i < items.size(); ++i)
719  {
720  if (!items[i]->isNull(index))
721  values.push_back(items[i]->getString(index));
722  }
723 
726 
727  result.insert( std::map<std::string, std::string>::value_type( propertyName + "_MIN_VALUE", ss.m_minVal ));
728  result.insert( std::map<std::string, std::string>::value_type( propertyName + "_MAX_VALUE", ss.m_maxVal ));
729  result.insert( std::map<std::string, std::string>::value_type( propertyName + "_COUNT", boost::lexical_cast<std::string>(items.size())));
730  result.insert( std::map<std::string, std::string>::value_type( propertyName + "_VALID_COUNT", boost::lexical_cast<std::string>(values.size())));
731  }
732  ++it;
733  }
734 
735  return result;
736 }
737 
738 std::map<std::string, double> CalculateDoubleGroupingFunctions( const std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >& statisticalSummary,
739  const std::vector<te::mem::DataSetItem*>& items)
740 {
741  std::map<std::string, double> result;
742  int idProp = 0;
743  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator it = statisticalSummary.begin();
744 
745  while(it != statisticalSummary.end())
746  {
747  if(it->first->getType() != te::dt::STRING_TYPE)
748  {
749  std::string propertyName = it->first->getName();
750  std::size_t index = te::da::GetPropertyPos(items[0]->getParent(), propertyName);
751  std::size_t type = items[0]->getParent()->getPropertyDataType(index);
752 
753  std::vector<double> values;
754  for(std::size_t i = 0; i < items.size(); ++i)
755  {
756  if (!items[i]->isNull(index))
757  {
758  double numval;
759  if (type == te::dt::INT16_TYPE)
760  numval = items[i]->getInt16(index);
761  else if (type == te::dt::INT32_TYPE)
762  numval = items[i]->getInt32(index);
763  else if (type == te::dt::INT64_TYPE)
764  numval = (double)items[i]->getInt64(index);
765  else if (type == te::dt::FLOAT_TYPE)
766  numval = items[i]->getFloat(index);
767  else if (type == te::dt::DOUBLE_TYPE)
768  numval = items[i]->getDouble(index);
769  values.push_back(numval);
770  }
771  }
772 
775 
776  result.insert( std::map<std::string, double>::value_type( propertyName + "_MIN_VALUE", ss.m_minVal ) );
777  result.insert( std::map<std::string, double>::value_type( propertyName + "_MAX_VALUE", ss.m_maxVal ) );
778  result.insert( std::map<std::string, double>::value_type( propertyName + "_COUNT", items.size()) );
779  result.insert( std::map<std::string, double>::value_type( propertyName + "_VALID_COUNT", values.size()) );
780  result.insert( std::map<std::string, double>::value_type( propertyName + "_MEAN", ss.m_mean ) );
781  result.insert( std::map<std::string, double>::value_type( propertyName + "_SUM", ss.m_sum ) );
782  result.insert( std::map<std::string, double>::value_type( propertyName + "_STANDARD_DEVIATION", ss.m_stdDeviation ) );
783  result.insert( std::map<std::string, double>::value_type( propertyName + "_VARIANCE", ss.m_variance ) );
784  result.insert( std::map<std::string, double>::value_type( propertyName + "_SKEWNESS", ss.m_skewness ) );
785  result.insert( std::map<std::string, double>::value_type( propertyName + "_KURTOSIS", ss.m_kurtosis ) );
786  result.insert( std::map<std::string, double>::value_type( propertyName + "_AMPLITUDE", ss.m_amplitude ) );
787  result.insert( std::map<std::string, double>::value_type( propertyName + "_MEDIAN", ss.m_median ) );
788  result.insert( std::map<std::string, double>::value_type( propertyName + "_VAR_COEFF", ss.m_varCoeff ) );
789 
790  for(std::size_t i = 0; i < ss.m_mode.size(); ++i)
791  result.insert( std::map<std::string, double>::value_type( boost::lexical_cast<std::string>(idProp) + "_" +
792  boost::lexical_cast<std::string>(i) + "_" +
793  propertyName + "_MODE", ss.m_mode[i] ) );
794  }
795  ++it;
796  ++idProp;
797  }
798 
799  return result;
800 }
801 
virtual std::string getPropertyName(std::size_t i) const =0
It returns the property name at position pos.
virtual double getDouble(std::size_t i) const =0
Method for retrieving a double attribute value.
Sum aggregate operator.
Definition: Sum.h:46
void setGeometryType(GeomType t)
It sets the geometry subtype.
bool isActive() const
Verify if the task is active.
virtual std::size_t getNumProperties() const =0
It returns the number of properties that composes an item of the dataset.
const DataSetTypeCapabilities & getDataSetTypeCapabilities() const
virtual void createDataSet(DataSetType *dt, const std::map< std::string, std::string > &options)
It creates the dataset schema definition in the target data source.
Definition: DataSource.cpp:424
te::da::DataSet * getParent() const
It returns its parent.
Definition: DataSetItem.cpp:94
void setGeometry(std::size_t i, te::gm::Geometry *value)
It sets the value of the i-th property.
A class that models the name of a dataset used in a From clause.
Definition: DataSetName.h:43
te::gm::GeomType GeomOpResultType(te::gm::GeomType firstGeom, te::gm::GeomType secondGeom)
Definition: Utils.cpp:134
An atomic property like an integer or double.
TEVPEXPORT bool Aggregation(const std::string &inDataset, te::da::DataSource *inDatasource, const std::vector< te::dt::Property * > &groupingProperties, const std::map< te::dt::Property *, std::vector< te::stat::StatisticalSummary > > &statisticalSummary, const std::string &outDataset, te::da::DataSource *outDatasource)
Executes the Aggregation Geographical Operation and persists the result as a dataset in a given outpu...
An converter for DataSetType.
Implementation of a random-access dataset class for the TerraLib In-Memory Data Access driver...
Definition: DataSet.h:64
TEDATAACCESSEXPORT te::gm::GeometryProperty * GetFirstGeomProperty(const DataSetType *dt)
Definition: Utils.cpp:504
virtual std::string getAsString(std::size_t i, int precision=0) const
Method for retrieving a data value as a string plain representation.
Definition: DataSet.cpp:218
virtual const DataSourceCapabilities & getCapabilities() const =0
It returns the known capabilities of the data source.
The subtraction operator.
Definition: Sub.h:46
A Select models a query to be used when retrieving data from a DataSource.
Definition: Select.h:65
TESTATEXPORT void GetStringStatisticalSummary(std::vector< std::string > &values, te::stat::StringStatisticalSummary &ss)
int getSRID() const
It returns the spatial reference system identifier associated to this property.
void setTotalSteps(int value)
Set the task total stepes.
The type for string types: FIXED_STRING, VAR_STRING or STRING.
bool supportsSpatialSQLDialect() const
te::gm::Geometry * GetGeometryUnion(const std::vector< te::mem::DataSetItem * > &items, size_t geomIdx, te::gm::GeomType outGeoType)
It returns the union of a geometry vector.
Definition: Utils.cpp:50
std::map< std::string, std::vector< te::mem::DataSetItem * > > GetGroups(te::da::DataSet *inputDataSet, const std::vector< te::dt::Property * > &groupingProperties)
const std::string & getName() const
It returns the property name.
Definition: PropertyName.h:80
void add(DataSetItem *item)
It adds a new item to the dataset and takes its ownership.
Definition: DataSet.cpp:149
The Field class can be used to model an expression that takes part of the output items of a SELECT...
Definition: Field.h:50
virtual bool moveNext()=0
It moves the internal pointer to the next item of the collection.
It describes a primary key (pk) constraint.
Definition: PrimaryKey.h:52
boost::ptr_vector< FromItem > From
It models the FROM clause for a query.
Definition: From.h:37
const QueryCapabilities & getQueryCapabilities() const
An implementation of the DatasetItem class for the TerraLib In-Memory Data Access driver...
Definition: DataSetItem.h:56
virtual void addPrimaryKey(const std::string &datasetName, PrimaryKey *pk)
It adds a primary key constraint to the dataset schema.
Definition: DataSource.cpp:229
const std::string & getName() const
It returns the property name.
Definition: Property.h:126
void add(Constraint *c)
It adds a new constraint.
virtual std::string getNumeric(std::size_t i) const =0
Method for retrieving a numeric attribute value.
void setGroupBy(GroupBy *g)
It sets the list of expressions used to condense the result set.
Definition: Select.cpp:957
void setInt32(std::size_t i, boost::int32_t value)
It sets the value of the i-th property.
A class that models the name of any property of an object.
Definition: PropertyName.h:50
virtual bool isNull(std::size_t i) const =0
It checks if the attribute value is NULL.
te::da::DataSetType * BuildOutputDataSetType(const std::string &name, const std::vector< te::dt::Property * > &properties, const std::map< te::dt::Property *, std::vector< te::stat::StatisticalSummary > > &statisticalSummary)
A class that represents the known capabilities of a specific data source, i.e. this class informs all...
Mode.
Definition: Enums.h:54
boost::ptr_vector< Field > Fields
Fields is just a boost::ptr_vector of Field pointers.
Definition: Fields.h:37
Max statistical function.
Definition: Max.h:46
void setValue(std::size_t i, te::dt::AbstractData *value)
It sets the value of the i-th property.
Aggregation Vector Processing functions.
ST_Union statistical function.
Definition: ST_Union.h:46
GeomType getGeometryType() const
It returns the geometry subtype allowed for the property.
Configuration flags for the Terrralib Vector Processing module.
void SetOutputDatasetQuery(const std::vector< te::dt::Property * > &groupingProperties, te::da::DataSet *dsQuery, te::mem::DataSet *outputDataSet, te::gm::GeomType outGeoType)
An abstract class that models a source of data in a query.
Definition: FromItem.h:50
Utility functions for the data access module.
void setDouble(std::size_t i, double value)
It sets the value of the i-th property.
StdDev statistical function.
Definition: StdDev.h:46
void pulse()
Calls setCurrentStep() function using getCurrentStep() + 1.
Geometry is the root class of the geometries hierarchy, it follows OGC and ISO standards.
Definition: Geometry.h:73
virtual int getPropertyDataType(std::size_t i) const =0
It returns the underlying data type of the property at position pos.
virtual std::auto_ptr< te::dt::AbstractData > getValue(std::size_t i) const
Method for retrieving any other type of data value stored in the data source.
Definition: DataSet.cpp:151
GeomType
Each enumerated type is compatible with a Well-known Binary (WKB) type code.
Definition: Enums.h:41
A structure to hold the set of statistics from a set of numerical values.
std::map< std::string, double > CalculateDoubleGroupingFunctions(const std::map< te::dt::Property *, std::vector< te::stat::StatisticalSummary > > &statisticalSummary, const std::vector< te::mem::DataSetItem * > &items)
A structure to hold the set of statistics from a set of categorical (sample) values.
virtual std::auto_ptr< DataSet > query(const Select &q, te::common::TraverseType travType=te::common::FORWARDONLY, const te::common::AccessPolicy accessPolicy=te::common::RAccess)
It executes a query that may return some data using a generic query. This method always returns a dis...
Definition: DataSource.cpp:99
std::size_t getNumProperties() const
It returns the number of properties that composes an item of the dataset.
Definition: DataSet.cpp:234
virtual bool moveBeforeFirst()=0
It moves the internal pointer to a position before the first item in the collection.
void setString(std::size_t i, const std::string &value)
It sets the value of the i-th property.
virtual void add(const std::string &datasetName, DataSet *d, const std::map< std::string, std::string > &options, std::size_t limit=0)
It adds data items to the dataset in the data source.
Definition: DataSource.cpp:450
A class that can be used in a GROUP BY clause.
Definition: GroupByItem.h:50
std::map< std::string, std::string > CalculateStringGroupingFunctions(const std::map< te::dt::Property *, std::vector< te::stat::StatisticalSummary > > &statisticalSummary, const std::vector< te::mem::DataSetItem * > &items)
An exception class for the Vector processing module.
Min statistical function.
Definition: Min.h:46
TEDATAACCESSEXPORT std::size_t GetPropertyPos(const DataSet *dataset, const std::string &name)
Definition: Utils.cpp:447
An abstract class for data providers like a DBMS, Web Services or a regular file. ...
Definition: DataSource.h:116
boost::ptr_vector< GroupByItem > GroupBy
A class that can be used to model a GROUP BY clause.
Definition: GroupBy.h:37
It is a collection of other geometric objects.
A class that models the description of a dataset.
Definition: DataSetType.h:72
int getPropertyDataType(std::size_t pos) const
It returns the type of the pos-th property.
void add(te::dt::Property *p)
It adds a property to the list of properties of the primary key.
Definition: PrimaryKey.h:123
It models a property definition.
Definition: Property.h:59
This is an abstract class that models a query expression.
Definition: Expression.h:47
bool moveBeforeFirst()
It moves the internal pointer to a position before the first item in the collection.
Definition: DataSet.cpp:296
void useTimer(bool flag)
Used to define if task use progress timer information.
This class can be used to inform the progress of a task.
Definition: TaskProgress.h:53
virtual std::auto_ptr< DataSet > getDataSet(const std::string &name, te::common::TraverseType travType=te::common::FORWARDONLY, const te::common::AccessPolicy accessPolicy=te::common::RAccess)
It gets the dataset identified by the given name. This method always returns a disconnected dataset...
Definition: DataSource.cpp:60
Variance statistical function.
Definition: Variance.h:46
TESTATEXPORT void GetNumericStatisticalSummary(std::vector< double > &values, te::stat::NumericStatisticalSummary &ss, double nullVal)
bool AggregationMemory(const std::string &inDataset, te::da::DataSource *inDatasource, const std::vector< te::dt::Property * > &groupingProperties, const std::map< te::dt::Property *, std::vector< te::stat::StatisticalSummary > > &statisticalSummary, te::mem::DataSet *outputDataSet, te::gm::GeomType outGeoType)
virtual std::auto_ptr< te::gm::Geometry > getGeometry(std::size_t i) const =0
Method for retrieving a geometric attribute value.
void setSRID(int srid)
It sets the spatial reference system identifier associated to this property.
void setGeometryN(std::size_t i, Geometry *g)
It sets the n-th geometry in this geometry collection.
bool AggregationQuery(const std::string &inDataset, te::da::DataSource *inDatasource, const std::vector< te::dt::Property * > &groupingProperties, const std::map< te::dt::Property *, std::vector< te::stat::StatisticalSummary > > &statisticalSummary, te::mem::DataSet *outputDataSet, te::gm::GeomType outGeoType)
Geometric property.
#define TR_VP(message)
It marks a string in order to get translated. This is a special mark used in the Terrralib Vector Pro...
Definition: Config.h:58
A dataset is the unit of information manipulated by the data access module of TerraLib.
Definition: DataSet.h:111
Avg statistical function.
Definition: Avg.h:46
TESTATEXPORT std::string GetStatSummaryShortName(const int &e)
Get the statistical parameter short name from its enumerator.
Definition: Utils.cpp:34
TEDATAACCESSEXPORT DataSetAdapter * CreateAdapter(DataSet *ds, DataSetTypeConverter *converter, bool isOwner=false)
Definition: Utils.cpp:591
virtual std::auto_ptr< te::da::DataSetType > getDataSetType(const std::string &name)
It gets information about the given dataset.
Definition: DataSource.cpp:155
Count statistical function.
Definition: Count.h:46