All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
AggregationQuery.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file AggregationQuery.h
22 
23  \brief Aggregation Vector Processing functions.
24 */
25 
26 //Terralib
27 
28 #include "../common/progress/TaskProgress.h"
29 #include "../common/Logger.h"
30 #include "../common/Translator.h"
31 
32 #include "../dataaccess/dataset/DataSet.h"
33 
34 #include "../datatype/Property.h"
35 #include "../datatype/SimpleProperty.h"
36 #include "../datatype/StringProperty.h"
37 
38 #include "../dataaccess/dataset/ObjectIdSet.h"
39 #include "../dataaccess/query/Avg.h"
40 #include "../dataaccess/query/Count.h"
41 #include "../dataaccess/query/DataSetName.h"
42 #include "../dataaccess/query/Expression.h"
43 #include "../dataaccess/query/Field.h"
44 #include "../dataaccess/query/Fields.h"
45 #include "../dataaccess/query/From.h"
46 #include "../dataaccess/query/FromItem.h"
47 #include "../dataaccess/query/GroupBy.h"
48 #include "../dataaccess/query/GroupByItem.h"
49 #include "../dataaccess/query/Max.h"
50 #include "../dataaccess/query/Min.h"
51 #include "../dataaccess/query/PropertyName.h"
52 #include "../dataaccess/query/Select.h"
53 #include "../dataaccess/query/Sub.h"
54 #include "../dataaccess/query/Sum.h"
55 #include "../dataaccess/query/StdDev.h"
56 #include "../dataaccess/query/ST_Union.h"
57 #include "../dataaccess/query/Variance.h"
58 #include "../dataaccess/query/Where.h"
59 #include "../dataaccess/utils/Utils.h"
60 
61 #include "../geometry/Geometry.h"
62 #include "../geometry/GeometryCollection.h"
63 #include "../geometry/GeometryProperty.h"
64 #include "../geometry/Utils.h"
65 
66 #include "../memory/DataSet.h"
67 #include "../memory/DataSetItem.h"
68 
69 #include "../statistics/core/SummaryFunctions.h"
70 #include "../statistics/core/StringStatisticalSummary.h"
71 #include "../statistics/core/NumericStatisticalSummary.h"
72 
73 #include "AggregationQuery.h"
74 #include "Config.h"
75 #include "Exception.h"
76 #include "Utils.h"
77 
78 // STL
79 #include <map>
80 #include <math.h>
81 #include <string>
82 #include <vector>
83 
84 // BOOST
85 #include <boost/lexical_cast.hpp>
86 #include <boost/algorithm/string.hpp>
87 
89 {}
90 
92 {}
93 
94 bool te::vp::AggregationQuery::run() throw(te::common::Exception)
95 {
96  std::auto_ptr<te::da::DataSetType> outDSetType(new te::da::DataSetType(m_outDset));
97 
98 // Primary key
99  te::dt::SimpleProperty* pkProperty = new te::dt::SimpleProperty(m_outDset + "_id", te::dt::INT32_TYPE);
100  pkProperty->setAutoNumber(true);
101  outDSetType->add(pkProperty);
102 
103  te::da::PrimaryKey* pk = new te::da::PrimaryKey(m_outDset + "_pk", outDSetType.get());
104  pk->add(pkProperty);
105  outDSetType->setPrimaryKey(pk);
106 
107  // include the grouping properties in the query
108  te::da::Fields* fields = new te::da::Fields;
109  for(std::size_t i=0; i<m_groupProps.size(); ++i)
110  {
111  te::da::Field* f_aggName = new te::da::Field(m_groupProps[i]->getName());
112  fields->push_back(f_aggName);
113 
114  te::dt::Property* propclone = m_groupProps[i]->clone();
115  outDSetType->add(propclone);
116  }
117 
118  // number of objects in each group (mandatory)
119  te::da::Expression* e_aggCount = new te::da::Count(new te::da::PropertyName(m_groupProps[0]->getName()));
120  te::da::Field* f_aggCount = new te::da::Field(*e_aggCount, "NUM_OBJ");
121  fields->push_back(f_aggCount);
122 
124  outDSetType->add(countProp);
125 
126  // build the query expression according to the summarization requested
127  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator itStat = m_statSum.begin();
128 
129  while(itStat != m_statSum.end())
130  {
131  std::vector<te::stat::StatisticalSummary>::const_iterator itFunc = itStat->second.begin();
132  while (itFunc != itStat->second.end())
133  {
134  te::da::PropertyName* p_name = new te::da::PropertyName(itStat->first->getName());
135  int p_type = itStat->first->getType();
136  te::dt::Property* newProp = 0;
137 
138  te::da::Expression *sexp, *e_max, *e_min;
139  te::da::Field* sfield;
140  switch (*itFunc)
141  {
142  case MIN_VALUE:
143  sexp = new te::da::Min(p_name);
144  sfield = new te::da::Field(*sexp, p_name->getName() + "_MIN_VALUE");
145  if (p_type == te::dt::STRING_TYPE)
146  newProp = new te::dt::StringProperty(p_name->getName() + "_MIN_VALUE");
147  else
148  newProp = new te::dt::SimpleProperty(p_name->getName() + "_MIN_VALUE", p_type);
149  break;
150  case MAX_VALUE:
151  sexp = new te::da::Max(p_name);
152  sfield = new te::da::Field(*sexp, p_name->getName() + "_MAX_VALUE");
153  if (p_type == te::dt::STRING_TYPE)
154  newProp = new te::dt::StringProperty(p_name->getName() + "_MAX_VALUE");
155  else
156  newProp = new te::dt::SimpleProperty(p_name->getName() + "_MAX_VALUE", p_type);
157  break;
158  case MEAN:
159  sexp = new te::da::Avg(p_name);
160  sfield = new te::da::Field(*sexp, p_name->getName() + "_MEAN");
161  newProp = new te::dt::SimpleProperty(p_name->getName() + "_MEAN", te::dt::DOUBLE_TYPE);
162  break;
163  case SUM:
164  sexp = new te::da::Sum(p_name);
165  sfield = new te::da::Field(*sexp, p_name->getName() + "_SUM");
166  newProp = new te::dt::SimpleProperty(p_name->getName() + "_SUM", p_type);
167  break;
168  case COUNT:
169  sexp = new te::da::Count(p_name);
170  sfield = new te::da::Field(*sexp, p_name->getName() + "_COUNT");
171  newProp = new te::dt::SimpleProperty(p_name->getName() + "_COUNT", te::dt::INT32_TYPE);
172  break;
173  case STANDARD_DEVIATION:
174  sexp = new te::da::StdDev(p_name);
175  sfield = new te::da::Field(*sexp, p_name->getName() + "_STANDARD_DEVIATION");
176  newProp = new te::dt::SimpleProperty(p_name->getName() + "_STANDARD_DEVIATION", te::dt::DOUBLE_TYPE);
177  break;
178  case VARIANCE:
179  sexp = new te::da::Variance(p_name);
180  sfield = new te::da::Field(*sexp, p_name->getName() + "_VARIANCE");
181  newProp = new te::dt::SimpleProperty(p_name->getName() + "_VARIANCE", te::dt::DOUBLE_TYPE);
182  break;
183  case AMPLITUDE:
184  e_min = new te::da::Min(p_name);
185  e_max = new te::da::Max(p_name);
186  sexp = new te::da::Sub(*e_max, *e_min);
187  sfield = new te::da::Field(*sexp, p_name->getName() + "_AMPLITUDE");
188  newProp = new te::dt::SimpleProperty(p_name->getName() + "_AMPLITUDE", p_type);
189  break;
190  default:
191  ++itFunc;
192  continue;
193  }
194  fields->push_back(sfield);
195  outDSetType->add(newProp);
196  ++itFunc;
197  }
198  ++itStat;
199  }
200 
201  te::gm::GeometryProperty* geom = te::da::GetFirstGeomProperty(m_inDsetType.get());
202 
204  te::da::Field* f_union = new te::da::Field(*e_union, "geom");
205  fields->push_back(f_union);
206 
207  // define the resulting spatial property
208  te::gm::GeometryProperty* p = static_cast<te::gm::GeometryProperty*>(m_inDsetType->findFirstPropertyOfType(te::dt::GEOMETRY_TYPE));
209 
210  // creates the output geometry property
211  te::gm::GeometryProperty* geometry = new te::gm::GeometryProperty("geom");
212  te::gm::GeomType geotype = getGeomResultType(p->getGeometryType());
213  geometry->setGeometryType(geotype);
214  geometry->setSRID(p->getSRID());
215  outDSetType->add(geometry);
216 
217  te::da::FromItem* fromItem = new te::da::DataSetName(m_inDsetType->getName());
218  te::da::From* from = new te::da::From;
219  from->push_back(fromItem);
220 
221  te::da::Where* w_oid = 0;
222 
223  if(m_oidSet)
224  w_oid = new te::da::Where(m_oidSet->getExpression());
225 
226  te::da::Select select(fields, from, w_oid);
227 
228  te::da::GroupBy* groupBy = new te::da::GroupBy();
229  for(std::size_t i=0; i<m_groupProps.size(); ++i)
230  {
231  te::da::GroupByItem* e_groupBy = new te::da::GroupByItem(m_groupProps[i]->getName());
232  groupBy->push_back(e_groupBy);
233  }
234  select.setGroupBy(groupBy);
235 
236  std::auto_ptr<te::da::DataSet> dsQuery = m_inDsrc->query(select);
237 
238 
239  if (dsQuery->isEmpty())
240  return false;
241 
242  std::auto_ptr<te::mem::DataSet> outDSet(new te::mem::DataSet(outDSetType.get()));
243 
244  int key = 0;
245  dsQuery->moveBeforeFirst();
246  while (dsQuery->moveNext())
247  {
248  te::mem::DataSetItem* outDSetItem = new te::mem::DataSetItem(outDSet.get());
249  outDSetItem->setInt32(0, key);
250 
251  for (size_t i=1; i<outDSetType->size(); ++i)
252  {
253  switch (outDSetType->getProperty(i)->getType())
254  {
255  case te::dt::STRING_TYPE:
256  if (!dsQuery->isNull(i-1))
257  outDSetItem->setString(i,dsQuery->getAsString(i-1));
258  break;
259  case te::dt::INT32_TYPE:
260  if (!dsQuery->isNull(i-1))
261  outDSetItem->setInt32(i, boost::lexical_cast<int>(dsQuery->getAsString(i-1)));
262  break;
263  case te::dt::INT64_TYPE:
264  if (!dsQuery->isNull(i-1))
265  outDSetItem->setInt64(i,dsQuery->getInt64(i-1));
266  break;
267  case te::dt::DOUBLE_TYPE:
268  if (!dsQuery->isNull(i-1))
269  outDSetItem->setDouble(i,dsQuery->getDouble(i-1));
270  break;
272  if (!dsQuery->isNull(i-1))
273  outDSetItem->setNumeric(i,dsQuery->getNumeric(i-1));
274  break;
276  if (!dsQuery->isNull(i-1))
277  {
278  std::auto_ptr<te::gm::Geometry> agg_geo(dsQuery->getGeometry(i-1));
279  if (agg_geo->getGeomTypeId() != geotype)
280  {
281  te::gm::GeometryCollection* gc = new te::gm::GeometryCollection(1,geotype,agg_geo->getSRID());
282  gc->setGeometryN(0, agg_geo.release());
283  outDSetItem->setGeometry("geom", gc);
284  }
285  else
286  outDSetItem->setGeometry("geom", agg_geo.release());
287  }
288  break;
289  default:
290  continue;
291  }
292  }
293  outDSet->add(outDSetItem);
294  ++key;
295  }
296 
297  te::vp::Save(m_outDsrc.get(), outDSet.get(), outDSetType.get());
298  return true;
299 }
void setAutoNumber(bool a)
It tells if the property is an autonumber or not.
const std::string & getName() const
It returns the property name.
Definition: PropertyName.h:80
boost::ptr_vector< GroupByItem > GroupBy
A class that can be used to model a GROUP BY clause.
Definition: GroupBy.h:37
Geometric property.
An exception class for the Vector processing module.
void add(te::dt::Property *p)
It adds a property to the list of properties of the primary key.
Definition: PrimaryKey.h:123
GeomType
Each enumerated type is compatible with a Well-known Binary (WKB) type code.
Definition: Enums.h:41
void setGeometry(std::size_t i, te::gm::Geometry *value)
It sets the value of the i-th property.
Utility functions for the data access module.
A class that can be used in a GROUP BY clause.
Definition: GroupByItem.h:50
An abstract class that models a source of data in a query.
Definition: FromItem.h:50
The Field class can be used to model an expression that takes part of the output items of a SELECT...
Definition: Field.h:50
void setSRID(int srid)
It sets the spatial reference system identifier associated to this property.
void setGeometryType(GeomType t)
It sets the geometry subtype.
void setDouble(std::size_t i, double value)
It sets the value of the i-th property.
An atomic property like an integer or double.
A class that models the name of a dataset used in a From clause.
Definition: DataSetName.h:43
A class that models the name of any property of an object.
Definition: PropertyName.h:50
A class that models the description of a dataset.
Definition: DataSetType.h:72
void Save(te::da::DataSource *source, te::da::DataSet *result, te::da::DataSetType *outDsType)
Definition: Utils.cpp:213
virtual Property * clone() const =0
It returns a clone of the object.
Count statistical function.
Definition: Count.h:46
Minimum value.
Definition: Enums.h:42
Sum of values.
Definition: Enums.h:45
It models a property definition.
Definition: Property.h:59
Avg statistical function.
Definition: Avg.h:46
Min statistical function.
Definition: Min.h:46
void setNumeric(std::size_t i, const std::string &value)
It sets the value of the i-th property.
This is an abstract class that models a query expression.
Definition: Expression.h:47
StdDev statistical function.
Definition: StdDev.h:46
Total number of values.
Definition: Enums.h:46
void setInt32(std::size_t i, boost::int32_t value)
It sets the value of the i-th property.
Variance.
Definition: Enums.h:50
Implementation of a random-access dataset class for the TerraLib In-Memory Data Access driver...
Definition: DataSet.h:65
TESAEXPORT double Sum(te::sa::GeneralizedProximityMatrix *gpm, int attrIdx)
Function used to calculate sum of a specific attribute from a gpm.
int getSRID() const
It returns the spatial reference system identifier associated to this property.
Mean.
Definition: Enums.h:44
GeomType getGeometryType() const
It returns the geometry subtype allowed for the property.
boost::ptr_vector< Field > Fields
Fields is just a boost::ptr_vector of Field pointers.
Definition: Fields.h:37
A class that can be used to model a filter expression that can be applied to a query.
Definition: Where.h:47
Maximum value.
Definition: Enums.h:43
The type for string types: FIXED_STRING, VAR_STRING or STRING.
A Select models a query to be used when retrieving data from a DataSource.
Definition: Select.h:65
boost::ptr_vector< FromItem > From
It models the FROM clause for a query.
Definition: From.h:37
An implementation of the DatasetItem class for the TerraLib In-Memory Data Access driver...
Definition: DataSetItem.h:56
The subtraction operator.
Definition: Sub.h:46
Aggregation Vector Processing functions.
Amplitude.
Definition: Enums.h:53
It describes a primary key (pk) constraint.
Definition: PrimaryKey.h:52
Variance statistical function.
Definition: Variance.h:46
void setGeometryN(std::size_t i, Geometry *g)
It sets the n-th geometry in this geometry collection.
ST_Union statistical function.
Definition: ST_Union.h:46
Configuration flags for the Terrralib Vector Processing module.
void setString(std::size_t i, const std::string &value)
It sets the value of the i-th property.
void setInt64(std::size_t i, boost::int64_t value)
It sets the value of the i-th property.
It is a collection of other geometric objects.
TEDATAACCESSEXPORT te::gm::GeometryProperty * GetFirstGeomProperty(const DataSetType *dt)
Definition: Utils.cpp:557
Max statistical function.
Definition: Max.h:46
Standard deviation.
Definition: Enums.h:48
const std::string & getName() const
It returns the property name.
Definition: Property.h:127