AggregationQuery.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file AggregationQuery.h
22 
23  \brief Aggregation Vector Processing functions.
24 */
25 
26 //Terralib
27 
28 #include "../common/progress/TaskProgress.h"
29 #include "../common/Logger.h"
30 #include "../common/Translator.h"
31 
32 #include "../dataaccess/dataset/DataSet.h"
33 #include "../dataaccess/dataset/DataSetAdapter.h"
34 
35 #include "../datatype/Property.h"
36 #include "../datatype/SimpleProperty.h"
37 #include "../datatype/StringProperty.h"
38 
39 #include "../dataaccess/dataset/ObjectIdSet.h"
40 #include "../dataaccess/query/Avg.h"
41 #include "../dataaccess/query/Count.h"
42 #include "../dataaccess/query/DataSetName.h"
43 #include "../dataaccess/query/Expression.h"
44 #include "../dataaccess/query/Field.h"
45 #include "../dataaccess/query/Fields.h"
46 #include "../dataaccess/query/From.h"
47 #include "../dataaccess/query/FromItem.h"
48 #include "../dataaccess/query/GroupBy.h"
49 #include "../dataaccess/query/GroupByItem.h"
50 #include "../dataaccess/query/Max.h"
51 #include "../dataaccess/query/Min.h"
52 #include "../dataaccess/query/PropertyName.h"
53 #include "../dataaccess/query/Select.h"
54 #include "../dataaccess/query/Sub.h"
55 #include "../dataaccess/query/Sum.h"
56 #include "../dataaccess/query/StdDev.h"
57 #include "../dataaccess/query/ST_Union.h"
58 #include "../dataaccess/query/Variance.h"
59 #include "../dataaccess/query/Where.h"
60 #include "../dataaccess/utils/Utils.h"
61 
62 #include "../geometry/Geometry.h"
63 #include "../geometry/GeometryCollection.h"
64 #include "../geometry/GeometryProperty.h"
65 #include "../geometry/Utils.h"
66 
67 #include "../memory/DataSet.h"
68 #include "../memory/DataSetItem.h"
69 
70 #include "../statistics/core/SummaryFunctions.h"
71 #include "../statistics/core/StringStatisticalSummary.h"
72 #include "../statistics/core/NumericStatisticalSummary.h"
73 
74 #include "AggregationQuery.h"
75 #include "Config.h"
76 #include "Exception.h"
77 #include "Utils.h"
78 
79 // STL
80 #include <map>
81 #include <math.h>
82 #include <string>
83 #include <vector>
84 
85 // BOOST
86 #include <boost/lexical_cast.hpp>
87 #include <boost/algorithm/string.hpp>
88 
90 {}
91 
93 {}
94 
95 bool te::vp::AggregationQuery::run() throw(te::common::Exception)
96 {
97  std::auto_ptr<te::da::DataSetType> outDSetType(new te::da::DataSetType(m_outDset));
98 
99 // Primary key
100  te::dt::SimpleProperty* pkProperty = new te::dt::SimpleProperty(m_outDset + "_id", te::dt::INT32_TYPE);
101  pkProperty->setAutoNumber(true);
102  outDSetType->add(pkProperty);
103 
104  te::da::PrimaryKey* pk = new te::da::PrimaryKey(m_outDset + "_pk", outDSetType.get());
105  pk->add(pkProperty);
106  outDSetType->setPrimaryKey(pk);
107 
108  // include the grouping properties in the query
109  te::da::Fields* fields = new te::da::Fields;
110  for(std::size_t i=0; i<m_groupProps.size(); ++i)
111  {
112  te::da::Field* f_aggName = new te::da::Field(m_groupProps[i]->getName());
113  fields->push_back(f_aggName);
114 
115  te::dt::Property* propclone = m_groupProps[i]->clone();
116  outDSetType->add(propclone);
117  }
118 
119  // number of objects in each group (mandatory)
120  te::da::Expression* e_aggCount = new te::da::Count(new te::da::PropertyName(m_groupProps[0]->getName()));
121  te::da::Field* f_aggCount = new te::da::Field(*e_aggCount, "NUM_OBJ");
122  fields->push_back(f_aggCount);
123 
125  outDSetType->add(countProp);
126 
127  // build the query expression according to the summarization requested
128  std::map<te::dt::Property*, std::vector<te::stat::StatisticalSummary> >::const_iterator itStat = m_statSum.begin();
129 
130  while(itStat != m_statSum.end())
131  {
132  std::vector<te::stat::StatisticalSummary>::const_iterator itFunc = itStat->second.begin();
133  while (itFunc != itStat->second.end())
134  {
135  te::da::PropertyName* p_name = new te::da::PropertyName(itStat->first->getName());
136  int p_type = itStat->first->getType();
137  te::dt::Property* newProp = 0;
138 
139  te::da::Expression *sexp, *e_max, *e_min;
140  te::da::Field* sfield;
141  switch (*itFunc)
142  {
143  case MIN_VALUE:
144  sexp = new te::da::Min(p_name);
145  sfield = new te::da::Field(*sexp, p_name->getName() + "_MIN_VALUE");
146  if (p_type == te::dt::STRING_TYPE)
147  newProp = new te::dt::StringProperty(p_name->getName() + "_MIN_VALUE");
148  else
149  newProp = new te::dt::SimpleProperty(p_name->getName() + "_MIN_VALUE", p_type);
150  break;
151  case MAX_VALUE:
152  sexp = new te::da::Max(p_name);
153  sfield = new te::da::Field(*sexp, p_name->getName() + "_MAX_VALUE");
154  if (p_type == te::dt::STRING_TYPE)
155  newProp = new te::dt::StringProperty(p_name->getName() + "_MAX_VALUE");
156  else
157  newProp = new te::dt::SimpleProperty(p_name->getName() + "_MAX_VALUE", p_type);
158  break;
159  case MEAN:
160  sexp = new te::da::Avg(p_name);
161  sfield = new te::da::Field(*sexp, p_name->getName() + "_MEAN");
162  newProp = new te::dt::SimpleProperty(p_name->getName() + "_MEAN", te::dt::DOUBLE_TYPE);
163  break;
164  case SUM:
165  sexp = new te::da::Sum(p_name);
166  sfield = new te::da::Field(*sexp, p_name->getName() + "_SUM");
167  newProp = new te::dt::SimpleProperty(p_name->getName() + "_SUM", p_type);
168  break;
169  case COUNT:
170  sexp = new te::da::Count(p_name);
171  sfield = new te::da::Field(*sexp, p_name->getName() + "_COUNT");
172  newProp = new te::dt::SimpleProperty(p_name->getName() + "_COUNT", te::dt::INT32_TYPE);
173  break;
174  case STANDARD_DEVIATION:
175  sexp = new te::da::StdDev(p_name);
176  sfield = new te::da::Field(*sexp, p_name->getName() + "_STANDARD_DEVIATION");
177  newProp = new te::dt::SimpleProperty(p_name->getName() + "_STANDARD_DEVIATION", te::dt::DOUBLE_TYPE);
178  break;
179  case VARIANCE:
180  sexp = new te::da::Variance(p_name);
181  sfield = new te::da::Field(*sexp, p_name->getName() + "_VARIANCE");
182  newProp = new te::dt::SimpleProperty(p_name->getName() + "_VARIANCE", te::dt::DOUBLE_TYPE);
183  break;
184  case AMPLITUDE:
185  e_min = new te::da::Min(p_name);
186  e_max = new te::da::Max(p_name);
187  sexp = new te::da::Sub(*e_max, *e_min);
188  sfield = new te::da::Field(*sexp, p_name->getName() + "_AMPLITUDE");
189  newProp = new te::dt::SimpleProperty(p_name->getName() + "_AMPLITUDE", p_type);
190  break;
191  default:
192  ++itFunc;
193  continue;
194  }
195  fields->push_back(sfield);
196  outDSetType->add(newProp);
197  ++itFunc;
198  }
199  ++itStat;
200  }
201 
202  te::gm::GeometryProperty* geom = te::da::GetFirstGeomProperty(m_converter->getResult());
203 
205  te::da::Field* f_union = new te::da::Field(*e_union, "geom");
206  fields->push_back(f_union);
207 
208  // define the resulting spatial property
209  te::gm::GeometryProperty* p = static_cast<te::gm::GeometryProperty*>(m_converter->getResult()->findFirstPropertyOfType(te::dt::GEOMETRY_TYPE));
210 
211  // creates the output geometry property
212  te::gm::GeometryProperty* geometry = new te::gm::GeometryProperty("geom");
213  te::gm::GeomType geotype = getGeomResultType(p->getGeometryType());
214  geometry->setGeometryType(geotype);
215  geometry->setSRID(p->getSRID());
216  outDSetType->add(geometry);
217 
218  te::da::FromItem* fromItem = new te::da::DataSetName(m_converter->getResult()->getName());
219  te::da::From* from = new te::da::From;
220  from->push_back(fromItem);
221 
222  te::da::Where* w_oid = 0;
223 
224  if(m_oidSet)
225  w_oid = new te::da::Where(m_oidSet->getExpression());
226 
227  te::da::Select select(fields, from, w_oid);
228 
229  te::da::GroupBy* groupBy = new te::da::GroupBy();
230  for(std::size_t i=0; i<m_groupProps.size(); ++i)
231  {
232  te::da::GroupByItem* e_groupBy = new te::da::GroupByItem(m_groupProps[i]->getName());
233  groupBy->push_back(e_groupBy);
234  }
235  select.setGroupBy(groupBy);
236 
237  std::auto_ptr<te::da::DataSet> dsQuery = m_inDsrc->query(select);
238 
239 
240  if (dsQuery->isEmpty())
241  return false;
242 
243  std::auto_ptr<te::mem::DataSet> outDSet(new te::mem::DataSet(outDSetType.get()));
244 
245  int key = 0;
246  dsQuery->moveBeforeFirst();
247 
248  while (dsQuery->moveNext())
249  {
250  te::mem::DataSetItem* outDSetItem = new te::mem::DataSetItem(outDSet.get());
251  outDSetItem->setInt32(0, key);
252 
253  for (size_t i=1; i<outDSetType->size(); ++i)
254  {
255  switch (outDSetType->getProperty(i)->getType())
256  {
257  case te::dt::STRING_TYPE:
258  if (!dsQuery->isNull(i - 1))
259  outDSetItem->setString(i, dsQuery->getAsString(i - 1));
260  break;
261  case te::dt::INT32_TYPE:
262  if (!dsQuery->isNull(i - 1))
263  outDSetItem->setInt32(i, boost::lexical_cast<int>(dsQuery->getAsString(i - 1)));
264  break;
265  case te::dt::INT64_TYPE:
266  if (!dsQuery->isNull(i - 1))
267  outDSetItem->setInt64(i, dsQuery->getInt64(i - 1));
268  break;
269  case te::dt::DOUBLE_TYPE:
270  if (!dsQuery->isNull(i - 1))
271  outDSetItem->setDouble(i, dsQuery->getDouble(i - 1));
272  break;
274  if (!dsQuery->isNull(i - 1))
275  outDSetItem->setNumeric(i, dsQuery->getNumeric(i - 1));
276  break;
278  if (!dsQuery->isNull(i - 1))
279  {
280  std::auto_ptr<te::gm::Geometry> agg_geo(dsQuery->getGeometry(i - 1));
281 
282  if (agg_geo->getGeomTypeId() != geotype)
283  {
284  te::gm::GeometryCollection* gc = new te::gm::GeometryCollection(1,geotype,agg_geo->getSRID());
285  gc->setGeometryN(0, agg_geo.release());
286  outDSetItem->setGeometry("geom", gc);
287  }
288  else
289  outDSetItem->setGeometry("geom", agg_geo.release());
290  }
291  break;
292  default:
293  continue;
294  }
295  }
296  outDSet->add(outDSetItem);
297  ++key;
298  }
299 
300  te::da::DataSetTypeConverter* converter = new te::da::DataSetTypeConverter(outDSetType.get(), m_outDsrc->getCapabilities(), m_outDsrc->getEncoding());
302  std::auto_ptr<te::da::DataSetAdapter> dsAdapter(te::da::CreateAdapter(outDSet.get(), converter));
303 
304  te::vp::Save(m_outDsrc.get(), dsAdapter.get(), outDSetType.get());
305  return true;
306 }
void setAutoNumber(bool a)
It tells if the property is an autonumber or not.
const std::string & getName() const
It returns the property name.
Definition: PropertyName.h:80
boost::ptr_vector< GroupByItem > GroupBy
A class that can be used to model a GROUP BY clause.
Definition: GroupBy.h:37
Geometric property.
An exception class for the Vector processing module.
void add(te::dt::Property *p)
It adds a property to the list of properties of the primary key.
Definition: PrimaryKey.h:123
GeomType
Each enumerated type is compatible with a Well-known Binary (WKB) type code.
Definition: Enums.h:41
void setGeometry(std::size_t i, te::gm::Geometry *value)
It sets the value of the i-th property.
Utility functions for the data access module.
A class that can be used in a GROUP BY clause.
Definition: GroupByItem.h:50
An abstract class that models a source of data in a query.
Definition: FromItem.h:50
The Field class can be used to model an expression that takes part of the output items of a SELECT...
Definition: Field.h:50
void setSRID(int srid)
It sets the spatial reference system identifier associated to this property.
void setGeometryType(GeomType t)
It sets the geometry subtype.
void setDouble(std::size_t i, double value)
It sets the value of the i-th property.
An atomic property like an integer or double.
A class that models the name of a dataset used in a From clause.
Definition: DataSetName.h:43
TEDATAACCESSEXPORT void AssociateDataSetTypeConverterSRID(DataSetTypeConverter *converter, const int &inputSRID, const int &outputSRID=TE_UNKNOWN_SRS)
Definition: Utils.cpp:670
A class that models the name of any property of an object.
Definition: PropertyName.h:50
A class that models the description of a dataset.
Definition: DataSetType.h:72
void Save(te::da::DataSource *source, te::da::DataSet *result, te::da::DataSetType *outDsType)
Definition: Utils.cpp:172
virtual Property * clone() const =0
It returns a clone of the object.
Count statistical function.
Definition: Count.h:46
Minimum value.
Definition: Enums.h:42
Sum of values.
Definition: Enums.h:45
It models a property definition.
Definition: Property.h:59
Avg statistical function.
Definition: Avg.h:46
Min statistical function.
Definition: Min.h:46
void setNumeric(std::size_t i, const std::string &value)
It sets the value of the i-th property.
This is an abstract class that models a query expression.
Definition: Expression.h:47
StdDev statistical function.
Definition: StdDev.h:46
Total number of values.
Definition: Enums.h:46
An converter for DataSetType.
void setInt32(std::size_t i, boost::int32_t value)
It sets the value of the i-th property.
Variance.
Definition: Enums.h:50
Implementation of a random-access dataset class for the TerraLib In-Memory Data Access driver...
Definition: DataSet.h:65
TESAEXPORT double Sum(te::sa::GeneralizedProximityMatrix *gpm, int attrIdx)
Function used to calculate sum of a specific attribute from a gpm.
int getSRID() const
It returns the spatial reference system identifier associated to this property.
Mean.
Definition: Enums.h:44
URI C++ Library.
GeomType getGeometryType() const
It returns the geometry subtype allowed for the property.
boost::ptr_vector< Field > Fields
Fields is just a boost::ptr_vector of Field pointers.
Definition: Fields.h:37
A class that can be used to model a filter expression that can be applied to a query.
Definition: Where.h:47
Maximum value.
Definition: Enums.h:43
The type for string types: FIXED_STRING, VAR_STRING or STRING.
A Select models a query to be used when retrieving data from a DataSource.
Definition: Select.h:65
boost::ptr_vector< FromItem > From
It models the FROM clause for a query.
Definition: From.h:37
An implementation of the DatasetItem class for the TerraLib In-Memory Data Access driver...
Definition: DataSetItem.h:56
The subtraction operator.
Definition: Sub.h:46
Aggregation Vector Processing functions.
Amplitude.
Definition: Enums.h:53
It describes a primary key (pk) constraint.
Definition: PrimaryKey.h:52
Variance statistical function.
Definition: Variance.h:46
void setGeometryN(std::size_t i, Geometry *g)
It sets the n-th geometry in this geometry collection.
ST_Union statistical function.
Definition: ST_Union.h:46
Configuration flags for the Terrralib Vector Processing module.
void setString(std::size_t i, const std::string &value)
It sets the value of the i-th property.
void setInt64(std::size_t i, boost::int64_t value)
It sets the value of the i-th property.
It is a collection of other geometric objects.
TEDATAACCESSEXPORT te::gm::GeometryProperty * GetFirstGeomProperty(const DataSetType *dt)
Definition: Utils.cpp:557
TEDATAACCESSEXPORT DataSetAdapter * CreateAdapter(DataSet *ds, DataSetTypeConverter *converter, bool isOwner=false)
Definition: Utils.cpp:644
Max statistical function.
Definition: Max.h:46
Standard deviation.
Definition: Enums.h:48
const std::string & getName() const
It returns the property name.
Definition: Property.h:127