All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
Reader.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file Reader.cpp
22 
23  \brief A class that models a XML reader object built on top of Xerces-C++.
24 */
25 
26 // TerraLib
27 #include "../common/Exception.h"
28 #include "../common/Translator.h"
29 #include "ErrorHandler.h"
30 #include "Exception.h"
31 #include "Reader.h"
32 #include "ReaderHandler.h"
33 #include "StrToXMLCh.h"
34 #include "Utils.h"
35 
36 // Xerces-C++
37 #include <xercesc/sax2/SAX2XMLReader.hpp>
38 #include <xercesc/sax2/XMLReaderFactory.hpp>
39 
41  : m_parser(0),
42  m_readerH(0),
43  m_errH(0),
44  m_token(0),
45  m_ignoreWhiteSpaces(false)
46 {
47  m_parser = xercesc::XMLReaderFactory::createXMLReader();
49  m_errH = new ErrorHandler;
50 
51  if(m_parser)
52  {
53  m_parser->setContentHandler(m_readerH);
54  m_parser->setErrorHandler(m_errH);
55  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpaces, true);
56  m_parser->setFeature(xercesc::XMLUni::fgXercesSchema, true);
57  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, true);
58  m_parser->setFeature(xercesc::XMLUni::fgXercesUseCachedGrammarInParse, true);
59  m_parser->setFeature(xercesc::XMLUni::fgXercesCacheGrammarFromParse, true);
60  m_parser->setInputBufferSize(TE_XERCES_READER_MAX_BUFFSIZE);
61  }
62 }
63 
65 {
66  reset();
67 
68  delete m_parser;
69  delete m_readerH;
70  delete m_errH;
71  delete m_token;
72 }
73 
75 {
76  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpaces, d);
77 }
78 
80 {
81  m_parser->setFeature(xercesc::XMLUni::fgXercesSchema, d);
82 }
83 
85 {
86  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, d);
87 }
88 
90 {
91  m_parser->setFeature(xercesc::XMLUni::fgXercesUseCachedGrammarInParse, d);
92 }
93 
95 {
96  m_parser->setFeature(xercesc::XMLUni::fgXercesCacheGrammarFromParse, d);
97 }
98 
100 {
101  m_ignoreWhiteSpaces = d;
102 }
103 
104 void te::xerces::Reader::read(const std::string& fileURI)
105 {
106 // reset parser if it is in use!
107  if(m_token)
108  reset();
109 
110 // create a new state for the progressive parser
111  delete m_token;
112  m_token = new xercesc::XMLPScanToken;
113 
114  try
115  {
116  if(!m_parser->parseFirst(fileURI.c_str(), *m_token))
117  {
118  ErrorHandler* errH = static_cast<ErrorHandler*>(m_parser->getErrorHandler());
119 
120  std::string m = TE_TR("Could not start reading the file due to the following problem: ");
121  m += errH->getErrors();
122  m += ".";
123 
124  throw Exception(m);
125  }
126 
127  if(m_readerH->getNodeType() == te::xml::UNKNOWN)
128  {
129  next();
130  }
131  }
132  catch(const xercesc::XMLException& e)
133  {
134  std::string m = TE_TR("Error when reading the XML document. Exception messages is: ");
135  m += ToString(e.getMessage());
136 
137  throw Exception(m);
138  }
139  catch(...)
140  {
141  throw Exception(TE_TR("Unexpected error reading the XML document!"));
142  }
143 }
144 
146 {
147  assert(m_token);
148 
149  m_readerH->reset();
150 
151  bool parserStatus = true;
152 
153  try
154  {
155  while(true && parserStatus)
156  {
157  if(m_readerH->isInContractedForm())
158  {
159  m_readerH->setNodeType(te::xml::END_ELEMENT);
160  m_readerH->setInContractedForm(false);
161  return true;
162  }
163  else
164  parserStatus = m_parser->parseNext(*m_token);
165 
166  if(m_parser->getErrorCount() != 0)
167  {
168  std::string errmsg = m_errH->getErrors();
169  throw Exception(errmsg);
170  }
171 
172  if(m_readerH->isInContractedForm())
173  {
174  m_readerH->setNodeType(te::xml::START_ELEMENT);
175  }
176 
177  if(m_ignoreWhiteSpaces && getNodeType() == te::xml::WHITESPACE)
178  continue;
179 
180  if(m_readerH->getNodeType() != te::xml::UNKNOWN)
181  return true;
182  }
183  }
184  catch(const xercesc::XMLException& e)
185  {
186  std::string m = TE_TR("Error parsing the XML document. Exception messages is: ");
187  m += ToString(e.getMessage());
188 
189  throw Exception(m);
190  }
191  catch(const std::exception& /*e*/)
192  {
193  throw;
194  }
195  catch(...)
196  {
197  throw Exception(TE_TR("Unexpected error in parse next!"));
198  }
199 
200  return false;
201 }
202 
204 {
205  return m_readerH->getNodeType();
206 }
207 
209 {
210  return ToString(m_readerH->getElementURI());
211 }
212 
214 {
215  return ToString(m_readerH->getElementLocalName());
216 }
217 
219 {
220  return ToString(m_readerH->getElementQName());
221 }
222 
224 {
225  return ToString(m_readerH->getElementValue());
226 }
227 
229 {
230  return m_readerH->getDataLen();
231 }
232 
234 {
235  return (m_readerH->getElementAttrs() != 0) && (m_readerH->getElementAttrs()->getLength() > 0);
236 }
237 
239 {
240  return (hasAttrs() ? m_readerH->getElementAttrs()->getLength() : 0);
241 }
242 
243 std::string te::xerces::Reader::getAttr(const std::string& name) const
244 {
245  assert(m_readerH->getElementAttrs());
246 
247  const XMLCh* value = m_readerH->getElementAttrs()->getValue(StrToXMLCh(name).getXMLCh());
248 
249  return ToString(value);
250 }
251 
252 std::string te::xerces::Reader::getAttr(std::size_t i) const
253 {
254  assert(m_readerH->getElementAttrs());
255  assert(i < m_readerH->getElementAttrs()->getLength());
256 
257  const XMLCh* value = m_readerH->getElementAttrs()->getValue(i);
258 
259  return ToString(value);
260 }
261 
262 std::string te::xerces::Reader::getAttrLocalName(std::size_t i) const
263 {
264  assert(m_readerH->getElementAttrs());
265  assert(i < m_readerH->getElementAttrs()->getLength());
266 
267  const XMLCh* name = m_readerH->getElementAttrs()->getLocalName(i);
268 
269  return ToString(name);
270 }
271 
272 std::string te::xerces::Reader::getAttrQName(std::size_t i) const
273 {
274  assert(m_readerH->getElementAttrs());
275  assert(m_readerH->getElementAttrs()->getLength() > i);
276 
277  const XMLCh* name = m_readerH->getElementAttrs()->getQName(i);
278 
279  return ToString(name);
280 }
281 
282 std::string te::xerces::Reader::getAttrURI(std::size_t i) const
283 {
284  assert(m_readerH->getElementAttrs());
285  assert(m_readerH->getElementAttrs()->getLength() > i);
286 
287  const XMLCh* name = m_readerH->getElementAttrs()->getURI(i);
288 
289  return ToString(name);
290 }
291 
292 std::size_t te::xerces::Reader::getAttrPosition(const std::string& name) const
293 {
294  assert(m_readerH->getElementAttrs());
295 
296  return m_readerH->getElementAttrs()->getIndex(StrToXMLCh(name).getXMLCh());
297 }
298 
300 {
301  return m_readerH->getNumberOfNamespaces();
302 }
303 
304 void te::xerces::Reader::getNamespace(std::size_t i, std::pair<std::string, std::string>& ns) const
305 {
306  const std::pair<const XMLCh*, const XMLCh*>& nns = m_readerH->getNamespace(i);
307 
308  ns.first = ToString(nns.first);
309  ns.second = ToString(nns.second);
310 }
311 
312 void te::xerces::Reader::setInternalBufferSize(const std::size_t size)
313 {
314  m_parser->setInputBufferSize(size);
315 }
316 
318 {
319  if(m_token == 0)
320  return;
321 
322  try
323  {
324  m_parser->parseReset(*m_token);
325  }
326  catch(const xercesc::XMLException& e)
327  {
328  std::string m = TE_TR("Error reseting the parser. Exception messages is: ");
329  m += ToString(e.getMessage());
330 
331  throw Exception(m);
332  }
333  catch(...)
334  {
335  throw Exception(TE_TR("Unexpected error reseting the parser!"));
336  }
337 }
338 
std::string getAttrLocalName(std::size_t i) const
It returns the local part of the attribute name for the i-th attribute.
Definition: Reader.cpp:262
A class for converting a standard string to a Xerces string (XMLCh).
Definition: StrToXMLCh.h:47
bool next()
It gets the next event to be read.
Definition: Reader.cpp:145
std::string getElementURI() const
It returns the URI of the associated namespace in the case of an element node.
Definition: Reader.cpp:208
void setDoNamespaces(bool d)
It enables or disables the parser namespace processing.
Definition: Reader.cpp:74
Reader()
Default constructor.
Definition: Reader.cpp:40
std::size_t getAttrPosition(const std::string &name) const
It returns the attribute position.
Definition: Reader.cpp:292
void setIgnoreWhiteSpaces(bool d)
If true the parser will ignore the white space characters.
Definition: Reader.cpp:99
A class for converting a standard string to a Xerces string (XMLCh).
This class implements Xerces interface for error handlers.
std::string getAttrURI(std::size_t i) const
It returns the attribute URI of the associated namespace in the case of an element node...
Definition: Reader.cpp:282
#define TE_TR(message)
It marks a string in order to get translated.
Definition: Translator.h:347
ErrorHandler * m_errH
A pointer to an error handler.
Definition: Reader.h:139
void read(const std::string &fileURI)
It prepare the given file to be read.
Definition: Reader.cpp:104
void setCacheGrammarFromParse(bool d)
If true it caches the grammar in the pool for re-use in subsequent parses.
Definition: Reader.cpp:94
const std::string getErrors() const
It returns a concatenation of all reported error messages.
void setValidationScheme(bool d)
If true the parser will perform a validation scheme.
Definition: Reader.cpp:84
NodeType
The type of node read by XML reader.
Definition: Enums.h:40
This class implements Xerces interface for a handler that receives general document events...
Definition: ReaderHandler.h:52
std::size_t getNumberOfNamespaces() const
Definition: Reader.cpp:299
xercesc::SAX2XMLReader * m_parser
A pointer to the parser used by the reader.
Definition: Reader.h:137
std::string getAttrQName(std::size_t i) const
It returns the qualified name for the i-th attribute.
Definition: Reader.cpp:272
ReaderHandler * m_readerH
A pointer to a content handler.
Definition: Reader.h:138
void setInternalBufferSize(const std::size_t size)
It sets the maximal allowed buffer size used for parsing.
Definition: Reader.cpp:312
std::string getElementLocalName() const
It returns the local part of the element name in the case of an element node.
Definition: Reader.cpp:213
This class implements Xerces interface for a handler that receives general document events...
te::xml::NodeType getNodeType() const
It return the type of node read.
Definition: Reader.cpp:203
std::size_t getElementDataLen() const
It returns the element data value in the case of VALUE node.
Definition: Reader.cpp:228
This class implements Xerces interface for error handlers.
Definition: ErrorHandler.h:50
void setUseCachedGrammarInParse(bool d)
If true the reader will use cached grammar if it exists in the pool.
Definition: Reader.cpp:89
std::string getElementQName() const
It returns the qualified name in the case of an element node.
Definition: Reader.cpp:218
void setDoSchema(bool d)
It enables or disables the parser schema processing.
Definition: Reader.cpp:79
This class is designed to declare objects to be thrown as exceptions by TerraLib. ...
std::string getElementValue() const
It returns the element data value in the case of VALUE node.
Definition: Reader.cpp:223
#define TE_XERCES_READER_MAX_BUFFSIZE
It defines the default internal buffer size for parsing a XERCES file.
Definition: Config.h:44
std::size_t getNumberOfAttrs() const
It returns the number of attributes in the case of an element node.
Definition: Reader.cpp:238
bool hasAttrs() const
It tells if the element has attributes in the case of an element node.
Definition: Reader.cpp:233
std::string getAttr(const std::string &name) const
It returns the attribute value in the case of an element node with valid attributes.
Definition: Reader.cpp:243
void getNamespace(std::size_t i, std::pair< std::string, std::string > &ns) const
Definition: Reader.cpp:304
~Reader()
Destructor.
Definition: Reader.cpp:64
std::string ToString(const XMLCh *const value)
It converts the XML string to a standard C++ string.
Definition: Utils.h:122
void reset()
It resets the parser.
Definition: Reader.cpp:317