xerces/Reader.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file Reader.cpp
22 
23  \brief A class that models a XML reader object built on top of Xerces-C++.
24 */
25 
26 // TerraLib
27 #include "../common/Exception.h"
28 #include "../core/encoding/CharEncoding.h"
29 #include "../core/logger/Logger.h"
30 #include "../core/translator/Translator.h"
31 #include "../xml/Exception.h"
32 #include "ErrorHandler.h"
33 #include "Reader.h"
34 #include "ReaderHandler.h"
35 #include "StrToXMLCh.h"
36 #include "Utils.h"
37 
38 // STL
39 #include <fstream>
40 
41 // Xerces-C++
42 #include <xercesc/sax2/SAX2XMLReader.hpp>
43 #include <xercesc/framework/MemBufInputSource.hpp>
44 #include <xercesc/sax2/XMLReaderFactory.hpp>
45 
47  : m_parser(nullptr),
48  m_readerH(nullptr),
49  m_errH(nullptr),
50  m_token(nullptr),
51  m_ignoreWhiteSpaces(true)
52 {
53  m_parser = xercesc::XMLReaderFactory::createXMLReader();
55  m_errH = new ErrorHandler;
56 
57  if(m_parser)
58  {
59  m_parser->setContentHandler(m_readerH);
60  m_parser->setErrorHandler(m_errH);
61  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpaces, true);
62  m_parser->setFeature(xercesc::XMLUni::fgXercesSchema, false);
63  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, false);
64  m_parser->setFeature(xercesc::XMLUni::fgXercesUseCachedGrammarInParse, true);
65  m_parser->setFeature(xercesc::XMLUni::fgXercesCacheGrammarFromParse, true);
66  m_parser->setInputBufferSize(TE_XERCES_READER_MAX_BUFFSIZE);
67  }
68 }
69 
71 {
72  try
73  {
74  reset();
75  }
76  catch (...)
77  {
78  TE_LOG_ERROR("Error reseting the parser.");
79  }
80 
81  delete m_parser;
82  delete m_readerH;
83  delete m_errH;
84  delete m_token;
85 }
86 
88 {
89  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpaces, d);
90 }
91 
93 {
94  m_parser->setFeature(xercesc::XMLUni::fgXercesSchema, d);
95 }
96 
98 {
99  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, d);
100 }
101 
103 {
104  m_parser->setFeature(xercesc::XMLUni::fgXercesUseCachedGrammarInParse, d);
105 }
106 
108 {
109  m_parser->setFeature(xercesc::XMLUni::fgXercesCacheGrammarFromParse, d);
110 }
111 
113 {
115 }
116 
117 void te::xerces::Reader::read(const std::string& fileURI)
118 {
119 // reset parser if it is in use!
120  if(m_token)
121  reset();
122 
123 // create a new state for the progressive parser
124  delete m_token;
125  m_token = new xercesc::XMLPScanToken;
126 
127  try
128  {
129  std::ifstream file(te::core::CharEncoding::fromUTF8(fileURI).c_str());
130  std::string text((std::istreambuf_iterator<char>(file)),
131  std::istreambuf_iterator<char>());
132  xercesc::MemBufInputSource xmlbuffer((const ::XMLByte*)text.c_str(), text.size(), "");
133  if (!m_parser->parseFirst(xmlbuffer, *m_token))
134  {
135  ErrorHandler* errH = static_cast<ErrorHandler*>(m_parser->getErrorHandler());
136 
137  std::string m = TE_TR("Could not start reading the file due to the following problem: ");
138  m += errH->getErrors();
139  m += ".";
140 
141  throw te::xml::Exception(m);
142  }
143 
145  {
146  next();
147  }
148  }
149  catch(const xercesc::XMLException& e)
150  {
151  std::string m = TE_TR("Error when reading the XML document. Exception messages is: ");
152  m += ToString(e.getMessage());
153 
154  throw te::xml::Exception(m);
155  }
156  catch(...)
157  {
158  throw te::xml::Exception(TE_TR("Unexpected error reading the XML document!"));
159  }
160 }
161 
163 {
164  assert(m_token);
165 
166  m_readerH->reset();
167 
168  bool parserStatus = true;
169 
170  try
171  {
172  while(true && parserStatus)
173  {
175  {
178  return true;
179  }
180  else
181  parserStatus = m_parser->parseNext(*m_token);
182 
183  if(m_parser->getErrorCount() != 0)
184  {
185  std::string errmsg = m_errH->getErrors();
186  throw te::xml::Exception(errmsg);
187  }
188 
190  {
192  }
193 
195  continue;
196 
198  return true;
199  }
200  }
201  catch(const xercesc::XMLException& e)
202  {
203  std::string m = TE_TR("Error parsing the XML document. Exception messages is: ");
204  m += ToString(e.getMessage());
205 
206  throw te::xml::Exception(m);
207  }
208  catch(const std::exception& /*e*/)
209  {
210  throw;
211  }
212  catch(...)
213  {
214  throw te::xml::Exception(TE_TR("Unexpected error in parse next!"));
215  }
216 
217  return false;
218 }
219 
221 {
222  return m_readerH->getNodeType();
223 }
224 
226 {
227  return ToString(m_readerH->getElementURI());
228 }
229 
231 {
233 }
234 
236 {
238 }
239 
241 {
243 }
244 
246 {
247  return m_readerH->getDataLen();
248 }
249 
251 {
252  return (m_readerH->getElementAttrs() != nullptr) && (m_readerH->getElementAttrs()->getLength() > 0);
253 }
254 
256 {
257  return (hasAttrs() ? m_readerH->getElementAttrs()->getLength() : 0);
258 }
259 
260 std::string te::xerces::Reader::getAttr(const std::string& name) const
261 {
262  assert(m_readerH->getElementAttrs());
263 
264  const XMLCh* value = m_readerH->getElementAttrs()->getValue(StrToXMLCh(name).getXMLCh());
265 
266  return ToString(value);
267 }
268 
269 std::string te::xerces::Reader::getAttr(std::size_t i) const
270 {
271  assert(m_readerH->getElementAttrs());
272  assert(i < m_readerH->getElementAttrs()->getLength());
273 
274  const XMLCh* value = m_readerH->getElementAttrs()->getValue(i);
275 
276  return ToString(value);
277 }
278 
279 std::string te::xerces::Reader::getAttrLocalName(std::size_t i) const
280 {
281  assert(m_readerH->getElementAttrs());
282  assert(i < m_readerH->getElementAttrs()->getLength());
283 
284  const XMLCh* name = m_readerH->getElementAttrs()->getLocalName(i);
285 
286  return ToString(name);
287 }
288 
289 std::string te::xerces::Reader::getAttrQName(std::size_t i) const
290 {
291  assert(m_readerH->getElementAttrs());
292  assert(m_readerH->getElementAttrs()->getLength() > i);
293 
294  const XMLCh* name = m_readerH->getElementAttrs()->getQName(i);
295 
296  return ToString(name);
297 }
298 
299 std::string te::xerces::Reader::getAttrURI(std::size_t i) const
300 {
301  assert(m_readerH->getElementAttrs());
302  assert(m_readerH->getElementAttrs()->getLength() > i);
303 
304  const XMLCh* name = m_readerH->getElementAttrs()->getURI(i);
305 
306  return ToString(name);
307 }
308 
309 std::size_t te::xerces::Reader::getAttrPosition(const std::string& name) const
310 {
311  assert(m_readerH->getElementAttrs());
312 
313  return m_readerH->getElementAttrs()->getIndex(StrToXMLCh(name).getXMLCh());
314 }
315 
317 {
319 }
320 
321 void te::xerces::Reader::getNamespace(std::size_t i, std::pair<std::string, std::string>& ns) const
322 {
323  const std::pair<const XMLCh*, const XMLCh*>& nns = m_readerH->getNamespace(i);
324 
325  ns.first = ToString(nns.first);
326  ns.second = ToString(nns.second);
327 }
328 
329 void te::xerces::Reader::setInternalBufferSize(const std::size_t size)
330 {
331  m_parser->setInputBufferSize(size);
332 }
333 
335 {
336  if(m_token == nullptr)
337  return;
338 
339  try
340  {
341  m_parser->parseReset(*m_token);
342  }
343  catch(const xercesc::XMLException& e)
344  {
345  std::string m = TE_TR("Error reseting the parser. Exception messages is: ");
346  m += ToString(e.getMessage());
347 
348  throw te::xml::Exception(m);
349  }
350  catch(...)
351  {
352  throw te::xml::Exception(TE_TR("Unexpected error reseting the parser!"));
353  }
354 }
355 
std::string getAttrLocalName(std::size_t i) const
It returns the local part of the attribute name for the i-th attribute.
A class for converting a standard string to a Xerces string (XMLCh).
Definition: StrToXMLCh.h:47
bool next()
It gets the next event to be read.
std::string getElementURI() const
It returns the URI of the associated namespace in the case of an element node.
bool m_ignoreWhiteSpaces
A flag that indicates if the parser should ignore white spaces.
void setDoNamespaces(bool d)
It enables or disables the parser namespace processing.
Reader()
Default constructor.
std::size_t getAttrPosition(const std::string &name) const
It returns the attribute position.
void setIgnoreWhiteSpaces(bool d)
If true the parser will ignore the white space characters.
A class for converting a standard string to a Xerces string (XMLCh).
This class implements Xerces interface for error handlers.
std::size_t getNumberOfNamespaces() const
const xercesc::Attributes * getElementAttrs() const
TODO.
std::string getAttrURI(std::size_t i) const
It returns the attribute URI of the associated namespace in the case of an element node...
#define TE_TR(message)
It marks a string in order to get translated.
Definition: Translator.h:242
static std::string fromUTF8(const std::string &src)
Convert a string in UTF-8 to the current locale encoding.
ErrorHandler * m_errH
A pointer to an error handler.
const XMLCh * getElementQName() const
TODO.
void setNodeType(te::xml::NodeType type)
void read(const std::string &fileURI)
It prepare the given file to be read.
const std::pair< const XMLCh *, const XMLCh * > & getNamespace(std::size_t i) const
void setCacheGrammarFromParse(bool d)
If true it caches the grammar in the pool for re-use in subsequent parses.
const std::string getErrors() const
It returns a concatenation of all reported error messages.
void setValidationScheme(bool d)
If true the parser will perform a validation scheme.
NodeType
The type of node read by XML reader.
This class implements Xerces interface for a handler that receives general document events...
Definition: ReaderHandler.h:52
std::size_t getNumberOfNamespaces() const
const XMLCh * getElementURI() const
TODO.
xercesc::SAX2XMLReader * m_parser
A pointer to the parser used by the reader.
std::string getAttrQName(std::size_t i) const
It returns the qualified name for the i-th attribute.
static te::dt::DateTime d(2010, 8, 9, 15, 58, 39)
ReaderHandler * m_readerH
A pointer to a content handler.
xercesc::XMLPScanToken * m_token
This keeps the parser state.
void setInternalBufferSize(const std::size_t size)
It sets the maximal allowed buffer size used for parsing.
#define TE_XERCES_READER_MAX_BUFFSIZE
It defines the default internal buffer size for parsing a XERCES file.
std::string getElementLocalName() const
It returns the local part of the element name in the case of an element node.
This class implements Xerces interface for a handler that receives general document events...
te::xml::NodeType getNodeType() const
It return the type of node read.
std::size_t getElementDataLen() const
It returns the element data value in the case of VALUE node.
This class implements Xerces interface for error handlers.
Definition: ErrorHandler.h:50
const XMLCh * getElementLocalName() const
TODO.
te::xml::NodeType getNodeType() const
It return the type of node read by the handler.
bool isInContractedForm() const
void setUseCachedGrammarInParse(bool d)
If true the reader will use cached grammar if it exists in the pool.
XMLSize_t getDataLen() const
std::string getElementQName() const
It returns the qualified name in the case of an element node.
const XMLCh * getElementValue() const
void setDoSchema(bool d)
It enables or disables the parser schema processing.
#define TE_LOG_ERROR(message)
Use this tag in order to log a message to the TerraLib default logger with the ERROR level...
Definition: Logger.h:337
std::string getElementValue() const
It returns the element data value in the case of VALUE node.
std::size_t getNumberOfAttrs() const
It returns the number of attributes in the case of an element node.
bool hasAttrs() const
It tells if the element has attributes in the case of an element node.
std::string getAttr(const std::string &name) const
It returns the attribute value in the case of an element node with valid attributes.
void getNamespace(std::size_t i, std::pair< std::string, std::string > &ns) const
~Reader()
Destructor.
file(WRITE ${CMAKE_BINARY_DIR}/config_qhelp.cmake"configure_file (${TERRALIB_ABSOLUTE_ROOT_DIR}/doc/qhelp/help.qhcp.in ${CMAKE_BINARY_DIR}/share/terraview/help/help.qhcp @ONLY)") add_custom_command(OUTPUT del_dir COMMAND $
std::string ToString(const XMLCh *const value)
It converts the XML string to a standard C++ string.
void reset()
It resets the parser.