All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
Reader.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2001-2009 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file Reader.cpp
22 
23  \brief A class that models a XML reader object built on top of Xerces-C++.
24 */
25 
26 // TerraLib
27 #include "../common/Exception.h"
28 #include "../common/Translator.h"
29 #include "ErrorHandler.h"
30 #include "Exception.h"
31 #include "Reader.h"
32 #include "ReaderHandler.h"
33 #include "StrToXMLCh.h"
34 #include "Utils.h"
35 
36 // Xerces-C++
37 #include <xercesc/sax2/SAX2XMLReader.hpp>
38 #include <xercesc/sax2/XMLReaderFactory.hpp>
39 
41  : m_parser(0),
42  m_readerH(0),
43  m_errH(0),
44  m_token(0),
45  m_ignoreWhiteSpaces(false)
46 {
47  m_parser = xercesc::XMLReaderFactory::createXMLReader();
49  m_errH = new ErrorHandler;
50 
51  if(m_parser)
52  {
53  m_parser->setContentHandler(m_readerH);
54  m_parser->setErrorHandler(m_errH);
55  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpaces, true);
56  m_parser->setFeature(xercesc::XMLUni::fgXercesSchema, true);
57  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, true);
58  m_parser->setFeature(xercesc::XMLUni::fgXercesUseCachedGrammarInParse, true);
59  m_parser->setFeature(xercesc::XMLUni::fgXercesCacheGrammarFromParse, true);
60  m_parser->setInputBufferSize(TE_XERCES_READER_MAX_BUFFSIZE);
61  }
62 }
63 
65 {
66  reset();
67 
68  delete m_parser;
69  delete m_readerH;
70  delete m_errH;
71  delete m_token;
72 }
73 
75 {
76  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpaces, d);
77 }
78 
80 {
81  m_parser->setFeature(xercesc::XMLUni::fgXercesSchema, d);
82 }
83 
85 {
86  m_parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, d);
87 }
88 
90 {
91  m_parser->setFeature(xercesc::XMLUni::fgXercesUseCachedGrammarInParse, d);
92 }
93 
95 {
96  m_parser->setFeature(xercesc::XMLUni::fgXercesCacheGrammarFromParse, d);
97 }
98 
100 {
101  m_ignoreWhiteSpaces = d;
102 }
103 
104 void te::xerces::Reader::read(const std::string& fileURI)
105 {
106 // reset parser if it is in use!
107  if(m_token)
108  reset();
109 
110 // create a new state for the progressive parser
111  delete m_token;
112  m_token = new xercesc::XMLPScanToken;
113 
114  try
115  {
116  if(!m_parser->parseFirst(fileURI.c_str(), *m_token))
117  {
118  ErrorHandler* errH = static_cast<ErrorHandler*>(m_parser->getErrorHandler());
119 
120  std::string m = TE_TR("Could not start reading the file due to the following problem: ");
121  m += errH->getErrors();
122  m += ".";
123 
124  throw Exception(m);
125  }
126 
127  if(m_readerH->getNodeType() == te::xml::UNKNOWN)
128  {
129  next();
130  }
131  }
132  catch(const xercesc::XMLException& e)
133  {
134  std::string m = TE_TR("Error when reading the XML document. Exception messages is: ");
135  m += ToString(e.getMessage());
136 
137  throw Exception(m);
138  }
139  catch(...)
140  {
141  throw Exception(TE_TR("Unexpected error reading the XML document!"));
142  }
143 }
144 
146 {
147  assert(m_token);
148 
149  m_readerH->reset();
150 
151  bool parserStatus = true;
152 
153  try
154  {
155  while(true && parserStatus)
156  {
157  if(m_readerH->isInContractedForm())
158  {
159  m_readerH->setNodeType(te::xml::END_ELEMENT);
160  m_readerH->setInContractedForm(false);
161  return true;
162  }
163  else
164  parserStatus = m_parser->parseNext(*m_token);
165 
166  if(m_parser->getErrorCount() != 0)
167  {
168  std::string errmsg = m_errH->getErrors();
169  throw Exception(errmsg);
170  }
171 
172  if(m_ignoreWhiteSpaces && getNodeType() == te::xml::WHITESPACE)
173  continue;
174 
175  if(m_readerH->getNodeType() != te::xml::UNKNOWN)
176  return true;
177  }
178  }
179  catch(const xercesc::XMLException& e)
180  {
181  std::string m = TE_TR("Error parsing the XML document. Exception messages is: ");
182  m += ToString(e.getMessage());
183 
184  throw Exception(m);
185  }
186  catch(const std::exception& /*e*/)
187  {
188  throw;
189  }
190  catch(...)
191  {
192  throw Exception(TE_TR("Unexpected error in parse next!"));
193  }
194 
195  return false;
196 }
197 
199 {
200  return m_readerH->getNodeType();
201 }
202 
204 {
205  return ToString(m_readerH->getElementURI());
206 }
207 
209 {
210  return ToString(m_readerH->getElementLocalName());
211 }
212 
214 {
215  return ToString(m_readerH->getElementQName());
216 }
217 
219 {
220  return ToString(m_readerH->getElementValue());
221 }
222 
224 {
225  return m_readerH->getDataLen();
226 }
227 
229 {
230  return (m_readerH->getElementAttrs() != 0) && (m_readerH->getElementAttrs()->getLength() > 0);
231 }
232 
234 {
235  return (hasAttrs() ? m_readerH->getElementAttrs()->getLength() : 0);
236 }
237 
238 std::string te::xerces::Reader::getAttr(const std::string& name) const
239 {
240  assert(m_readerH->getElementAttrs());
241 
242  const XMLCh* value = m_readerH->getElementAttrs()->getValue(StrToXMLCh(name).getXMLCh());
243 
244  return ToString(value);
245 }
246 
247 std::string te::xerces::Reader::getAttr(std::size_t i) const
248 {
249  assert(m_readerH->getElementAttrs());
250  assert(i < m_readerH->getElementAttrs()->getLength());
251 
252  const XMLCh* value = m_readerH->getElementAttrs()->getValue(i);
253 
254  return ToString(value);
255 }
256 
257 std::string te::xerces::Reader::getAttrLocalName(std::size_t i) const
258 {
259  assert(m_readerH->getElementAttrs());
260  assert(i < m_readerH->getElementAttrs()->getLength());
261 
262  const XMLCh* name = m_readerH->getElementAttrs()->getLocalName(i);
263 
264  return ToString(name);
265 }
266 
267 std::string te::xerces::Reader::getAttrQName(std::size_t i) const
268 {
269  assert(m_readerH->getElementAttrs());
270  assert(m_readerH->getElementAttrs()->getLength() > i);
271 
272  const XMLCh* name = m_readerH->getElementAttrs()->getQName(i);
273 
274  return ToString(name);
275 }
276 
277 std::string te::xerces::Reader::getAttrURI(std::size_t i) const
278 {
279  assert(m_readerH->getElementAttrs());
280  assert(m_readerH->getElementAttrs()->getLength() > i);
281 
282  const XMLCh* name = m_readerH->getElementAttrs()->getURI(i);
283 
284  return ToString(name);
285 }
286 
287 std::size_t te::xerces::Reader::getAttrPosition(const std::string& name) const
288 {
289  assert(m_readerH->getElementAttrs());
290 
291  return m_readerH->getElementAttrs()->getIndex(StrToXMLCh(name).getXMLCh());
292 }
293 
295 {
296  return m_readerH->getNumberOfNamespaces();
297 }
298 
299 void te::xerces::Reader::getNamespace(std::size_t i, std::pair<std::string, std::string>& ns) const
300 {
301  const std::pair<const XMLCh*, const XMLCh*>& nns = m_readerH->getNamespace(i);
302 
303  ns.first = ToString(nns.first);
304  ns.second = ToString(nns.second);
305 }
306 
307 void te::xerces::Reader::setInternalBufferSize(const std::size_t size)
308 {
309  m_parser->setInputBufferSize(size);
310 }
311 
313 {
314  if(m_token == 0)
315  return;
316 
317  try
318  {
319  m_parser->parseReset(*m_token);
320  }
321  catch(const xercesc::XMLException& e)
322  {
323  std::string m = TE_TR("Error reseting the parser. Exception messages is: ");
324  m += ToString(e.getMessage());
325 
326  throw Exception(m);
327  }
328  catch(...)
329  {
330  throw Exception(TE_TR("Unexpected error reseting the parser!"));
331  }
332 }
333 
std::string getAttrLocalName(std::size_t i) const
It returns the local part of the attribute name for the i-th attribute.
Definition: Reader.cpp:257
A class for converting a standard string to a Xerces string (XMLCh).
Definition: StrToXMLCh.h:47
bool next()
It gets the next event to be read.
Definition: Reader.cpp:145
std::string getElementURI() const
It returns the URI of the associated namespace in the case of an element node.
Definition: Reader.cpp:203
void setDoNamespaces(bool d)
It enables or disables the parser namespace processing.
Definition: Reader.cpp:74
Reader()
Default constructor.
Definition: Reader.cpp:40
std::size_t getAttrPosition(const std::string &name) const
It returns the attribute position.
Definition: Reader.cpp:287
void setIgnoreWhiteSpaces(bool d)
If true the parser will ignore the white space characters.
Definition: Reader.cpp:99
A class for converting a standard string to a Xerces string (XMLCh).
This class implements Xerces interface for error handlers.
std::string getAttrURI(std::size_t i) const
It returns the attribute URI of the associated namespace in the case of an element node...
Definition: Reader.cpp:277
#define TE_TR(message)
It marks a string in order to get translated.
Definition: Translator.h:345
ErrorHandler * m_errH
A pointer to an error handler.
Definition: Reader.h:139
void read(const std::string &fileURI)
It prepare the given file to be read.
Definition: Reader.cpp:104
void setCacheGrammarFromParse(bool d)
If true it caches the grammar in the pool for re-use in subsequent parses.
Definition: Reader.cpp:94
const std::string getErrors() const
It returns a concatenation of all reported error messages.
void setValidationScheme(bool d)
If true the parser will perform a validation scheme.
Definition: Reader.cpp:84
NodeType
The type of node read by XML reader.
Definition: Enums.h:40
This class implements Xerces interface for a handler that receives general document events...
Definition: ReaderHandler.h:52
std::size_t getNumberOfNamespaces() const
Definition: Reader.cpp:294
xercesc::SAX2XMLReader * m_parser
A pointer to the parser used by the reader.
Definition: Reader.h:137
std::string getAttrQName(std::size_t i) const
It returns the qualified name for the i-th attribute.
Definition: Reader.cpp:267
ReaderHandler * m_readerH
A pointer to a content handler.
Definition: Reader.h:138
void setInternalBufferSize(const std::size_t size)
It sets the maximal allowed buffer size used for parsing.
Definition: Reader.cpp:307
std::string getElementLocalName() const
It returns the local part of the element name in the case of an element node.
Definition: Reader.cpp:208
This class implements Xerces interface for a handler that receives general document events...
te::xml::NodeType getNodeType() const
It return the type of node read.
Definition: Reader.cpp:198
std::size_t getElementDataLen() const
It returns the element data value in the case of VALUE node.
Definition: Reader.cpp:223
This class implements Xerces interface for error handlers.
Definition: ErrorHandler.h:50
void setUseCachedGrammarInParse(bool d)
If true the reader will use cached grammar if it exists in the pool.
Definition: Reader.cpp:89
std::string getElementQName() const
It returns the qualified name in the case of an element node.
Definition: Reader.cpp:213
void setDoSchema(bool d)
It enables or disables the parser schema processing.
Definition: Reader.cpp:79
This class is designed to declare objects to be thrown as exceptions by TerraLib. ...
std::string getElementValue() const
It returns the element data value in the case of VALUE node.
Definition: Reader.cpp:218
#define TE_XERCES_READER_MAX_BUFFSIZE
It defines the default internal buffer size for parsing a XERCES file.
Definition: Config.h:44
std::size_t getNumberOfAttrs() const
It returns the number of attributes in the case of an element node.
Definition: Reader.cpp:233
bool hasAttrs() const
It tells if the element has attributes in the case of an element node.
Definition: Reader.cpp:228
std::string getAttr(const std::string &name) const
It returns the attribute value in the case of an element node with valid attributes.
Definition: Reader.cpp:238
void getNamespace(std::size_t i, std::pair< std::string, std::string > &ns) const
Definition: Reader.cpp:299
~Reader()
Destructor.
Definition: Reader.cpp:64
std::string ToString(const XMLCh *const value)
It converts the XML string to a standard C++ string.
Definition: Utils.h:122
void reset()
It resets the parser.
Definition: Reader.cpp:312