CharEncodingConv.h
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file terralib/common/CharEncodingConv.h
22 
23  \brief A class that deals with character encoding/decoding.
24  */
25 
26 #ifndef __TERRALIB_COMMON_INTERNAL_CHARENCODINGCONV_H
27 #define __TERRALIB_COMMON_INTERNAL_CHARENCODINGCONV_H
28 
29 // TerraLib
30 #include "Config.h"
31 
32 namespace te
33 {
34  namespace common
35  {
36  /*!
37  \enum CharEncoding
38 
39  \brief Supported charsets (character encoding).
40 
41  As described in Wikipedia (http://en.wikipedia.org/wiki/Character_encoding),
42  the terms character encoding, character set (charset), and sometimes character map or code page
43  are used interchangeably.
44  */
46  {
47  UTF8, /*!< UTF-8 encoding. */
48  CP1250, /*!< CP1250 encoding. */
49  CP1251, /*!< CP1251 encoding. */
50  CP1252, /*!< CP1252 encoding. */
51  CP1253, /*!< CP1253 encoding. */
52  CP1254, /*!< CP1254 encoding. */
53  CP1257, /*!< CP1257 encoding. */
54  LATIN1, /*!< ISO-8859-1 encoding (Latin1). */
55  // continue...
56  UNKNOWN_CHAR_ENCODING /*!< Unknown encoding. */
57  };
58  }
59 }
60 
61 // STL
62 #include <map>
63 #include <string>
64 
65 // Forward declarations
66 extern "C"
67 {
68  typedef void* iconv_t;
69 }
70 
71 namespace te
72 {
73  namespace common
74  {
75  /*!
76  \class CharEncodingConv
77 
78  \brief This class deals with character encoding/decoding.
79  */
81  {
82  public:
83 
84  /*!
85  \brief It prepare sthe conversion from a charset code to another one.
86 
87  \fromCode The name of the origen charset of the string to be converted to destiny charset.
88  \toCode The name of the destiny charset of the string to be converted.
89  */
90  CharEncodingConv(const CharEncoding& fromCode, const CharEncoding& toCode);
91 
92  /*! \brief Destructor. */
94 
95  /*!
96  \brief It converts the source string to a target charset.
97 
98  \param src The string in a source charset.
99 
100  \return The string converted to the destiny charset.
101 
102  \exception Exception It throws an exception if the conversion can not be done.
103 
104  \note This method tries to cache the internal encoder/decoder machine.
105 
106  \note Once an exception is thrown the converter object can not be used anymore because it can be in an state that can not be used to convert text anymore.
107  */
108  std::string conv(const std::string& src);
109 
110  /*!
111  \brief An static method that converts the source string to a target charset.
112 
113  \param src The string in a source charset.
114  \param fromCode The name of the origen charset of the string to be converted to destiny charset.
115  \param toCode The name of the destiny charset of the string to be converted.
116 
117  \return The string converted to the destiny charset.
118 
119  \exception Exception It throws an exception if the conversion can not be done.
120 
121  \note This method doesn't cache the internal encoder/decoder machine.
122  */
123  static std::string convert(const std::string& src, const CharEncoding& fromCode, const CharEncoding& toCode);
124 
125  /*!
126  \brief It returns the name of the given charset.
127 
128  \param code The charset code.
129 
130  \return The name of the given charset.
131  */
132  static std::string getCharEncodingName(const CharEncoding& code);
133 
134  /*!
135  \brief It returns the charset type of the given charset name.
136 
137  \param name The charset name.
138 
139  \return The type of the given charset.
140  */
141  static CharEncoding getCharEncodingType(const std::string& name);
142 
143  /*! \brief It initializes the CharEncoding Converter. */
144  static void initialize();
145 
146  private:
147 
148  /*! \brief Copy constructor not allowed! */
150 
151  /*! \brief Assignment operator not allowed! */
152  CharEncodingConv& operator=(const CharEncodingConv& rhs);
153 
154  private:
155 
156  const CharEncoding m_fromCode; //!< The origin charset.
157  const CharEncoding m_toCode; //!< The target charset.
158  iconv_t m_cd; //!< The iconv state machine used to convert string from one character to another one.
159 
160  static std::map<CharEncoding, std::string> sm_encodingNames; //!< Static map with the encoding names.
161  };
162 
163  } // end namespace common
164 } // end namespace te
165 
166 #endif // __TERRALIB_COMMON_INTERNAL_CHARENCODINGCONV_H
void * iconv_t
CharEncoding
Supported charsets (character encoding).
iconv_t m_cd
The iconv state machine used to convert string from one character to another one. ...
Configuration flags for the TerraLib Common Runtime module.
This class deals with character encoding/decoding.
URI C++ Library.
#define TECOMMONEXPORT
You can use this macro in order to export/import classes and functions from this module.
Definition: Config.h:65
const CharEncoding m_fromCode
The origin charset.
const CharEncoding m_toCode
The target charset.
std::string TECOMMONEXPORT convert(const path &v)
URI path to string.
static std::map< CharEncoding, std::string > sm_encodingNames
Static map with the encoding names.