All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
CharEncodingConv.cpp
Go to the documentation of this file.
1 /* Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
2 
3  This file is part of the TerraLib - a Framework for building GIS enabled applications.
4 
5  TerraLib is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  TerraLib is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public License
16  along with TerraLib. See COPYING. If not, write to
17  TerraLib Team at <terralib-team@terralib.org>.
18  */
19 
20 /*!
21  \file terralib/common/CharEncodingConv.cpp
22 
23  \brief A class that deals with character encoding/decoding.
24  */
25 
26 // TerraLib
27 #include "../Defines.h"
28 #include "CharEncodingConv.h"
29 
30 // TerraLib
31 #include "Exception.h"
32 #include "Translator.h"
33 
34 // STL
35 #include <sstream>
36 
37 #ifdef TERRALIB_GNU_ICONV_ENABLED
38 // iconv
39 #include <errno.h>
40 #include <iconv.h>
41 
42 // internal iconv names
43 static const char* iconv_names[] = {"UTF-8", "CP1250", "CP1251", "CP1252", "CP1253", "CP1254", "CP1257", "ISO-8859-1"};
44 #endif
45 
46 #define TE_CONVERSION_BUFFERSIZE_SIZE 64
47 
48 // CharEncoding Names
49 std::map<te::common::CharEncoding, std::string> te::common::CharEncodingConv::sm_encodingNames;
50 
52  : m_fromCode(fromCode),
53  m_toCode(toCode)
54 {
56  throw Exception(TE_TR("Impossible conversion of unknown char encoding!"));
57 
58 #ifdef TERRALIB_GNU_ICONV_ENABLED
59  m_cd = iconv_open(iconv_names[toCode], iconv_names[fromCode]);
60 
61  if(m_cd == (iconv_t)(-1))
62  {
63  if(errno == EINVAL)
64  throw Exception(TE_TR("Failed to start iconv to start converting charsets: the conversion from fromCode to toCode is not supported!"));
65  else
66  throw Exception(TE_TR("Failed to start iconv to start converting charsets!"));
67  }
68 #endif
69 }
70 
72 {
73 #ifdef TERRALIB_GNU_ICONV_ENABLED
74  if(iconv_close(m_cd))
75  throw Exception(TE_TR("Failed to close iconv! This wasn't supposed to occur! Contact TerraLib Team!"));
76 #endif
77 }
78 
79 std::string te::common::CharEncodingConv::conv(const std::string& src)
80 {
81 #ifdef TERRALIB_GNU_ICONV_ENABLED
82  std::ostringstream outstring(std::ios_base::out);
83  const char* inbuff = src.c_str();
84  std::size_t inbytesleft = src.length();
85 
86  char outchar[TE_CONVERSION_BUFFERSIZE_SIZE];
87 
88  std::size_t nbytes = 0;
89 
90  for(;;)
91  {
92  char* outbuff = outchar;
93  std::size_t outbytesleft = TE_CONVERSION_BUFFERSIZE_SIZE;
94 
95 #if TE_PLATFORM == TE_PLATFORMCODE_MSWINDOWS
96  nbytes = iconv(m_cd, &inbuff, &inbytesleft, &outbuff, &outbytesleft);
97 
98 #elif TE_PLATFORM == TE_PLATFORMCODE_LINUX || TE_PLATFORM == TE_PLATFORMCODE_APPLE
99  nbytes = iconv(m_cd, (char**)(&inbuff), &inbytesleft, &outbuff, &outbytesleft);
100 
101 #else
102  #error "Platform not supported! Please contact terralib-team@dpi.inpe.br"
103 #endif
104 
105  if((nbytes == (std::size_t)(-1)) && (errno != E2BIG))
106  {
107  nbytes = iconv(m_cd, 0, 0, 0, 0);
108 
109  if(nbytes == (std::size_t)(-1))
110  throw Exception(TE_TR("Failed to convert character sets and also to bring iconv to its initial state!"));
111  else
112  throw Exception(TE_TR("Failed to convert character sets!"));
113  }
114 
115  outstring.write(outchar, TE_CONVERSION_BUFFERSIZE_SIZE - outbytesleft);
116 
117  if(inbytesleft == 0)
118  break;
119  }
120 
121  nbytes = iconv(m_cd, 0, 0, 0, 0);
122 
123  if(nbytes == (std::size_t)(-1))
124  throw Exception(TE_TR("Failed to bring iconv to its initial state!"));
125 
126  return outstring.str();
127 #else
128  return src;
129 #endif
130 }
131 
132 std::string te::common::CharEncodingConv::convert(const std::string& src, const CharEncoding& fromCode, const CharEncoding& toCode)
133 {
134  if(fromCode == UNKNOWN_CHAR_ENCODING || toCode == UNKNOWN_CHAR_ENCODING)
135  throw Exception(TE_TR("Impossible conversion of unknown char encoding!"));
136 
137 #ifdef TERRALIB_GNU_ICONV_ENABLED
138  iconv_t cd = iconv_open(iconv_names[toCode], iconv_names[fromCode]);
139 
140  if(cd == (iconv_t)(-1))
141  {
142  if(errno == EINVAL)
143  throw Exception(TE_TR("Failed to start iconv to start converting charsets: the conversion from fromCode to toCode is not supported!"));
144  else
145  throw Exception(TE_TR("Failed to start iconv to start converting charsets!"));
146  }
147 
148  std::ostringstream outstring(std::ios_base::out);
149  const char* inbuff = src.c_str();
150  std::size_t inbytesleft = src.length();
151 
152  char outchar[TE_CONVERSION_BUFFERSIZE_SIZE];
153 
154  for(;;)
155  {
156  char* outbuff = outchar;
157  std::size_t outbytesleft = TE_CONVERSION_BUFFERSIZE_SIZE;
158 
159 #if TE_PLATFORM == TE_PLATFORMCODE_MSWINDOWS
160  std::size_t nbytes = iconv(cd, &inbuff, &inbytesleft, &outbuff, &outbytesleft);
161 
162 #elif TE_PLATFORM == TE_PLATFORMCODE_LINUX || TE_PLATFORM == TE_PLATFORMCODE_APPLE
163  std::size_t nbytes = iconv(cd, (char**)(&inbuff), &inbytesleft, &outbuff, &outbytesleft);
164 
165 #else
166  #error "Platform not supported! Please contact terralib-team@dpi.inpe.br"
167 #endif
168 
169  if((nbytes == (std::size_t)(-1)) && (errno != E2BIG))
170  {
171  iconv_close(cd);
172  throw Exception(TE_TR("Failed to convert character sets!"));
173  }
174 
175  outstring.write(outchar, TE_CONVERSION_BUFFERSIZE_SIZE - outbytesleft);
176 
177  if(inbytesleft == 0)
178  break;
179  }
180 
181  if(iconv_close(cd))
182  throw Exception(TE_TR("Failed to close iconv!"));
183 
184  return outstring.str();
185 #else
186  return src;
187 #endif
188 }
189 
191 {
192  return sm_encodingNames[code];
193 }
194 
196 {
197  std::map<CharEncoding, std::string>::const_iterator it;
198  for(it = sm_encodingNames.begin(); it != sm_encodingNames.end(); ++it)
199  if(it->second == name)
200  return it->first;
201 
202  return UNKNOWN_CHAR_ENCODING;
203 }
204 
206 {
207  if(!sm_encodingNames.empty())
208  return;
209 
210  sm_encodingNames[UTF8 ] = "UTF-8";
211  sm_encodingNames[CP1250] = "CP1250";
212  sm_encodingNames[CP1251] = "CP1251";
213  sm_encodingNames[CP1252] = "CP1252";
214  sm_encodingNames[CP1253] = "CP1253";
215  sm_encodingNames[CP1254] = "CP1254";
216  sm_encodingNames[CP1257] = "CP1257";
217  sm_encodingNames[LATIN1] = "Latin1";
218  // continue...
219  sm_encodingNames[UNKNOWN_CHAR_ENCODING] = "Unknown";
220 }
void * iconv_t
CharEncoding
Supported charsets (character encoding).
static std::map< CharEncoding, std::string > sm_encodingNames
Static map with the encoding names.
iconv_t m_cd
The iconv state machine used to convert string from one character to another one. ...
static std::string convert(const std::string &src, const CharEncoding &fromCode, const CharEncoding &toCode)
An static method that converts the source string to a target charset.
CharEncodingConv(const CharEncoding &fromCode, const CharEncoding &toCode)
It prepare sthe conversion from a charset code to another one.
static void initialize()
It initializes the CharEncoding Converter.
#define TE_TR(message)
It marks a string in order to get translated.
Definition: Translator.h:347
static CharEncoding getCharEncodingType(const std::string &name)
It returns the charset type of the given charset name.
const CharEncoding m_fromCode
The origin charset.
std::string conv(const std::string &src)
It converts the source string to a target charset.
This class is designed to declare objects to be thrown as exceptions by TerraLib. ...
Definition: Exception.h:58
A class that deals with character encoding/decoding.
This class is designed for dealing with multi-language text translation in TerraLib.
const CharEncoding m_toCode
The target charset.
This class is designed to declare objects to be thrown as exceptions by TerraLib. ...
static std::string getCharEncodingName(const CharEncoding &code)
It returns the name of the given charset.
#define TE_CONVERSION_BUFFERSIZE_SIZE