TsCharEncoding.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2008 National Institute For Space Research (INPE) - Brazil.
3 
4  This file is part of the TerraLib - a Framework for building GIS enabled
5  applications.
6 
7  TerraLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU Lesser General Public License as published by
9  the Free Software Foundation, either version 3 of the License,
10  or (at your option) any later version.
11 
12  TerraLib is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU Lesser General Public License for more details.
16 
17  You should have received a copy of the GNU Lesser General Public License
18  along with TerraLib. See COPYING. If not, write to
19  TerraLib Team at <terralib-team@terralib.org>.
20  */
21 
22 /*!
23  \file terralib/unittest/core/encoding/TsCharEncoding.cpp
24 
25  \brief A test suit for the TerraLib Core Encoding Module.
26 
27  \author Matheus Cavassan Zaglia.
28  */
29 
30 // STL
31 #include <string>
32 #include <fstream>
33 
34 // TerraLib
35 #include <terralib_buildconfig.h>
38 #include <terralib/Defines.h>
39 
40 // Boost
41 #include <boost/test/unit_test.hpp>
42 
43 struct F
44 {
45  F()
46  : text_in_utf8(""),
47  text_in_latin1(""),
48  text_in_cp1252(""),
49  text_in_ascii("")
50  {
51  std::ifstream utf8_file(TERRALIB_DATA_DIR "/encoding/arq_utf8.txt");
52  std::ifstream latin1_file(TERRALIB_DATA_DIR "/encoding/arq_latin1.txt");
53  std::ifstream cp1252_file(TERRALIB_DATA_DIR "/encoding/arq_cp1252.txt");
54  std::ifstream ascii_file(TERRALIB_DATA_DIR "/encoding/arq_ascii.txt");
55  text_in_utf8 = std::string((std::istreambuf_iterator<char>(utf8_file)),
56  std::istreambuf_iterator<char>());
57  text_in_latin1 = std::string((std::istreambuf_iterator<char>(latin1_file)),
58  std::istreambuf_iterator<char>());
59  text_in_cp1252 = std::string((std::istreambuf_iterator<char>(cp1252_file)),
60  std::istreambuf_iterator<char>());
61  text_in_ascii = std::string((std::istreambuf_iterator<char>(ascii_file)),
62  std::istreambuf_iterator<char>());
63  }
64  ~F() {}
65 
66  std::string text_in_utf8;
67  std::string text_in_latin1;
68  std::string text_in_cp1252;
69  std::string text_in_ascii;
70 };
71 
73 
74 BOOST_AUTO_TEST_CASE(encoding_test_utils)
75 {
76  BOOST_CHECK(te::core::EncodingType::UTF8 ==
78 
79  BOOST_CHECK(te::core::EncodingType::CP1250 ==
81 
82  BOOST_CHECK(te::core::EncodingType::CP1251 ==
84 
85  BOOST_CHECK(te::core::EncodingType::CP1252 ==
87 
88  BOOST_CHECK(te::core::EncodingType::CP1253 ==
90 
91  BOOST_CHECK(te::core::EncodingType::CP1254 ==
93 
94  BOOST_CHECK(te::core::EncodingType::CP1257 ==
96 
97  BOOST_CHECK(te::core::EncodingType::LATIN1 ==
99 
100  BOOST_CHECK_THROW(te::core::CharEncoding::getEncodingType("UTF8"),
102 
103  BOOST_CHECK_NO_THROW(te::core::CharEncoding::getEncodingList());
104 }
105 
106 BOOST_AUTO_TEST_CASE(encoding_test_utf8_latin1)
107 {
108  std::string utf8_from_latin1 = te::core::CharEncoding::toUTF8(
110  std::string latin1_from_utf8 = te::core::CharEncoding::fromUTF8(
112 
113  BOOST_CHECK(utf8_from_latin1 == text_in_utf8);
114 
115  BOOST_CHECK(latin1_from_utf8 == text_in_latin1);
116 }
117 
118 BOOST_AUTO_TEST_CASE(encoding_test_utf8_cp1252)
119 {
120  std::string utf8_from_cp1252 = te::core::CharEncoding::toUTF8(
122 
123  std::string cp1252_from_utf8 = te::core::CharEncoding::fromUTF8(
125 
126  BOOST_CHECK(utf8_from_cp1252 == text_in_utf8);
127 
128  BOOST_CHECK(cp1252_from_utf8 == text_in_cp1252);
129 }
130 
131 BOOST_AUTO_TEST_CASE(encoding_test_latin_cp1252)
132 {
133  std::string latin1_from_cp1252 = te::core::CharEncoding::convert(
136 
137  std::string cp1252_from_latin1 = te::core::CharEncoding::convert(
140 
141  BOOST_CHECK(latin1_from_cp1252 == text_in_latin1);
142 
143  BOOST_CHECK(cp1252_from_latin1 == text_in_cp1252);
144 }
145 
146 BOOST_AUTO_TEST_CASE(encoding_test_utf8_ascii)
147 {
148  std::string ascii_from_utf8 = te::core::CharEncoding::toASCII(text_in_utf8);
149 
150  BOOST_CHECK(ascii_from_utf8 == text_in_ascii);
151 }
152 
153 BOOST_AUTO_TEST_CASE(encoding_test_locale_utf8)
154 {
155 #if TE_PLATFORM == TE_PLATFORMCODE_MSWINDOWS
156  std::string latin1_from_utf8 = te::core::CharEncoding::fromUTF8(text_in_utf8);
157  BOOST_CHECK(text_in_latin1 == latin1_from_utf8);
158 
159  std::string cp1252_from_utf8 = te::core::CharEncoding::fromUTF8(text_in_utf8);
160  BOOST_CHECK(text_in_cp1252 == cp1252_from_utf8);
161 #endif
162 }
163 
164 BOOST_AUTO_TEST_CASE(enconding_test_wrong_type)
165 {
166  // using wrong encoding when converting
167  std::string wrong_utf8 = te::core::CharEncoding::toUTF8(
169 
170  std::string wrong_latin1 = te::core::CharEncoding::fromUTF8(
172 
173  BOOST_CHECK(text_in_latin1 != wrong_latin1);
174 
175  BOOST_CHECK(text_in_utf8 != wrong_utf8);
176 }
177 
178 BOOST_AUTO_TEST_SUITE_END()
static std::string toASCII(const std::string &src)
Decomposes a UTF-8 encoded string and extracts its ASCII characters.
std::string text_in_utf8
std::string text_in_cp1252
static std::string fromUTF8(const std::string &src)
Convert a string in UTF-8 to the current locale encoding.
Proxy file for the real file terralib_defines.h.
std::string text_in_ascii
A class for handling character enconding/decoding.
static te::core::EncodingType getEncodingType(const std::string &name)
Retrive an EncodingType from a given character encoding name.
static std::string convert(const std::string &src, EncodingType from, EncodingType to)
Convert a string from one character encoding to another one.
static std::string toUTF8(const std::string &src)
Convert a string from a current locale encoding to UTF-8.
BOOST_AUTO_TEST_CASE(encoding_test_utf8_latin1)
BOOST_FIXTURE_TEST_SUITE(encoding, F) BOOST_AUTO_TEST_CASE(encoding_test_utils)
Base exception class for TerraLib Core Runtime Library.
Exception classes for the TerraLib Core Runtime Library.
static std::vector< std::string > getEncodingList()
Retrive a vector of string with all available encoding types name.
std::string text_in_latin1