Differences
This shows you the differences between two versions of the page.
Next revision | Previous revision | ||
wiki:documentation:devguide:core:char_encoding [2016/09/29 16:13] gribeiro created |
wiki:documentation:devguide:core:char_encoding [2016/10/04 14:00] (current) carolina.santos [References] |
||
---|---|---|---|
Line 1: | Line 1: | ||
====== TerraLib.Core: CharEncoding ====== | ====== TerraLib.Core: CharEncoding ====== | ||
+ | |||
+ | In TerraLib was decided to use strings in UTF-8 as well as to perform input and output operations (strings from the drivers and strings that come in the TerraLib API). | ||
+ | |||
+ | As support was created the class **te::core::Char Encoding** that allows the conversion of characters using the [[http://www.boost.org/doc/libs/1_62_0/libs/locale/doc/html/index.html | Boost.Locale]]. | ||
+ | |||
+ | This class is used only through static member functions and it is not possible to create instances or copies of the class. | ||
+ | ===== API ===== | ||
+ | |||
+ | ==== C++ ==== | ||
+ | |||
+ | The API for converting characters in TerraLib is defined by the **CharEncoding** class, shown below: | ||
+ | |||
+ | <code cpp> | ||
+ | namespace te | ||
+ | { | ||
+ | namespace core | ||
+ | { | ||
+ | /*! | ||
+ | \enum EncodingType | ||
+ | |||
+ | \brief Supported character encodings. | ||
+ | */ | ||
+ | enum class EncodingType | ||
+ | { | ||
+ | UTF8, /*!< UTF-8 encoding. */ | ||
+ | CP1250, /*!< CP1250 encoding. */ | ||
+ | CP1251, /*!< CP1251 encoding. */ | ||
+ | CP1252, /*!< CP1252 encoding. */ | ||
+ | CP1253, /*!< CP1253 encoding. */ | ||
+ | CP1254, /*!< CP1254 encoding. */ | ||
+ | CP1257, /*!< CP1257 encoding. */ | ||
+ | LATIN1 /*!< Latin1 encoding (ISO8859-1). */ | ||
+ | }; | ||
+ | |||
+ | /*! | ||
+ | \class CharEncoding | ||
+ | |||
+ | \brief A class for handling character enconding/decoding. | ||
+ | */ | ||
+ | class CharEncoding | ||
+ | { | ||
+ | public: | ||
+ | /*! | ||
+ | \brief Convert a string in UTF-8 to the current locale encoding. | ||
+ | |||
+ | \param src UTF-8 string. | ||
+ | |||
+ | \exception te::Exception if the system locale cannot be retrieved. | ||
+ | |||
+ | \return String encoded according to new character encoding. | ||
+ | */ | ||
+ | static std::string fromUTF8(const std::string& src); | ||
+ | |||
+ | /*! | ||
+ | \brief Convert a string in UTF-8 to another character encoding. | ||
+ | |||
+ | \param src UTF-8 string. | ||
+ | \param to The target character encoding. | ||
+ | |||
+ | \return String encoded according to new character encoding. | ||
+ | */ | ||
+ | static std::string fromUTF8(const std::string& src, EncodingType to); | ||
+ | |||
+ | /*! | ||
+ | \brief Convert a string from a current locale encoding to UTF-8. | ||
+ | |||
+ | \param src String to be encoded in UTF-8. | ||
+ | |||
+ | \exception te::Exception if the system locale cannot be retrieved. | ||
+ | |||
+ | \return String encoded in UTF-8. | ||
+ | */ | ||
+ | static std::string toUTF8(const std::string& src); | ||
+ | |||
+ | /*! | ||
+ | \brief Convert a string from a given character encoding to UTF-8. | ||
+ | |||
+ | \param src String to be encoded in UTF-8. | ||
+ | |||
+ | \return String encoded in UTF-8. | ||
+ | */ | ||
+ | static std::string toUTF8(const std::string& src, EncodingType from); | ||
+ | |||
+ | /*! | ||
+ | \brief Convert a string from one character encoding to another one. | ||
+ | |||
+ | \param src String encoded according to "from" encoding type. | ||
+ | \param from Current string encoding. | ||
+ | \param to New encoding for the string. | ||
+ | |||
+ | \return String in a new encoding. | ||
+ | */ | ||
+ | static std::string convert(const std::string& src, EncodingType from, | ||
+ | EncodingType to); | ||
+ | /*! | ||
+ | \brief Decomposes a UTF-8 encoded string and extracts its ASCII | ||
+ | characters. | ||
+ | |||
+ | \note Non-ascii characters will be skipped. | ||
+ | |||
+ | \param src UTF-8 encoded string. | ||
+ | |||
+ | \exception te::Exception if the given string cannot be decomposed. | ||
+ | |||
+ | \return ASCII String. | ||
+ | */ | ||
+ | static std::string toASCII(const std::string& src); | ||
+ | /*! | ||
+ | \brief Retrive a string from a given character encoding type enum. | ||
+ | |||
+ | \param et Encoding Type to be retrieved as string. | ||
+ | |||
+ | \return Encoding Type as string. | ||
+ | */ | ||
+ | static std::string getEncodingName(EncodingType et); | ||
+ | |||
+ | /*! | ||
+ | \brief Retrive an EncodingType from a given character encoding name. | ||
+ | |||
+ | \param name Encoding name to be retrieved as enum. | ||
+ | |||
+ | \return Encoding as enum. | ||
+ | */ | ||
+ | static te::core::EncodingType getEncodingType(const std::string& name); | ||
+ | |||
+ | /*! | ||
+ | \brief Retrive a vector of string with all available encoding types name. | ||
+ | |||
+ | \return A vector of encoding type as string. | ||
+ | */ | ||
+ | static std::vector<std::string> getEncodingList(); | ||
+ | }; | ||
+ | } // end namespace core | ||
+ | } // end namespace te | ||
+ | </code> | ||
+ | |||
+ | ===== Example ===== | ||
+ | |||
+ | Here is a simple example using the functions provided by **CharEncoding** class: | ||
+ | |||
+ | <code cpp> | ||
+ | // TerraLib | ||
+ | #include <terralib/core/encoding/CharEncoding.h> | ||
+ | |||
+ | // STL | ||
+ | #include <iostream> | ||
+ | |||
+ | int main(int argc, char* argv[]) | ||
+ | { | ||
+ | |||
+ | std::string str = "your string"; | ||
+ | // from system encoding to utf8 | ||
+ | str = te::core::CharEncoding::toUTF8(str); | ||
+ | // from utf8 to system encoding | ||
+ | str = te::core::CharEncoding::fromUTF8(str); | ||
+ | // from latin1 to utf8 | ||
+ | str = te::core::CharEncoding::toUTF8(str, te::core::EncodingType::LATIN1); | ||
+ | // from utf8 to latin1 | ||
+ | str = te::core::CharEncoding::fromUTF8(str, te::core::EncodingType::LATIN1); | ||
+ | // from latin1 to cp1254 | ||
+ | str = te::core::CharEncoding::convert(str, te::core::EncodingType::LATIN1, | ||
+ | te::core::EncodingType::CP1254); | ||
+ | // from utf8 to ASCII | ||
+ | str = te::core::CharEncoding::toASCII(str); | ||
+ | } | ||
+ | </code> | ||
+ | |||
+ | ==== Interfaces Qt ==== | ||
+ | |||
+ | Here is an example of using a QString in Qt components: | ||
+ | |||
+ | <code cpp> | ||
+ | QString s = QString::fromUtf8("your string"); | ||
+ | std::string varStr = s.toUtf8().data(); | ||
+ | </code> | ||
+ | |||
+ | **Note:** in case it is not possible to convert a character it will be ignored. | ||
+ | |||
+ | ===== Additional References ===== | ||
+ | |||
+ | * [[http://stackoverflow.com/questions/1259084/what-encoding-code-page-is-cmd-exe-using | What encoding/code page is cmd.exe using]] | ||