TerraLib and TerraView Wiki Page

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
wiki:documentation:devguide:core:char_encoding [2016/09/29 16:13]
gribeiro created
wiki:documentation:devguide:core:char_encoding [2016/10/04 14:00] (current)
carolina.santos [References]
Line 1: Line 1:
 ====== TerraLib.Core:​ CharEncoding ====== ====== TerraLib.Core:​ CharEncoding ======
 +
 +In TerraLib was decided to use strings in UTF-8 as well as to perform input and output operations (strings from the drivers and strings that come in the TerraLib API).
 +
 +As support was created the class **te::​core::​Char Encoding** that allows the conversion of characters using the [[http://​www.boost.org/​doc/​libs/​1_62_0/​libs/​locale/​doc/​html/​index.html | Boost.Locale]].
 +
 +This class is used only through static member functions and it is not possible to create instances or copies of the class.
 +===== API =====
 +
 +==== C++ ====
 +
 +The API for converting characters in TerraLib is defined by the **CharEncoding** class, shown below: ​
 +
 +<code cpp>
 +namespace te
 +{
 +  namespace core
 +  {
 +    /*!
 +      \enum EncodingType
 +
 +      \brief Supported character encodings.
 +    */
 +    enum class EncodingType
 +    {
 +      UTF8,   /​*!<​ UTF-8 encoding. ​              */
 +      CP1250, /*!< CP1250 encoding. ​             */
 +      CP1251, /*!< CP1251 encoding. ​             */
 +      CP1252, /*!< CP1252 encoding. ​             */
 +      CP1253, /*!< CP1253 encoding. ​             */
 +      CP1254, /*!< CP1254 encoding. ​             */
 +      CP1257, /*!< CP1257 encoding. ​             */
 +      LATIN1 ​ /*!< Latin1 encoding (ISO8859-1). ​ */
 +    };
 +
 +    /*!
 +      \class CharEncoding
 +
 +      \brief A class for handling character enconding/​decoding.
 +     */
 +    class CharEncoding
 +    {
 +     ​public:​
 +      /*!
 +        \brief Convert a string in UTF-8 to the current locale encoding.
 +
 +        \param src UTF-8 string.
 +
 +        \exception te::​Exception if the system locale cannot be retrieved.
 +
 +        \return String encoded according to new character encoding.
 +       */
 +      static std::string fromUTF8(const std::​string&​ src);
 +
 +      /*!
 +        \brief Convert a string in UTF-8 to another character encoding.
 +
 +        \param src UTF-8 string.
 +        \param to  The target character encoding.
 +
 +        \return String encoded according to new character encoding.
 +       */
 +      static std::string fromUTF8(const std::​string&​ src, EncodingType to);
 +
 +      /*!
 +        \brief Convert a string from a current locale encoding to UTF-8.
 +
 +        \param src  String to be encoded in UTF-8.
 +
 +        \exception te::​Exception if the system locale cannot be retrieved.
 +
 +        \return String encoded in UTF-8.
 +       */
 +      static std::string toUTF8(const std::​string&​ src);
 +
 +      /*!
 +        \brief Convert a string from a given character encoding to UTF-8.
 +
 +        \param src  String to be encoded in UTF-8.
 +
 +        \return String encoded in UTF-8.
 +       */
 +      static std::string toUTF8(const std::​string&​ src, EncodingType from);
 +
 +      /*!
 +        \brief Convert a string from one character encoding to another one.
 +
 +        \param src  String encoded according to "​from"​ encoding type.
 +        \param from Current string encoding.
 +        \param to   New encoding for the string.
 +
 +        \return String in a new encoding.
 +       */
 +      static std::string convert(const std::​string&​ src, EncodingType from,
 +                                 ​EncodingType to);
 +      /*!
 +        \brief Decomposes a UTF-8 encoded string and extracts its ASCII
 +        characters.
 +
 +        \note Non-ascii characters will be skipped.
 +
 +        \param src UTF-8 encoded string.
 +
 +        \exception te::​Exception if the given string cannot be decomposed.
 +
 +        \return ASCII String.
 +      */
 +      static std::string toASCII(const std::​string&​ src);
 +      /*!
 +        \brief Retrive a string from a given character encoding type enum.
 +
 +        \param et Encoding Type to be retrieved as string.
 +
 +        \return Encoding Type as string.
 +       */
 +      static std::string getEncodingName(EncodingType et);
 +
 +      /*!
 +        \brief Retrive an EncodingType from a given character encoding name.
 +
 +        \param name Encoding name to be retrieved as enum.
 +
 +        \return Encoding as enum.
 +       */
 +      static te::​core::​EncodingType getEncodingType(const std::​string&​ name);
 +
 +      /*!
 +        \brief Retrive a vector of string with all available encoding types name.
 +
 +        \return A vector of encoding type as string.
 +      */
 +      static std::​vector<​std::​string>​ getEncodingList();​
 +    };
 +  }  // end namespace core
 +}  // end namespace te
 +</​code>​
 +
 +===== Example =====
 +
 +Here is a simple example using the functions provided by **CharEncoding** class:
 +
 +<code cpp>
 +// TerraLib
 +#include <​terralib/​core/​encoding/​CharEncoding.h>​
 +
 +// STL
 +#include <​iostream>​
 +
 +int main(int argc, char* argv[])
 +{
 +
 +  std::string str = "your string";​
 +  // from system encoding to utf8
 +  str = te::​core::​CharEncoding::​toUTF8(str); ​
 +  // from utf8 to system encoding
 +  str = te::​core::​CharEncoding::​fromUTF8(str); ​
 +  // from latin1 to utf8
 +  str = te::​core::​CharEncoding::​toUTF8(str,​ te::​core::​EncodingType::​LATIN1); ​
 +  // from utf8 to latin1
 +  str = te::​core::​CharEncoding::​fromUTF8(str,​ te::​core::​EncodingType::​LATIN1); ​
 +  // from latin1 to cp1254
 +  str = te::​core::​CharEncoding::​convert(str,​ te::​core::​EncodingType::​LATIN1,​
 +                                        te::​core::​EncodingType::​CP1254);​
 +  // from utf8 to ASCII
 +  str = te::​core::​CharEncoding::​toASCII(str);​
 +}
 +</​code>​
 +
 +==== Interfaces Qt ====
 +
 +Here is an example of using a QString in Qt components:
 +
 +<code cpp>
 +QString s = QString::​fromUtf8("​your string"​);​
 +std::string varStr = s.toUtf8().data();​
 +</​code>​
 +
 +**Note:** in case it is not possible to convert a character it will be ignored.
 +
 +===== Additional References =====
 +
 +  * [[http://​stackoverflow.com/​questions/​1259084/​what-encoding-code-page-is-cmd-exe-using | What encoding/​code page is cmd.exe using]]