Merge pull request #541 from smowton/string-refine-unicode
utf8 to utf16 conversion and utf16 to ascii
This commit is contained in:
commit
d1e691e7ed
|
@ -28,6 +28,7 @@ matrix:
|
|||
packages:
|
||||
- libwww-perl
|
||||
- clang-3.7
|
||||
- libstdc++-5-dev
|
||||
- libubsan0
|
||||
before_install:
|
||||
- mkdir bin ; ln -s /usr/bin/clang-3.7 bin/gcc
|
||||
|
|
|
@ -7,6 +7,10 @@ Author: Daniel Kroening, kroening@kroening.com
|
|||
\*******************************************************************/
|
||||
|
||||
#include <cstring>
|
||||
#include <locale>
|
||||
#include <codecvt>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include "unicode.h"
|
||||
|
||||
|
@ -258,3 +262,79 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide)
|
|||
|
||||
return argv_narrow;
|
||||
}
|
||||
|
||||
/*******************************************************************\
|
||||
|
||||
Function: utf8_to_utf16_big_endian
|
||||
|
||||
Inputs: String in UTF-8 format
|
||||
|
||||
Outputs: String in UTF-16BE format
|
||||
|
||||
Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+
|
||||
|
||||
\*******************************************************************/
|
||||
|
||||
std::wstring utf8_to_utf16_big_endian(const std::string& in)
|
||||
{
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t> > converter;
|
||||
return converter.from_bytes(in);
|
||||
}
|
||||
|
||||
/*******************************************************************\
|
||||
|
||||
Function: utf8_to_utf16_little_endian
|
||||
|
||||
Inputs: String in UTF-8 format
|
||||
|
||||
Outputs: String in UTF-16LE format
|
||||
|
||||
Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+
|
||||
|
||||
\*******************************************************************/
|
||||
|
||||
std::wstring utf8_to_utf16_little_endian(const std::string& in)
|
||||
{
|
||||
const std::codecvt_mode mode=std::codecvt_mode::little_endian;
|
||||
|
||||
// default largest value codecvt_utf8_utf16 reads without error is 0x10ffff
|
||||
// see: http://en.cppreference.com/w/cpp/locale/codecvt_utf8_utf16
|
||||
const unsigned long maxcode=0x10ffff;
|
||||
|
||||
typedef std::codecvt_utf8_utf16<wchar_t, maxcode, mode> codecvt_utf8_utf16t;
|
||||
std::wstring_convert<codecvt_utf8_utf16t> converter;
|
||||
return converter.from_bytes(in);
|
||||
}
|
||||
|
||||
/*******************************************************************\
|
||||
|
||||
Function: utf16_little_endian_to_ascii
|
||||
|
||||
Inputs: String in UTF-16LE format
|
||||
|
||||
Outputs: String in US-ASCII format, with \uxxxx escapes for other
|
||||
characters
|
||||
|
||||
Purpose:
|
||||
|
||||
\*******************************************************************/
|
||||
|
||||
std::string utf16_little_endian_to_ascii(const std::wstring& in)
|
||||
{
|
||||
std::ostringstream result;
|
||||
std::locale loc;
|
||||
for(const auto c : in)
|
||||
{
|
||||
if(c<=255 && isprint(c, loc))
|
||||
result << (unsigned char)c;
|
||||
else
|
||||
{
|
||||
result << "\\u"
|
||||
<< std::hex
|
||||
<< std::setw(4)
|
||||
<< std::setfill('0')
|
||||
<< (unsigned int)c;
|
||||
}
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
|
|
@ -22,6 +22,10 @@ std::wstring widen(const std::string &s);
|
|||
std::string utf32_to_utf8(const std::basic_string<unsigned int> &s);
|
||||
std::string utf16_to_utf8(const std::basic_string<unsigned short int> &s);
|
||||
|
||||
std::wstring utf8_to_utf16_big_endian(const std::string &);
|
||||
std::wstring utf8_to_utf16_little_endian(const std::string &);
|
||||
std::string utf16_little_endian_to_ascii(const std::wstring &in);
|
||||
|
||||
const char **narrow_argv(int argc, const wchar_t **argv_wide);
|
||||
|
||||
#endif // CPROVER_UTIL_UNICODE_H
|
||||
|
|
Loading…
Reference in New Issue