Add a function to convert a single Unicode code point to a UTF8 sequence.

llvm-svn: 160890
This commit is contained in:
Dmitri Gribenko 2012-07-27 20:36:22 +00:00
parent 8e87a75be7
commit ce675c52ba
3 changed files with 33 additions and 2 deletions

View File

@ -110,6 +110,8 @@ typedef unsigned char Boolean; /* 0 or 1 */
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
typedef enum {
conversionOK, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
@ -139,11 +141,13 @@ ConversionResult ConvertUTF8toUTF32 (
ConversionResult ConvertUTF16toUTF8 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
#endif
ConversionResult ConvertUTF32toUTF8 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
#ifdef CLANG_NEEDS_THESE_ONE_DAY
ConversionResult ConvertUTF16toUTF32 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
@ -177,6 +181,18 @@ namespace clang {
bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
char *&ResultPtr);
/**
* Convert an Unicode code point to UTF8 sequence.
*
* \param Source a Unicode code point.
* \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
* \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes. On success \c ResultPtr is
* updated one past end of the converted sequence.
*
* \returns true on success.
*/
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
}
#endif

View File

@ -285,6 +285,7 @@ ConversionResult ConvertUTF16toUTF8 (
*targetStart = target;
return result;
}
#endif
/* --------------------------------------------------------------------- */
@ -339,8 +340,6 @@ ConversionResult ConvertUTF32toUTF8 (
return result;
}
#endif
/* --------------------------------------------------------------------- */
/*

View File

@ -51,4 +51,20 @@ bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
return result == conversionOK;
}
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
const UTF32 *SourceStart = &Source;
const UTF32 *SourceEnd = SourceStart + 1;
UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
UTF8 *TargetEnd = TargetStart + 4;
ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
&TargetStart, TargetEnd,
strictConversion);
if (CR != conversionOK)
return false;
ResultPtr = reinterpret_cast<char*>(TargetStart);
return true;
}
} // end namespace clang