CaseFoldUtf8String: Fixed to actually work with Unicode values.

I think this started off as a UTF-32 output, and I forgot to actually
finish this off to encode back to UTF-8.

Fixes #9607.
This commit is contained in:
Ryan C. Gordon 2024-04-23 18:57:48 -04:00
parent 8acb91936a
commit be6b8dead4
No known key found for this signature in database
GPG Key ID: FA148B892AB48044
1 changed files with 53 additions and 9 deletions

View File

@ -136,35 +136,79 @@ static SDL_bool WildcardMatch(const char *pattern, const char *str, SDL_bool *ma
return (pch == '\0'); // survived the whole pattern? That's a match!
}
// Note that this will currently encode illegal codepoints: UTF-16 surrogates, 0xFFFE, and 0xFFFF.
// and a codepoint > 0x10FFFF will fail the same as if there wasn't enough memory.
// clean this up if you want to move this to SDL_string.c.
static size_t EncodeCodepointToUtf8(char *ptr, Uint32 cp, size_t remaining)
{
if (cp < 0x80) { // fits in a single UTF-8 byte.
if (remaining) {
*ptr = (char) cp;
return 1;
}
} else if (cp < 0x800) { // fits in 2 bytes.
if (remaining >= 2) {
ptr[0] = (char) ((cp >> 6) | 128 | 64);
ptr[1] = (char) (cp & 0x3F) | 128;
return 2;
}
} else if (cp < 0x10000) { // fits in 3 bytes.
if (remaining >= 3) {
ptr[0] = (char) ((cp >> 12) | 128 | 64 | 32);
ptr[1] = (char) ((cp >> 6) & 0x3F) | 128;
ptr[2] = (char) (cp & 0x3F) | 128;
return 3;
}
} else if (cp <= 0x10FFFF) { // fits in 4 bytes.
if (remaining >= 4) {
ptr[0] = (char) ((cp >> 18) | 128 | 64 | 32 | 16);
ptr[1] = (char) ((cp >> 12) & 0x3F) | 128;
ptr[2] = (char) ((cp >> 6) & 0x3F) | 128;
ptr[3] = (char) (cp & 0x3F) | 128;
return 4;
}
}
return 0;
}
static char *CaseFoldUtf8String(const char *fname)
{
SDL_assert(fname != NULL);
const size_t allocation = (SDL_strlen(fname) + 1) * 3;
const size_t allocation = (SDL_strlen(fname) + 1) * 3 * 4;
char *retval = (char *) SDL_malloc(allocation); // lazy: just allocating the max needed.
if (!retval) {
return NULL;
}
Uint32 codepoint;
size_t written = 0;
char *ptr = retval;
size_t remaining = allocation;
while ((codepoint = SDL_StepUTF8(&fname, 4)) != 0) {
Uint32 folded[3];
const int num_folded = SDL_CaseFoldUnicode(codepoint, folded);
SDL_assert(num_folded > 0);
SDL_assert(num_folded <= SDL_arraysize(folded));
for (int i = 0; i < num_folded; i++) {
SDL_assert(written < allocation);
retval[written++] = (char)folded[i];
SDL_assert(remaining > 0);
const size_t rc = EncodeCodepointToUtf8(ptr, folded[i], remaining);
SDL_assert(rc > 0);
SDL_assert(rc < remaining);
remaining -= rc;
ptr += rc;
}
}
SDL_assert(written < allocation);
retval[written++] = '\0';
SDL_assert(remaining > 0);
remaining--;
*ptr = '\0';
if (written < allocation) {
void *ptr = SDL_realloc(retval, written); // shrink it down.
if (remaining > 0) {
SDL_assert(allocation > remaining);
ptr = SDL_realloc(retval, allocation - remaining); // shrink it down.
if (ptr) { // shouldn't fail, but if it does, `retval` is still valid.
retval = (char *) ptr;
retval = ptr;
}
}