diff options
author | Rob Clark <robdclark@gmail.com> | 2017-09-09 06:47:40 -0400 |
---|---|---|
committer | Tom Rini <trini@konsulko.com> | 2017-09-12 17:57:59 -0400 |
commit | 78178bb0c9dfe2a91a636a411291d8bab50e8a7d (patch) | |
tree | 5c4ebb4800d57ca58b6b5a74948abec199215083 /lib/charset.c | |
parent | 4a85663ec7eddd955d22f1b0f34a9708eac82314 (diff) |
lib: add some utf16 handling helpers
We'll eventually want these in a few places in efi_loader, and also
vsprintf.
Signed-off-by: Rob Clark <robdclark@gmail.com>
Diffstat (limited to 'lib/charset.c')
-rw-r--r-- | lib/charset.c | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/lib/charset.c b/lib/charset.c new file mode 100644 index 0000000000..ff76e88c77 --- /dev/null +++ b/lib/charset.c @@ -0,0 +1,101 @@ +/* + * charset conversion utils + * + * Copyright (c) 2017 Rob Clark + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <charset.h> +#include <malloc.h> + +/* + * utf8/utf16 conversion mostly lifted from grub + */ + +size_t utf16_strlen(const uint16_t *in) +{ + size_t i; + for (i = 0; in[i]; i++); + return i; +} + +size_t utf16_strnlen(const uint16_t *in, size_t count) +{ + size_t i; + for (i = 0; count-- && in[i]; i++); + return i; +} + +uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src) +{ + uint16_t *tmp = dest; + + while ((*dest++ = *src++) != '\0') + /* nothing */; + return tmp; + +} + +uint16_t *utf16_strdup(const uint16_t *s) +{ + uint16_t *new; + if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2))) + return NULL; + utf16_strcpy(new, s); + return new; +} + +/* Convert UTF-16 to UTF-8. */ +uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) +{ + uint32_t code_high = 0; + + while (size--) { + uint32_t code = *src++; + + if (code_high) { + if (code >= 0xDC00 && code <= 0xDFFF) { + /* Surrogate pair. */ + code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000; + + *dest++ = (code >> 18) | 0xF0; + *dest++ = ((code >> 12) & 0x3F) | 0x80; + *dest++ = ((code >> 6) & 0x3F) | 0x80; + *dest++ = (code & 0x3F) | 0x80; + } else { + /* Error... */ + *dest++ = '?'; + /* *src may be valid. Don't eat it. */ + src--; + } + + code_high = 0; + } else { + if (code <= 0x007F) { + *dest++ = code; + } else if (code <= 0x07FF) { + *dest++ = (code >> 6) | 0xC0; + *dest++ = (code & 0x3F) | 0x80; + } else if (code >= 0xD800 && code <= 0xDBFF) { + code_high = code; + continue; + } else if (code >= 0xDC00 && code <= 0xDFFF) { + /* Error... */ + *dest++ = '?'; + } else if (code < 0x10000) { + *dest++ = (code >> 12) | 0xE0; + *dest++ = ((code >> 6) & 0x3F) | 0x80; + *dest++ = (code & 0x3F) | 0x80; + } else { + *dest++ = (code >> 18) | 0xF0; + *dest++ = ((code >> 12) & 0x3F) | 0x80; + *dest++ = ((code >> 6) & 0x3F) | 0x80; + *dest++ = (code & 0x3F) | 0x80; + } + } + } + + return dest; +} |