summaryrefslogtreecommitdiff
path: root/lib/charset.c
diff options
context:
space:
mode:
authorRob Clark <robdclark@gmail.com>2017-09-09 06:47:40 -0400
committerTom Rini <trini@konsulko.com>2017-09-12 17:57:59 -0400
commit78178bb0c9dfe2a91a636a411291d8bab50e8a7d (patch)
tree5c4ebb4800d57ca58b6b5a74948abec199215083 /lib/charset.c
parent4a85663ec7eddd955d22f1b0f34a9708eac82314 (diff)
lib: add some utf16 handling helpers
We'll eventually want these in a few places in efi_loader, and also vsprintf. Signed-off-by: Rob Clark <robdclark@gmail.com>
Diffstat (limited to 'lib/charset.c')
-rw-r--r--lib/charset.c101
1 files changed, 101 insertions, 0 deletions
diff --git a/lib/charset.c b/lib/charset.c
new file mode 100644
index 0000000000..ff76e88c77
--- /dev/null
+++ b/lib/charset.c
@@ -0,0 +1,101 @@
+/*
+ * charset conversion utils
+ *
+ * Copyright (c) 2017 Rob Clark
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+#include <common.h>
+#include <charset.h>
+#include <malloc.h>
+
+/*
+ * utf8/utf16 conversion mostly lifted from grub
+ */
+
+size_t utf16_strlen(const uint16_t *in)
+{
+ size_t i;
+ for (i = 0; in[i]; i++);
+ return i;
+}
+
+size_t utf16_strnlen(const uint16_t *in, size_t count)
+{
+ size_t i;
+ for (i = 0; count-- && in[i]; i++);
+ return i;
+}
+
+uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src)
+{
+ uint16_t *tmp = dest;
+
+ while ((*dest++ = *src++) != '\0')
+ /* nothing */;
+ return tmp;
+
+}
+
+uint16_t *utf16_strdup(const uint16_t *s)
+{
+ uint16_t *new;
+ if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2)))
+ return NULL;
+ utf16_strcpy(new, s);
+ return new;
+}
+
+/* Convert UTF-16 to UTF-8. */
+uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
+{
+ uint32_t code_high = 0;
+
+ while (size--) {
+ uint32_t code = *src++;
+
+ if (code_high) {
+ if (code >= 0xDC00 && code <= 0xDFFF) {
+ /* Surrogate pair. */
+ code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
+
+ *dest++ = (code >> 18) | 0xF0;
+ *dest++ = ((code >> 12) & 0x3F) | 0x80;
+ *dest++ = ((code >> 6) & 0x3F) | 0x80;
+ *dest++ = (code & 0x3F) | 0x80;
+ } else {
+ /* Error... */
+ *dest++ = '?';
+ /* *src may be valid. Don't eat it. */
+ src--;
+ }
+
+ code_high = 0;
+ } else {
+ if (code <= 0x007F) {
+ *dest++ = code;
+ } else if (code <= 0x07FF) {
+ *dest++ = (code >> 6) | 0xC0;
+ *dest++ = (code & 0x3F) | 0x80;
+ } else if (code >= 0xD800 && code <= 0xDBFF) {
+ code_high = code;
+ continue;
+ } else if (code >= 0xDC00 && code <= 0xDFFF) {
+ /* Error... */
+ *dest++ = '?';
+ } else if (code < 0x10000) {
+ *dest++ = (code >> 12) | 0xE0;
+ *dest++ = ((code >> 6) & 0x3F) | 0x80;
+ *dest++ = (code & 0x3F) | 0x80;
+ } else {
+ *dest++ = (code >> 18) | 0xF0;
+ *dest++ = ((code >> 12) & 0x3F) | 0x80;
+ *dest++ = ((code >> 6) & 0x3F) | 0x80;
+ *dest++ = (code & 0x3F) | 0x80;
+ }
+ }
+ }
+
+ return dest;
+}