diff options
author | Heinrich Schuchardt <xypron.glpk@gmx.de> | 2018-08-31 21:31:27 +0200 |
---|---|---|
committer | Alexander Graf <agraf@suse.de> | 2018-09-23 21:55:29 +0200 |
commit | d8c28232c34d4be9984eda52d69d0920678d937d (patch) | |
tree | da6d8305f94829b36f163c331eb5e1bf8abc3280 /include/charset.h | |
parent | 1dde0d57a5d1281aaa949e9bf7b5c476345a56ee (diff) |
lib: charset: utility functions for Unicode
utf8_get() - get next UTF-8 code point from buffer
utf8_put() - write UTF-8 code point to buffer
utf8_utf16_strnlen() - length of a utf-8 string after conversion to utf-16
utf8_utf16_strncpy() - copy a utf-8 string to utf-16
utf16_get() - get next UTF-16 code point from buffer
utf16_put() - write UTF-16 code point to buffer
utf16_strnlen() - number of codes points in a utf-16 string
utf16_utf8_strnlen() - length of a utf-16 string after conversion to utf-8
utf16_utf8_strncpy() - copy a utf-16 string to utf-8
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'include/charset.h')
-rw-r--r-- | include/charset.h | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/include/charset.h b/include/charset.h index 2c6deb8034..cf41eb5e5f 100644 --- a/include/charset.h +++ b/include/charset.h @@ -8,11 +8,141 @@ #ifndef __CHARSET_H_ #define __CHARSET_H_ +#include <linux/kernel.h> #include <linux/types.h> #define MAX_UTF8_PER_UTF16 3 /** + * utf8_get() - get next UTF-8 code point from buffer + * + * @src: pointer to current byte, updated to point to next byte + * Return: code point, or 0 for end of string, or -1 if no legal + * code point is found. In case of an error src points to + * the incorrect byte. + */ +s32 utf8_get(const char **src); + +/** + * utf8_put() - write UTF-8 code point to buffer + * + * @code: code point + * @dst: pointer to destination buffer, updated to next position + * Return: -1 if the input parameters are invalid + */ +int utf8_put(s32 code, char **dst); + +/** + * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion + * to utf-16 + * + * @src: utf-8 string + * @count: maximum number of code points to convert + * Return: length in bytes after conversion to utf-16 without the + * trailing \0. If an invalid UTF-8 sequence is hit one + * word will be reserved for a replacement character. + */ +size_t utf8_utf16_strnlen(const char *src, size_t count); + +/** + * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 + * + * @src: utf-8 string + * Return: length in bytes after conversion to utf-16 without the + * trailing \0. -1 if the utf-8 string is not valid. + */ +#define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) + +/** + * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string + * + * @dst: destination buffer + * @src: source buffer + * @count: maximum number of code points to copy + * Return: -1 if the input parameters are invalid + */ +int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); + +/** + * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string + * + * @dst: destination buffer + * @src: source buffer + * Return: -1 if the input parameters are invalid + */ +#define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) + +/** + * utf16_get() - get next UTF-16 code point from buffer + * + * @src: pointer to current word, updated to point to next word + * Return: code point, or 0 for end of string, or -1 if no legal + * code point is found. In case of an error src points to + * the incorrect word. + */ +s32 utf16_get(const u16 **src); + +/** + * utf16_put() - write UTF-16 code point to buffer + * + * @code: code point + * @dst: pointer to destination buffer, updated to next position + * Return: -1 if the input parameters are invalid + */ +int utf16_put(s32 code, u16 **dst); + +/** + * utf16_strnlen() - length of a truncated utf-16 string + * + * @src: utf-16 string + * @count: maximum number of code points to convert + * Return: length in code points. If an invalid UTF-16 sequence is + * hit one position will be reserved for a replacement + * character. + */ +size_t utf16_strnlen(const u16 *src, size_t count); + +/** + * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion + * to utf-8 + * + * @src: utf-16 string + * @count: maximum number of code points to convert + * Return: length in bytes after conversion to utf-8 without the + * trailing \0. If an invalid UTF-16 sequence is hit one + * byte will be reserved for a replacement character. + */ +size_t utf16_utf8_strnlen(const u16 *src, size_t count); + +/** + * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 + * + * @src: utf-16 string + * Return: length in bytes after conversion to utf-8 without the + * trailing \0. -1 if the utf-16 string is not valid. + */ +#define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) + +/** + * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string + * + * @dst: destination buffer + * @src: source buffer + * @count: maximum number of code points to copy + * Return: -1 if the input parameters are invalid + */ +int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); + +/** + * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string + * + * @dst: destination buffer + * @src: source buffer + * Return: -1 if the input parameters are invalid + */ +#define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) + +/** * u16_strlen - count non-zero words * * This function matches wsclen() if the -fshort-wchar compiler flag is set. |