^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/ucs2_string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) /* Return the number of unicode characters in data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) ucs2_strnlen(const ucs2_char_t *s, size_t maxlength)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) unsigned long length = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) while (*s++ != 0 && length < maxlength)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) length++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) return length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) EXPORT_SYMBOL(ucs2_strnlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) ucs2_strlen(const ucs2_char_t *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) return ucs2_strnlen(s, ~0UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) EXPORT_SYMBOL(ucs2_strlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * Return the number of bytes is the length of this string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * Note: this is NOT the same as the number of unicode characters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) return ucs2_strnlen(data, maxlength/sizeof(ucs2_char_t)) * sizeof(ucs2_char_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) EXPORT_SYMBOL(ucs2_strsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) if (len == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) if (*a < *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) if (*a > *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) if (*a == 0) /* implies *b == 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) a++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) b++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) len--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) EXPORT_SYMBOL(ucs2_strncmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) ucs2_utf8size(const ucs2_char_t *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) unsigned long j = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) for (i = 0; src[i]; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) u16 c = src[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) if (c >= 0x800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) j += 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) else if (c >= 0x80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) j += 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) j += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) EXPORT_SYMBOL(ucs2_utf8size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * copy at most maxlength bytes of whole utf8 characters to dest from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * ucs2 string src.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * The return value is the number of characters copied, not including the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * final NUL character.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) unsigned long j = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) unsigned long limit = ucs2_strnlen(src, maxlength);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) for (i = 0; maxlength && i < limit; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) u16 c = src[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (c >= 0x800) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) if (maxlength < 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) maxlength -= 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) dest[j++] = 0xe0 | (c & 0xf000) >> 12;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) dest[j++] = 0x80 | (c & 0x0fc0) >> 6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) dest[j++] = 0x80 | (c & 0x003f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) } else if (c >= 0x80) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (maxlength < 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) maxlength -= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) dest[j++] = 0xc0 | (c & 0x7c0) >> 6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) dest[j++] = 0x80 | (c & 0x03f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) maxlength -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) dest[j++] = c & 0x7f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (maxlength)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) dest[j] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) EXPORT_SYMBOL(ucs2_as_utf8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) MODULE_LICENSE("GPL v2");