^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Kernel module for testing utf-8 support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright 2017 Collabora Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/printk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/unicode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/dcache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "utf8n.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) unsigned int failed_tests;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) unsigned int total_tests;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) /* Tests will be based on this version. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define latest_maj 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define latest_min 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define latest_rev 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define _test(cond, func, line, fmt, ...) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) total_tests++; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) if (!cond) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) failed_tests++; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) pr_err("test %s:%d Failed: %s%s", \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) func, line, #cond, (fmt?":":".")); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) if (fmt) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) pr_err(fmt, ##__VA_ARGS__); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define test_f(cond, fmt, ...) _test(cond, __func__, __LINE__, fmt, ##__VA_ARGS__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #define test(cond) _test(cond, __func__, __LINE__, "")
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) static const struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) /* UTF-8 strings in this vector _must_ be NULL-terminated. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) unsigned char str[10];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) unsigned char dec[10];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) } nfdi_test_data[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) /* Trivial sequence */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /* "ABba" decomposes to itself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) .str = "aBba",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) .dec = "aBba",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* Simple equivalent sequences */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) /* 'VULGAR FRACTION ONE QUARTER' cannot decompose to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) 'NUMBER 1' + 'FRACTION SLASH' + 'NUMBER 4' on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) canonical decomposition */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) .str = {0xc2, 0xbc, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) .dec = {0xc2, 0xbc, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /* 'LATIN SMALL LETTER A WITH DIAERESIS' decomposes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) 'LETTER A' + 'COMBINING DIAERESIS' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) .str = {0xc3, 0xa4, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) .dec = {0x61, 0xcc, 0x88, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) /* 'LATIN SMALL LETTER LJ' can't decompose to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) 'LETTER L' + 'LETTER J' on canonical decomposition */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) .str = {0xC7, 0x89, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) .dec = {0xC7, 0x89, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /* GREEK ANO TELEIA decomposes to MIDDLE DOT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) .str = {0xCE, 0x87, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) .dec = {0xC2, 0xB7, 0x00}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) /* Canonical ordering */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /* A + 'COMBINING ACUTE ACCENT' + 'COMBINING OGONEK' decomposes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) to A + 'COMBINING OGONEK' + 'COMBINING ACUTE ACCENT' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) .str = {0x41, 0xcc, 0x81, 0xcc, 0xa8, 0x0},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) .dec = {0x41, 0xcc, 0xa8, 0xcc, 0x81, 0x0},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /* 'LATIN SMALL LETTER A WITH DIAERESIS' + 'COMBINING OGONEK'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) decomposes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) 'LETTER A' + 'COMBINING OGONEK' + 'COMBINING DIAERESIS' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) .str = {0xc3, 0xa4, 0xCC, 0xA8, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) .dec = {0x61, 0xCC, 0xA8, 0xcc, 0x88, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) static const struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) /* UTF-8 strings in this vector _must_ be NULL-terminated. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) unsigned char str[30];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) unsigned char ncf[30];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) } nfdicf_test_data[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) /* Trivial sequences */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) /* "ABba" folds to lowercase */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) .str = {0x41, 0x42, 0x62, 0x61, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) .ncf = {0x61, 0x62, 0x62, 0x61, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* All ASCII folds to lower-case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) .str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0.1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) .ncf = "abcdefghijklmnopqrstuvwxyz0.1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) /* LATIN SMALL LETTER SHARP S folds to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) LATIN SMALL LETTER S + LATIN SMALL LETTER S */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) .str = {0xc3, 0x9f, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) .ncf = {0x73, 0x73, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) /* LATIN CAPITAL LETTER A WITH RING ABOVE folds to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) LATIN SMALL LETTER A + COMBINING RING ABOVE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) .str = {0xC3, 0x85, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) .ncf = {0x61, 0xcc, 0x8a, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) /* Introduced by UTF-8.0.0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /* Cherokee letters are interesting test-cases because they fold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) to upper-case. Before 8.0.0, Cherokee lowercase were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) undefined, thus, the folding from LC is not stable between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 7.0.0 -> 8.0.0, but it is from UC. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /* CHEROKEE SMALL LETTER A folds to CHEROKEE LETTER A */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) .str = {0xea, 0xad, 0xb0, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) .ncf = {0xe1, 0x8e, 0xa0, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /* CHEROKEE SMALL LETTER YE folds to CHEROKEE LETTER YE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) .str = {0xe1, 0x8f, 0xb8, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) .ncf = {0xe1, 0x8f, 0xb0, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /* OLD HUNGARIAN CAPITAL LETTER AMB folds to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) OLD HUNGARIAN SMALL LETTER AMB */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) .str = {0xf0, 0x90, 0xb2, 0x83, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) .ncf = {0xf0, 0x90, 0xb3, 0x83, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) /* Introduced by UTF-9.0.0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /* OSAGE CAPITAL LETTER CHA folds to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) OSAGE SMALL LETTER CHA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) .str = {0xf0, 0x90, 0x92, 0xb5, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) .ncf = {0xf0, 0x90, 0x93, 0x9d, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) /* LATIN CAPITAL LETTER SMALL CAPITAL I folds to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) LATIN LETTER SMALL CAPITAL I */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) .str = {0xea, 0x9e, 0xae, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) .ncf = {0xc9, 0xaa, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) /* Introduced by UTF-11.0.0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /* GEORGIAN SMALL LETTER AN folds to GEORGIAN MTAVRULI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) CAPITAL LETTER AN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) .str = {0xe1, 0xb2, 0x90, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) .ncf = {0xe1, 0x83, 0x90, 0x00},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) static void check_utf8_nfdi(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) struct utf8cursor u8c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) const struct utf8data *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) if (!data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) __func__, latest_maj, latest_min, latest_rev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) int len = strlen(nfdi_test_data[i].str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) int nlen = strlen(nfdi_test_data[i].dec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) int j = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) unsigned char c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) test((utf8len(data, nfdi_test_data[i].str) == nlen));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) pr_err("can't create cursor\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) while ((c = utf8byte(&u8c)) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) test_f((c == nfdi_test_data[i].dec[j]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) "Unexpected byte 0x%x should be 0x%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) c, nfdi_test_data[i].dec[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) j++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) test((j == nlen));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) static void check_utf8_nfdicf(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) struct utf8cursor u8c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) const struct utf8data *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) if (!data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) __func__, latest_maj, latest_min, latest_rev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) int len = strlen(nfdicf_test_data[i].str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) int nlen = strlen(nfdicf_test_data[i].ncf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) int j = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) unsigned char c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) test((utf8len(data, nfdicf_test_data[i].str) == nlen));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) pr_err("can't create cursor\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) while ((c = utf8byte(&u8c)) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) test_f((c == nfdicf_test_data[i].ncf[j]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) "Unexpected byte 0x%x should be 0x%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) c, nfdicf_test_data[i].ncf[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) j++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) test((j == nlen));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) static void check_utf8_comparisons(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) struct unicode_map *table = utf8_load("12.1.0");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) if (IS_ERR(table)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) __func__, latest_maj, latest_min, latest_rev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) const struct qstr s1 = {.name = nfdi_test_data[i].str,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) .len = sizeof(nfdi_test_data[i].str)};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) const struct qstr s2 = {.name = nfdi_test_data[i].dec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) .len = sizeof(nfdi_test_data[i].dec)};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) test_f(!utf8_strncmp(table, &s1, &s2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) "%s %s comparison mismatch\n", s1.name, s2.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) const struct qstr s1 = {.name = nfdicf_test_data[i].str,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) .len = sizeof(nfdicf_test_data[i].str)};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) const struct qstr s2 = {.name = nfdicf_test_data[i].ncf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) .len = sizeof(nfdicf_test_data[i].ncf)};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) test_f(!utf8_strncasecmp(table, &s1, &s2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) "%s %s comparison mismatch\n", s1.name, s2.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) utf8_unload(table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) static void check_supported_versions(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) /* Unicode 7.0.0 should be supported. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) test(utf8version_is_supported(7, 0, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) /* Unicode 9.0.0 should be supported. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) test(utf8version_is_supported(9, 0, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) /* Unicode 1x.0.0 (the latest version) should be supported. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) test(utf8version_is_supported(latest_maj, latest_min, latest_rev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) /* Next versions don't exist. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) test(!utf8version_is_supported(13, 0, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) test(!utf8version_is_supported(0, 0, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) test(!utf8version_is_supported(-1, -1, -1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) static int __init init_test_ucd(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) failed_tests = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) total_tests = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) check_supported_versions();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) check_utf8_nfdi();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) check_utf8_nfdicf();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) check_utf8_comparisons();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (!failed_tests)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) pr_info("All %u tests passed\n", total_tests);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) pr_err("%u out of %u tests failed\n", failed_tests,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) total_tests);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) static void __exit exit_test_ucd(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) module_init(init_test_ucd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) module_exit(exit_test_ucd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) MODULE_AUTHOR("Gabriel Krisman Bertazi <krisman@collabora.co.uk>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) MODULE_LICENSE("GPL");