Commit | Line | Data |
---|---|---|
9c92ab61 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
f0d6cc00 GKB |
2 | /* |
3 | * Kernel module for testing utf-8 support. | |
4 | * | |
5 | * Copyright 2017 Collabora Ltd. | |
f0d6cc00 GKB |
6 | */ |
7 | ||
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
9 | ||
10 | #include <linux/module.h> | |
11 | #include <linux/printk.h> | |
12 | #include <linux/unicode.h> | |
13 | #include <linux/dcache.h> | |
14 | ||
15 | #include "utf8n.h" | |
16 | ||
17 | unsigned int failed_tests; | |
18 | unsigned int total_tests; | |
19 | ||
20 | /* Tests will be based on this version. */ | |
1215d239 GKB |
21 | #define latest_maj 12 |
22 | #define latest_min 1 | |
f0d6cc00 GKB |
23 | #define latest_rev 0 |
24 | ||
25 | #define _test(cond, func, line, fmt, ...) do { \ | |
26 | total_tests++; \ | |
27 | if (!cond) { \ | |
28 | failed_tests++; \ | |
29 | pr_err("test %s:%d Failed: %s%s", \ | |
30 | func, line, #cond, (fmt?":":".")); \ | |
31 | if (fmt) \ | |
32 | pr_err(fmt, ##__VA_ARGS__); \ | |
33 | } \ | |
34 | } while (0) | |
35 | #define test_f(cond, fmt, ...) _test(cond, __func__, __LINE__, fmt, ##__VA_ARGS__) | |
36 | #define test(cond) _test(cond, __func__, __LINE__, "") | |
37 | ||
334b427e | 38 | static const struct { |
f0d6cc00 GKB |
39 | /* UTF-8 strings in this vector _must_ be NULL-terminated. */ |
40 | unsigned char str[10]; | |
41 | unsigned char dec[10]; | |
42 | } nfdi_test_data[] = { | |
43 | /* Trivial sequence */ | |
44 | { | |
45 | /* "ABba" decomposes to itself */ | |
46 | .str = "aBba", | |
47 | .dec = "aBba", | |
48 | }, | |
49 | /* Simple equivalent sequences */ | |
50 | { | |
51 | /* 'VULGAR FRACTION ONE QUARTER' cannot decompose to | |
52 | 'NUMBER 1' + 'FRACTION SLASH' + 'NUMBER 4' on | |
53 | canonical decomposition */ | |
54 | .str = {0xc2, 0xbc, 0x00}, | |
55 | .dec = {0xc2, 0xbc, 0x00}, | |
56 | }, | |
57 | { | |
58 | /* 'LATIN SMALL LETTER A WITH DIAERESIS' decomposes to | |
59 | 'LETTER A' + 'COMBINING DIAERESIS' */ | |
60 | .str = {0xc3, 0xa4, 0x00}, | |
61 | .dec = {0x61, 0xcc, 0x88, 0x00}, | |
62 | }, | |
63 | { | |
64 | /* 'LATIN SMALL LETTER LJ' can't decompose to | |
65 | 'LETTER L' + 'LETTER J' on canonical decomposition */ | |
66 | .str = {0xC7, 0x89, 0x00}, | |
67 | .dec = {0xC7, 0x89, 0x00}, | |
68 | }, | |
69 | { | |
70 | /* GREEK ANO TELEIA decomposes to MIDDLE DOT */ | |
71 | .str = {0xCE, 0x87, 0x00}, | |
72 | .dec = {0xC2, 0xB7, 0x00} | |
73 | }, | |
74 | /* Canonical ordering */ | |
75 | { | |
76 | /* A + 'COMBINING ACUTE ACCENT' + 'COMBINING OGONEK' decomposes | |
77 | to A + 'COMBINING OGONEK' + 'COMBINING ACUTE ACCENT' */ | |
78 | .str = {0x41, 0xcc, 0x81, 0xcc, 0xa8, 0x0}, | |
79 | .dec = {0x41, 0xcc, 0xa8, 0xcc, 0x81, 0x0}, | |
80 | }, | |
81 | { | |
82 | /* 'LATIN SMALL LETTER A WITH DIAERESIS' + 'COMBINING OGONEK' | |
83 | decomposes to | |
84 | 'LETTER A' + 'COMBINING OGONEK' + 'COMBINING DIAERESIS' */ | |
85 | .str = {0xc3, 0xa4, 0xCC, 0xA8, 0x00}, | |
86 | ||
87 | .dec = {0x61, 0xCC, 0xA8, 0xcc, 0x88, 0x00}, | |
88 | }, | |
89 | ||
90 | }; | |
91 | ||
334b427e | 92 | static const struct { |
f0d6cc00 GKB |
93 | /* UTF-8 strings in this vector _must_ be NULL-terminated. */ |
94 | unsigned char str[30]; | |
95 | unsigned char ncf[30]; | |
96 | } nfdicf_test_data[] = { | |
97 | /* Trivial sequences */ | |
98 | { | |
99 | /* "ABba" folds to lowercase */ | |
100 | .str = {0x41, 0x42, 0x62, 0x61, 0x00}, | |
101 | .ncf = {0x61, 0x62, 0x62, 0x61, 0x00}, | |
102 | }, | |
103 | { | |
104 | /* All ASCII folds to lower-case */ | |
105 | .str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0.1", | |
106 | .ncf = "abcdefghijklmnopqrstuvwxyz0.1", | |
107 | }, | |
108 | { | |
109 | /* LATIN SMALL LETTER SHARP S folds to | |
110 | LATIN SMALL LETTER S + LATIN SMALL LETTER S */ | |
111 | .str = {0xc3, 0x9f, 0x00}, | |
112 | .ncf = {0x73, 0x73, 0x00}, | |
113 | }, | |
114 | { | |
115 | /* LATIN CAPITAL LETTER A WITH RING ABOVE folds to | |
116 | LATIN SMALL LETTER A + COMBINING RING ABOVE */ | |
117 | .str = {0xC3, 0x85, 0x00}, | |
118 | .ncf = {0x61, 0xcc, 0x8a, 0x00}, | |
119 | }, | |
120 | /* Introduced by UTF-8.0.0. */ | |
121 | /* Cherokee letters are interesting test-cases because they fold | |
122 | to upper-case. Before 8.0.0, Cherokee lowercase were | |
123 | undefined, thus, the folding from LC is not stable between | |
124 | 7.0.0 -> 8.0.0, but it is from UC. */ | |
125 | { | |
126 | /* CHEROKEE SMALL LETTER A folds to CHEROKEE LETTER A */ | |
127 | .str = {0xea, 0xad, 0xb0, 0x00}, | |
128 | .ncf = {0xe1, 0x8e, 0xa0, 0x00}, | |
129 | }, | |
130 | { | |
131 | /* CHEROKEE SMALL LETTER YE folds to CHEROKEE LETTER YE */ | |
132 | .str = {0xe1, 0x8f, 0xb8, 0x00}, | |
133 | .ncf = {0xe1, 0x8f, 0xb0, 0x00}, | |
134 | }, | |
135 | { | |
136 | /* OLD HUNGARIAN CAPITAL LETTER AMB folds to | |
137 | OLD HUNGARIAN SMALL LETTER AMB */ | |
138 | .str = {0xf0, 0x90, 0xb2, 0x83, 0x00}, | |
139 | .ncf = {0xf0, 0x90, 0xb3, 0x83, 0x00}, | |
140 | }, | |
141 | /* Introduced by UTF-9.0.0. */ | |
142 | { | |
143 | /* OSAGE CAPITAL LETTER CHA folds to | |
144 | OSAGE SMALL LETTER CHA */ | |
145 | .str = {0xf0, 0x90, 0x92, 0xb5, 0x00}, | |
146 | .ncf = {0xf0, 0x90, 0x93, 0x9d, 0x00}, | |
147 | }, | |
148 | { | |
149 | /* LATIN CAPITAL LETTER SMALL CAPITAL I folds to | |
150 | LATIN LETTER SMALL CAPITAL I */ | |
151 | .str = {0xea, 0x9e, 0xae, 0x00}, | |
152 | .ncf = {0xc9, 0xaa, 0x00}, | |
153 | }, | |
154 | /* Introduced by UTF-11.0.0. */ | |
155 | { | |
156 | /* GEORGIAN SMALL LETTER AN folds to GEORGIAN MTAVRULI | |
157 | CAPITAL LETTER AN */ | |
158 | .str = {0xe1, 0xb2, 0x90, 0x00}, | |
159 | .ncf = {0xe1, 0x83, 0x90, 0x00}, | |
160 | } | |
161 | }; | |
162 | ||
163 | static void check_utf8_nfdi(void) | |
164 | { | |
165 | int i; | |
166 | struct utf8cursor u8c; | |
167 | const struct utf8data *data; | |
168 | ||
169 | data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev)); | |
170 | if (!data) { | |
171 | pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n", | |
172 | __func__, latest_maj, latest_min, latest_rev); | |
173 | return; | |
174 | } | |
175 | ||
176 | for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) { | |
177 | int len = strlen(nfdi_test_data[i].str); | |
178 | int nlen = strlen(nfdi_test_data[i].dec); | |
179 | int j = 0; | |
180 | unsigned char c; | |
181 | ||
182 | test((utf8len(data, nfdi_test_data[i].str) == nlen)); | |
183 | test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen)); | |
184 | ||
185 | if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0) | |
186 | pr_err("can't create cursor\n"); | |
187 | ||
188 | while ((c = utf8byte(&u8c)) > 0) { | |
189 | test_f((c == nfdi_test_data[i].dec[j]), | |
190 | "Unexpected byte 0x%x should be 0x%x\n", | |
191 | c, nfdi_test_data[i].dec[j]); | |
192 | j++; | |
193 | } | |
194 | ||
195 | test((j == nlen)); | |
196 | } | |
197 | } | |
198 | ||
199 | static void check_utf8_nfdicf(void) | |
200 | { | |
201 | int i; | |
202 | struct utf8cursor u8c; | |
203 | const struct utf8data *data; | |
204 | ||
205 | data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev)); | |
206 | if (!data) { | |
207 | pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n", | |
208 | __func__, latest_maj, latest_min, latest_rev); | |
209 | return; | |
210 | } | |
211 | ||
212 | for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) { | |
213 | int len = strlen(nfdicf_test_data[i].str); | |
214 | int nlen = strlen(nfdicf_test_data[i].ncf); | |
215 | int j = 0; | |
216 | unsigned char c; | |
217 | ||
218 | test((utf8len(data, nfdicf_test_data[i].str) == nlen)); | |
219 | test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen)); | |
220 | ||
221 | if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0) | |
222 | pr_err("can't create cursor\n"); | |
223 | ||
224 | while ((c = utf8byte(&u8c)) > 0) { | |
225 | test_f((c == nfdicf_test_data[i].ncf[j]), | |
226 | "Unexpected byte 0x%x should be 0x%x\n", | |
227 | c, nfdicf_test_data[i].ncf[j]); | |
228 | j++; | |
229 | } | |
230 | ||
231 | test((j == nlen)); | |
232 | } | |
233 | } | |
234 | ||
235 | static void check_utf8_comparisons(void) | |
236 | { | |
237 | int i; | |
1215d239 | 238 | struct unicode_map *table = utf8_load("12.1.0"); |
f0d6cc00 GKB |
239 | |
240 | if (IS_ERR(table)) { | |
241 | pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n", | |
242 | __func__, latest_maj, latest_min, latest_rev); | |
243 | return; | |
244 | } | |
245 | ||
246 | for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) { | |
247 | const struct qstr s1 = {.name = nfdi_test_data[i].str, | |
248 | .len = sizeof(nfdi_test_data[i].str)}; | |
249 | const struct qstr s2 = {.name = nfdi_test_data[i].dec, | |
250 | .len = sizeof(nfdi_test_data[i].dec)}; | |
251 | ||
252 | test_f(!utf8_strncmp(table, &s1, &s2), | |
253 | "%s %s comparison mismatch\n", s1.name, s2.name); | |
254 | } | |
255 | ||
256 | for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) { | |
257 | const struct qstr s1 = {.name = nfdicf_test_data[i].str, | |
258 | .len = sizeof(nfdicf_test_data[i].str)}; | |
259 | const struct qstr s2 = {.name = nfdicf_test_data[i].ncf, | |
260 | .len = sizeof(nfdicf_test_data[i].ncf)}; | |
261 | ||
262 | test_f(!utf8_strncasecmp(table, &s1, &s2), | |
263 | "%s %s comparison mismatch\n", s1.name, s2.name); | |
264 | } | |
265 | ||
266 | utf8_unload(table); | |
267 | } | |
268 | ||
269 | static void check_supported_versions(void) | |
270 | { | |
271 | /* Unicode 7.0.0 should be supported. */ | |
272 | test(utf8version_is_supported(7, 0, 0)); | |
273 | ||
274 | /* Unicode 9.0.0 should be supported. */ | |
275 | test(utf8version_is_supported(9, 0, 0)); | |
276 | ||
277 | /* Unicode 1x.0.0 (the latest version) should be supported. */ | |
278 | test(utf8version_is_supported(latest_maj, latest_min, latest_rev)); | |
279 | ||
280 | /* Next versions don't exist. */ | |
1215d239 | 281 | test(!utf8version_is_supported(13, 0, 0)); |
f0d6cc00 GKB |
282 | test(!utf8version_is_supported(0, 0, 0)); |
283 | test(!utf8version_is_supported(-1, -1, -1)); | |
284 | } | |
285 | ||
286 | static int __init init_test_ucd(void) | |
287 | { | |
288 | failed_tests = 0; | |
289 | total_tests = 0; | |
290 | ||
291 | check_supported_versions(); | |
292 | check_utf8_nfdi(); | |
293 | check_utf8_nfdicf(); | |
294 | check_utf8_comparisons(); | |
295 | ||
296 | if (!failed_tests) | |
297 | pr_info("All %u tests passed\n", total_tests); | |
298 | else | |
299 | pr_err("%u out of %u tests failed\n", failed_tests, | |
300 | total_tests); | |
301 | return 0; | |
302 | } | |
303 | ||
304 | static void __exit exit_test_ucd(void) | |
305 | { | |
306 | } | |
307 | ||
308 | module_init(init_test_ucd); | |
309 | module_exit(exit_test_ucd); | |
310 | ||
311 | MODULE_AUTHOR("Gabriel Krisman Bertazi <krisman@collabora.co.uk>"); | |
312 | MODULE_LICENSE("GPL"); |