Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * fs/cifs/cifs_unicode.c | |
3 | * | |
d185cda7 | 4 | * Copyright (c) International Business Machines Corp., 2000,2009 |
1da177e4 LT |
5 | * Modified by Steve French (sfrench@us.ibm.com) |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
221601c3 | 9 | * the Free Software Foundation; either version 2 of the License, or |
1da177e4 | 10 | * (at your option) any later version. |
221601c3 | 11 | * |
1da177e4 LT |
12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
15 | * the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
221601c3 | 18 | * along with this program; if not, write to the Free Software |
1da177e4 LT |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
20 | */ | |
21 | #include <linux/fs.h> | |
5a0e3ad6 | 22 | #include <linux/slab.h> |
2baa2682 | 23 | #include "cifs_fs_sb.h" |
1da177e4 LT |
24 | #include "cifs_unicode.h" |
25 | #include "cifs_uniupr.h" | |
26 | #include "cifspdu.h" | |
3979877e | 27 | #include "cifsglob.h" |
1da177e4 LT |
28 | #include "cifs_debug.h" |
29 | ||
69f801fc | 30 | /* |
acbbb76a SF |
31 | * cifs_utf16_bytes - how long will a string be after conversion? |
32 | * @utf16 - pointer to input string | |
69f801fc JL |
33 | * @maxbytes - don't go past this many bytes of input string |
34 | * @codepage - destination codepage | |
35 | * | |
acbbb76a | 36 | * Walk a utf16le string and return the number of bytes that the string will |
69f801fc JL |
37 | * be after being converted to the given charset, not including any null |
38 | * termination required. Don't walk past maxbytes in the source buffer. | |
39 | */ | |
40 | int | |
acbbb76a | 41 | cifs_utf16_bytes(const __le16 *from, int maxbytes, |
69f801fc JL |
42 | const struct nls_table *codepage) |
43 | { | |
44 | int i; | |
45 | int charlen, outlen = 0; | |
46 | int maxwords = maxbytes / 2; | |
47 | char tmp[NLS_MAX_CHARSET_SIZE]; | |
ba2dbf30 | 48 | __u16 ftmp; |
69f801fc | 49 | |
ba2dbf30 JL |
50 | for (i = 0; i < maxwords; i++) { |
51 | ftmp = get_unaligned_le16(&from[i]); | |
52 | if (ftmp == 0) | |
53 | break; | |
54 | ||
55 | charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); | |
69f801fc JL |
56 | if (charlen > 0) |
57 | outlen += charlen; | |
58 | else | |
59 | outlen++; | |
60 | } | |
61 | ||
62 | return outlen; | |
63 | } | |
64 | ||
2baa2682 SF |
65 | int cifs_remap(struct cifs_sb_info *cifs_sb) |
66 | { | |
67 | int map_type; | |
68 | ||
69 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) | |
70 | map_type = SFM_MAP_UNI_RSVD; | |
71 | else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) | |
72 | map_type = SFU_MAP_UNI_RSVD; | |
73 | else | |
74 | map_type = NO_MAP_UNI_RSVD; | |
75 | ||
76 | return map_type; | |
77 | } | |
78 | ||
b693855f SF |
79 | /* Convert character using the SFU - "Services for Unix" remapping range */ |
80 | static bool | |
81 | convert_sfu_char(const __u16 src_char, char *target) | |
7fabf0c9 | 82 | { |
7fabf0c9 JL |
83 | /* |
84 | * BB: Cannot handle remapping UNI_SLASH until all the calls to | |
85 | * build_path_from_dentry are modified, as they use slash as | |
86 | * separator. | |
87 | */ | |
ba2dbf30 | 88 | switch (src_char) { |
7fabf0c9 JL |
89 | case UNI_COLON: |
90 | *target = ':'; | |
91 | break; | |
581ade4d | 92 | case UNI_ASTERISK: |
7fabf0c9 JL |
93 | *target = '*'; |
94 | break; | |
95 | case UNI_QUESTION: | |
96 | *target = '?'; | |
97 | break; | |
98 | case UNI_PIPE: | |
99 | *target = '|'; | |
100 | break; | |
101 | case UNI_GRTRTHAN: | |
102 | *target = '>'; | |
103 | break; | |
104 | case UNI_LESSTHAN: | |
105 | *target = '<'; | |
106 | break; | |
107 | default: | |
b693855f | 108 | return false; |
7fabf0c9 | 109 | } |
b693855f SF |
110 | return true; |
111 | } | |
7fabf0c9 | 112 | |
b693855f SF |
113 | /* Convert character using the SFM - "Services for Mac" remapping range */ |
114 | static bool | |
115 | convert_sfm_char(const __u16 src_char, char *target) | |
116 | { | |
117 | switch (src_char) { | |
118 | case SFM_COLON: | |
119 | *target = ':'; | |
120 | break; | |
121 | case SFM_ASTERISK: | |
122 | *target = '*'; | |
123 | break; | |
124 | case SFM_QUESTION: | |
125 | *target = '?'; | |
126 | break; | |
127 | case SFM_PIPE: | |
128 | *target = '|'; | |
129 | break; | |
130 | case SFM_GRTRTHAN: | |
131 | *target = '>'; | |
132 | break; | |
133 | case SFM_LESSTHAN: | |
134 | *target = '<'; | |
135 | break; | |
136 | case SFM_SLASH: | |
137 | *target = '\\'; | |
138 | break; | |
139 | default: | |
140 | return false; | |
141 | } | |
142 | return true; | |
143 | } | |
144 | ||
145 | ||
146 | /* | |
147 | * cifs_mapchar - convert a host-endian char to proper char in codepage | |
148 | * @target - where converted character should be copied | |
149 | * @src_char - 2 byte host-endian source character | |
150 | * @cp - codepage to which character should be converted | |
151 | * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2? | |
152 | * | |
153 | * This function handles the conversion of a single character. It is the | |
154 | * responsibility of the caller to ensure that the target buffer is large | |
155 | * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). | |
156 | */ | |
157 | static int | |
158 | cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, | |
159 | int maptype) | |
160 | { | |
161 | int len = 1; | |
162 | ||
163 | if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) | |
164 | return len; | |
165 | else if ((maptype == SFU_MAP_UNI_RSVD) && | |
166 | convert_sfu_char(src_char, target)) | |
167 | return len; | |
7fabf0c9 | 168 | |
b693855f | 169 | /* if character not one of seven in special remap set */ |
ba2dbf30 | 170 | len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); |
7fabf0c9 JL |
171 | if (len <= 0) { |
172 | *target = '?'; | |
173 | len = 1; | |
174 | } | |
b693855f | 175 | return len; |
7fabf0c9 JL |
176 | } |
177 | ||
178 | /* | |
acbbb76a | 179 | * cifs_from_utf16 - convert utf16le string to local charset |
7fabf0c9 JL |
180 | * @to - destination buffer |
181 | * @from - source buffer | |
182 | * @tolen - destination buffer size (in bytes) | |
183 | * @fromlen - source buffer size (in bytes) | |
184 | * @codepage - codepage to which characters should be converted | |
185 | * @mapchar - should characters be remapped according to the mapchars option? | |
186 | * | |
acbbb76a | 187 | * Convert a little-endian utf16le string (as sent by the server) to a string |
7fabf0c9 JL |
188 | * in the provided codepage. The tolen and fromlen parameters are to ensure |
189 | * that the code doesn't walk off of the end of the buffer (which is always | |
190 | * a danger if the alignment of the source buffer is off). The destination | |
191 | * string is always properly null terminated and fits in the destination | |
192 | * buffer. Returns the length of the destination string in bytes (including | |
193 | * null terminator). | |
194 | * | |
195 | * Note that some windows versions actually send multiword UTF-16 characters | |
acbbb76a | 196 | * instead of straight UTF16-2. The linux nls routines however aren't able to |
7fabf0c9 JL |
197 | * deal with those characters properly. In the event that we get some of |
198 | * those characters, they won't be translated properly. | |
199 | */ | |
200 | int | |
acbbb76a | 201 | cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, |
b693855f | 202 | const struct nls_table *codepage, int map_type) |
7fabf0c9 JL |
203 | { |
204 | int i, charlen, safelen; | |
205 | int outlen = 0; | |
206 | int nullsize = nls_nullsize(codepage); | |
207 | int fromwords = fromlen / 2; | |
208 | char tmp[NLS_MAX_CHARSET_SIZE]; | |
ba2dbf30 | 209 | __u16 ftmp; |
7fabf0c9 JL |
210 | |
211 | /* | |
212 | * because the chars can be of varying widths, we need to take care | |
213 | * not to overflow the destination buffer when we get close to the | |
214 | * end of it. Until we get to this offset, we don't need to check | |
215 | * for overflow however. | |
216 | */ | |
217 | safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); | |
218 | ||
ba2dbf30 JL |
219 | for (i = 0; i < fromwords; i++) { |
220 | ftmp = get_unaligned_le16(&from[i]); | |
221 | if (ftmp == 0) | |
222 | break; | |
223 | ||
7fabf0c9 JL |
224 | /* |
225 | * check to see if converting this character might make the | |
226 | * conversion bleed into the null terminator | |
227 | */ | |
228 | if (outlen >= safelen) { | |
b693855f | 229 | charlen = cifs_mapchar(tmp, ftmp, codepage, map_type); |
7fabf0c9 JL |
230 | if ((outlen + charlen) > (tolen - nullsize)) |
231 | break; | |
232 | } | |
233 | ||
234 | /* put converted char into 'to' buffer */ | |
b693855f | 235 | charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); |
7fabf0c9 JL |
236 | outlen += charlen; |
237 | } | |
238 | ||
239 | /* properly null-terminate string */ | |
240 | for (i = 0; i < nullsize; i++) | |
241 | to[outlen++] = 0; | |
242 | ||
243 | return outlen; | |
244 | } | |
245 | ||
1da177e4 | 246 | /* |
acbbb76a | 247 | * NAME: cifs_strtoUTF16() |
1da177e4 LT |
248 | * |
249 | * FUNCTION: Convert character string to unicode string | |
250 | * | |
251 | */ | |
252 | int | |
acbbb76a | 253 | cifs_strtoUTF16(__le16 *to, const char *from, int len, |
1da177e4 LT |
254 | const struct nls_table *codepage) |
255 | { | |
256 | int charlen; | |
257 | int i; | |
ba2dbf30 | 258 | wchar_t wchar_to; /* needed to quiet sparse */ |
1da177e4 | 259 | |
fd3ba42c FZ |
260 | /* special case for utf8 to handle no plane0 chars */ |
261 | if (!strcmp(codepage->charset, "utf8")) { | |
262 | /* | |
263 | * convert utf8 -> utf16, we assume we have enough space | |
264 | * as caller should have assumed conversion does not overflow | |
265 | * in destination len is length in wchar_t units (16bits) | |
266 | */ | |
267 | i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, | |
268 | (wchar_t *) to, len); | |
269 | ||
270 | /* if success terminate and exit */ | |
271 | if (i >= 0) | |
272 | goto success; | |
273 | /* | |
274 | * if fails fall back to UCS encoding as this | |
275 | * function should not return negative values | |
276 | * currently can fail only if source contains | |
277 | * invalid encoded characters | |
278 | */ | |
279 | } | |
280 | ||
1da177e4 | 281 | for (i = 0; len && *from; i++, from += charlen, len -= charlen) { |
ba2dbf30 | 282 | charlen = codepage->char2uni(from, len, &wchar_to); |
1da177e4 | 283 | if (charlen < 1) { |
f96637be JP |
284 | cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n", |
285 | *from, charlen); | |
69114089 | 286 | /* A question mark */ |
ba2dbf30 | 287 | wchar_to = 0x003f; |
1da177e4 | 288 | charlen = 1; |
ba2dbf30 JL |
289 | } |
290 | put_unaligned_le16(wchar_to, &to[i]); | |
1da177e4 LT |
291 | } |
292 | ||
fd3ba42c | 293 | success: |
ba2dbf30 | 294 | put_unaligned_le16(0, &to[i]); |
1da177e4 LT |
295 | return i; |
296 | } | |
297 | ||
066ce689 | 298 | /* |
acbbb76a SF |
299 | * cifs_strndup_from_utf16 - copy a string from wire format to the local |
300 | * codepage | |
066ce689 JL |
301 | * @src - source string |
302 | * @maxlen - don't walk past this many bytes in the source string | |
303 | * @is_unicode - is this a unicode string? | |
304 | * @codepage - destination codepage | |
305 | * | |
306 | * Take a string given by the server, convert it to the local codepage and | |
307 | * put it in a new buffer. Returns a pointer to the new string or NULL on | |
308 | * error. | |
309 | */ | |
310 | char * | |
acbbb76a SF |
311 | cifs_strndup_from_utf16(const char *src, const int maxlen, |
312 | const bool is_unicode, const struct nls_table *codepage) | |
066ce689 JL |
313 | { |
314 | int len; | |
315 | char *dst; | |
316 | ||
317 | if (is_unicode) { | |
acbbb76a | 318 | len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage); |
066ce689 JL |
319 | len += nls_nullsize(codepage); |
320 | dst = kmalloc(len, GFP_KERNEL); | |
321 | if (!dst) | |
322 | return NULL; | |
acbbb76a | 323 | cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, |
b693855f | 324 | NO_MAP_UNI_RSVD); |
066ce689 JL |
325 | } else { |
326 | len = strnlen(src, maxlen); | |
327 | len++; | |
328 | dst = kmalloc(len, GFP_KERNEL); | |
329 | if (!dst) | |
330 | return NULL; | |
331 | strlcpy(dst, src, len); | |
332 | } | |
333 | ||
334 | return dst; | |
335 | } | |
336 | ||
a4153cb1 SF |
337 | static __le16 convert_to_sfu_char(char src_char) |
338 | { | |
339 | __le16 dest_char; | |
340 | ||
341 | switch (src_char) { | |
342 | case ':': | |
343 | dest_char = cpu_to_le16(UNI_COLON); | |
344 | break; | |
345 | case '*': | |
346 | dest_char = cpu_to_le16(UNI_ASTERISK); | |
347 | break; | |
348 | case '?': | |
349 | dest_char = cpu_to_le16(UNI_QUESTION); | |
350 | break; | |
351 | case '<': | |
352 | dest_char = cpu_to_le16(UNI_LESSTHAN); | |
353 | break; | |
354 | case '>': | |
355 | dest_char = cpu_to_le16(UNI_GRTRTHAN); | |
356 | break; | |
357 | case '|': | |
358 | dest_char = cpu_to_le16(UNI_PIPE); | |
359 | break; | |
360 | default: | |
361 | dest_char = 0; | |
362 | } | |
363 | ||
364 | return dest_char; | |
365 | } | |
366 | ||
367 | static __le16 convert_to_sfm_char(char src_char) | |
368 | { | |
369 | __le16 dest_char; | |
370 | ||
371 | switch (src_char) { | |
372 | case ':': | |
373 | dest_char = cpu_to_le16(SFM_COLON); | |
374 | break; | |
375 | case '*': | |
376 | dest_char = cpu_to_le16(SFM_ASTERISK); | |
377 | break; | |
378 | case '?': | |
379 | dest_char = cpu_to_le16(SFM_QUESTION); | |
380 | break; | |
381 | case '<': | |
382 | dest_char = cpu_to_le16(SFM_LESSTHAN); | |
383 | break; | |
384 | case '>': | |
385 | dest_char = cpu_to_le16(SFM_GRTRTHAN); | |
386 | break; | |
387 | case '|': | |
388 | dest_char = cpu_to_le16(SFM_PIPE); | |
389 | break; | |
390 | default: | |
391 | dest_char = 0; | |
392 | } | |
393 | ||
394 | return dest_char; | |
395 | } | |
396 | ||
84cdf74e JL |
397 | /* |
398 | * Convert 16 bit Unicode pathname to wire format from string in current code | |
399 | * page. Conversion may involve remapping up the six characters that are | |
400 | * only legal in POSIX-like OS (if they are present in the string). Path | |
401 | * names are little endian 16 bit Unicode on the wire | |
402 | */ | |
403 | int | |
acbbb76a | 404 | cifsConvertToUTF16(__le16 *target, const char *source, int srclen, |
a4153cb1 | 405 | const struct nls_table *cp, int map_chars) |
84cdf74e | 406 | { |
ce36d9ab SF |
407 | int i, charlen; |
408 | int j = 0; | |
84cdf74e | 409 | char src_char; |
581ade4d JL |
410 | __le16 dst_char; |
411 | wchar_t tmp; | |
84cdf74e | 412 | |
a4153cb1 | 413 | if (map_chars == NO_MAP_UNI_RSVD) |
acbbb76a | 414 | return cifs_strtoUTF16(target, source, PATH_MAX, cp); |
84cdf74e | 415 | |
ce36d9ab | 416 | for (i = 0; i < srclen; j++) { |
84cdf74e | 417 | src_char = source[i]; |
11379b5e | 418 | charlen = 1; |
a4153cb1 SF |
419 | |
420 | /* check if end of string */ | |
421 | if (src_char == 0) | |
acbbb76a | 422 | goto ctoUTF16_out; |
a4153cb1 SF |
423 | |
424 | /* see if we must remap this char */ | |
425 | if (map_chars == SFU_MAP_UNI_RSVD) | |
426 | dst_char = convert_to_sfu_char(src_char); | |
427 | else if (map_chars == SFM_MAP_UNI_RSVD) | |
428 | dst_char = convert_to_sfm_char(src_char); | |
429 | else | |
430 | dst_char = 0; | |
84cdf74e JL |
431 | /* |
432 | * FIXME: We can not handle remapping backslash (UNI_SLASH) | |
433 | * until all the calls to build_path_from_dentry are modified, | |
434 | * as they use backslash as separator. | |
435 | */ | |
a4153cb1 | 436 | if (dst_char == 0) { |
581ade4d JL |
437 | charlen = cp->char2uni(source + i, srclen - i, &tmp); |
438 | dst_char = cpu_to_le16(tmp); | |
439 | ||
84cdf74e JL |
440 | /* |
441 | * if no match, use question mark, which at least in | |
442 | * some cases serves as wild card | |
443 | */ | |
444 | if (charlen < 1) { | |
581ade4d | 445 | dst_char = cpu_to_le16(0x003f); |
84cdf74e JL |
446 | charlen = 1; |
447 | } | |
84cdf74e | 448 | } |
11379b5e JL |
449 | /* |
450 | * character may take more than one byte in the source string, | |
451 | * but will take exactly two bytes in the target string | |
452 | */ | |
453 | i += charlen; | |
581ade4d | 454 | put_unaligned(dst_char, &target[j]); |
84cdf74e JL |
455 | } |
456 | ||
acbbb76a | 457 | ctoUTF16_out: |
ce36d9ab | 458 | put_unaligned(0, &target[j]); /* Null terminate target unicode string */ |
c73f6939 | 459 | return j; |
84cdf74e | 460 | } |
2503a0db PS |
461 | |
462 | #ifdef CONFIG_CIFS_SMB2 | |
463 | /* | |
464 | * cifs_local_to_utf16_bytes - how long will a string be after conversion? | |
465 | * @from - pointer to input string | |
466 | * @maxbytes - don't go past this many bytes of input string | |
467 | * @codepage - source codepage | |
468 | * | |
469 | * Walk a string and return the number of bytes that the string will | |
470 | * be after being converted to the given charset, not including any null | |
471 | * termination required. Don't walk past maxbytes in the source buffer. | |
472 | */ | |
473 | ||
474 | static int | |
475 | cifs_local_to_utf16_bytes(const char *from, int len, | |
476 | const struct nls_table *codepage) | |
477 | { | |
478 | int charlen; | |
479 | int i; | |
480 | wchar_t wchar_to; | |
481 | ||
482 | for (i = 0; len && *from; i++, from += charlen, len -= charlen) { | |
483 | charlen = codepage->char2uni(from, len, &wchar_to); | |
484 | /* Failed conversion defaults to a question mark */ | |
485 | if (charlen < 1) | |
486 | charlen = 1; | |
487 | } | |
488 | return 2 * i; /* UTF16 characters are two bytes */ | |
489 | } | |
490 | ||
491 | /* | |
492 | * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage | |
493 | * @src - source string | |
494 | * @maxlen - don't walk past this many bytes in the source string | |
495 | * @utf16_len - the length of the allocated string in bytes (including null) | |
496 | * @cp - source codepage | |
497 | * @remap - map special chars | |
498 | * | |
499 | * Take a string convert it from the local codepage to UTF16 and | |
500 | * put it in a new buffer. Returns a pointer to the new string or NULL on | |
501 | * error. | |
502 | */ | |
503 | __le16 * | |
504 | cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, | |
505 | const struct nls_table *cp, int remap) | |
506 | { | |
507 | int len; | |
508 | __le16 *dst; | |
509 | ||
510 | len = cifs_local_to_utf16_bytes(src, maxlen, cp); | |
511 | len += 2; /* NULL */ | |
512 | dst = kmalloc(len, GFP_KERNEL); | |
513 | if (!dst) { | |
514 | *utf16_len = 0; | |
515 | return NULL; | |
516 | } | |
517 | cifsConvertToUTF16(dst, src, strlen(src), cp, remap); | |
518 | *utf16_len = len; | |
519 | return dst; | |
520 | } | |
521 | #endif /* CONFIG_CIFS_SMB2 */ |