[linux-block.git] / fs / smb / server / unicode.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   Some of the source code in this file came from fs/cifs/cifs_unicode.c
 *
 *   Copyright (c) International Business Machines  Corp., 2000,2009
 *   Modified by Steve French (sfrench@us.ibm.com)
 *   Modified by Namjae Jeon (linkinjeon@kernel.org)
 */
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/unaligned.h>
#include "glob.h"
#include "unicode.h"
#include "smb_common.h"

/*
 * cifs_mapchar() - convert a host-endian char to proper char in codepage
 * @target:	where converted character should be copied
 * @from:	host-endian source string
 * @cp:		codepage to which character should be converted
 * @mapchar:	should character be mapped according to mapchars mount option?
 *
 * This function handles the conversion of a single character. It is the
 * responsibility of the caller to ensure that the target buffer is large
 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
 *
 * Return:	string length after conversion
 */
static int
cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
	     bool mapchar)
{
	int len = 1;
	__u16 src_char;

	src_char = *from;

	if (!mapchar)
		goto cp_convert;

	/*
	 * BB: Cannot handle remapping UNI_SLASH until all the calls to
	 *     build_path_from_dentry are modified, as they use slash as
	 *     separator.
	 */
	switch (src_char) {
	case UNI_COLON:
		*target = ':';
		break;
	case UNI_ASTERISK:
		*target = '*';
		break;
	case UNI_QUESTION:
		*target = '?';
		break;
	case UNI_PIPE:
		*target = '|';
		break;
	case UNI_GRTRTHAN:
		*target = '>';
		break;
	case UNI_LESSTHAN:
		*target = '<';
		break;
	default:
		goto cp_convert;
	}

out:
	return len;

cp_convert:
	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
	if (len <= 0)
		goto surrogate_pair;

	goto out;

surrogate_pair:
	/* convert SURROGATE_PAIR and IVS */
	if (strcmp(cp->charset, "utf8"))
		goto unknown;
	len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
	if (len <= 0)
		goto unknown;
	return len;

unknown:
	*target = '?';
	len = 1;
	goto out;
}

/*
 * smb_utf16_bytes() - compute converted string length
 * @from:	pointer to input string
 * @maxbytes:	input string length
 * @codepage:	destination codepage
 *
 * Walk a utf16le string and return the number of bytes that the string will
 * be after being converted to the given charset, not including any null
 * termination required. Don't walk past maxbytes in the source buffer.
 *
 * Return:	string length after conversion
 */
static int smb_utf16_bytes(const __le16 *from, int maxbytes,
			   const struct nls_table *codepage)
{
	int i, j;
	int charlen, outlen = 0;
	int maxwords = maxbytes / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];
	__u16 ftmp[3];

	for (i = 0; i < maxwords; i++) {
		ftmp[0] = get_unaligned_le16(&from[i]);
		if (ftmp[0] == 0)
			break;
		for (j = 1; j <= 2; j++) {
			if (i + j < maxwords)
				ftmp[j] = get_unaligned_le16(&from[i + j]);
			else
				ftmp[j] = 0;
		}

		charlen = cifs_mapchar(tmp, ftmp, codepage, 0);
		if (charlen > 0)
			outlen += charlen;
		else
			outlen++;
	}

	return outlen;
}

/*
 * smb_from_utf16() - convert utf16le string to local charset
 * @to:		destination buffer
 * @from:	source buffer
 * @tolen:	destination buffer size (in bytes)
 * @fromlen:	source buffer size (in bytes)
 * @codepage:	codepage to which characters should be converted
 * @mapchar:	should characters be remapped according to the mapchars option?
 *
 * Convert a little-endian utf16le string (as sent by the server) to a string
 * in the provided codepage. The tolen and fromlen parameters are to ensure
 * that the code doesn't walk off of the end of the buffer (which is always
 * a danger if the alignment of the source buffer is off). The destination
 * string is always properly null terminated and fits in the destination
 * buffer. Returns the length of the destination string in bytes (including
 * null terminator).
 *
 * Note that some windows versions actually send multiword UTF-16 characters
 * instead of straight UTF16-2. The linux nls routines however aren't able to
 * deal with those characters properly. In the event that we get some of
 * those characters, they won't be translated properly.
 *
 * Return:	string length after conversion
 */
static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
			  const struct nls_table *codepage, bool mapchar)
{
	int i, j, charlen, safelen;
	int outlen = 0;
	int nullsize = nls_nullsize(codepage);
	int fromwords = fromlen / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];
	__u16 ftmp[3];	/* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */

	/*
	 * because the chars can be of varying widths, we need to take care
	 * not to overflow the destination buffer when we get close to the
	 * end of it. Until we get to this offset, we don't need to check
	 * for overflow however.
	 */
	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);

	for (i = 0; i < fromwords; i++) {
		ftmp[0] = get_unaligned_le16(&from[i]);
		if (ftmp[0] == 0)
			break;
		for (j = 1; j <= 2; j++) {
			if (i + j < fromwords)
				ftmp[j] = get_unaligned_le16(&from[i + j]);
			else
				ftmp[j] = 0;
		}

		/*
		 * check to see if converting this character might make the
		 * conversion bleed into the null terminator
		 */
		if (outlen >= safelen) {
			charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
			if ((outlen + charlen) > (tolen - nullsize))
				break;
		}

		/* put converted char into 'to' buffer */
		charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
		outlen += charlen;

		/*
		 * charlen (=bytes of UTF-8 for 1 character)
		 * 4bytes UTF-8(surrogate pair) is charlen=4
		 * (4bytes UTF-16 code)
		 * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
		 * (2 UTF-8 pairs divided to 2 UTF-16 pairs)
		 */
		if (charlen == 4)
			i++;
		else if (charlen >= 5)
			/* 5-6bytes UTF-8 */
			i += 2;
	}

	/* properly null-terminate string */
	for (i = 0; i < nullsize; i++)
		to[outlen++] = 0;

	return outlen;
}

/*
 * smb_strtoUTF16() - Convert character string to unicode string
 * @to:		destination buffer
 * @from:	source buffer
 * @len:	destination buffer size (in bytes)
 * @codepage:	codepage to which characters should be converted
 *
 * Return:	string length after conversion
 */
int smb_strtoUTF16(__le16 *to, const char *from, int len,
		   const struct nls_table *codepage)
{
	int charlen;
	int i;
	wchar_t wchar_to; /* needed to quiet sparse */

	/* special case for utf8 to handle no plane0 chars */
	if (!strcmp(codepage->charset, "utf8")) {
		/*
		 * convert utf8 -> utf16, we assume we have enough space
		 * as caller should have assumed conversion does not overflow
		 * in destination len is length in wchar_t units (16bits)
		 */
		i  = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
				     (wchar_t *)to, len);

		/* if success terminate and exit */
		if (i >= 0)
			goto success;
		/*
		 * if fails fall back to UCS encoding as this
		 * function should not return negative values
		 * currently can fail only if source contains
		 * invalid encoded characters
		 */
	}

	for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) {
		charlen = codepage->char2uni(from, len, &wchar_to);
		if (charlen < 1) {
			/* A question mark */
			wchar_to = 0x003f;
			charlen = 1;
		}
		put_unaligned_le16(wchar_to, &to[i]);
	}

success:
	put_unaligned_le16(0, &to[i]);
	return i;
}

/*
 * smb_strndup_from_utf16() - copy a string from wire format to the local
 *		codepage
 * @src:	source string
 * @maxlen:	don't walk past this many bytes in the source string
 * @is_unicode:	is this a unicode string?
 * @codepage:	destination codepage
 *
 * Take a string given by the server, convert it to the local codepage and
 * put it in a new buffer. Returns a pointer to the new string or NULL on
 * error.
 *
 * Return:	destination string buffer or error ptr
 */
char *smb_strndup_from_utf16(const char *src, const int maxlen,
			     const bool is_unicode,
			     const struct nls_table *codepage)
{
	int len, ret;
	char *dst;

	if (is_unicode) {
		len = smb_utf16_bytes((__le16 *)src, maxlen, codepage);
		len += nls_nullsize(codepage);
		dst = kmalloc(len, KSMBD_DEFAULT_GFP);
		if (!dst)
			return ERR_PTR(-ENOMEM);
		ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage,
				     false);
		if (ret < 0) {
			kfree(dst);
			return ERR_PTR(-EINVAL);
		}
	} else {
		len = strnlen(src, maxlen);
		len++;
		dst = kmalloc(len, KSMBD_DEFAULT_GFP);
		if (!dst)
			return ERR_PTR(-ENOMEM);
		strscpy(dst, src, len);
	}

	return dst;
}

/*
 * Convert 16 bit Unicode pathname to wire format from string in current code
 * page. Conversion may involve remapping up the six characters that are
 * only legal in POSIX-like OS (if they are present in the string). Path
 * names are little endian 16 bit Unicode on the wire
 */
/*
 * smbConvertToUTF16() - convert string from local charset to utf16
 * @target:	destination buffer
 * @source:	source buffer
 * @srclen:	source buffer size (in bytes)
 * @cp:		codepage to which characters should be converted
 * @mapchar:	should characters be remapped according to the mapchars option?
 *
 * Convert 16 bit Unicode pathname to wire format from string in current code
 * page. Conversion may involve remapping up the six characters that are
 * only legal in POSIX-like OS (if they are present in the string). Path
 * names are little endian 16 bit Unicode on the wire
 *
 * Return:	char length after conversion
 */
int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
		      const struct nls_table *cp, int mapchars)
{
	int i, j, charlen;
	char src_char;
	__le16 dst_char;
	wchar_t tmp;
	wchar_t wchar_to[6];	/* UTF-16 */
	int ret;
	unicode_t u;

	if (!mapchars)
		return smb_strtoUTF16(target, source, srclen, cp);

	for (i = 0, j = 0; i < srclen; j++) {
		src_char = source[i];
		charlen = 1;
		switch (src_char) {
		case 0:
			put_unaligned(0, &target[j]);
			return j;
		case ':':
			dst_char = cpu_to_le16(UNI_COLON);
			break;
		case '*':
			dst_char = cpu_to_le16(UNI_ASTERISK);
			break;
		case '?':
			dst_char = cpu_to_le16(UNI_QUESTION);
			break;
		case '<':
			dst_char = cpu_to_le16(UNI_LESSTHAN);
			break;
		case '>':
			dst_char = cpu_to_le16(UNI_GRTRTHAN);
			break;
		case '|':
			dst_char = cpu_to_le16(UNI_PIPE);
			break;
		/*
		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
		 * until all the calls to build_path_from_dentry are modified,
		 * as they use backslash as separator.
		 */
		default:
			charlen = cp->char2uni(source + i, srclen - i, &tmp);
			dst_char = cpu_to_le16(tmp);

			/*
			 * if no match, use question mark, which at least in
			 * some cases serves as wild card
			 */
			if (charlen > 0)
				goto ctoUTF16;

			/* convert SURROGATE_PAIR */
			if (strcmp(cp->charset, "utf8"))
				goto unknown;
			if (*(source + i) & 0x80) {
				charlen = utf8_to_utf32(source + i, 6, &u);
				if (charlen < 0)
					goto unknown;
			} else
				goto unknown;
			ret  = utf8s_to_utf16s(source + i, charlen,
					UTF16_LITTLE_ENDIAN,
					wchar_to, 6);
			if (ret < 0)
				goto unknown;

			i += charlen;
			dst_char = cpu_to_le16(*wchar_to);
			if (charlen <= 3)
				/* 1-3bytes UTF-8 to 2bytes UTF-16 */
				put_unaligned(dst_char, &target[j]);
			else if (charlen == 4) {
				/*
				 * 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
				 * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
				 * (charlen=3+4 or 4+4)
				 */
				put_unaligned(dst_char, &target[j]);
				dst_char = cpu_to_le16(*(wchar_to + 1));
				j++;
				put_unaligned(dst_char, &target[j]);
			} else if (charlen >= 5) {
				/* 5-6bytes UTF-8 to 6bytes UTF-16 */
				put_unaligned(dst_char, &target[j]);
				dst_char = cpu_to_le16(*(wchar_to + 1));
				j++;
				put_unaligned(dst_char, &target[j]);
				dst_char = cpu_to_le16(*(wchar_to + 2));
				j++;
				put_unaligned(dst_char, &target[j]);
			}
			continue;

unknown:
			dst_char = cpu_to_le16(0x003f);
			charlen = 1;
		}

ctoUTF16:
		/*
		 * character may take more than one byte in the source string,
		 * but will take exactly two bytes in the target string
		 */
		i += charlen;
		put_unaligned(dst_char, &target[j]);
	}

	return j;
}
Commit	Line	Data
e2f34481 NJ	1	// SPDX-License-Identifier: GPL-2.0-or-later
	2	/*
	3	* Some of the source code in this file came from fs/cifs/cifs_unicode.c
	4	*
	5	* Copyright (c) International Business Machines Corp., 2000,2009
	6	* Modified by Steve French (sfrench@us.ibm.com)
	7	* Modified by Namjae Jeon (linkinjeon@kernel.org)
	8	*/
	9	#include <linux/fs.h>
	10	#include <linux/slab.h>
5f60d5f6	11	#include <linux/unaligned.h>
e2f34481 NJ	12	#include "glob.h"
e2f34481 NJ	13	#include "unicode.h"
e2f34481 NJ	14	#include "smb_common.h"
e2f34481 NJ	15
e2f34481 NJ	16	/*
	17	* cifs_mapchar() - convert a host-endian char to proper char in codepage
	18	* @target: where converted character should be copied
0c180317	19	* @from: host-endian source string
e2f34481 NJ	20	* @cp: codepage to which character should be converted
	21	* @mapchar: should character be mapped according to mapchars mount option?
	22	*
	23	* This function handles the conversion of a single character. It is the
	24	* responsibility of the caller to ensure that the target buffer is large
	25	* enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
	26	*
	27	* Return: string length after conversion
	28	*/
	29	static int
0c180317	30	cifs_mapchar(char target, const __u16 from, const struct nls_table *cp,
070fb21e	31	bool mapchar)
e2f34481 NJ	32	{
e2f34481 NJ	33	int len = 1;
0c180317 NJ	34	__u16 src_char;
	35
	36	src_char = *from;
e2f34481 NJ	37
	38	if (!mapchar)
	39	goto cp_convert;
	40
	41	/*
	42	* BB: Cannot handle remapping UNI_SLASH until all the calls to
	43	* build_path_from_dentry are modified, as they use slash as
	44	* separator.
	45	*/
	46	switch (src_char) {
	47	case UNI_COLON:
	48	*target = ':';
	49	break;
	50	case UNI_ASTERISK:
	51	target = '';
	52	break;
	53	case UNI_QUESTION:
	54	*target = '?';
	55	break;
	56	case UNI_PIPE:
	57	*target = '\|';
	58	break;
	59	case UNI_GRTRTHAN:
	60	*target = '>';
	61	break;
	62	case UNI_LESSTHAN:
	63	*target = '<';
	64	break;
	65	default:
	66	goto cp_convert;
	67	}
	68
	69	out:
	70	return len;
	71
	72	cp_convert:
	73	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
0c180317 NJ	74	if (len <= 0)
0c180317 NJ	75	goto surrogate_pair;
e2f34481 NJ	76
e2f34481 NJ	77	goto out;
0c180317 NJ	78
	79	surrogate_pair:
	80	/* convert SURROGATE_PAIR and IVS */
	81	if (strcmp(cp->charset, "utf8"))
	82	goto unknown;
	83	len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
	84	if (len <= 0)
	85	goto unknown;
	86	return len;
	87
	88	unknown:
	89	*target = '?';
	90	len = 1;
	91	goto out;
	92	}
	93
	94	/*
	95	* smb_utf16_bytes() - compute converted string length
	96	* @from: pointer to input string
	97	* @maxbytes: input string length
	98	* @codepage: destination codepage
	99	*
	100	* Walk a utf16le string and return the number of bytes that the string will
	101	* be after being converted to the given charset, not including any null
	102	* termination required. Don't walk past maxbytes in the source buffer.
	103	*
	104	* Return: string length after conversion
	105	*/
	106	static int smb_utf16_bytes(const __le16 *from, int maxbytes,
	107	const struct nls_table *codepage)
	108	{
	109	int i, j;
	110	int charlen, outlen = 0;
	111	int maxwords = maxbytes / 2;
	112	char tmp[NLS_MAX_CHARSET_SIZE];
	113	__u16 ftmp[3];
	114
	115	for (i = 0; i < maxwords; i++) {
	116	ftmp[0] = get_unaligned_le16(&from[i]);
	117	if (ftmp[0] == 0)
	118	break;
	119	for (j = 1; j <= 2; j++) {
	120	if (i + j < maxwords)
	121	ftmp[j] = get_unaligned_le16(&from[i + j]);
	122	else
	123	ftmp[j] = 0;
	124	}
	125
	126	charlen = cifs_mapchar(tmp, ftmp, codepage, 0);
	127	if (charlen > 0)
	128	outlen += charlen;
	129	else
	130	outlen++;
	131	}
	132
	133	return outlen;
e2f34481 NJ	134	}
e2f34481 NJ	135
e2f34481 NJ	136	/*
	137	* smb_from_utf16() - convert utf16le string to local charset
	138	* @to: destination buffer
	139	* @from: source buffer
	140	* @tolen: destination buffer size (in bytes)
	141	* @fromlen: source buffer size (in bytes)
	142	* @codepage: codepage to which characters should be converted
	143	* @mapchar: should characters be remapped according to the mapchars option?
	144	*
	145	* Convert a little-endian utf16le string (as sent by the server) to a string
	146	* in the provided codepage. The tolen and fromlen parameters are to ensure
	147	* that the code doesn't walk off of the end of the buffer (which is always
	148	* a danger if the alignment of the source buffer is off). The destination
	149	* string is always properly null terminated and fits in the destination
	150	* buffer. Returns the length of the destination string in bytes (including
	151	* null terminator).
	152	*
	153	* Note that some windows versions actually send multiword UTF-16 characters
	154	* instead of straight UTF16-2. The linux nls routines however aren't able to
	155	* deal with those characters properly. In the event that we get some of
	156	* those characters, they won't be translated properly.
	157	*
	158	* Return: string length after conversion
	159	*/
64b39f4a	160	static int smb_from_utf16(char to, const __le16 from, int tolen, int fromlen,
070fb21e	161	const struct nls_table *codepage, bool mapchar)
e2f34481	162	{
0c180317	163	int i, j, charlen, safelen;
e2f34481 NJ	164	int outlen = 0;
	165	int nullsize = nls_nullsize(codepage);
	166	int fromwords = fromlen / 2;
	167	char tmp[NLS_MAX_CHARSET_SIZE];
0c180317	168	__u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
e2f34481 NJ	169
	170	/*
	171	* because the chars can be of varying widths, we need to take care
	172	* not to overflow the destination buffer when we get close to the
	173	* end of it. Until we get to this offset, we don't need to check
	174	* for overflow however.
	175	*/
	176	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
	177
	178	for (i = 0; i < fromwords; i++) {
0c180317 NJ	179	ftmp[0] = get_unaligned_le16(&from[i]);
0c180317 NJ	180	if (ftmp[0] == 0)
e2f34481	181	break;
0c180317 NJ	182	for (j = 1; j <= 2; j++) {
	183	if (i + j < fromwords)
	184	ftmp[j] = get_unaligned_le16(&from[i + j]);
	185	else
	186	ftmp[j] = 0;
	187	}
e2f34481 NJ	188
	189	/*
	190	* check to see if converting this character might make the
	191	* conversion bleed into the null terminator
	192	*/
	193	if (outlen >= safelen) {
	194	charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
	195	if ((outlen + charlen) > (tolen - nullsize))
	196	break;
	197	}
	198
	199	/* put converted char into 'to' buffer */
	200	charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
	201	outlen += charlen;
0c180317 NJ	202
	203	/*
	204	* charlen (=bytes of UTF-8 for 1 character)
	205	* 4bytes UTF-8(surrogate pair) is charlen=4
	206	* (4bytes UTF-16 code)
	207	* 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
	208	* (2 UTF-8 pairs divided to 2 UTF-16 pairs)
	209	*/
	210	if (charlen == 4)
	211	i++;
	212	else if (charlen >= 5)
	213	/* 5-6bytes UTF-8 */
	214	i += 2;
e2f34481 NJ	215	}
	216
	217	/* properly null-terminate string */
	218	for (i = 0; i < nullsize; i++)
	219	to[outlen++] = 0;
	220
	221	return outlen;
	222	}
	223
	224	/*
	225	* smb_strtoUTF16() - Convert character string to unicode string
	226	* @to: destination buffer
	227	* @from: source buffer
	228	* @len: destination buffer size (in bytes)
	229	* @codepage: codepage to which characters should be converted
	230	*
	231	* Return: string length after conversion
	232	*/
64b39f4a	233	int smb_strtoUTF16(__le16 to, const char from, int len,
070fb21e	234	const struct nls_table *codepage)
e2f34481 NJ	235	{
	236	int charlen;
	237	int i;
	238	wchar_t wchar_to; /* needed to quiet sparse */
	239
	240	/* special case for utf8 to handle no plane0 chars */
	241	if (!strcmp(codepage->charset, "utf8")) {
	242	/*
	243	* convert utf8 -> utf16, we assume we have enough space
	244	* as caller should have assumed conversion does not overflow
	245	* in destination len is length in wchar_t units (16bits)
	246	*/
	247	i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
070fb21e	248	(wchar_t *)to, len);
e2f34481 NJ	249
	250	/* if success terminate and exit */
	251	if (i >= 0)
	252	goto success;
	253	/*
	254	* if fails fall back to UCS encoding as this
	255	* function should not return negative values
	256	* currently can fail only if source contains
	257	* invalid encoded characters
	258	*/
	259	}
	260
	261	for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) {
	262	charlen = codepage->char2uni(from, len, &wchar_to);
	263	if (charlen < 1) {
	264	/* A question mark */
	265	wchar_to = 0x003f;
	266	charlen = 1;
	267	}
	268	put_unaligned_le16(wchar_to, &to[i]);
	269	}
	270
	271	success:
	272	put_unaligned_le16(0, &to[i]);
	273	return i;
	274	}
	275
	276	/*
	277	* smb_strndup_from_utf16() - copy a string from wire format to the local
	278	* codepage
	279	* @src: source string
	280	* @maxlen: don't walk past this many bytes in the source string
	281	* @is_unicode: is this a unicode string?
	282	* @codepage: destination codepage
	283	*
	284	* Take a string given by the server, convert it to the local codepage and
	285	* put it in a new buffer. Returns a pointer to the new string or NULL on
	286	* error.
	287	*
	288	* Return: destination string buffer or error ptr
	289	*/
64b39f4a	290	char smb_strndup_from_utf16(const char src, const int maxlen,
070fb21e NJ	291	const bool is_unicode,
070fb21e NJ	292	const struct nls_table *codepage)
e2f34481 NJ	293	{
	294	int len, ret;
	295	char *dst;
	296
	297	if (is_unicode) {
64b39f4a	298	len = smb_utf16_bytes((__le16 *)src, maxlen, codepage);
e2f34481	299	len += nls_nullsize(codepage);
0066f623	300	dst = kmalloc(len, KSMBD_DEFAULT_GFP);
e2f34481 NJ	301	if (!dst)
e2f34481 NJ	302	return ERR_PTR(-ENOMEM);
64b39f4a	303	ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage,
070fb21e	304	false);
e2f34481 NJ	305	if (ret < 0) {
	306	kfree(dst);
	307	return ERR_PTR(-EINVAL);
	308	}
	309	} else {
	310	len = strnlen(src, maxlen);
	311	len++;
0066f623	312	dst = kmalloc(len, KSMBD_DEFAULT_GFP);
e2f34481 NJ	313	if (!dst)
	314	return ERR_PTR(-ENOMEM);
	315	strscpy(dst, src, len);
	316	}
	317
	318	return dst;
	319	}
	320
	321	/*
	322	* Convert 16 bit Unicode pathname to wire format from string in current code
	323	* page. Conversion may involve remapping up the six characters that are
	324	* only legal in POSIX-like OS (if they are present in the string). Path
	325	* names are little endian 16 bit Unicode on the wire
	326	*/
	327	/*
	328	* smbConvertToUTF16() - convert string from local charset to utf16
	329	* @target: destination buffer
	330	* @source: source buffer
	331	* @srclen: source buffer size (in bytes)
	332	* @cp: codepage to which characters should be converted
	333	* @mapchar: should characters be remapped according to the mapchars option?
	334	*
	335	* Convert 16 bit Unicode pathname to wire format from string in current code
	336	* page. Conversion may involve remapping up the six characters that are
	337	* only legal in POSIX-like OS (if they are present in the string). Path
	338	* names are little endian 16 bit Unicode on the wire
	339	*
	340	* Return: char length after conversion
	341	*/
64b39f4a	342	int smbConvertToUTF16(__le16 target, const char source, int srclen,
070fb21e	343	const struct nls_table *cp, int mapchars)
e2f34481 NJ	344	{
	345	int i, j, charlen;
	346	char src_char;
	347	__le16 dst_char;
	348	wchar_t tmp;
0c180317 NJ	349	wchar_t wchar_to[6]; /* UTF-16 */
	350	int ret;
	351	unicode_t u;
e2f34481 NJ	352
	353	if (!mapchars)
	354	return smb_strtoUTF16(target, source, srclen, cp);
	355
	356	for (i = 0, j = 0; i < srclen; j++) {
	357	src_char = source[i];
	358	charlen = 1;
	359	switch (src_char) {
	360	case 0:
	361	put_unaligned(0, &target[j]);
	362	return j;
	363	case ':':
	364	dst_char = cpu_to_le16(UNI_COLON);
	365	break;
	366	case '*':
	367	dst_char = cpu_to_le16(UNI_ASTERISK);
	368	break;
	369	case '?':
	370	dst_char = cpu_to_le16(UNI_QUESTION);
	371	break;
	372	case '<':
	373	dst_char = cpu_to_le16(UNI_LESSTHAN);
	374	break;
	375	case '>':
	376	dst_char = cpu_to_le16(UNI_GRTRTHAN);
	377	break;
	378	case '\|':
	379	dst_char = cpu_to_le16(UNI_PIPE);
	380	break;
	381	/*
	382	* FIXME: We can not handle remapping backslash (UNI_SLASH)
	383	* until all the calls to build_path_from_dentry are modified,
	384	* as they use backslash as separator.
	385	*/
	386	default:
	387	charlen = cp->char2uni(source + i, srclen - i, &tmp);
	388	dst_char = cpu_to_le16(tmp);
	389
	390	/*
	391	* if no match, use question mark, which at least in
	392	* some cases serves as wild card
	393	*/
0c180317 NJ	394	if (charlen > 0)
	395	goto ctoUTF16;
	396
	397	/* convert SURROGATE_PAIR */
	398	if (strcmp(cp->charset, "utf8"))
	399	goto unknown;
	400	if (*(source + i) & 0x80) {
	401	charlen = utf8_to_utf32(source + i, 6, &u);
	402	if (charlen < 0)
	403	goto unknown;
	404	} else
	405	goto unknown;
	406	ret = utf8s_to_utf16s(source + i, charlen,
	407	UTF16_LITTLE_ENDIAN,
	408	wchar_to, 6);
	409	if (ret < 0)
	410	goto unknown;
	411
	412	i += charlen;
	413	dst_char = cpu_to_le16(*wchar_to);
	414	if (charlen <= 3)
	415	/* 1-3bytes UTF-8 to 2bytes UTF-16 */
	416	put_unaligned(dst_char, &target[j]);
	417	else if (charlen == 4) {
	418	/*
	419	* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
	420	* 7-8bytes UTF-8(IVS) divided to 2 UTF-16
	421	* (charlen=3+4 or 4+4)
	422	*/
	423	put_unaligned(dst_char, &target[j]);
	424	dst_char = cpu_to_le16(*(wchar_to + 1));
	425	j++;
	426	put_unaligned(dst_char, &target[j]);
	427	} else if (charlen >= 5) {
	428	/* 5-6bytes UTF-8 to 6bytes UTF-16 */
	429	put_unaligned(dst_char, &target[j]);
	430	dst_char = cpu_to_le16(*(wchar_to + 1));
	431	j++;
	432	put_unaligned(dst_char, &target[j]);
	433	dst_char = cpu_to_le16(*(wchar_to + 2));
	434	j++;
	435	put_unaligned(dst_char, &target[j]);
e2f34481	436	}
0c180317 NJ	437	continue;
	438
	439	unknown:
	440	dst_char = cpu_to_le16(0x003f);
	441	charlen = 1;
e2f34481	442	}
0c180317 NJ	443
0c180317 NJ	444	ctoUTF16:
e2f34481 NJ	445	/*
	446	* character may take more than one byte in the source string,
	447	* but will take exactly two bytes in the target string
	448	*/
	449	i += charlen;
	450	put_unaligned(dst_char, &target[j]);
	451	}
	452
	453	return j;
	454	}