[linux-2.6-block.git] / fs / cifs / cifs_unicode.c

/*
 *   fs/cifs/cifs_unicode.c
 *
 *   Copyright (c) International Business Machines  Corp., 2000,2009
 *   Modified by Steve French (sfrench@us.ibm.com)
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
#include <linux/fs.h>
#include <linux/slab.h>
#include "cifs_fs_sb.h"
#include "cifs_unicode.h"
#include "cifs_uniupr.h"
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifs_debug.h"

/*
 * cifs_utf16_bytes - how long will a string be after conversion?
 * @utf16 - pointer to input string
 * @maxbytes - don't go past this many bytes of input string
 * @codepage - destination codepage
 *
 * Walk a utf16le string and return the number of bytes that the string will
 * be after being converted to the given charset, not including any null
 * termination required. Don't walk past maxbytes in the source buffer.
 */
int
cifs_utf16_bytes(const __le16 *from, int maxbytes,
		const struct nls_table *codepage)
{
	int i;
	int charlen, outlen = 0;
	int maxwords = maxbytes / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];
	__u16 ftmp;

	for (i = 0; i < maxwords; i++) {
		ftmp = get_unaligned_le16(&from[i]);
		if (ftmp == 0)
			break;

		charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
		if (charlen > 0)
			outlen += charlen;
		else
			outlen++;
	}

	return outlen;
}

int cifs_remap(struct cifs_sb_info *cifs_sb)
{
	int map_type;

	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
		map_type = SFM_MAP_UNI_RSVD;
	else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
		map_type = SFU_MAP_UNI_RSVD;
	else
		map_type = NO_MAP_UNI_RSVD;

	return map_type;
}

/* Convert character using the SFU - "Services for Unix" remapping range */
static bool
convert_sfu_char(const __u16 src_char, char *target)
{
	/*
	 * BB: Cannot handle remapping UNI_SLASH until all the calls to
	 *     build_path_from_dentry are modified, as they use slash as
	 *     separator.
	 */
	switch (src_char) {
	case UNI_COLON:
		*target = ':';
		break;
	case UNI_ASTERISK:
		*target = '*';
		break;
	case UNI_QUESTION:
		*target = '?';
		break;
	case UNI_PIPE:
		*target = '|';
		break;
	case UNI_GRTRTHAN:
		*target = '>';
		break;
	case UNI_LESSTHAN:
		*target = '<';
		break;
	default:
		return false;
	}
	return true;
}

/* Convert character using the SFM - "Services for Mac" remapping range */
static bool
convert_sfm_char(const __u16 src_char, char *target)
{
	switch (src_char) {
	case SFM_COLON:
		*target = ':';
		break;
	case SFM_ASTERISK:
		*target = '*';
		break;
	case SFM_QUESTION:
		*target = '?';
		break;
	case SFM_PIPE:
		*target = '|';
		break;
	case SFM_GRTRTHAN:
		*target = '>';
		break;
	case SFM_LESSTHAN:
		*target = '<';
		break;
	case SFM_SLASH:
		*target = '\\';
		break;
	default:
		return false;
	}
	return true;
}


/*
 * cifs_mapchar - convert a host-endian char to proper char in codepage
 * @target - where converted character should be copied
 * @src_char - 2 byte host-endian source character
 * @cp - codepage to which character should be converted
 * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2?
 *
 * This function handles the conversion of a single character. It is the
 * responsibility of the caller to ensure that the target buffer is large
 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
 */
static int
cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
	     int maptype)
{
	int len = 1;

	if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
		return len;
	else if ((maptype == SFU_MAP_UNI_RSVD) &&
		  convert_sfu_char(src_char, target))
		return len;

	/* if character not one of seven in special remap set */
	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
	if (len <= 0) {
		*target = '?';
		len = 1;
	}
	return len;
}

/*
 * cifs_from_utf16 - convert utf16le string to local charset
 * @to - destination buffer
 * @from - source buffer
 * @tolen - destination buffer size (in bytes)
 * @fromlen - source buffer size (in bytes)
 * @codepage - codepage to which characters should be converted
 * @mapchar - should characters be remapped according to the mapchars option?
 *
 * Convert a little-endian utf16le string (as sent by the server) to a string
 * in the provided codepage. The tolen and fromlen parameters are to ensure
 * that the code doesn't walk off of the end of the buffer (which is always
 * a danger if the alignment of the source buffer is off). The destination
 * string is always properly null terminated and fits in the destination
 * buffer. Returns the length of the destination string in bytes (including
 * null terminator).
 *
 * Note that some windows versions actually send multiword UTF-16 characters
 * instead of straight UTF16-2. The linux nls routines however aren't able to
 * deal with those characters properly. In the event that we get some of
 * those characters, they won't be translated properly.
 */
int
cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
		const struct nls_table *codepage, int map_type)
{
	int i, charlen, safelen;
	int outlen = 0;
	int nullsize = nls_nullsize(codepage);
	int fromwords = fromlen / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];
	__u16 ftmp;

	/*
	 * because the chars can be of varying widths, we need to take care
	 * not to overflow the destination buffer when we get close to the
	 * end of it. Until we get to this offset, we don't need to check
	 * for overflow however.
	 */
	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);

	for (i = 0; i < fromwords; i++) {
		ftmp = get_unaligned_le16(&from[i]);
		if (ftmp == 0)
			break;

		/*
		 * check to see if converting this character might make the
		 * conversion bleed into the null terminator
		 */
		if (outlen >= safelen) {
			charlen = cifs_mapchar(tmp, ftmp, codepage, map_type);
			if ((outlen + charlen) > (tolen - nullsize))
				break;
		}

		/* put converted char into 'to' buffer */
		charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
		outlen += charlen;
	}

	/* properly null-terminate string */
	for (i = 0; i < nullsize; i++)
		to[outlen++] = 0;

	return outlen;
}

/*
 * NAME:	cifs_strtoUTF16()
 *
 * FUNCTION:	Convert character string to unicode string
 *
 */
int
cifs_strtoUTF16(__le16 *to, const char *from, int len,
	      const struct nls_table *codepage)
{
	int charlen;
	int i;
	wchar_t wchar_to; /* needed to quiet sparse */

	/* special case for utf8 to handle no plane0 chars */
	if (!strcmp(codepage->charset, "utf8")) {
		/*
		 * convert utf8 -> utf16, we assume we have enough space
		 * as caller should have assumed conversion does not overflow
		 * in destination len is length in wchar_t units (16bits)
		 */
		i  = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
				       (wchar_t *) to, len);

		/* if success terminate and exit */
		if (i >= 0)
			goto success;
		/*
		 * if fails fall back to UCS encoding as this
		 * function should not return negative values
		 * currently can fail only if source contains
		 * invalid encoded characters
		 */
	}

	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
		charlen = codepage->char2uni(from, len, &wchar_to);
		if (charlen < 1) {
			cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
				 *from, charlen);
			/* A question mark */
			wchar_to = 0x003f;
			charlen = 1;
		}
		put_unaligned_le16(wchar_to, &to[i]);
	}

success:
	put_unaligned_le16(0, &to[i]);
	return i;
}

/*
 * cifs_strndup_from_utf16 - copy a string from wire format to the local
 * codepage
 * @src - source string
 * @maxlen - don't walk past this many bytes in the source string
 * @is_unicode - is this a unicode string?
 * @codepage - destination codepage
 *
 * Take a string given by the server, convert it to the local codepage and
 * put it in a new buffer. Returns a pointer to the new string or NULL on
 * error.
 */
char *
cifs_strndup_from_utf16(const char *src, const int maxlen,
			const bool is_unicode, const struct nls_table *codepage)
{
	int len;
	char *dst;

	if (is_unicode) {
		len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);
		len += nls_nullsize(codepage);
		dst = kmalloc(len, GFP_KERNEL);
		if (!dst)
			return NULL;
		cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
			       NO_MAP_UNI_RSVD);
	} else {
		len = strnlen(src, maxlen);
		len++;
		dst = kmalloc(len, GFP_KERNEL);
		if (!dst)
			return NULL;
		strlcpy(dst, src, len);
	}

	return dst;
}

static __le16 convert_to_sfu_char(char src_char)
{
	__le16 dest_char;

	switch (src_char) {
	case ':':
		dest_char = cpu_to_le16(UNI_COLON);
		break;
	case '*':
		dest_char = cpu_to_le16(UNI_ASTERISK);
		break;
	case '?':
		dest_char = cpu_to_le16(UNI_QUESTION);
		break;
	case '<':
		dest_char = cpu_to_le16(UNI_LESSTHAN);
		break;
	case '>':
		dest_char = cpu_to_le16(UNI_GRTRTHAN);
		break;
	case '|':
		dest_char = cpu_to_le16(UNI_PIPE);
		break;
	default:
		dest_char = 0;
	}

	return dest_char;
}

static __le16 convert_to_sfm_char(char src_char)
{
	__le16 dest_char;

	switch (src_char) {
	case ':':
		dest_char = cpu_to_le16(SFM_COLON);
		break;
	case '*':
		dest_char = cpu_to_le16(SFM_ASTERISK);
		break;
	case '?':
		dest_char = cpu_to_le16(SFM_QUESTION);
		break;
	case '<':
		dest_char = cpu_to_le16(SFM_LESSTHAN);
		break;
	case '>':
		dest_char = cpu_to_le16(SFM_GRTRTHAN);
		break;
	case '|':
		dest_char = cpu_to_le16(SFM_PIPE);
		break;
	default:
		dest_char = 0;
	}

	return dest_char;
}

/*
 * Convert 16 bit Unicode pathname to wire format from string in current code
 * page. Conversion may involve remapping up the six characters that are
 * only legal in POSIX-like OS (if they are present in the string). Path
 * names are little endian 16 bit Unicode on the wire
 */
int
cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
		 const struct nls_table *cp, int map_chars)
{
	int i, charlen;
	int j = 0;
	char src_char;
	__le16 dst_char;
	wchar_t tmp;

	if (map_chars == NO_MAP_UNI_RSVD)
		return cifs_strtoUTF16(target, source, PATH_MAX, cp);

	for (i = 0; i < srclen; j++) {
		src_char = source[i];
		charlen = 1;

		/* check if end of string */
		if (src_char == 0)
			goto ctoUTF16_out;

		/* see if we must remap this char */
		if (map_chars == SFU_MAP_UNI_RSVD)
			dst_char = convert_to_sfu_char(src_char);
		else if (map_chars == SFM_MAP_UNI_RSVD)
			dst_char = convert_to_sfm_char(src_char);
		else
			dst_char = 0;
		/*
		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
		 * until all the calls to build_path_from_dentry are modified,
		 * as they use backslash as separator.
		 */
		if (dst_char == 0) {
			charlen = cp->char2uni(source + i, srclen - i, &tmp);
			dst_char = cpu_to_le16(tmp);

			/*
			 * if no match, use question mark, which at least in
			 * some cases serves as wild card
			 */
			if (charlen < 1) {
				dst_char = cpu_to_le16(0x003f);
				charlen = 1;
			}
		}
		/*
		 * character may take more than one byte in the source string,
		 * but will take exactly two bytes in the target string
		 */
		i += charlen;
		put_unaligned(dst_char, &target[j]);
	}

ctoUTF16_out:
	put_unaligned(0, &target[j]); /* Null terminate target unicode string */
	return j;
}

#ifdef CONFIG_CIFS_SMB2
/*
 * cifs_local_to_utf16_bytes - how long will a string be after conversion?
 * @from - pointer to input string
 * @maxbytes - don't go past this many bytes of input string
 * @codepage - source codepage
 *
 * Walk a string and return the number of bytes that the string will
 * be after being converted to the given charset, not including any null
 * termination required. Don't walk past maxbytes in the source buffer.
 */

static int
cifs_local_to_utf16_bytes(const char *from, int len,
			  const struct nls_table *codepage)
{
	int charlen;
	int i;
	wchar_t wchar_to;

	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
		charlen = codepage->char2uni(from, len, &wchar_to);
		/* Failed conversion defaults to a question mark */
		if (charlen < 1)
			charlen = 1;
	}
	return 2 * i; /* UTF16 characters are two bytes */
}

/*
 * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage
 * @src - source string
 * @maxlen - don't walk past this many bytes in the source string
 * @utf16_len - the length of the allocated string in bytes (including null)
 * @cp - source codepage
 * @remap - map special chars
 *
 * Take a string convert it from the local codepage to UTF16 and
 * put it in a new buffer. Returns a pointer to the new string or NULL on
 * error.
 */
__le16 *
cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len,
		      const struct nls_table *cp, int remap)
{
	int len;
	__le16 *dst;

	len = cifs_local_to_utf16_bytes(src, maxlen, cp);
	len += 2; /* NULL */
	dst = kmalloc(len, GFP_KERNEL);
	if (!dst) {
		*utf16_len = 0;
		return NULL;
	}
	cifsConvertToUTF16(dst, src, strlen(src), cp, remap);
	*utf16_len = len;
	return dst;
}
#endif /* CONFIG_CIFS_SMB2 */
Commit	Line	Data
1da177e4 LT	1	/*
	2	* fs/cifs/cifs_unicode.c
	3	*
d185cda7	4	* Copyright (c) International Business Machines Corp., 2000,2009
1da177e4 LT	5	* Modified by Steve French (sfrench@us.ibm.com)
	6	*
	7	* This program is free software; you can redistribute it and/or modify
	8	* it under the terms of the GNU General Public License as published by
221601c3	9	* the Free Software Foundation; either version 2 of the License, or
1da177e4	10	* (at your option) any later version.
221601c3	11	*
1da177e4 LT	12	* This program is distributed in the hope that it will be useful,
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
	15	* the GNU General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU General Public License
221601c3	18	* along with this program; if not, write to the Free Software
1da177e4 LT	19	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	20	*/
	21	#include <linux/fs.h>
5a0e3ad6	22	#include <linux/slab.h>
2baa2682	23	#include "cifs_fs_sb.h"
1da177e4 LT	24	#include "cifs_unicode.h"
	25	#include "cifs_uniupr.h"
	26	#include "cifspdu.h"
3979877e	27	#include "cifsglob.h"
1da177e4 LT	28	#include "cifs_debug.h"
1da177e4 LT	29
69f801fc	30	/*
acbbb76a SF	31	* cifs_utf16_bytes - how long will a string be after conversion?
acbbb76a SF	32	* @utf16 - pointer to input string
69f801fc JL	33	* @maxbytes - don't go past this many bytes of input string
	34	* @codepage - destination codepage
	35	*
acbbb76a	36	* Walk a utf16le string and return the number of bytes that the string will
69f801fc JL	37	* be after being converted to the given charset, not including any null
	38	* termination required. Don't walk past maxbytes in the source buffer.
	39	*/
	40	int
acbbb76a	41	cifs_utf16_bytes(const __le16 *from, int maxbytes,
69f801fc JL	42	const struct nls_table *codepage)
	43	{
	44	int i;
	45	int charlen, outlen = 0;
	46	int maxwords = maxbytes / 2;
	47	char tmp[NLS_MAX_CHARSET_SIZE];
ba2dbf30	48	__u16 ftmp;
69f801fc	49
ba2dbf30 JL	50	for (i = 0; i < maxwords; i++) {
	51	ftmp = get_unaligned_le16(&from[i]);
	52	if (ftmp == 0)
	53	break;
	54
	55	charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
69f801fc JL	56	if (charlen > 0)
	57	outlen += charlen;
	58	else
	59	outlen++;
	60	}
	61
	62	return outlen;
	63	}
	64
2baa2682 SF	65	int cifs_remap(struct cifs_sb_info *cifs_sb)
	66	{
	67	int map_type;
	68
	69	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
	70	map_type = SFM_MAP_UNI_RSVD;
	71	else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
	72	map_type = SFU_MAP_UNI_RSVD;
	73	else
	74	map_type = NO_MAP_UNI_RSVD;
	75
	76	return map_type;
	77	}
	78
b693855f SF	79	/* Convert character using the SFU - "Services for Unix" remapping range */
	80	static bool
	81	convert_sfu_char(const __u16 src_char, char *target)
7fabf0c9	82	{
7fabf0c9 JL	83	/*
	84	* BB: Cannot handle remapping UNI_SLASH until all the calls to
	85	* build_path_from_dentry are modified, as they use slash as
	86	* separator.
	87	*/
ba2dbf30	88	switch (src_char) {
7fabf0c9 JL	89	case UNI_COLON:
	90	*target = ':';
	91	break;
581ade4d	92	case UNI_ASTERISK:
7fabf0c9 JL	93	target = '';
	94	break;
	95	case UNI_QUESTION:
	96	*target = '?';
	97	break;
	98	case UNI_PIPE:
	99	*target = '\|';
	100	break;
	101	case UNI_GRTRTHAN:
	102	*target = '>';
	103	break;
	104	case UNI_LESSTHAN:
	105	*target = '<';
	106	break;
	107	default:
b693855f	108	return false;
7fabf0c9	109	}
b693855f SF	110	return true;
b693855f SF	111	}
7fabf0c9	112
b693855f SF	113	/* Convert character using the SFM - "Services for Mac" remapping range */
	114	static bool
	115	convert_sfm_char(const __u16 src_char, char *target)
	116	{
	117	switch (src_char) {
	118	case SFM_COLON:
	119	*target = ':';
	120	break;
	121	case SFM_ASTERISK:
	122	target = '';
	123	break;
	124	case SFM_QUESTION:
	125	*target = '?';
	126	break;
	127	case SFM_PIPE:
	128	*target = '\|';
	129	break;
	130	case SFM_GRTRTHAN:
	131	*target = '>';
	132	break;
	133	case SFM_LESSTHAN:
	134	*target = '<';
	135	break;
	136	case SFM_SLASH:
	137	*target = '\\';
	138	break;
	139	default:
	140	return false;
	141	}
	142	return true;
	143	}
	144
	145
	146	/*
	147	* cifs_mapchar - convert a host-endian char to proper char in codepage
	148	* @target - where converted character should be copied
	149	* @src_char - 2 byte host-endian source character
	150	* @cp - codepage to which character should be converted
	151	* @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2?
	152	*
	153	* This function handles the conversion of a single character. It is the
	154	* responsibility of the caller to ensure that the target buffer is large
	155	* enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
	156	*/
	157	static int
	158	cifs_mapchar(char target, const __u16 src_char, const struct nls_table cp,
	159	int maptype)
	160	{
	161	int len = 1;
	162
	163	if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
	164	return len;
	165	else if ((maptype == SFU_MAP_UNI_RSVD) &&
	166	convert_sfu_char(src_char, target))
	167	return len;
7fabf0c9	168
b693855f	169	/* if character not one of seven in special remap set */
ba2dbf30	170	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
7fabf0c9 JL	171	if (len <= 0) {
	172	*target = '?';
	173	len = 1;
	174	}
b693855f	175	return len;
7fabf0c9 JL	176	}
	177
	178	/*
acbbb76a	179	* cifs_from_utf16 - convert utf16le string to local charset
7fabf0c9 JL	180	* @to - destination buffer
	181	* @from - source buffer
	182	* @tolen - destination buffer size (in bytes)
	183	* @fromlen - source buffer size (in bytes)
	184	* @codepage - codepage to which characters should be converted
	185	* @mapchar - should characters be remapped according to the mapchars option?
	186	*
acbbb76a	187	* Convert a little-endian utf16le string (as sent by the server) to a string
7fabf0c9 JL	188	* in the provided codepage. The tolen and fromlen parameters are to ensure
	189	* that the code doesn't walk off of the end of the buffer (which is always
	190	* a danger if the alignment of the source buffer is off). The destination
	191	* string is always properly null terminated and fits in the destination
	192	* buffer. Returns the length of the destination string in bytes (including
	193	* null terminator).
	194	*
	195	* Note that some windows versions actually send multiword UTF-16 characters
acbbb76a	196	* instead of straight UTF16-2. The linux nls routines however aren't able to
7fabf0c9 JL	197	* deal with those characters properly. In the event that we get some of
	198	* those characters, they won't be translated properly.
	199	*/
	200	int
acbbb76a	201	cifs_from_utf16(char to, const __le16 from, int tolen, int fromlen,
b693855f	202	const struct nls_table *codepage, int map_type)
7fabf0c9 JL	203	{
	204	int i, charlen, safelen;
	205	int outlen = 0;
	206	int nullsize = nls_nullsize(codepage);
	207	int fromwords = fromlen / 2;
	208	char tmp[NLS_MAX_CHARSET_SIZE];
ba2dbf30	209	__u16 ftmp;
7fabf0c9 JL	210
	211	/*
	212	* because the chars can be of varying widths, we need to take care
	213	* not to overflow the destination buffer when we get close to the
	214	* end of it. Until we get to this offset, we don't need to check
	215	* for overflow however.
	216	*/
	217	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
	218
ba2dbf30 JL	219	for (i = 0; i < fromwords; i++) {
	220	ftmp = get_unaligned_le16(&from[i]);
	221	if (ftmp == 0)
	222	break;
	223
7fabf0c9 JL	224	/*
	225	* check to see if converting this character might make the
	226	* conversion bleed into the null terminator
	227	*/
	228	if (outlen >= safelen) {
b693855f	229	charlen = cifs_mapchar(tmp, ftmp, codepage, map_type);
7fabf0c9 JL	230	if ((outlen + charlen) > (tolen - nullsize))
	231	break;
	232	}
	233
	234	/* put converted char into 'to' buffer */
b693855f	235	charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
7fabf0c9 JL	236	outlen += charlen;
	237	}
	238
	239	/* properly null-terminate string */
	240	for (i = 0; i < nullsize; i++)
	241	to[outlen++] = 0;
	242
	243	return outlen;
	244	}
	245
1da177e4	246	/*
acbbb76a	247	* NAME: cifs_strtoUTF16()
1da177e4 LT	248	*
	249	* FUNCTION: Convert character string to unicode string
	250	*
	251	*/
	252	int
acbbb76a	253	cifs_strtoUTF16(__le16 to, const char from, int len,
1da177e4 LT	254	const struct nls_table *codepage)
	255	{
	256	int charlen;
	257	int i;
ba2dbf30	258	wchar_t wchar_to; /* needed to quiet sparse */
1da177e4	259
fd3ba42c FZ	260	/* special case for utf8 to handle no plane0 chars */
	261	if (!strcmp(codepage->charset, "utf8")) {
	262	/*
	263	* convert utf8 -> utf16, we assume we have enough space
	264	* as caller should have assumed conversion does not overflow
	265	* in destination len is length in wchar_t units (16bits)
	266	*/
	267	i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
	268	(wchar_t *) to, len);
	269
	270	/* if success terminate and exit */
	271	if (i >= 0)
	272	goto success;
	273	/*
	274	* if fails fall back to UCS encoding as this
	275	* function should not return negative values
	276	* currently can fail only if source contains
	277	* invalid encoded characters
	278	*/
	279	}
	280
1da177e4	281	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
ba2dbf30	282	charlen = codepage->char2uni(from, len, &wchar_to);
1da177e4	283	if (charlen < 1) {
f96637be JP	284	cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
f96637be JP	285	*from, charlen);
69114089	286	/* A question mark */
ba2dbf30	287	wchar_to = 0x003f;
1da177e4	288	charlen = 1;
ba2dbf30 JL	289	}
ba2dbf30 JL	290	put_unaligned_le16(wchar_to, &to[i]);
1da177e4 LT	291	}
1da177e4 LT	292
fd3ba42c	293	success:
ba2dbf30	294	put_unaligned_le16(0, &to[i]);
1da177e4 LT	295	return i;
	296	}
	297
066ce689	298	/*
acbbb76a SF	299	* cifs_strndup_from_utf16 - copy a string from wire format to the local
acbbb76a SF	300	* codepage
066ce689 JL	301	* @src - source string
	302	* @maxlen - don't walk past this many bytes in the source string
	303	* @is_unicode - is this a unicode string?
	304	* @codepage - destination codepage
	305	*
	306	* Take a string given by the server, convert it to the local codepage and
	307	* put it in a new buffer. Returns a pointer to the new string or NULL on
	308	* error.
	309	*/
	310	char *
acbbb76a SF	311	cifs_strndup_from_utf16(const char *src, const int maxlen,
acbbb76a SF	312	const bool is_unicode, const struct nls_table *codepage)
066ce689 JL	313	{
	314	int len;
	315	char *dst;
	316
	317	if (is_unicode) {
acbbb76a	318	len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);
066ce689 JL	319	len += nls_nullsize(codepage);
	320	dst = kmalloc(len, GFP_KERNEL);
	321	if (!dst)
	322	return NULL;
acbbb76a	323	cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
b693855f	324	NO_MAP_UNI_RSVD);
066ce689 JL	325	} else {
	326	len = strnlen(src, maxlen);
	327	len++;
	328	dst = kmalloc(len, GFP_KERNEL);
	329	if (!dst)
	330	return NULL;
	331	strlcpy(dst, src, len);
	332	}
	333
	334	return dst;
	335	}
	336
a4153cb1 SF	337	static __le16 convert_to_sfu_char(char src_char)
	338	{
	339	__le16 dest_char;
	340
	341	switch (src_char) {
	342	case ':':
	343	dest_char = cpu_to_le16(UNI_COLON);
	344	break;
	345	case '*':
	346	dest_char = cpu_to_le16(UNI_ASTERISK);
	347	break;
	348	case '?':
	349	dest_char = cpu_to_le16(UNI_QUESTION);
	350	break;
	351	case '<':
	352	dest_char = cpu_to_le16(UNI_LESSTHAN);
	353	break;
	354	case '>':
	355	dest_char = cpu_to_le16(UNI_GRTRTHAN);
	356	break;
	357	case '\|':
	358	dest_char = cpu_to_le16(UNI_PIPE);
	359	break;
	360	default:
	361	dest_char = 0;
	362	}
	363
	364	return dest_char;
	365	}
	366
	367	static __le16 convert_to_sfm_char(char src_char)
	368	{
	369	__le16 dest_char;
	370
	371	switch (src_char) {
	372	case ':':
	373	dest_char = cpu_to_le16(SFM_COLON);
	374	break;
	375	case '*':
	376	dest_char = cpu_to_le16(SFM_ASTERISK);
	377	break;
	378	case '?':
	379	dest_char = cpu_to_le16(SFM_QUESTION);
	380	break;
	381	case '<':
	382	dest_char = cpu_to_le16(SFM_LESSTHAN);
	383	break;
	384	case '>':
	385	dest_char = cpu_to_le16(SFM_GRTRTHAN);
	386	break;
	387	case '\|':
	388	dest_char = cpu_to_le16(SFM_PIPE);
	389	break;
	390	default:
	391	dest_char = 0;
	392	}
	393
	394	return dest_char;
	395	}
	396
84cdf74e JL	397	/*
	398	* Convert 16 bit Unicode pathname to wire format from string in current code
	399	* page. Conversion may involve remapping up the six characters that are
	400	* only legal in POSIX-like OS (if they are present in the string). Path
	401	* names are little endian 16 bit Unicode on the wire
	402	*/
	403	int
acbbb76a	404	cifsConvertToUTF16(__le16 target, const char source, int srclen,
a4153cb1	405	const struct nls_table *cp, int map_chars)
84cdf74e	406	{
ce36d9ab SF	407	int i, charlen;
ce36d9ab SF	408	int j = 0;
84cdf74e	409	char src_char;
581ade4d JL	410	__le16 dst_char;
581ade4d JL	411	wchar_t tmp;
84cdf74e	412
a4153cb1	413	if (map_chars == NO_MAP_UNI_RSVD)
acbbb76a	414	return cifs_strtoUTF16(target, source, PATH_MAX, cp);
84cdf74e	415
ce36d9ab	416	for (i = 0; i < srclen; j++) {
84cdf74e	417	src_char = source[i];
11379b5e	418	charlen = 1;
a4153cb1 SF	419
	420	/* check if end of string */
	421	if (src_char == 0)
acbbb76a	422	goto ctoUTF16_out;
a4153cb1 SF	423
	424	/* see if we must remap this char */
	425	if (map_chars == SFU_MAP_UNI_RSVD)
	426	dst_char = convert_to_sfu_char(src_char);
	427	else if (map_chars == SFM_MAP_UNI_RSVD)
	428	dst_char = convert_to_sfm_char(src_char);
	429	else
	430	dst_char = 0;
84cdf74e JL	431	/*
	432	* FIXME: We can not handle remapping backslash (UNI_SLASH)
	433	* until all the calls to build_path_from_dentry are modified,
	434	* as they use backslash as separator.
	435	*/
a4153cb1	436	if (dst_char == 0) {
581ade4d JL	437	charlen = cp->char2uni(source + i, srclen - i, &tmp);
	438	dst_char = cpu_to_le16(tmp);
	439
84cdf74e JL	440	/*
	441	* if no match, use question mark, which at least in
	442	* some cases serves as wild card
	443	*/
	444	if (charlen < 1) {
581ade4d	445	dst_char = cpu_to_le16(0x003f);
84cdf74e JL	446	charlen = 1;
84cdf74e JL	447	}
84cdf74e	448	}
11379b5e JL	449	/*
	450	* character may take more than one byte in the source string,
	451	* but will take exactly two bytes in the target string
	452	*/
	453	i += charlen;
581ade4d	454	put_unaligned(dst_char, &target[j]);
84cdf74e JL	455	}
84cdf74e JL	456
acbbb76a	457	ctoUTF16_out:
ce36d9ab	458	put_unaligned(0, &target[j]); /* Null terminate target unicode string */
c73f6939	459	return j;
84cdf74e	460	}
2503a0db PS	461
	462	#ifdef CONFIG_CIFS_SMB2
	463	/*
	464	* cifs_local_to_utf16_bytes - how long will a string be after conversion?
	465	* @from - pointer to input string
	466	* @maxbytes - don't go past this many bytes of input string
	467	* @codepage - source codepage
	468	*
	469	* Walk a string and return the number of bytes that the string will
	470	* be after being converted to the given charset, not including any null
	471	* termination required. Don't walk past maxbytes in the source buffer.
	472	*/
	473
	474	static int
	475	cifs_local_to_utf16_bytes(const char *from, int len,
	476	const struct nls_table *codepage)
	477	{
	478	int charlen;
	479	int i;
	480	wchar_t wchar_to;
	481
	482	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
	483	charlen = codepage->char2uni(from, len, &wchar_to);
	484	/* Failed conversion defaults to a question mark */
	485	if (charlen < 1)
	486	charlen = 1;
	487	}
	488	return 2 * i; /* UTF16 characters are two bytes */
	489	}
	490
	491	/*
	492	* cifs_strndup_to_utf16 - copy a string to wire format from the local codepage
	493	* @src - source string
	494	* @maxlen - don't walk past this many bytes in the source string
	495	* @utf16_len - the length of the allocated string in bytes (including null)
	496	* @cp - source codepage
	497	* @remap - map special chars
	498	*
	499	* Take a string convert it from the local codepage to UTF16 and
	500	* put it in a new buffer. Returns a pointer to the new string or NULL on
	501	* error.
	502	*/
	503	__le16 *
	504	cifs_strndup_to_utf16(const char src, const int maxlen, int utf16_len,
	505	const struct nls_table *cp, int remap)
	506	{
	507	int len;
	508	__le16 *dst;
	509
	510	len = cifs_local_to_utf16_bytes(src, maxlen, cp);
	511	len += 2; /* NULL */
	512	dst = kmalloc(len, GFP_KERNEL);
	513	if (!dst) {
	514	*utf16_len = 0;
	515	return NULL;
	516	}
	517	cifsConvertToUTF16(dst, src, strlen(src), cp, remap);
	518	*utf16_len = len;
	519	return dst;
	520	}
	521	#endif /* CONFIG_CIFS_SMB2 */