[linux-block.git] / fs / ksmbd / unicode.h

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * Some of the source code in this file came from fs/cifs/cifs_unicode.c
 * cifs_unicode:  Unicode kernel case support
 *
 * Function:
 *     Convert a unicode character to upper or lower case using
 *     compressed tables.
 *
 *   Copyright (c) International Business Machines  Corp., 2000,2009
 *
 *
 * Notes:
 *     These APIs are based on the C library functions.  The semantics
 *     should match the C functions but with expanded size operands.
 *
 *     The upper/lower functions are based on a table created by mkupr.
 *     This is a compressed table of upper and lower case conversion.
 *
 */
#ifndef _CIFS_UNICODE_H
#define _CIFS_UNICODE_H

#include <asm/byteorder.h>
#include <linux/types.h>
#include <linux/nls.h>
#include <linux/unicode.h>

#define  UNIUPR_NOLOWER		/* Example to not expand lower case tables */

/*
 * Windows maps these to the user defined 16 bit Unicode range since they are
 * reserved symbols (along with \ and /), otherwise illegal to store
 * in filenames in NTFS
 */
#define UNI_ASTERISK    ((__u16)('*' + 0xF000))
#define UNI_QUESTION    ((__u16)('?' + 0xF000))
#define UNI_COLON       ((__u16)(':' + 0xF000))
#define UNI_GRTRTHAN    ((__u16)('>' + 0xF000))
#define UNI_LESSTHAN    ((__u16)('<' + 0xF000))
#define UNI_PIPE        ((__u16)('|' + 0xF000))
#define UNI_SLASH       ((__u16)('\\' + 0xF000))

/* Just define what we want from uniupr.h.  We don't want to define the tables
 * in each source file.
 */
#ifndef	UNICASERANGE_DEFINED
struct UniCaseRange {
	wchar_t start;
	wchar_t end;
	signed char *table;
};
#endif				/* UNICASERANGE_DEFINED */

#ifndef UNIUPR_NOUPPER
extern signed char SmbUniUpperTable[512];
extern const struct UniCaseRange SmbUniUpperRange[];
#endif				/* UNIUPR_NOUPPER */

#ifndef UNIUPR_NOLOWER
extern signed char CifsUniLowerTable[512];
extern const struct UniCaseRange CifsUniLowerRange[];
#endif				/* UNIUPR_NOLOWER */

#ifdef __KERNEL__
int smb_strtoUTF16(__le16 *to, const char *from, int len,
		   const struct nls_table *codepage);
char *smb_strndup_from_utf16(const char *src, const int maxlen,
			     const bool is_unicode,
			     const struct nls_table *codepage);
int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
		      const struct nls_table *cp, int mapchars);
char *ksmbd_extract_sharename(struct unicode_map *um, const char *treename);
#endif

/*
 * UniStrcat:  Concatenate the second string to the first
 *
 * Returns:
 *     Address of the first string
 */
static inline wchar_t *UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
{
	wchar_t *anchor = ucs1;	/* save a pointer to start of ucs1 */

	while (*ucs1++)
	/*NULL*/;	/* To end of first string */
	ucs1--;			/* Return to the null */
	while ((*ucs1++ = *ucs2++))
	/*NULL*/;	/* copy string 2 over */
	return anchor;
}

/*
 * UniStrchr:  Find a character in a string
 *
 * Returns:
 *     Address of first occurrence of character in string
 *     or NULL if the character is not in the string
 */
static inline wchar_t *UniStrchr(const wchar_t *ucs, wchar_t uc)
{
	while ((*ucs != uc) && *ucs)
		ucs++;

	if (*ucs == uc)
		return (wchar_t *)ucs;
	return NULL;
}

/*
 * UniStrcmp:  Compare two strings
 *
 * Returns:
 *     < 0:  First string is less than second
 *     = 0:  Strings are equal
 *     > 0:  First string is greater than second
 */
static inline int UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
{
	while ((*ucs1 == *ucs2) && *ucs1) {
		ucs1++;
		ucs2++;
	}
	return (int)*ucs1 - (int)*ucs2;
}

/*
 * UniStrcpy:  Copy a string
 */
static inline wchar_t *UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
{
	wchar_t *anchor = ucs1;	/* save the start of result string */

	while ((*ucs1++ = *ucs2++))
	/*NULL*/;
	return anchor;
}

/*
 * UniStrlen:  Return the length of a string (in 16 bit Unicode chars not bytes)
 */
static inline size_t UniStrlen(const wchar_t *ucs1)
{
	int i = 0;

	while (*ucs1++)
		i++;
	return i;
}

/*
 * UniStrnlen:  Return the length (in 16 bit Unicode chars not bytes) of a
 *		string (length limited)
 */
static inline size_t UniStrnlen(const wchar_t *ucs1, int maxlen)
{
	int i = 0;

	while (*ucs1++) {
		i++;
		if (i >= maxlen)
			break;
	}
	return i;
}

/*
 * UniStrncat:  Concatenate length limited string
 */
static inline wchar_t *UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	wchar_t *anchor = ucs1;	/* save pointer to string 1 */

	while (*ucs1++)
	/*NULL*/;
	ucs1--;			/* point to null terminator of s1 */
	while (n-- && (*ucs1 = *ucs2)) {	/* copy s2 after s1 */
		ucs1++;
		ucs2++;
	}
	*ucs1 = 0;		/* Null terminate the result */
	return anchor;
}

/*
 * UniStrncmp:  Compare length limited string
 */
static inline int UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	if (!n)
		return 0;	/* Null strings are equal */
	while ((*ucs1 == *ucs2) && *ucs1 && --n) {
		ucs1++;
		ucs2++;
	}
	return (int)*ucs1 - (int)*ucs2;
}

/*
 * UniStrncmp_le:  Compare length limited string - native to little-endian
 */
static inline int
UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	if (!n)
		return 0;	/* Null strings are equal */
	while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
		ucs1++;
		ucs2++;
	}
	return (int)*ucs1 - (int)__le16_to_cpu(*ucs2);
}

/*
 * UniStrncpy:  Copy length limited string with pad
 */
static inline wchar_t *UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	wchar_t *anchor = ucs1;

	while (n-- && *ucs2)	/* Copy the strings */
		*ucs1++ = *ucs2++;

	n++;
	while (n--)		/* Pad with nulls */
		*ucs1++ = 0;
	return anchor;
}

/*
 * UniStrncpy_le:  Copy length limited string with pad to little-endian
 */
static inline wchar_t *UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
{
	wchar_t *anchor = ucs1;

	while (n-- && *ucs2)	/* Copy the strings */
		*ucs1++ = __le16_to_cpu(*ucs2++);

	n++;
	while (n--)		/* Pad with nulls */
		*ucs1++ = 0;
	return anchor;
}

/*
 * UniStrstr:  Find a string in a string
 *
 * Returns:
 *     Address of first match found
 *     NULL if no matching string is found
 */
static inline wchar_t *UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
{
	const wchar_t *anchor1 = ucs1;
	const wchar_t *anchor2 = ucs2;

	while (*ucs1) {
		if (*ucs1 == *ucs2) {
			/* Partial match found */
			ucs1++;
			ucs2++;
		} else {
			if (!*ucs2)	/* Match found */
				return (wchar_t *)anchor1;
			ucs1 = ++anchor1;	/* No match */
			ucs2 = anchor2;
		}
	}

	if (!*ucs2)		/* Both end together */
		return (wchar_t *)anchor1;	/* Match found */
	return NULL;		/* No match */
}

#ifndef UNIUPR_NOUPPER
/*
 * UniToupper:  Convert a unicode character to upper case
 */
static inline wchar_t UniToupper(register wchar_t uc)
{
	register const struct UniCaseRange *rp;

	if (uc < sizeof(SmbUniUpperTable)) {
		/* Latin characters */
		return uc + SmbUniUpperTable[uc];	/* Use base tables */
	}

	rp = SmbUniUpperRange;	/* Use range tables */
	while (rp->start) {
		if (uc < rp->start)	/* Before start of range */
			return uc;	/* Uppercase = input */
		if (uc <= rp->end)	/* In range */
			return uc + rp->table[uc - rp->start];
		rp++;	/* Try next range */
	}
	return uc;		/* Past last range */
}

/*
 * UniStrupr:  Upper case a unicode string
 */
static inline __le16 *UniStrupr(register __le16 *upin)
{
	register __le16 *up;

	up = upin;
	while (*up) {		/* For all characters */
		*up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
		up++;
	}
	return upin;		/* Return input pointer */
}
#endif				/* UNIUPR_NOUPPER */

#ifndef UNIUPR_NOLOWER
/*
 * UniTolower:  Convert a unicode character to lower case
 */
static inline wchar_t UniTolower(register wchar_t uc)
{
	register const struct UniCaseRange *rp;

	if (uc < sizeof(CifsUniLowerTable)) {
		/* Latin characters */
		return uc + CifsUniLowerTable[uc];	/* Use base tables */
	}

	rp = CifsUniLowerRange;	/* Use range tables */
	while (rp->start) {
		if (uc < rp->start)	/* Before start of range */
			return uc;	/* Uppercase = input */
		if (uc <= rp->end)	/* In range */
			return uc + rp->table[uc - rp->start];
		rp++;	/* Try next range */
	}
	return uc;		/* Past last range */
}

/*
 * UniStrlwr:  Lower case a unicode string
 */
static inline wchar_t *UniStrlwr(register wchar_t *upin)
{
	register wchar_t *up;

	up = upin;
	while (*up) {		/* For all characters */
		*up = UniTolower(*up);
		up++;
	}
	return upin;		/* Return input pointer */
}

#endif

#endif /* _CIFS_UNICODE_H */
Commit	Line	Data
e2f34481 NJ	1	/* SPDX-License-Identifier: GPL-2.0-or-later */
	2	/*
	3	* Some of the source code in this file came from fs/cifs/cifs_unicode.c
	4	* cifs_unicode: Unicode kernel case support
	5	*
	6	* Function:
	7	* Convert a unicode character to upper or lower case using
	8	* compressed tables.
	9	*
	10	* Copyright (c) International Business Machines Corp., 2000,2009
	11	*
	12	*
	13	* Notes:
	14	* These APIs are based on the C library functions. The semantics
	15	* should match the C functions but with expanded size operands.
	16	*
	17	* The upper/lower functions are based on a table created by mkupr.
	18	* This is a compressed table of upper and lower case conversion.
	19	*
	20	*/
	21	#ifndef _CIFS_UNICODE_H
	22	#define _CIFS_UNICODE_H
	23
	24	#include <asm/byteorder.h>
	25	#include <linux/types.h>
	26	#include <linux/nls.h>
16b5f54e	27	#include <linux/unicode.h>
e2f34481 NJ	28
	29	#define UNIUPR_NOLOWER /* Example to not expand lower case tables */
	30
	31	/*
	32	* Windows maps these to the user defined 16 bit Unicode range since they are
	33	* reserved symbols (along with \ and /), otherwise illegal to store
	34	* in filenames in NTFS
	35	*/
64b39f4a NJ	36	#define UNI_ASTERISK ((__u16)('*' + 0xF000))
	37	#define UNI_QUESTION ((__u16)('?' + 0xF000))
	38	#define UNI_COLON ((__u16)(':' + 0xF000))
	39	#define UNI_GRTRTHAN ((__u16)('>' + 0xF000))
	40	#define UNI_LESSTHAN ((__u16)('<' + 0xF000))
	41	#define UNI_PIPE ((__u16)('\|' + 0xF000))
	42	#define UNI_SLASH ((__u16)('\\' + 0xF000))
e2f34481 NJ	43
	44	/* Just define what we want from uniupr.h. We don't want to define the tables
	45	* in each source file.
	46	*/
	47	#ifndef UNICASERANGE_DEFINED
	48	struct UniCaseRange {
	49	wchar_t start;
	50	wchar_t end;
	51	signed char *table;
	52	};
	53	#endif /* UNICASERANGE_DEFINED */
	54
	55	#ifndef UNIUPR_NOUPPER
	56	extern signed char SmbUniUpperTable[512];
	57	extern const struct UniCaseRange SmbUniUpperRange[];
	58	#endif /* UNIUPR_NOUPPER */
	59
	60	#ifndef UNIUPR_NOLOWER
	61	extern signed char CifsUniLowerTable[512];
	62	extern const struct UniCaseRange CifsUniLowerRange[];
	63	#endif /* UNIUPR_NOLOWER */
	64
	65	#ifdef __KERNEL__
	66	int smb_strtoUTF16(__le16 to, const char from, int len,
070fb21e	67	const struct nls_table *codepage);
64b39f4a	68	char smb_strndup_from_utf16(const char src, const int maxlen,
070fb21e NJ	69	const bool is_unicode,
070fb21e NJ	70	const struct nls_table *codepage);
64b39f4a	71	int smbConvertToUTF16(__le16 target, const char source, int srclen,
070fb21e	72	const struct nls_table *cp, int mapchars);
16b5f54e	73	char ksmbd_extract_sharename(struct unicode_map um, const char *treename);
e2f34481 NJ	74	#endif
e2f34481 NJ	75
e2f34481 NJ	76	/*
	77	* UniStrcat: Concatenate the second string to the first
	78	*
	79	* Returns:
	80	* Address of the first string
	81	*/
64b39f4a	82	static inline wchar_t UniStrcat(wchar_t ucs1, const wchar_t *ucs2)
e2f34481 NJ	83	{
	84	wchar_t anchor = ucs1; / save a pointer to start of ucs1 */
	85
	86	while (*ucs1++)
	87	/NULL/; /* To end of first string */
	88	ucs1--; /* Return to the null */
	89	while ((ucs1++ = ucs2++))
	90	/NULL/; /* copy string 2 over */
	91	return anchor;
	92	}
	93
	94	/*
	95	* UniStrchr: Find a character in a string
	96	*
	97	* Returns:
	98	* Address of first occurrence of character in string
	99	* or NULL if the character is not in the string
	100	*/
64b39f4a	101	static inline wchar_t UniStrchr(const wchar_t ucs, wchar_t uc)
e2f34481 NJ	102	{
	103	while ((ucs != uc) && ucs)
	104	ucs++;
	105
	106	if (*ucs == uc)
64b39f4a	107	return (wchar_t *)ucs;
e2f34481 NJ	108	return NULL;
	109	}
	110
	111	/*
	112	* UniStrcmp: Compare two strings
	113	*
	114	* Returns:
	115	* < 0: First string is less than second
	116	* = 0: Strings are equal
	117	* > 0: First string is greater than second
	118	*/
64b39f4a	119	static inline int UniStrcmp(const wchar_t ucs1, const wchar_t ucs2)
e2f34481 NJ	120	{
	121	while ((ucs1 == ucs2) && *ucs1) {
	122	ucs1++;
	123	ucs2++;
	124	}
64b39f4a	125	return (int)ucs1 - (int)ucs2;
e2f34481 NJ	126	}
	127
	128	/*
	129	* UniStrcpy: Copy a string
	130	*/
64b39f4a	131	static inline wchar_t UniStrcpy(wchar_t ucs1, const wchar_t *ucs2)
e2f34481 NJ	132	{
	133	wchar_t anchor = ucs1; / save the start of result string */
	134
	135	while ((ucs1++ = ucs2++))
	136	/NULL/;
	137	return anchor;
	138	}
	139
	140	/*
	141	* UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes)
	142	*/
64b39f4a	143	static inline size_t UniStrlen(const wchar_t *ucs1)
e2f34481 NJ	144	{
	145	int i = 0;
	146
	147	while (*ucs1++)
	148	i++;
	149	return i;
	150	}
	151
	152	/*
	153	* UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a
	154	* string (length limited)
	155	*/
64b39f4a	156	static inline size_t UniStrnlen(const wchar_t *ucs1, int maxlen)
e2f34481 NJ	157	{
	158	int i = 0;
	159
	160	while (*ucs1++) {
	161	i++;
	162	if (i >= maxlen)
	163	break;
	164	}
	165	return i;
	166	}
	167
	168	/*
	169	* UniStrncat: Concatenate length limited string
	170	*/
64b39f4a	171	static inline wchar_t UniStrncat(wchar_t ucs1, const wchar_t *ucs2, size_t n)
e2f34481 NJ	172	{
	173	wchar_t anchor = ucs1; / save pointer to string 1 */
	174
	175	while (*ucs1++)
	176	/NULL/;
	177	ucs1--; /* point to null terminator of s1 */
	178	while (n-- && (ucs1 = ucs2)) { /* copy s2 after s1 */
	179	ucs1++;
	180	ucs2++;
	181	}
	182	ucs1 = 0; / Null terminate the result */
	183	return anchor;
	184	}
	185
	186	/*
	187	* UniStrncmp: Compare length limited string
	188	*/
64b39f4a	189	static inline int UniStrncmp(const wchar_t ucs1, const wchar_t ucs2, size_t n)
e2f34481 NJ	190	{
	191	if (!n)
	192	return 0; /* Null strings are equal */
	193	while ((ucs1 == ucs2) && *ucs1 && --n) {
	194	ucs1++;
	195	ucs2++;
	196	}
64b39f4a	197	return (int)ucs1 - (int)ucs2;
e2f34481 NJ	198	}
	199
	200	/*
	201	* UniStrncmp_le: Compare length limited string - native to little-endian
	202	*/
070fb21e	203	static inline int
e2f34481 NJ	204	UniStrncmp_le(const wchar_t ucs1, const wchar_t ucs2, size_t n)
	205	{
	206	if (!n)
	207	return 0; /* Null strings are equal */
	208	while ((ucs1 == __le16_to_cpu(ucs2)) && *ucs1 && --n) {
	209	ucs1++;
	210	ucs2++;
	211	}
64b39f4a	212	return (int)ucs1 - (int)__le16_to_cpu(ucs2);
e2f34481 NJ	213	}
	214
	215	/*
	216	* UniStrncpy: Copy length limited string with pad
	217	*/
64b39f4a	218	static inline wchar_t UniStrncpy(wchar_t ucs1, const wchar_t *ucs2, size_t n)
e2f34481 NJ	219	{
	220	wchar_t *anchor = ucs1;
	221
	222	while (n-- && ucs2) / Copy the strings */
	223	ucs1++ = ucs2++;
	224
	225	n++;
	226	while (n--) /* Pad with nulls */
	227	*ucs1++ = 0;
	228	return anchor;
	229	}
	230
	231	/*
	232	* UniStrncpy_le: Copy length limited string with pad to little-endian
	233	*/
64b39f4a	234	static inline wchar_t UniStrncpy_le(wchar_t ucs1, const wchar_t *ucs2, size_t n)
e2f34481 NJ	235	{
	236	wchar_t *anchor = ucs1;
	237
	238	while (n-- && ucs2) / Copy the strings */
	239	ucs1++ = __le16_to_cpu(ucs2++);
	240
	241	n++;
	242	while (n--) /* Pad with nulls */
	243	*ucs1++ = 0;
	244	return anchor;
	245	}
	246
	247	/*
	248	* UniStrstr: Find a string in a string
	249	*
	250	* Returns:
	251	* Address of first match found
	252	* NULL if no matching string is found
	253	*/
64b39f4a	254	static inline wchar_t UniStrstr(const wchar_t ucs1, const wchar_t *ucs2)
e2f34481 NJ	255	{
	256	const wchar_t *anchor1 = ucs1;
	257	const wchar_t *anchor2 = ucs2;
	258
	259	while (*ucs1) {
	260	if (ucs1 == ucs2) {
	261	/* Partial match found */
	262	ucs1++;
	263	ucs2++;
	264	} else {
	265	if (!ucs2) / Match found */
64b39f4a	266	return (wchar_t *)anchor1;
e2f34481 NJ	267	ucs1 = ++anchor1; /* No match */
	268	ucs2 = anchor2;
	269	}
	270	}
	271
	272	if (!ucs2) / Both end together */
64b39f4a	273	return (wchar_t )anchor1; / Match found */
e2f34481 NJ	274	return NULL; /* No match */
	275	}
	276
	277	#ifndef UNIUPR_NOUPPER
	278	/*
	279	* UniToupper: Convert a unicode character to upper case
	280	*/
64b39f4a	281	static inline wchar_t UniToupper(register wchar_t uc)
e2f34481 NJ	282	{
	283	register const struct UniCaseRange *rp;
	284
	285	if (uc < sizeof(SmbUniUpperTable)) {
	286	/* Latin characters */
	287	return uc + SmbUniUpperTable[uc]; /* Use base tables */
	288	}
	289
	290	rp = SmbUniUpperRange; /* Use range tables */
	291	while (rp->start) {
	292	if (uc < rp->start) /* Before start of range */
	293	return uc; /* Uppercase = input */
	294	if (uc <= rp->end) /* In range */
	295	return uc + rp->table[uc - rp->start];
	296	rp++; /* Try next range */
	297	}
	298	return uc; /* Past last range */
	299	}
	300
	301	/*
	302	* UniStrupr: Upper case a unicode string
	303	*/
64b39f4a	304	static inline __le16 UniStrupr(register __le16 upin)
e2f34481 NJ	305	{
	306	register __le16 *up;
	307
	308	up = upin;
	309	while (up) { / For all characters */
	310	up = cpu_to_le16(UniToupper(le16_to_cpu(up)));
	311	up++;
	312	}
	313	return upin; /* Return input pointer */
	314	}
	315	#endif /* UNIUPR_NOUPPER */
	316
	317	#ifndef UNIUPR_NOLOWER
	318	/*
	319	* UniTolower: Convert a unicode character to lower case
	320	*/
64b39f4a	321	static inline wchar_t UniTolower(register wchar_t uc)
e2f34481 NJ	322	{
	323	register const struct UniCaseRange *rp;
	324
	325	if (uc < sizeof(CifsUniLowerTable)) {
	326	/* Latin characters */
	327	return uc + CifsUniLowerTable[uc]; /* Use base tables */
	328	}
	329
	330	rp = CifsUniLowerRange; /* Use range tables */
	331	while (rp->start) {
	332	if (uc < rp->start) /* Before start of range */
	333	return uc; /* Uppercase = input */
	334	if (uc <= rp->end) /* In range */
	335	return uc + rp->table[uc - rp->start];
	336	rp++; /* Try next range */
	337	}
	338	return uc; /* Past last range */
	339	}
	340
	341	/*
	342	* UniStrlwr: Lower case a unicode string
	343	*/
64b39f4a	344	static inline wchar_t UniStrlwr(register wchar_t upin)
e2f34481 NJ	345	{
	346	register wchar_t *up;
	347
	348	up = upin;
	349	while (up) { / For all characters */
	350	up = UniTolower(up);
	351	up++;
	352	}
	353	return upin; /* Return input pointer */
	354	}
	355
	356	#endif
	357
	358	#endif /* _CIFS_UNICODE_H */