[linux-2.6-block.git] / fs / unicode / utf8n.h

/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (c) 2014 SGI.
 * All rights reserved.
 */

#ifndef UTF8NORM_H
#define UTF8NORM_H

#include <linux/types.h>
#include <linux/export.h>
#include <linux/string.h>
#include <linux/module.h>

/* Encoding a unicode version number as a single unsigned int. */
#define UNICODE_MAJ_SHIFT		(16)
#define UNICODE_MIN_SHIFT		(8)

#define UNICODE_AGE(MAJ, MIN, REV)			\
	(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) |	\
	 ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) |	\
	 ((unsigned int)(REV)))

/* Highest unicode version supported by the data tables. */
extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
extern int utf8version_latest(void);

/*
 * Look for the correct const struct utf8data for a unicode version.
 * Returns NULL if the version requested is too new.
 *
 * Two normalization forms are supported: nfdi and nfdicf.
 *
 * nfdi:
 *  - Apply unicode normalization form NFD.
 *  - Remove any Default_Ignorable_Code_Point.
 *
 * nfdicf:
 *  - Apply unicode normalization form NFD.
 *  - Remove any Default_Ignorable_Code_Point.
 *  - Apply a full casefold (C + F).
 */
extern const struct utf8data *utf8nfdi(unsigned int maxage);
extern const struct utf8data *utf8nfdicf(unsigned int maxage);

/*
 * Determine the maximum age of any unicode character in the string.
 * Returns 0 if only unassigned code points are present.
 * Returns -1 if the input is not valid UTF-8.
 */
extern int utf8agemax(const struct utf8data *data, const char *s);
extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len);

/*
 * Determine the minimum age of any unicode character in the string.
 * Returns 0 if any unassigned code points are present.
 * Returns -1 if the input is not valid UTF-8.
 */
extern int utf8agemin(const struct utf8data *data, const char *s);
extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);

/*
 * Determine the length of the normalized from of the string,
 * excluding any terminating NULL byte.
 * Returns 0 if only ignorable code points are present.
 * Returns -1 if the input is not valid UTF-8.
 */
extern ssize_t utf8len(const struct utf8data *data, const char *s);
extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);

/* Needed in struct utf8cursor below. */
#define UTF8HANGULLEAF	(12)

/*
 * Cursor structure used by the normalizer.
 */
struct utf8cursor {
	const struct utf8data	*data;
	const char	*s;
	const char	*p;
	const char	*ss;
	const char	*sp;
	unsigned int	len;
	unsigned int	slen;
	short int	ccc;
	short int	nccc;
	unsigned char	hangul[UTF8HANGULLEAF];
};

/*
 * Initialize a utf8cursor to normalize a string.
 * Returns 0 on success.
 * Returns -1 on failure.
 */
extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
		      const char *s);
extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
		       const char *s, size_t len);

/*
 * Get the next byte in the normalization.
 * Returns a value > 0 && < 256 on success.
 * Returns 0 when the end of the normalization is reached.
 * Returns -1 if the string being normalized is not valid UTF-8.
 */
extern int utf8byte(struct utf8cursor *u8c);

#endif /* UTF8NORM_H */
Commit	Line	Data
9f806850	1	/* SPDX-License-Identifier: GPL-2.0-only */
44594c2f OW	2	/*
	3	* Copyright (c) 2014 SGI.
	4	* All rights reserved.
44594c2f OW	5	*/
	6
	7	#ifndef UTF8NORM_H
	8	#define UTF8NORM_H
	9
	10	#include <linux/types.h>
	11	#include <linux/export.h>
	12	#include <linux/string.h>
	13	#include <linux/module.h>
	14
	15	/* Encoding a unicode version number as a single unsigned int. */
	16	#define UNICODE_MAJ_SHIFT (16)
	17	#define UNICODE_MIN_SHIFT (8)
	18
	19	#define UNICODE_AGE(MAJ, MIN, REV) \
	20	(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) \| \
	21	((unsigned int)(MIN) << UNICODE_MIN_SHIFT) \| \
	22	((unsigned int)(REV)))
	23
	24	/* Highest unicode version supported by the data tables. */
	25	extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
9d53690f	26	extern int utf8version_latest(void);
44594c2f OW	27
	28	/*
	29	* Look for the correct const struct utf8data for a unicode version.
	30	* Returns NULL if the version requested is too new.
	31	*
	32	* Two normalization forms are supported: nfdi and nfdicf.
	33	*
	34	* nfdi:
	35	* - Apply unicode normalization form NFD.
	36	* - Remove any Default_Ignorable_Code_Point.
	37	*
	38	* nfdicf:
	39	* - Apply unicode normalization form NFD.
	40	* - Remove any Default_Ignorable_Code_Point.
	41	* - Apply a full casefold (C + F).
	42	*/
	43	extern const struct utf8data *utf8nfdi(unsigned int maxage);
	44	extern const struct utf8data *utf8nfdicf(unsigned int maxage);
	45
	46	/*
	47	* Determine the maximum age of any unicode character in the string.
	48	* Returns 0 if only unassigned code points are present.
	49	* Returns -1 if the input is not valid UTF-8.
	50	*/
	51	extern int utf8agemax(const struct utf8data data, const char s);
	52	extern int utf8nagemax(const struct utf8data data, const char s, size_t len);
	53
	54	/*
	55	* Determine the minimum age of any unicode character in the string.
	56	* Returns 0 if any unassigned code points are present.
	57	* Returns -1 if the input is not valid UTF-8.
	58	*/
	59	extern int utf8agemin(const struct utf8data data, const char s);
	60	extern int utf8nagemin(const struct utf8data data, const char s, size_t len);
	61
	62	/*
	63	* Determine the length of the normalized from of the string,
	64	* excluding any terminating NULL byte.
	65	* Returns 0 if only ignorable code points are present.
	66	* Returns -1 if the input is not valid UTF-8.
	67	*/
	68	extern ssize_t utf8len(const struct utf8data data, const char s);
	69	extern ssize_t utf8nlen(const struct utf8data data, const char s, size_t len);
	70
a8384c68 OW	71	/* Needed in struct utf8cursor below. */
	72	#define UTF8HANGULLEAF (12)
	73
44594c2f OW	74	/*
	75	* Cursor structure used by the normalizer.
	76	*/
	77	struct utf8cursor {
	78	const struct utf8data *data;
	79	const char *s;
	80	const char *p;
	81	const char *ss;
	82	const char *sp;
	83	unsigned int len;
	84	unsigned int slen;
	85	short int ccc;
	86	short int nccc;
a8384c68	87	unsigned char hangul[UTF8HANGULLEAF];
44594c2f OW	88	};
	89
	90	/*
	91	* Initialize a utf8cursor to normalize a string.
	92	* Returns 0 on success.
	93	* Returns -1 on failure.
	94	*/
	95	extern int utf8cursor(struct utf8cursor u8c, const struct utf8data data,
	96	const char *s);
	97	extern int utf8ncursor(struct utf8cursor u8c, const struct utf8data data,
	98	const char *s, size_t len);
	99
	100	/*
	101	* Get the next byte in the normalization.
	102	* Returns a value > 0 && < 256 on success.
	103	* Returns 0 when the end of the normalization is reached.
	104	* Returns -1 if the string being normalized is not valid UTF-8.
	105	*/
	106	extern int utf8byte(struct utf8cursor *u8c);
	107
	108	#endif /* UTF8NORM_H */