1 /*****************************************************************************
3 (c) Cambridge Silicon Radio Limited 2010
4 All rights reserved and confidential information of CSR
6 Refer to LICENSE.txt included with this source for details
9 *****************************************************************************/
10 #include <linux/module.h>
11 #include "csr_types.h"
13 #include "csr_unicode.h"
16 #define UNI_SUR_HIGH_START ((CsrUint32) 0xD800)
17 #define UNI_SUR_HIGH_END ((CsrUint32) 0xDBFF)
18 #define UNI_SUR_LOW_START ((CsrUint32) 0xDC00)
19 #define UNI_SUR_LOW_END ((CsrUint32) 0xDFFF)
20 #define UNI_REPLACEMENT_CHAR ((CsrUint32) 0xFFFD)
21 #define UNI_HALF_SHIFT ((CsrUint8) 10) /* used for shifting by 10 bits */
22 #define UNI_HALF_BASE ((CsrUint32) 0x00010000)
23 #define UNI_BYTEMASK ((CsrUint32) 0xBF)
24 #define UNI_BYTEMARK ((CsrUint32) 0x80)
26 #define CAPITAL(x) ((x >= 'a') && (x <= 'z') ? ((x) & 0x00DF) : (x))
29 * Index into the table with the first byte to get the number of trailing bytes in a utf-8 character.
30 * -1 if the byte has an invalid value.
32 * Legal sequences are:
34 * byte 1st 2nd 3rd 4th
42 * F0 90-BF 80-BF 80-BF
43 * F1-F3 80-BF 80-BF 80-BF
44 * F4 80-8F 80-BF 80-BF
46 static const CsrInt8 trailingBytesForUtf8[256] =
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 - 0x1F */
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x3F */
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x5F */
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x7F */
52 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0x80 - 0x9F */
53 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xA0 - 0xBF */
54 -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 - 0xDF */
55 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xE0 - 0xFF */
58 /* Values to be substracted from a CsrUint32 when converting from UTF8 to UTF16 */
59 static const CsrUint32 offsetsFromUtf8[4] =
61 0x00000000, 0x00003080, 0x000E2080, 0x03C82080
64 /********************************************************************************
66 * Name: CsrUint32ToUtf16String
68 * Description: The function converts an 32 bit number to an UTF-16 string
69 * that is allocated and 0-terminated.
71 * Input: 32 bit number.
73 * Output: A string of UTF-16 characters.
75 *********************************************************************************/
76 CsrUtf16String *CsrUint32ToUtf16String(CsrUint32 number)
78 CsrUint16 count, noOfDigits;
79 CsrUtf16String *output;
82 /* calculate the number of digits in the output */
85 while (tempNumber >= 10)
87 tempNumber = tempNumber / 10;
91 output = (CsrUtf16String *) CsrPmemAlloc(sizeof(CsrUtf16String) * (noOfDigits + 1)); /*add space for 0-termination*/
94 for (count = noOfDigits; count > 0; count--)
96 output[count - 1] = (CsrUtf16String) ((tempNumber % 10) + '0');
97 tempNumber = tempNumber / 10;
99 output[noOfDigits] = '\0';
104 /********************************************************************************
106 * Name: CsrUtf16StringToUint32
108 * Description: The function converts an UTF-16 string that is
109 * 0-terminated into a 32 bit number.
111 * Input: A string of UTF-16 characters containig a number.
113 * Output: 32 bit number.
115 *********************************************************************************/
116 CsrUint32 CsrUtf16StringToUint32(const CsrUtf16String *unicodeString)
118 CsrUint16 numLen, count;
119 CsrUint32 newNumber = 0;
121 numLen = (CsrUint16) CsrUtf16StrLen(unicodeString);
123 if ((numLen > 10) || (numLen == 0) || (unicodeString == NULL)) /*CSRMAX number is 4.294.967.295 */
128 for (count = 0; count < numLen; count++)
130 CsrUtf16String input = unicodeString[count];
131 if ((input < 0x30) || (input > 0x39) || ((newNumber == 0x19999999) && (input > 0x35)) || (newNumber > 0x19999999)) /* chars are present or number is too large now causing number to get to large when *10 */
136 newNumber = (newNumber * 10) + (input - 0x30);
141 /********************************************************************************
143 * Name: CsrUtf16MemCpy
145 * Description: The function copies count number of 16 bit data elements
148 * Input: A pointer to an unicoded string.
150 * Output: A pointer to an unicoded string.
152 *********************************************************************************/
153 CsrUtf16String *CsrUtf16MemCpy(CsrUtf16String *dest, const CsrUtf16String *src, CsrUint32 count)
155 return CsrMemCpy((CsrUint8 *) dest, (CsrUint8 *) src, count * sizeof(CsrUtf16String));
158 /********************************************************************************
160 * Name: CsrUtf16ConcatenateTexts
162 * Description: The function merge the contents of 4 unicoded input pointers
165 * Input: 4 unicoded input strings (UTF-16).
167 * Output: A new unicoded string (UTF-16) containing the combined strings.
169 *********************************************************************************/
170 CsrUtf16String *CsrUtf16ConcatenateTexts(const CsrUtf16String *inputText1, const CsrUtf16String *inputText2,
171 const CsrUtf16String *inputText3, const CsrUtf16String *inputText4)
173 CsrUtf16String *outputText;
174 CsrUint32 textLen, textLen1, textLen2, textLen3, textLen4;
176 textLen1 = CsrUtf16StrLen(inputText1);
177 textLen2 = CsrUtf16StrLen(inputText2);
178 textLen3 = CsrUtf16StrLen(inputText3);
179 textLen4 = CsrUtf16StrLen(inputText4);
181 textLen = textLen1 + textLen2 + textLen3 + textLen4;
183 if (textLen == 0) /*stop here is all lengths are 0*/
188 outputText = (CsrUtf16String *) CsrPmemAlloc((textLen + 1) * sizeof(CsrUtf16String)); /* add space for 0-termination*/
191 if (inputText1 != NULL)
193 CsrUtf16MemCpy(outputText, inputText1, textLen1);
196 if (inputText2 != NULL)
198 CsrUtf16MemCpy(&(outputText[textLen1]), inputText2, textLen2);
201 if (inputText3 != NULL)
203 CsrUtf16MemCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3);
206 if (inputText4 != NULL)
208 CsrUtf16MemCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4);
211 outputText[textLen] = '\0';
216 /********************************************************************************
218 * Name: CsrUtf16StrLen
220 * Description: The function returns the number of 16 bit elements present
221 * in the 0-terminated string.
223 * Input: 0-terminated string of 16 bit unicoded characters.
225 * Output: The number of 16 bit elements in the string.
227 *********************************************************************************/
228 CsrUint32 CsrUtf16StrLen(const CsrUtf16String *unicodeString)
233 if (unicodeString != NULL)
235 while (*unicodeString)
244 /********************************************************************************
246 * Name: CsrUtf16String2Utf8
248 * Description: The function decodes an UTF-16 string into an UTF8 byte
251 * Input: 0-terminated UTF-16 string characters.
253 * Output: 0-terminated string of byte oriented UTF8 coded characters.
255 *********************************************************************************/
256 CsrUtf8String *CsrUtf16String2Utf8(const CsrUtf16String *source)
258 CsrUtf8String *dest, *destStart = NULL;
262 CsrUint32 sourceLength;
264 CsrBool appendNull = FALSE;
266 CsrUint8 firstByteMark[5] = {0x00, 0x00, 0xC0, 0xE0, 0xF0};
274 sourceLength = CsrUtf16StrLen(source) + 1;
276 for (i = 0; i < sourceLength; i++)
279 if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */
281 if (i + 1 < sourceLength) /* The low surrogate is in the source */
283 CsrUint32 ch2 = source[++i];
284 if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate */
288 else /* It is not a low surrogate, instead put a Unicode
289 'REPLACEMENT CHARACTER' (U+FFFD) */
292 i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */
295 else /* The low surrogate does not exist, instead put a Unicode
296 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */
301 else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put
302 a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
306 else /* Figure out how many bytes that are required */
312 else if (ch < 0x0800)
323 dest = CsrPmemAlloc(length);
326 for (i = 0; i < sourceLength; i++)
329 if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */
331 if (i + 1 < sourceLength) /* The low surrogate is in the source */
333 CsrUint32 ch2 = source[++i];
334 if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate, convert to UTF-32 */
336 ch = ((ch - UNI_SUR_HIGH_START) << UNI_HALF_SHIFT) + (ch2 - UNI_SUR_LOW_START) + UNI_HALF_BASE;
338 else /* It is not a low surrogate, instead put a Unicode
339 'REPLACEMENT CHARACTER' (U+FFFD) */
341 ch = UNI_REPLACEMENT_CHAR;
342 i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */
345 else /* The low surrogate does not exist, instead put a Unicode
346 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */
348 ch = UNI_REPLACEMENT_CHAR;
352 else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put
353 a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
355 ch = UNI_REPLACEMENT_CHAR;
358 /* Figure out how many bytes that are required */
359 if (ch < (CsrUint32) 0x80)
363 else if (ch < (CsrUint32) 0x800)
367 else if (ch < (CsrUint32) 0x10000)
371 else if (ch < (CsrUint32) 0x110000)
378 ch = UNI_REPLACEMENT_CHAR;
383 switch (bytes) /* Convert character to UTF-8. Note: everything falls through. */
387 *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
393 *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
399 *--dest = (CsrUint8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
405 *--dest = (CsrUint8) (ch | firstByteMark[bytes]);
417 if (appendNull) /* Append the \0 character */
425 /*****************************************************************************
431 Returns TRUE if the given UFT-8 code unit is legal as defined by the
432 Unicode standard (see Chapter 3: Conformance, Section 3.9: Unicode
433 Encoding Forms, UTF-8).
435 This function assumes that the length parameter is unconditionally
436 correct and that the first byte is already validated by looking it up
437 in the trailingBytesForUtf8 array, which also reveals the number of
440 Legal code units are composed of one of the following byte sequences:
443 --------------------------------
451 F1-F3 80-BF 80-BF 80-BF
454 Please note that this function only checks whether the 2nd, 3rd and
455 4th bytes fall into the valid ranges.
458 codeUnit - pointer to the first byte of the byte sequence composing
459 the code unit to test.
460 length - the number of bytes in the code unit. Valid range is 1 to 4.
463 TRUE if the given code unit is legal.
465 *****************************************************************************/
466 static CsrBool isLegalUtf8(const CsrUtf8String *codeUnit, CsrUint32 length)
468 const CsrUtf8String *srcPtr = codeUnit + length;
471 switch (length) /* Everything falls through except case 1 */
476 if ((byte < 0x80) || (byte > 0xBF))
485 if ((byte < 0x80) || (byte > 0xBF))
499 switch (*codeUnit) /* No fallthrough */
511 if ((byte < 0x80) || (byte > 0x9F))
527 if ((byte < 0x80) || (byte > 0x8F))
546 /* The 1st byte and length are assumed correct */
553 /********************************************************************************
555 * Name: CsrUtf82Utf16String
557 * Description: The function decodes an UTF8 byte oriented string into a
560 * Input: 0-terminated string of byte oriented UTF8 coded characters.
562 * Output: 0-terminated string of UTF-16 characters.
564 *********************************************************************************/
565 CsrUtf16String *CsrUtf82Utf16String(const CsrUtf8String *utf8String)
567 CsrSize i, length = 0;
568 CsrSize sourceLength;
569 CsrUtf16String *dest = NULL;
570 CsrUtf16String *destStart = NULL;
571 CsrInt8 extraBytes2Read;
577 sourceLength = CsrStrLen((CsrCharString *) utf8String);
579 for (i = 0; i < sourceLength; i++)
581 extraBytes2Read = trailingBytesForUtf8[utf8String[i]];
583 if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
587 else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT
588 CHARACTER' (U+FFFD), and the null terminated character */
593 else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT
594 CHARACTER' (U+FFFD) */
600 if (utf8String[i] > 0xEF) /* Needs a high and a low surrogate */
608 i += extraBytes2Read;
612 /* Create space for the null terminated character */
613 dest = (CsrUtf16String *) CsrPmemAlloc((1 + length) * sizeof(CsrUtf16String));
616 for (i = 0; i < sourceLength; i++)
618 extraBytes2Read = trailingBytesForUtf8[utf8String[i]];
620 if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
622 *dest++ = UNI_REPLACEMENT_CHAR;
624 else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT
625 CHARACTER' (U+FFFD), and the null terminated character */
627 *dest++ = UNI_REPLACEMENT_CHAR;
631 else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT
632 CHARACTER' (U+FFFD) */
634 *dest++ = UNI_REPLACEMENT_CHAR;
636 else /* It is legal, convert the character to an CsrUint32 */
640 switch (extraBytes2Read) /* Everything falls through */
674 ch -= offsetsFromUtf8[extraBytes2Read];
676 if (ch <= 0xFFFF) /* Character can be encoded in one CsrUint16 */
678 *dest++ = (CsrUint16) ch;
680 else /* The character needs two CsrUint16 */
683 *dest++ = (CsrUint16) ((ch >> UNI_HALF_SHIFT) | UNI_SUR_HIGH_START);
684 *dest++ = (CsrUint16) ((ch & 0x03FF) | UNI_SUR_LOW_START);
689 destStart[length] = 0x00;
694 /********************************************************************************
696 * Name: CsrUtf16StrCpy
698 * Description: The function copies the contents from one UTF-16 string
699 * to another UTF-16 string.
701 * Input: 0-terminated UTF-16 string.
703 * Output: 0-terminated UTF-16 string.
705 *********************************************************************************/
706 CsrUtf16String *CsrUtf16StrCpy(CsrUtf16String *target, const CsrUtf16String *source)
708 if (source) /* if source is not NULL*/
710 CsrMemCpy(target, source, (CsrUtf16StrLen(source) + 1) * sizeof(CsrUtf16String));
719 /********************************************************************************
721 * Name: CsrUtf16StringDuplicate
723 * Description: The function allocates a new pointer and copies the input to
726 * Input: 0-terminated UTF-16 string.
728 * Output: Allocated variable0-terminated UTF-16 string.
730 *********************************************************************************/
731 CsrUtf16String *CsrUtf16StringDuplicate(const CsrUtf16String *source)
733 CsrUtf16String *target = NULL;
736 if (source) /* if source is not NULL*/
738 length = (CsrUtf16StrLen(source) + 1) * sizeof(CsrUtf16String);
739 target = (CsrUtf16String *) CsrPmemAlloc(length);
740 CsrMemCpy(target, source, length);
745 /********************************************************************************
747 * Name: CsrUtf16StrICmp
749 * Description: The function compares two UTF-16 strings.
751 * Input: Two 0-terminated UTF-16 string.
753 * Output: 0: if the strings are identical.
755 *********************************************************************************/
756 CsrUint16 CsrUtf16StrICmp(const CsrUtf16String *string1, const CsrUtf16String *string2)
758 while (*string1 || *string2)
760 if (CAPITAL(*string1) != CAPITAL(*string2))
762 return *string1 - *string2;
771 /********************************************************************************
773 * Name: CsrUtf16StrNICmp
775 * Description: The function compares upto count number of elements in the
778 * Input: Two 0-terminated UTF-16 string and a maximum
779 * number of elements to check.
781 * Output: 0: if the strings are identical.
783 *********************************************************************************/
784 CsrUint16 CsrUtf16StrNICmp(const CsrUtf16String *string1, const CsrUtf16String *string2, CsrUint32 count)
786 while ((*string1 || *string2) && count--)
788 if (CAPITAL(*string1) != CAPITAL(*string2))
790 return *string1 - *string2;
799 /********************************************************************************
801 * Name: CsrUtf16String2XML
803 * Description: The function converts an unicoded string (UTF-16) into an unicoded XML
804 * string where some special characters are encoded according to
807 * Input: A unicoded string (UTF-16) which is freed.
809 * Output: A new unicoded string (UTF-16) containing the converted output.
811 *********************************************************************************/
812 CsrUtf16String *CsrUtf16String2XML(CsrUtf16String *str)
814 CsrUtf16String *scanString;
815 CsrUtf16String *outputString = NULL;
816 CsrUtf16String *resultString = str;
817 CsrUint32 stringLength = 0;
818 CsrBool encodeChars = FALSE;
825 if (*scanString == L'&')
830 else if ((*scanString == L'<') || (*scanString == L'>'))
847 resultString = outputString = CsrPmemAlloc(stringLength * sizeof(CsrUtf16String));
853 if (*scanString == L'&')
855 *outputString++ = '&';
856 *outputString++ = 'a';
857 *outputString++ = 'm';
858 *outputString++ = 'p';
859 *outputString++ = ';';
861 else if (*scanString == L'<')
863 *outputString++ = '&';
864 *outputString++ = 'l';
865 *outputString++ = 't';
866 *outputString++ = ';';
868 else if (*scanString == L'>')
870 *outputString++ = '&';
871 *outputString++ = 'g';
872 *outputString++ = 't';
873 *outputString++ = ';';
877 *outputString++ = *scanString;
892 /********************************************************************************
894 * Name: CsrXML2Utf16String
896 * Description: The function converts an unicoded XML string into an unicoded
897 * string (UTF-16) where some special XML characters are decoded according to
900 * Input: A unicoded XML string which is freed.
902 * Output: A new unicoded pointer containing the decoded output.
904 *********************************************************************************/
905 CsrUtf16String *CsrXML2Utf16String(CsrUtf16String *str)
907 CsrUtf16String *scanString;
908 CsrUtf16String *outputString = NULL;
909 CsrUtf16String *resultString = str;
910 CsrUint32 stringLength = 0;
911 CsrBool encodeChars = FALSE;
918 if (*scanString == (CsrUtf16String) L'&')
922 if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"AMP;", 4))
927 else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"LT;", 3))
932 else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"GT;", 3))
937 if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"APOS;", 5))
942 if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"QUOT;", 5))
961 resultString = outputString = CsrPmemAlloc(stringLength * sizeof(CsrUtf16String));
967 if (*scanString == L'&')
971 if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"AMP;", 4))
973 *outputString++ = L'&';
976 else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"LT;", 3))
978 *outputString++ = L'<';
981 else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"GT;", 3))
983 *outputString++ = L'>';
986 else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"APOS;", 5))
988 *outputString++ = L'\'';
991 else if (!CsrUtf16StrNICmp(scanString, (CsrUtf16String *) L"QUOT;", 5))
993 *outputString++ = L'\"';
998 *outputString++ = L'&';
1004 *outputString++ = *scanString;
1010 *outputString++ = 0;
1016 return resultString;
1019 CsrInt32 CsrUtf8StrCmp(const CsrUtf8String *string1, const CsrUtf8String *string2)
1021 return CsrStrCmp((const CsrCharString *) string1, (const CsrCharString *) string2);
1024 CsrInt32 CsrUtf8StrNCmp(const CsrUtf8String *string1, const CsrUtf8String *string2, CsrSize count)
1026 return CsrStrNCmp((const CsrCharString *) string1, (const CsrCharString *) string2, count);
1029 CsrUint32 CsrUtf8StringLengthInBytes(const CsrUtf8String *string)
1034 length = CsrStrLen((const CsrCharString *) string);
1036 return (CsrUint32) length;
1039 CsrUtf8String *CsrUtf8StrCpy(CsrUtf8String *target, const CsrUtf8String *source)
1041 return (CsrUtf8String *) CsrStrCpy((CsrCharString *) target, (const CsrCharString *) source);
1044 CsrUtf8String *CsrUtf8StrTruncate(CsrUtf8String *target, CsrSize count)
1046 CsrSize lastByte = count - 1;
1048 target[count] = '\0';
1050 if (count && (target[lastByte] & 0x80))
1052 /* the last byte contains non-ascii char */
1053 if (target[lastByte] & 0x40)
1055 /* multi-byte char starting just before truncation */
1056 target[lastByte] = '\0';
1058 else if ((target[lastByte - 1] & 0xE0) == 0xE0)
1060 /* 3-byte char starting 2 bytes before truncation */
1061 target[lastByte - 1] = '\0';
1063 else if ((target[lastByte - 2] & 0xF0) == 0xF0)
1065 /* 4-byte char starting 3 bytes before truncation */
1066 target[lastByte - 2] = '\0';
1073 CsrUtf8String *CsrUtf8StrNCpy(CsrUtf8String *target, const CsrUtf8String *source, CsrSize count)
1075 return (CsrUtf8String *) CsrStrNCpy((CsrCharString *) target, (const CsrCharString *) source, count);
1078 CsrUtf8String *CsrUtf8StrNCpyZero(CsrUtf8String *target, const CsrUtf8String *source, CsrSize count)
1080 CsrStrNCpy((CsrCharString *) target, (const CsrCharString *) source, count);
1081 if (target[count - 1] != '\0')
1083 CsrUtf8StrTruncate(target, count - 1);
1088 CsrUtf8String *CsrUtf8StrDup(const CsrUtf8String *source)
1090 return (CsrUtf8String *) CsrStrDup((const CsrCharString *) source);
1093 CsrUtf8String *CsrUtf8StringConcatenateTexts(const CsrUtf8String *inputText1, const CsrUtf8String *inputText2, const CsrUtf8String *inputText3, const CsrUtf8String *inputText4)
1095 CsrUtf8String *outputText;
1096 CsrUint32 textLen, textLen1, textLen2, textLen3, textLen4;
1098 textLen1 = CsrUtf8StringLengthInBytes(inputText1);
1099 textLen2 = CsrUtf8StringLengthInBytes(inputText2);
1100 textLen3 = CsrUtf8StringLengthInBytes(inputText3);
1101 textLen4 = CsrUtf8StringLengthInBytes(inputText4);
1103 textLen = textLen1 + textLen2 + textLen3 + textLen4;
1105 if (textLen == 0) /*stop here is all lengths are 0*/
1110 outputText = (CsrUtf8String *) CsrPmemAlloc((textLen + 1) * sizeof(CsrUtf8String)); /* add space for 0-termination*/
1113 if (inputText1 != NULL)
1115 CsrUtf8StrNCpy(outputText, inputText1, textLen1);
1118 if (inputText2 != NULL)
1120 CsrUtf8StrNCpy(&(outputText[textLen1]), inputText2, textLen2);
1123 if (inputText3 != NULL)
1125 CsrUtf8StrNCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3);
1128 if (inputText4 != NULL)
1130 CsrUtf8StrNCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4);
1133 outputText[textLen] = '\0';