ICU 78.3
78.3
Toggle main menu visibility
Loading...
Searching...
No Matches
common
unicode
utf_old.h
Go to the documentation of this file.
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
* Copyright (C) 2002-2012, International Business Machines
7
* Corporation and others. All Rights Reserved.
8
*
9
*******************************************************************************
10
* file name: utf_old.h
11
* encoding: UTF-8
12
* tab size: 8 (not used)
13
* indentation:4
14
*
15
* created on: 2002sep21
16
* created by: Markus W. Scherer
17
*/
18
141
142
#ifndef __UTF_OLD_H__
143
#define __UTF_OLD_H__
144
145
#include "
unicode/utf.h
"
146
#include "
unicode/utf8.h
"
147
#include "
unicode/utf16.h
"
148
160
#ifndef U_HIDE_OBSOLETE_UTF_OLD_H
161
# define U_HIDE_OBSOLETE_UTF_OLD_H 0
162
#endif
163
164
#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H
165
166
/* Formerly utf.h, part 1 --------------------------------------------------- */
167
168
#ifdef U_USE_UTF_DEPRECATES
176
typedef
int32_t UTextOffset;
177
#endif
178
180
#define UTF_SIZE 16
181
188
#define UTF_SAFE
190
#undef UTF_UNSAFE
192
#undef UTF_STRICT
193
208
#define UTF8_ERROR_VALUE_1 0x15
209
215
#define UTF8_ERROR_VALUE_2 0x9f
216
223
#define UTF_ERROR_VALUE 0xffff
224
231
#define UTF_IS_ERROR(c) \
232
(((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
233
239
#define UTF_IS_VALID(c) \
240
(UTF_IS_UNICODE_CHAR(c) && \
241
(c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
242
247
#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
248
254
#define UTF_IS_UNICODE_NONCHAR(c) \
255
((c)>=0xfdd0 && \
256
((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
257
(uint32_t)(c)<=0x10ffff)
258
274
#define UTF_IS_UNICODE_CHAR(c) \
275
((uint32_t)(c)<0xd800 || \
276
((uint32_t)(c)>0xdfff && \
277
(uint32_t)(c)<=0x10ffff && \
278
!UTF_IS_UNICODE_NONCHAR(c)))
279
280
/* Formerly utf8.h ---------------------------------------------------------- */
281
293
#ifdef U_UTF8_IMPL
294
// No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes.
295
#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
296
U_CAPI
const
uint8_t
utf8_countTrailBytes
[];
297
#else
298
U_CFUNC
U_IMPORT
const
uint8_t
utf8_countTrailBytes
[];
299
#endif
300
305
#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
306
311
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
312
314
#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
316
#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
318
#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
319
321
#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
322
336
#if 1
337
# define UTF8_CHAR_LENGTH(c) \
338
((uint32_t)(c)<=0x7f ? 1 : \
339
((uint32_t)(c)<=0x7ff ? 2 : \
340
((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
341
) \
342
)
343
#else
344
# define UTF8_CHAR_LENGTH(c) \
345
((uint32_t)(c)<=0x7f ? 1 : \
346
((uint32_t)(c)<=0x7ff ? 2 : \
347
((uint32_t)(c)<=0xffff ? 3 : \
348
((uint32_t)(c)<=0x10ffff ? 4 : \
349
((uint32_t)(c)<=0x3ffffff ? 5 : \
350
((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
351
) \
352
) \
353
) \
354
) \
355
)
356
#endif
357
359
#define UTF8_MAX_CHAR_LENGTH 4
360
362
#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
363
365
#define UTF8_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
366
int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
367
UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
368
UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
369
} UPRV_BLOCK_MACRO_END
370
372
#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
373
int32_t _utf8_get_char_safe_index=(int32_t)(i); \
374
UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
375
UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
376
} UPRV_BLOCK_MACRO_END
377
379
#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
380
(c)=(s)[(i)++]; \
381
if((uint8_t)((c)-0xc0)<0x35) { \
382
uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
383
UTF8_MASK_LEAD_BYTE(c, __count); \
384
switch(__count) { \
385
/* each following branch falls through to the next one */
\
386
case 3: \
387
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
388
U_FALLTHROUGH; \
389
case 2: \
390
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
391
U_FALLTHROUGH; \
392
case 1: \
393
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
394
/* no other branches to optimize switch() */
\
395
break; \
396
} \
397
} \
398
} UPRV_BLOCK_MACRO_END
399
401
#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
402
if((uint32_t)(c)<=0x7f) { \
403
(s)[(i)++]=(uint8_t)(c); \
404
} else { \
405
if((uint32_t)(c)<=0x7ff) { \
406
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
407
} else { \
408
if((uint32_t)(c)<=0xffff) { \
409
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
410
} else { \
411
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
412
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
413
} \
414
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
415
} \
416
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
417
} \
418
} UPRV_BLOCK_MACRO_END
419
421
#define UTF8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
422
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
423
} UPRV_BLOCK_MACRO_END
424
426
#define UTF8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
427
int32_t __N=(n); \
428
while(__N>0) { \
429
UTF8_FWD_1_UNSAFE(s, i); \
430
--__N; \
431
} \
432
} UPRV_BLOCK_MACRO_END
433
435
#define UTF8_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
436
while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
437
} UPRV_BLOCK_MACRO_END
438
440
#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
441
(c)=(s)[(i)++]; \
442
if((c)>=0x80) { \
443
if(UTF8_IS_LEAD(c)) { \
444
(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
445
} else { \
446
(c)=UTF8_ERROR_VALUE_1; \
447
} \
448
} \
449
} UPRV_BLOCK_MACRO_END
450
452
#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
453
if((uint32_t)(c)<=0x7f) { \
454
(s)[(i)++]=(uint8_t)(c); \
455
} else { \
456
(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
457
} \
458
} UPRV_BLOCK_MACRO_END
459
461
#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
462
464
#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
465
467
#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
468
470
#define UTF8_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
471
(c)=(s)[--(i)]; \
472
if(UTF8_IS_TRAIL(c)) { \
473
uint8_t __b, __count=1, __shift=6; \
474
\
475
/* c is a trail byte */
\
476
(c)&=0x3f; \
477
for(;;) { \
478
__b=(s)[--(i)]; \
479
if(__b>=0xc0) { \
480
UTF8_MASK_LEAD_BYTE(__b, __count); \
481
(c)|=(UChar32)__b<<__shift; \
482
break; \
483
} else { \
484
(c)|=(UChar32)(__b&0x3f)<<__shift; \
485
++__count; \
486
__shift+=6; \
487
} \
488
} \
489
} \
490
} UPRV_BLOCK_MACRO_END
491
493
#define UTF8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
494
while(UTF8_IS_TRAIL((s)[--(i)])) {} \
495
} UPRV_BLOCK_MACRO_END
496
498
#define UTF8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
499
int32_t __N=(n); \
500
while(__N>0) { \
501
UTF8_BACK_1_UNSAFE(s, i); \
502
--__N; \
503
} \
504
} UPRV_BLOCK_MACRO_END
505
507
#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
508
UTF8_BACK_1_UNSAFE(s, i); \
509
UTF8_FWD_1_UNSAFE(s, i); \
510
} UPRV_BLOCK_MACRO_END
511
513
#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
514
(c)=(s)[--(i)]; \
515
if((c)>=0x80) { \
516
if((c)<=0xbf) { \
517
(c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
518
} else { \
519
(c)=UTF8_ERROR_VALUE_1; \
520
} \
521
} \
522
} UPRV_BLOCK_MACRO_END
523
525
#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
526
528
#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
529
531
#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
532
533
/* Formerly utf16.h --------------------------------------------------------- */
534
536
#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
537
539
#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
540
542
#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
543
545
#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
546
548
#define UTF16_GET_PAIR_VALUE(first, second) \
549
(((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
550
552
#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
553
555
#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
556
558
#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
559
561
#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
562
564
#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
565
567
#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
568
570
#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
571
573
#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
574
576
#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
577
579
#define UTF16_MAX_CHAR_LENGTH 2
580
582
#define UTF16_ARRAY_SIZE(size) (size)
583
595
#define UTF16_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
596
(c)=(s)[i]; \
597
if(UTF_IS_SURROGATE(c)) { \
598
if(UTF_IS_SURROGATE_FIRST(c)) { \
599
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
600
} else { \
601
(c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
602
} \
603
} \
604
} UPRV_BLOCK_MACRO_END
605
607
#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
608
(c)=(s)[i]; \
609
if(UTF_IS_SURROGATE(c)) { \
610
uint16_t __c2; \
611
if(UTF_IS_SURROGATE_FIRST(c)) { \
612
if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
613
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
614
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
615
} else if(strict) {\
616
/* unmatched first surrogate */
\
617
(c)=UTF_ERROR_VALUE; \
618
} \
619
} else { \
620
if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
621
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
622
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
623
} else if(strict) {\
624
/* unmatched second surrogate */
\
625
(c)=UTF_ERROR_VALUE; \
626
} \
627
} \
628
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
629
(c)=UTF_ERROR_VALUE; \
630
} \
631
} UPRV_BLOCK_MACRO_END
632
634
#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
635
(c)=(s)[(i)++]; \
636
if(UTF_IS_FIRST_SURROGATE(c)) { \
637
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
638
} \
639
} UPRV_BLOCK_MACRO_END
640
642
#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
643
if((uint32_t)(c)<=0xffff) { \
644
(s)[(i)++]=(uint16_t)(c); \
645
} else { \
646
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
647
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
648
} \
649
} UPRV_BLOCK_MACRO_END
650
652
#define UTF16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
653
if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
654
++(i); \
655
} \
656
} UPRV_BLOCK_MACRO_END
657
659
#define UTF16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
660
int32_t __N=(n); \
661
while(__N>0) { \
662
UTF16_FWD_1_UNSAFE(s, i); \
663
--__N; \
664
} \
665
} UPRV_BLOCK_MACRO_END
666
668
#define UTF16_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
669
if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
670
--(i); \
671
} \
672
} UPRV_BLOCK_MACRO_END
673
675
#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
676
(c)=(s)[(i)++]; \
677
if(UTF_IS_FIRST_SURROGATE(c)) { \
678
uint16_t __c2; \
679
if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
680
++(i); \
681
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
682
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
683
} else if(strict) {\
684
/* unmatched first surrogate */
\
685
(c)=UTF_ERROR_VALUE; \
686
} \
687
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
688
/* unmatched second surrogate or other non-character */
\
689
(c)=UTF_ERROR_VALUE; \
690
} \
691
} UPRV_BLOCK_MACRO_END
692
694
#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
695
if((uint32_t)(c)<=0xffff) { \
696
(s)[(i)++]=(uint16_t)(c); \
697
} else if((uint32_t)(c)<=0x10ffff) { \
698
if((i)+1<(length)) { \
699
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
700
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
701
} else
/* not enough space */
{ \
702
(s)[(i)++]=UTF_ERROR_VALUE; \
703
} \
704
} else
/* c>0x10ffff, write error value */
{ \
705
(s)[(i)++]=UTF_ERROR_VALUE; \
706
} \
707
} UPRV_BLOCK_MACRO_END
708
710
#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
711
713
#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
714
716
#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
717
719
#define UTF16_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
720
(c)=(s)[--(i)]; \
721
if(UTF_IS_SECOND_SURROGATE(c)) { \
722
(c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
723
} \
724
} UPRV_BLOCK_MACRO_END
725
727
#define UTF16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
728
if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
729
--(i); \
730
} \
731
} UPRV_BLOCK_MACRO_END
732
734
#define UTF16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
735
int32_t __N=(n); \
736
while(__N>0) { \
737
UTF16_BACK_1_UNSAFE(s, i); \
738
--__N; \
739
} \
740
} UPRV_BLOCK_MACRO_END
741
743
#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
744
if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
745
++(i); \
746
} \
747
} UPRV_BLOCK_MACRO_END
748
750
#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
751
(c)=(s)[--(i)]; \
752
if(UTF_IS_SECOND_SURROGATE(c)) { \
753
uint16_t __c2; \
754
if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
755
--(i); \
756
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
757
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
758
} else if(strict) {\
759
/* unmatched second surrogate */
\
760
(c)=UTF_ERROR_VALUE; \
761
} \
762
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
763
/* unmatched first surrogate or other non-character */
\
764
(c)=UTF_ERROR_VALUE; \
765
} \
766
} UPRV_BLOCK_MACRO_END
767
769
#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
770
772
#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
773
775
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
776
777
/* Formerly utf32.h --------------------------------------------------------- */
778
779
/*
780
* Old documentation:
781
*
782
* This file defines macros to deal with UTF-32 code units and code points.
783
* Signatures and semantics are the same as for the similarly named macros
784
* in utf16.h.
785
* utf32.h is included by utf.h after unicode/umachine.h</p>
786
* and some common definitions.
787
* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
788
* Compound statements (curly braces {}) must be used for if-else-while...
789
* bodies and all macro statements should be terminated with semicolon.</p>
790
*/
791
792
/* internal definitions ----------------------------------------------------- */
793
795
#define UTF32_IS_SAFE(c, strict) \
796
(!(strict) ? \
797
(uint32_t)(c)<=0x10ffff : \
798
UTF_IS_UNICODE_CHAR(c))
799
800
/*
801
* For the semantics of all of these macros, see utf16.h.
802
* The UTF-32 versions are trivial because any code point is
803
* encoded using exactly one code unit.
804
*/
805
806
/* single-code point definitions -------------------------------------------- */
807
808
/* classes of code unit values */
809
811
#define UTF32_IS_SINGLE(uchar) 1
813
#define UTF32_IS_LEAD(uchar) 0
815
#define UTF32_IS_TRAIL(uchar) 0
816
817
/* number of code units per code point */
818
820
#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
822
#define UTF32_CHAR_LENGTH(c) 1
824
#define UTF32_MAX_CHAR_LENGTH 1
825
826
/* average number of code units compared to UTF-16 */
827
829
#define UTF32_ARRAY_SIZE(size) (size)
830
832
#define UTF32_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
833
(c)=(s)[i]; \
834
} UPRV_BLOCK_MACRO_END
835
837
#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
838
(c)=(s)[i]; \
839
if(!UTF32_IS_SAFE(c, strict)) { \
840
(c)=UTF_ERROR_VALUE; \
841
} \
842
} UPRV_BLOCK_MACRO_END
843
844
/* definitions with forward iteration --------------------------------------- */
845
847
#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
848
(c)=(s)[(i)++]; \
849
} UPRV_BLOCK_MACRO_END
850
852
#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
853
(s)[(i)++]=(c); \
854
} UPRV_BLOCK_MACRO_END
855
857
#define UTF32_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
858
++(i); \
859
} UPRV_BLOCK_MACRO_END
860
862
#define UTF32_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
863
(i)+=(n); \
864
} UPRV_BLOCK_MACRO_END
865
867
#define UTF32_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
868
} UPRV_BLOCK_MACRO_END
869
871
#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
872
(c)=(s)[(i)++]; \
873
if(!UTF32_IS_SAFE(c, strict)) { \
874
(c)=UTF_ERROR_VALUE; \
875
} \
876
} UPRV_BLOCK_MACRO_END
877
879
#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
880
if((uint32_t)(c)<=0x10ffff) { \
881
(s)[(i)++]=(c); \
882
} else
/* c>0x10ffff, write 0xfffd */
{ \
883
(s)[(i)++]=0xfffd; \
884
} \
885
} UPRV_BLOCK_MACRO_END
886
888
#define UTF32_FWD_1_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
889
++(i); \
890
} UPRV_BLOCK_MACRO_END
891
893
#define UTF32_FWD_N_SAFE(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
894
if(((i)+=(n))>(length)) { \
895
(i)=(length); \
896
} \
897
} UPRV_BLOCK_MACRO_END
898
900
#define UTF32_SET_CHAR_START_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
901
} UPRV_BLOCK_MACRO_END
902
903
/* definitions with backward iteration -------------------------------------- */
904
906
#define UTF32_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
907
(c)=(s)[--(i)]; \
908
} UPRV_BLOCK_MACRO_END
909
911
#define UTF32_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
912
--(i); \
913
} UPRV_BLOCK_MACRO_END
914
916
#define UTF32_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
917
(i)-=(n); \
918
} UPRV_BLOCK_MACRO_END
919
921
#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
922
} UPRV_BLOCK_MACRO_END
923
925
#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
926
(c)=(s)[--(i)]; \
927
if(!UTF32_IS_SAFE(c, strict)) { \
928
(c)=UTF_ERROR_VALUE; \
929
} \
930
} UPRV_BLOCK_MACRO_END
931
933
#define UTF32_BACK_1_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
934
--(i); \
935
} UPRV_BLOCK_MACRO_END
936
938
#define UTF32_BACK_N_SAFE(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
939
(i)-=(n); \
940
if((i)<(start)) { \
941
(i)=(start); \
942
} \
943
} UPRV_BLOCK_MACRO_END
944
946
#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
947
} UPRV_BLOCK_MACRO_END
948
949
/* Formerly utf.h, part 2 --------------------------------------------------- */
950
956
#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
957
959
#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
960
962
#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
963
964
966
#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
967
969
#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
970
971
973
#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
974
976
#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
977
978
980
#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
981
983
#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
984
985
987
#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
988
990
#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
991
992
994
#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
995
997
#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
998
999
1001
#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
1002
1004
#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
1005
1006
1008
#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
1009
1011
#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
1012
1013
1015
#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
1016
1018
#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
1019
1020
1022
#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
1023
1025
#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
1026
1027
/* Define default macros (UTF-16 "safe") ------------------------------------ */
1028
1034
#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
1035
1041
#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
1042
1048
#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
1049
1055
#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
1056
1062
#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
1063
1069
#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
1070
1080
#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
1081
1093
#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
1094
1106
#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
1107
1117
#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
1118
1128
#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
1129
1144
#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
1145
1157
#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
1158
1170
#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
1171
1183
#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
1184
1199
#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
1200
1201
#endif
// !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H
1202
1203
#endif
U_IMPORT
#define U_IMPORT
Definition
platform.h:798
U_CAPI
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition
umachine.h:110
U_CFUNC
#define U_CFUNC
This is used in a declaration of a library private ICU C function.
Definition
umachine.h:84
utf16.h
C API: 16-bit Unicode handling macros.
utf8.h
C API: 8-bit Unicode handling macros.
utf.h
C API: Code point macros.
utf8_countTrailBytes
U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]
Internal array with numbers of trail bytes for any given byte used in lead byte position.
Definition
utf_old.h:298
Generated by
1.17.0