Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/utf16.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 // © 2016 and later: Unicode, Inc. and others. | |
2 // License & terms of use: http://www.unicode.org/copyright.html | |
3 /* | |
4 ******************************************************************************* | |
5 * | |
6 * Copyright (C) 1999-2012, International Business Machines | |
7 * Corporation and others. All Rights Reserved. | |
8 * | |
9 ******************************************************************************* | |
10 * file name: utf16.h | |
11 * encoding: UTF-8 | |
12 * tab size: 8 (not used) | |
13 * indentation:4 | |
14 * | |
15 * created on: 1999sep09 | |
16 * created by: Markus W. Scherer | |
17 */ | |
18 | |
19 /** | |
20 * \file | |
21 * \brief C API: 16-bit Unicode handling macros | |
22 * | |
23 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. | |
24 * | |
25 * For more information see utf.h and the ICU User Guide Strings chapter | |
26 * (http://userguide.icu-project.org/strings). | |
27 * | |
28 * <em>Usage:</em> | |
29 * ICU coding guidelines for if() statements should be followed when using these macros. | |
30 * Compound statements (curly braces {}) must be used for if-else-while... | |
31 * bodies and all macro statements should be terminated with semicolon. | |
32 */ | |
33 | |
34 #ifndef __UTF16_H__ | |
35 #define __UTF16_H__ | |
36 | |
37 #include "unicode/umachine.h" | |
38 #ifndef __UTF_H__ | |
39 # include "unicode/utf.h" | |
40 #endif | |
41 | |
42 /* single-code point definitions -------------------------------------------- */ | |
43 | |
44 /** | |
45 * Does this code unit alone encode a code point (BMP, not a surrogate)? | |
46 * @param c 16-bit code unit | |
47 * @return TRUE or FALSE | |
48 * @stable ICU 2.4 | |
49 */ | |
50 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) | |
51 | |
52 /** | |
53 * Is this code unit a lead surrogate (U+d800..U+dbff)? | |
54 * @param c 16-bit code unit | |
55 * @return TRUE or FALSE | |
56 * @stable ICU 2.4 | |
57 */ | |
58 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) | |
59 | |
60 /** | |
61 * Is this code unit a trail surrogate (U+dc00..U+dfff)? | |
62 * @param c 16-bit code unit | |
63 * @return TRUE or FALSE | |
64 * @stable ICU 2.4 | |
65 */ | |
66 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) | |
67 | |
68 /** | |
69 * Is this code unit a surrogate (U+d800..U+dfff)? | |
70 * @param c 16-bit code unit | |
71 * @return TRUE or FALSE | |
72 * @stable ICU 2.4 | |
73 */ | |
74 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) | |
75 | |
76 /** | |
77 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), | |
78 * is it a lead surrogate? | |
79 * @param c 16-bit code unit | |
80 * @return TRUE or FALSE | |
81 * @stable ICU 2.4 | |
82 */ | |
83 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) | |
84 | |
85 /** | |
86 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), | |
87 * is it a trail surrogate? | |
88 * @param c 16-bit code unit | |
89 * @return TRUE or FALSE | |
90 * @stable ICU 4.2 | |
91 */ | |
92 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) | |
93 | |
94 /** | |
95 * Helper constant for U16_GET_SUPPLEMENTARY. | |
96 * @internal | |
97 */ | |
98 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) | |
99 | |
100 /** | |
101 * Get a supplementary code point value (U+10000..U+10ffff) | |
102 * from its lead and trail surrogates. | |
103 * The result is undefined if the input values are not | |
104 * lead and trail surrogates. | |
105 * | |
106 * @param lead lead surrogate (U+d800..U+dbff) | |
107 * @param trail trail surrogate (U+dc00..U+dfff) | |
108 * @return supplementary code point (U+10000..U+10ffff) | |
109 * @stable ICU 2.4 | |
110 */ | |
111 #define U16_GET_SUPPLEMENTARY(lead, trail) \ | |
112 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) | |
113 | |
114 | |
115 /** | |
116 * Get the lead surrogate (0xd800..0xdbff) for a | |
117 * supplementary code point (0x10000..0x10ffff). | |
118 * @param supplementary 32-bit code point (U+10000..U+10ffff) | |
119 * @return lead surrogate (U+d800..U+dbff) for supplementary | |
120 * @stable ICU 2.4 | |
121 */ | |
122 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) | |
123 | |
124 /** | |
125 * Get the trail surrogate (0xdc00..0xdfff) for a | |
126 * supplementary code point (0x10000..0x10ffff). | |
127 * @param supplementary 32-bit code point (U+10000..U+10ffff) | |
128 * @return trail surrogate (U+dc00..U+dfff) for supplementary | |
129 * @stable ICU 2.4 | |
130 */ | |
131 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) | |
132 | |
133 /** | |
134 * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) | |
135 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). | |
136 * @param c 32-bit code point | |
137 * @return 1 or 2 | |
138 * @stable ICU 2.4 | |
139 */ | |
140 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) | |
141 | |
142 /** | |
143 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). | |
144 * @return 2 | |
145 * @stable ICU 2.4 | |
146 */ | |
147 #define U16_MAX_LENGTH 2 | |
148 | |
149 /** | |
150 * Get a code point from a string at a random-access offset, | |
151 * without changing the offset. | |
152 * "Unsafe" macro, assumes well-formed UTF-16. | |
153 * | |
154 * The offset may point to either the lead or trail surrogate unit | |
155 * for a supplementary code point, in which case the macro will read | |
156 * the adjacent matching surrogate as well. | |
157 * The result is undefined if the offset points to a single, unpaired surrogate. | |
158 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. | |
159 * | |
160 * @param s const UChar * string | |
161 * @param i string offset | |
162 * @param c output UChar32 variable | |
163 * @see U16_GET | |
164 * @stable ICU 2.4 | |
165 */ | |
166 #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
167 (c)=(s)[i]; \ | |
168 if(U16_IS_SURROGATE(c)) { \ | |
169 if(U16_IS_SURROGATE_LEAD(c)) { \ | |
170 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ | |
171 } else { \ | |
172 (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ | |
173 } \ | |
174 } \ | |
175 } UPRV_BLOCK_MACRO_END | |
176 | |
177 /** | |
178 * Get a code point from a string at a random-access offset, | |
179 * without changing the offset. | |
180 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
181 * | |
182 * The offset may point to either the lead or trail surrogate unit | |
183 * for a supplementary code point, in which case the macro will read | |
184 * the adjacent matching surrogate as well. | |
185 * | |
186 * The length can be negative for a NUL-terminated string. | |
187 * | |
188 * If the offset points to a single, unpaired surrogate, then | |
189 * c is set to that unpaired surrogate. | |
190 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. | |
191 * | |
192 * @param s const UChar * string | |
193 * @param start starting string offset (usually 0) | |
194 * @param i string offset, must be start<=i<length | |
195 * @param length string length | |
196 * @param c output UChar32 variable | |
197 * @see U16_GET_UNSAFE | |
198 * @stable ICU 2.4 | |
199 */ | |
200 #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
201 (c)=(s)[i]; \ | |
202 if(U16_IS_SURROGATE(c)) { \ | |
203 uint16_t __c2; \ | |
204 if(U16_IS_SURROGATE_LEAD(c)) { \ | |
205 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ | |
206 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | |
207 } \ | |
208 } else { \ | |
209 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | |
210 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | |
211 } \ | |
212 } \ | |
213 } \ | |
214 } UPRV_BLOCK_MACRO_END | |
215 | |
216 /** | |
217 * Get a code point from a string at a random-access offset, | |
218 * without changing the offset. | |
219 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
220 * | |
221 * The offset may point to either the lead or trail surrogate unit | |
222 * for a supplementary code point, in which case the macro will read | |
223 * the adjacent matching surrogate as well. | |
224 * | |
225 * The length can be negative for a NUL-terminated string. | |
226 * | |
227 * If the offset points to a single, unpaired surrogate, then | |
228 * c is set to U+FFFD. | |
229 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. | |
230 * | |
231 * @param s const UChar * string | |
232 * @param start starting string offset (usually 0) | |
233 * @param i string offset, must be start<=i<length | |
234 * @param length string length | |
235 * @param c output UChar32 variable | |
236 * @see U16_GET_UNSAFE | |
237 * @stable ICU 60 | |
238 */ | |
239 #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
240 (c)=(s)[i]; \ | |
241 if(U16_IS_SURROGATE(c)) { \ | |
242 uint16_t __c2; \ | |
243 if(U16_IS_SURROGATE_LEAD(c)) { \ | |
244 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ | |
245 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | |
246 } else { \ | |
247 (c)=0xfffd; \ | |
248 } \ | |
249 } else { \ | |
250 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | |
251 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | |
252 } else { \ | |
253 (c)=0xfffd; \ | |
254 } \ | |
255 } \ | |
256 } \ | |
257 } UPRV_BLOCK_MACRO_END | |
258 | |
259 /* definitions with forward iteration --------------------------------------- */ | |
260 | |
261 /** | |
262 * Get a code point from a string at a code point boundary offset, | |
263 * and advance the offset to the next code point boundary. | |
264 * (Post-incrementing forward iteration.) | |
265 * "Unsafe" macro, assumes well-formed UTF-16. | |
266 * | |
267 * The offset may point to the lead surrogate unit | |
268 * for a supplementary code point, in which case the macro will read | |
269 * the following trail surrogate as well. | |
270 * If the offset points to a trail surrogate, then that itself | |
271 * will be returned as the code point. | |
272 * The result is undefined if the offset points to a single, unpaired lead surrogate. | |
273 * | |
274 * @param s const UChar * string | |
275 * @param i string offset | |
276 * @param c output UChar32 variable | |
277 * @see U16_NEXT | |
278 * @stable ICU 2.4 | |
279 */ | |
280 #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
281 (c)=(s)[(i)++]; \ | |
282 if(U16_IS_LEAD(c)) { \ | |
283 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ | |
284 } \ | |
285 } UPRV_BLOCK_MACRO_END | |
286 | |
287 /** | |
288 * Get a code point from a string at a code point boundary offset, | |
289 * and advance the offset to the next code point boundary. | |
290 * (Post-incrementing forward iteration.) | |
291 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
292 * | |
293 * The length can be negative for a NUL-terminated string. | |
294 * | |
295 * The offset may point to the lead surrogate unit | |
296 * for a supplementary code point, in which case the macro will read | |
297 * the following trail surrogate as well. | |
298 * If the offset points to a trail surrogate or | |
299 * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. | |
300 * | |
301 * @param s const UChar * string | |
302 * @param i string offset, must be i<length | |
303 * @param length string length | |
304 * @param c output UChar32 variable | |
305 * @see U16_NEXT_UNSAFE | |
306 * @stable ICU 2.4 | |
307 */ | |
308 #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
309 (c)=(s)[(i)++]; \ | |
310 if(U16_IS_LEAD(c)) { \ | |
311 uint16_t __c2; \ | |
312 if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ | |
313 ++(i); \ | |
314 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | |
315 } \ | |
316 } \ | |
317 } UPRV_BLOCK_MACRO_END | |
318 | |
319 /** | |
320 * Get a code point from a string at a code point boundary offset, | |
321 * and advance the offset to the next code point boundary. | |
322 * (Post-incrementing forward iteration.) | |
323 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
324 * | |
325 * The length can be negative for a NUL-terminated string. | |
326 * | |
327 * The offset may point to the lead surrogate unit | |
328 * for a supplementary code point, in which case the macro will read | |
329 * the following trail surrogate as well. | |
330 * If the offset points to a trail surrogate or | |
331 * to a single, unpaired lead surrogate, then c is set to U+FFFD. | |
332 * | |
333 * @param s const UChar * string | |
334 * @param i string offset, must be i<length | |
335 * @param length string length | |
336 * @param c output UChar32 variable | |
337 * @see U16_NEXT_UNSAFE | |
338 * @stable ICU 60 | |
339 */ | |
340 #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
341 (c)=(s)[(i)++]; \ | |
342 if(U16_IS_SURROGATE(c)) { \ | |
343 uint16_t __c2; \ | |
344 if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ | |
345 ++(i); \ | |
346 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | |
347 } else { \ | |
348 (c)=0xfffd; \ | |
349 } \ | |
350 } \ | |
351 } UPRV_BLOCK_MACRO_END | |
352 | |
353 /** | |
354 * Append a code point to a string, overwriting 1 or 2 code units. | |
355 * The offset points to the current end of the string contents | |
356 * and is advanced (post-increment). | |
357 * "Unsafe" macro, assumes a valid code point and sufficient space in the string. | |
358 * Otherwise, the result is undefined. | |
359 * | |
360 * @param s const UChar * string buffer | |
361 * @param i string offset | |
362 * @param c code point to append | |
363 * @see U16_APPEND | |
364 * @stable ICU 2.4 | |
365 */ | |
366 #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
367 if((uint32_t)(c)<=0xffff) { \ | |
368 (s)[(i)++]=(uint16_t)(c); \ | |
369 } else { \ | |
370 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ | |
371 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ | |
372 } \ | |
373 } UPRV_BLOCK_MACRO_END | |
374 | |
375 /** | |
376 * Append a code point to a string, overwriting 1 or 2 code units. | |
377 * The offset points to the current end of the string contents | |
378 * and is advanced (post-increment). | |
379 * "Safe" macro, checks for a valid code point. | |
380 * If a surrogate pair is written, checks for sufficient space in the string. | |
381 * If the code point is not valid or a trail surrogate does not fit, | |
382 * then isError is set to TRUE. | |
383 * | |
384 * @param s const UChar * string buffer | |
385 * @param i string offset, must be i<capacity | |
386 * @param capacity size of the string buffer | |
387 * @param c code point to append | |
388 * @param isError output UBool set to TRUE if an error occurs, otherwise not modified | |
389 * @see U16_APPEND_UNSAFE | |
390 * @stable ICU 2.4 | |
391 */ | |
392 #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ | |
393 if((uint32_t)(c)<=0xffff) { \ | |
394 (s)[(i)++]=(uint16_t)(c); \ | |
395 } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ | |
396 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ | |
397 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ | |
398 } else /* c>0x10ffff or not enough space */ { \ | |
399 (isError)=TRUE; \ | |
400 } \ | |
401 } UPRV_BLOCK_MACRO_END | |
402 | |
403 /** | |
404 * Advance the string offset from one code point boundary to the next. | |
405 * (Post-incrementing iteration.) | |
406 * "Unsafe" macro, assumes well-formed UTF-16. | |
407 * | |
408 * @param s const UChar * string | |
409 * @param i string offset | |
410 * @see U16_FWD_1 | |
411 * @stable ICU 2.4 | |
412 */ | |
413 #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | |
414 if(U16_IS_LEAD((s)[(i)++])) { \ | |
415 ++(i); \ | |
416 } \ | |
417 } UPRV_BLOCK_MACRO_END | |
418 | |
419 /** | |
420 * Advance the string offset from one code point boundary to the next. | |
421 * (Post-incrementing iteration.) | |
422 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
423 * | |
424 * The length can be negative for a NUL-terminated string. | |
425 * | |
426 * @param s const UChar * string | |
427 * @param i string offset, must be i<length | |
428 * @param length string length | |
429 * @see U16_FWD_1_UNSAFE | |
430 * @stable ICU 2.4 | |
431 */ | |
432 #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ | |
433 if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ | |
434 ++(i); \ | |
435 } \ | |
436 } UPRV_BLOCK_MACRO_END | |
437 | |
438 /** | |
439 * Advance the string offset from one code point boundary to the n-th next one, | |
440 * i.e., move forward by n code points. | |
441 * (Post-incrementing iteration.) | |
442 * "Unsafe" macro, assumes well-formed UTF-16. | |
443 * | |
444 * @param s const UChar * string | |
445 * @param i string offset | |
446 * @param n number of code points to skip | |
447 * @see U16_FWD_N | |
448 * @stable ICU 2.4 | |
449 */ | |
450 #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | |
451 int32_t __N=(n); \ | |
452 while(__N>0) { \ | |
453 U16_FWD_1_UNSAFE(s, i); \ | |
454 --__N; \ | |
455 } \ | |
456 } UPRV_BLOCK_MACRO_END | |
457 | |
458 /** | |
459 * Advance the string offset from one code point boundary to the n-th next one, | |
460 * i.e., move forward by n code points. | |
461 * (Post-incrementing iteration.) | |
462 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
463 * | |
464 * The length can be negative for a NUL-terminated string. | |
465 * | |
466 * @param s const UChar * string | |
467 * @param i int32_t string offset, must be i<length | |
468 * @param length int32_t string length | |
469 * @param n number of code points to skip | |
470 * @see U16_FWD_N_UNSAFE | |
471 * @stable ICU 2.4 | |
472 */ | |
473 #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ | |
474 int32_t __N=(n); \ | |
475 while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ | |
476 U16_FWD_1(s, i, length); \ | |
477 --__N; \ | |
478 } \ | |
479 } UPRV_BLOCK_MACRO_END | |
480 | |
481 /** | |
482 * Adjust a random-access offset to a code point boundary | |
483 * at the start of a code point. | |
484 * If the offset points to the trail surrogate of a surrogate pair, | |
485 * then the offset is decremented. | |
486 * Otherwise, it is not modified. | |
487 * "Unsafe" macro, assumes well-formed UTF-16. | |
488 * | |
489 * @param s const UChar * string | |
490 * @param i string offset | |
491 * @see U16_SET_CP_START | |
492 * @stable ICU 2.4 | |
493 */ | |
494 #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | |
495 if(U16_IS_TRAIL((s)[i])) { \ | |
496 --(i); \ | |
497 } \ | |
498 } UPRV_BLOCK_MACRO_END | |
499 | |
500 /** | |
501 * Adjust a random-access offset to a code point boundary | |
502 * at the start of a code point. | |
503 * If the offset points to the trail surrogate of a surrogate pair, | |
504 * then the offset is decremented. | |
505 * Otherwise, it is not modified. | |
506 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
507 * | |
508 * @param s const UChar * string | |
509 * @param start starting string offset (usually 0) | |
510 * @param i string offset, must be start<=i | |
511 * @see U16_SET_CP_START_UNSAFE | |
512 * @stable ICU 2.4 | |
513 */ | |
514 #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ | |
515 if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ | |
516 --(i); \ | |
517 } \ | |
518 } UPRV_BLOCK_MACRO_END | |
519 | |
520 /* definitions with backward iteration -------------------------------------- */ | |
521 | |
522 /** | |
523 * Move the string offset from one code point boundary to the previous one | |
524 * and get the code point between them. | |
525 * (Pre-decrementing backward iteration.) | |
526 * "Unsafe" macro, assumes well-formed UTF-16. | |
527 * | |
528 * The input offset may be the same as the string length. | |
529 * If the offset is behind a trail surrogate unit | |
530 * for a supplementary code point, then the macro will read | |
531 * the preceding lead surrogate as well. | |
532 * If the offset is behind a lead surrogate, then that itself | |
533 * will be returned as the code point. | |
534 * The result is undefined if the offset is behind a single, unpaired trail surrogate. | |
535 * | |
536 * @param s const UChar * string | |
537 * @param i string offset | |
538 * @param c output UChar32 variable | |
539 * @see U16_PREV | |
540 * @stable ICU 2.4 | |
541 */ | |
542 #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
543 (c)=(s)[--(i)]; \ | |
544 if(U16_IS_TRAIL(c)) { \ | |
545 (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ | |
546 } \ | |
547 } UPRV_BLOCK_MACRO_END | |
548 | |
549 /** | |
550 * Move the string offset from one code point boundary to the previous one | |
551 * and get the code point between them. | |
552 * (Pre-decrementing backward iteration.) | |
553 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
554 * | |
555 * The input offset may be the same as the string length. | |
556 * If the offset is behind a trail surrogate unit | |
557 * for a supplementary code point, then the macro will read | |
558 * the preceding lead surrogate as well. | |
559 * If the offset is behind a lead surrogate or behind a single, unpaired | |
560 * trail surrogate, then c is set to that unpaired surrogate. | |
561 * | |
562 * @param s const UChar * string | |
563 * @param start starting string offset (usually 0) | |
564 * @param i string offset, must be start<i | |
565 * @param c output UChar32 variable | |
566 * @see U16_PREV_UNSAFE | |
567 * @stable ICU 2.4 | |
568 */ | |
569 #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
570 (c)=(s)[--(i)]; \ | |
571 if(U16_IS_TRAIL(c)) { \ | |
572 uint16_t __c2; \ | |
573 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | |
574 --(i); \ | |
575 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | |
576 } \ | |
577 } \ | |
578 } UPRV_BLOCK_MACRO_END | |
579 | |
580 /** | |
581 * Move the string offset from one code point boundary to the previous one | |
582 * and get the code point between them. | |
583 * (Pre-decrementing backward iteration.) | |
584 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
585 * | |
586 * The input offset may be the same as the string length. | |
587 * If the offset is behind a trail surrogate unit | |
588 * for a supplementary code point, then the macro will read | |
589 * the preceding lead surrogate as well. | |
590 * If the offset is behind a lead surrogate or behind a single, unpaired | |
591 * trail surrogate, then c is set to U+FFFD. | |
592 * | |
593 * @param s const UChar * string | |
594 * @param start starting string offset (usually 0) | |
595 * @param i string offset, must be start<i | |
596 * @param c output UChar32 variable | |
597 * @see U16_PREV_UNSAFE | |
598 * @stable ICU 60 | |
599 */ | |
600 #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | |
601 (c)=(s)[--(i)]; \ | |
602 if(U16_IS_SURROGATE(c)) { \ | |
603 uint16_t __c2; \ | |
604 if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | |
605 --(i); \ | |
606 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | |
607 } else { \ | |
608 (c)=0xfffd; \ | |
609 } \ | |
610 } \ | |
611 } UPRV_BLOCK_MACRO_END | |
612 | |
613 /** | |
614 * Move the string offset from one code point boundary to the previous one. | |
615 * (Pre-decrementing backward iteration.) | |
616 * The input offset may be the same as the string length. | |
617 * "Unsafe" macro, assumes well-formed UTF-16. | |
618 * | |
619 * @param s const UChar * string | |
620 * @param i string offset | |
621 * @see U16_BACK_1 | |
622 * @stable ICU 2.4 | |
623 */ | |
624 #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | |
625 if(U16_IS_TRAIL((s)[--(i)])) { \ | |
626 --(i); \ | |
627 } \ | |
628 } UPRV_BLOCK_MACRO_END | |
629 | |
630 /** | |
631 * Move the string offset from one code point boundary to the previous one. | |
632 * (Pre-decrementing backward iteration.) | |
633 * The input offset may be the same as the string length. | |
634 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
635 * | |
636 * @param s const UChar * string | |
637 * @param start starting string offset (usually 0) | |
638 * @param i string offset, must be start<i | |
639 * @see U16_BACK_1_UNSAFE | |
640 * @stable ICU 2.4 | |
641 */ | |
642 #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ | |
643 if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ | |
644 --(i); \ | |
645 } \ | |
646 } UPRV_BLOCK_MACRO_END | |
647 | |
648 /** | |
649 * Move the string offset from one code point boundary to the n-th one before it, | |
650 * i.e., move backward by n code points. | |
651 * (Pre-decrementing backward iteration.) | |
652 * The input offset may be the same as the string length. | |
653 * "Unsafe" macro, assumes well-formed UTF-16. | |
654 * | |
655 * @param s const UChar * string | |
656 * @param i string offset | |
657 * @param n number of code points to skip | |
658 * @see U16_BACK_N | |
659 * @stable ICU 2.4 | |
660 */ | |
661 #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | |
662 int32_t __N=(n); \ | |
663 while(__N>0) { \ | |
664 U16_BACK_1_UNSAFE(s, i); \ | |
665 --__N; \ | |
666 } \ | |
667 } UPRV_BLOCK_MACRO_END | |
668 | |
669 /** | |
670 * Move the string offset from one code point boundary to the n-th one before it, | |
671 * i.e., move backward by n code points. | |
672 * (Pre-decrementing backward iteration.) | |
673 * The input offset may be the same as the string length. | |
674 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
675 * | |
676 * @param s const UChar * string | |
677 * @param start start of string | |
678 * @param i string offset, must be start<i | |
679 * @param n number of code points to skip | |
680 * @see U16_BACK_N_UNSAFE | |
681 * @stable ICU 2.4 | |
682 */ | |
683 #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | |
684 int32_t __N=(n); \ | |
685 while(__N>0 && (i)>(start)) { \ | |
686 U16_BACK_1(s, start, i); \ | |
687 --__N; \ | |
688 } \ | |
689 } UPRV_BLOCK_MACRO_END | |
690 | |
691 /** | |
692 * Adjust a random-access offset to a code point boundary after a code point. | |
693 * If the offset is behind the lead surrogate of a surrogate pair, | |
694 * then the offset is incremented. | |
695 * Otherwise, it is not modified. | |
696 * The input offset may be the same as the string length. | |
697 * "Unsafe" macro, assumes well-formed UTF-16. | |
698 * | |
699 * @param s const UChar * string | |
700 * @param i string offset | |
701 * @see U16_SET_CP_LIMIT | |
702 * @stable ICU 2.4 | |
703 */ | |
704 #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | |
705 if(U16_IS_LEAD((s)[(i)-1])) { \ | |
706 ++(i); \ | |
707 } \ | |
708 } UPRV_BLOCK_MACRO_END | |
709 | |
710 /** | |
711 * Adjust a random-access offset to a code point boundary after a code point. | |
712 * If the offset is behind the lead surrogate of a surrogate pair, | |
713 * then the offset is incremented. | |
714 * Otherwise, it is not modified. | |
715 * The input offset may be the same as the string length. | |
716 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | |
717 * | |
718 * The length can be negative for a NUL-terminated string. | |
719 * | |
720 * @param s const UChar * string | |
721 * @param start int32_t starting string offset (usually 0) | |
722 * @param i int32_t string offset, start<=i<=length | |
723 * @param length int32_t string length | |
724 * @see U16_SET_CP_LIMIT_UNSAFE | |
725 * @stable ICU 2.4 | |
726 */ | |
727 #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ | |
728 if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ | |
729 ++(i); \ | |
730 } \ | |
731 } UPRV_BLOCK_MACRO_END | |
732 | |
733 #endif |