annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/utf16.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 *******************************************************************************
jpayne@69 5 *
jpayne@69 6 * Copyright (C) 1999-2012, International Business Machines
jpayne@69 7 * Corporation and others. All Rights Reserved.
jpayne@69 8 *
jpayne@69 9 *******************************************************************************
jpayne@69 10 * file name: utf16.h
jpayne@69 11 * encoding: UTF-8
jpayne@69 12 * tab size: 8 (not used)
jpayne@69 13 * indentation:4
jpayne@69 14 *
jpayne@69 15 * created on: 1999sep09
jpayne@69 16 * created by: Markus W. Scherer
jpayne@69 17 */
jpayne@69 18
jpayne@69 19 /**
jpayne@69 20 * \file
jpayne@69 21 * \brief C API: 16-bit Unicode handling macros
jpayne@69 22 *
jpayne@69 23 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
jpayne@69 24 *
jpayne@69 25 * For more information see utf.h and the ICU User Guide Strings chapter
jpayne@69 26 * (http://userguide.icu-project.org/strings).
jpayne@69 27 *
jpayne@69 28 * <em>Usage:</em>
jpayne@69 29 * ICU coding guidelines for if() statements should be followed when using these macros.
jpayne@69 30 * Compound statements (curly braces {}) must be used for if-else-while...
jpayne@69 31 * bodies and all macro statements should be terminated with semicolon.
jpayne@69 32 */
jpayne@69 33
jpayne@69 34 #ifndef __UTF16_H__
jpayne@69 35 #define __UTF16_H__
jpayne@69 36
jpayne@69 37 #include "unicode/umachine.h"
jpayne@69 38 #ifndef __UTF_H__
jpayne@69 39 # include "unicode/utf.h"
jpayne@69 40 #endif
jpayne@69 41
jpayne@69 42 /* single-code point definitions -------------------------------------------- */
jpayne@69 43
jpayne@69 44 /**
jpayne@69 45 * Does this code unit alone encode a code point (BMP, not a surrogate)?
jpayne@69 46 * @param c 16-bit code unit
jpayne@69 47 * @return TRUE or FALSE
jpayne@69 48 * @stable ICU 2.4
jpayne@69 49 */
jpayne@69 50 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
jpayne@69 51
jpayne@69 52 /**
jpayne@69 53 * Is this code unit a lead surrogate (U+d800..U+dbff)?
jpayne@69 54 * @param c 16-bit code unit
jpayne@69 55 * @return TRUE or FALSE
jpayne@69 56 * @stable ICU 2.4
jpayne@69 57 */
jpayne@69 58 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
jpayne@69 59
jpayne@69 60 /**
jpayne@69 61 * Is this code unit a trail surrogate (U+dc00..U+dfff)?
jpayne@69 62 * @param c 16-bit code unit
jpayne@69 63 * @return TRUE or FALSE
jpayne@69 64 * @stable ICU 2.4
jpayne@69 65 */
jpayne@69 66 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
jpayne@69 67
jpayne@69 68 /**
jpayne@69 69 * Is this code unit a surrogate (U+d800..U+dfff)?
jpayne@69 70 * @param c 16-bit code unit
jpayne@69 71 * @return TRUE or FALSE
jpayne@69 72 * @stable ICU 2.4
jpayne@69 73 */
jpayne@69 74 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
jpayne@69 75
jpayne@69 76 /**
jpayne@69 77 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
jpayne@69 78 * is it a lead surrogate?
jpayne@69 79 * @param c 16-bit code unit
jpayne@69 80 * @return TRUE or FALSE
jpayne@69 81 * @stable ICU 2.4
jpayne@69 82 */
jpayne@69 83 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
jpayne@69 84
jpayne@69 85 /**
jpayne@69 86 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
jpayne@69 87 * is it a trail surrogate?
jpayne@69 88 * @param c 16-bit code unit
jpayne@69 89 * @return TRUE or FALSE
jpayne@69 90 * @stable ICU 4.2
jpayne@69 91 */
jpayne@69 92 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
jpayne@69 93
jpayne@69 94 /**
jpayne@69 95 * Helper constant for U16_GET_SUPPLEMENTARY.
jpayne@69 96 * @internal
jpayne@69 97 */
jpayne@69 98 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
jpayne@69 99
jpayne@69 100 /**
jpayne@69 101 * Get a supplementary code point value (U+10000..U+10ffff)
jpayne@69 102 * from its lead and trail surrogates.
jpayne@69 103 * The result is undefined if the input values are not
jpayne@69 104 * lead and trail surrogates.
jpayne@69 105 *
jpayne@69 106 * @param lead lead surrogate (U+d800..U+dbff)
jpayne@69 107 * @param trail trail surrogate (U+dc00..U+dfff)
jpayne@69 108 * @return supplementary code point (U+10000..U+10ffff)
jpayne@69 109 * @stable ICU 2.4
jpayne@69 110 */
jpayne@69 111 #define U16_GET_SUPPLEMENTARY(lead, trail) \
jpayne@69 112 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
jpayne@69 113
jpayne@69 114
jpayne@69 115 /**
jpayne@69 116 * Get the lead surrogate (0xd800..0xdbff) for a
jpayne@69 117 * supplementary code point (0x10000..0x10ffff).
jpayne@69 118 * @param supplementary 32-bit code point (U+10000..U+10ffff)
jpayne@69 119 * @return lead surrogate (U+d800..U+dbff) for supplementary
jpayne@69 120 * @stable ICU 2.4
jpayne@69 121 */
jpayne@69 122 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
jpayne@69 123
jpayne@69 124 /**
jpayne@69 125 * Get the trail surrogate (0xdc00..0xdfff) for a
jpayne@69 126 * supplementary code point (0x10000..0x10ffff).
jpayne@69 127 * @param supplementary 32-bit code point (U+10000..U+10ffff)
jpayne@69 128 * @return trail surrogate (U+dc00..U+dfff) for supplementary
jpayne@69 129 * @stable ICU 2.4
jpayne@69 130 */
jpayne@69 131 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
jpayne@69 132
jpayne@69 133 /**
jpayne@69 134 * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
jpayne@69 135 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
jpayne@69 136 * @param c 32-bit code point
jpayne@69 137 * @return 1 or 2
jpayne@69 138 * @stable ICU 2.4
jpayne@69 139 */
jpayne@69 140 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
jpayne@69 141
jpayne@69 142 /**
jpayne@69 143 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
jpayne@69 144 * @return 2
jpayne@69 145 * @stable ICU 2.4
jpayne@69 146 */
jpayne@69 147 #define U16_MAX_LENGTH 2
jpayne@69 148
jpayne@69 149 /**
jpayne@69 150 * Get a code point from a string at a random-access offset,
jpayne@69 151 * without changing the offset.
jpayne@69 152 * "Unsafe" macro, assumes well-formed UTF-16.
jpayne@69 153 *
jpayne@69 154 * The offset may point to either the lead or trail surrogate unit
jpayne@69 155 * for a supplementary code point, in which case the macro will read
jpayne@69 156 * the adjacent matching surrogate as well.
jpayne@69 157 * The result is undefined if the offset points to a single, unpaired surrogate.
jpayne@69 158 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
jpayne@69 159 *
jpayne@69 160 * @param s const UChar * string
jpayne@69 161 * @param i string offset
jpayne@69 162 * @param c output UChar32 variable
jpayne@69 163 * @see U16_GET
jpayne@69 164 * @stable ICU 2.4
jpayne@69 165 */
jpayne@69 166 #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 167 (c)=(s)[i]; \
jpayne@69 168 if(U16_IS_SURROGATE(c)) { \
jpayne@69 169 if(U16_IS_SURROGATE_LEAD(c)) { \
jpayne@69 170 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
jpayne@69 171 } else { \
jpayne@69 172 (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
jpayne@69 173 } \
jpayne@69 174 } \
jpayne@69 175 } UPRV_BLOCK_MACRO_END
jpayne@69 176
jpayne@69 177 /**
jpayne@69 178 * Get a code point from a string at a random-access offset,
jpayne@69 179 * without changing the offset.
jpayne@69 180 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 181 *
jpayne@69 182 * The offset may point to either the lead or trail surrogate unit
jpayne@69 183 * for a supplementary code point, in which case the macro will read
jpayne@69 184 * the adjacent matching surrogate as well.
jpayne@69 185 *
jpayne@69 186 * The length can be negative for a NUL-terminated string.
jpayne@69 187 *
jpayne@69 188 * If the offset points to a single, unpaired surrogate, then
jpayne@69 189 * c is set to that unpaired surrogate.
jpayne@69 190 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
jpayne@69 191 *
jpayne@69 192 * @param s const UChar * string
jpayne@69 193 * @param start starting string offset (usually 0)
jpayne@69 194 * @param i string offset, must be start<=i<length
jpayne@69 195 * @param length string length
jpayne@69 196 * @param c output UChar32 variable
jpayne@69 197 * @see U16_GET_UNSAFE
jpayne@69 198 * @stable ICU 2.4
jpayne@69 199 */
jpayne@69 200 #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 201 (c)=(s)[i]; \
jpayne@69 202 if(U16_IS_SURROGATE(c)) { \
jpayne@69 203 uint16_t __c2; \
jpayne@69 204 if(U16_IS_SURROGATE_LEAD(c)) { \
jpayne@69 205 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
jpayne@69 206 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
jpayne@69 207 } \
jpayne@69 208 } else { \
jpayne@69 209 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
jpayne@69 210 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
jpayne@69 211 } \
jpayne@69 212 } \
jpayne@69 213 } \
jpayne@69 214 } UPRV_BLOCK_MACRO_END
jpayne@69 215
jpayne@69 216 /**
jpayne@69 217 * Get a code point from a string at a random-access offset,
jpayne@69 218 * without changing the offset.
jpayne@69 219 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 220 *
jpayne@69 221 * The offset may point to either the lead or trail surrogate unit
jpayne@69 222 * for a supplementary code point, in which case the macro will read
jpayne@69 223 * the adjacent matching surrogate as well.
jpayne@69 224 *
jpayne@69 225 * The length can be negative for a NUL-terminated string.
jpayne@69 226 *
jpayne@69 227 * If the offset points to a single, unpaired surrogate, then
jpayne@69 228 * c is set to U+FFFD.
jpayne@69 229 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
jpayne@69 230 *
jpayne@69 231 * @param s const UChar * string
jpayne@69 232 * @param start starting string offset (usually 0)
jpayne@69 233 * @param i string offset, must be start<=i<length
jpayne@69 234 * @param length string length
jpayne@69 235 * @param c output UChar32 variable
jpayne@69 236 * @see U16_GET_UNSAFE
jpayne@69 237 * @stable ICU 60
jpayne@69 238 */
jpayne@69 239 #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 240 (c)=(s)[i]; \
jpayne@69 241 if(U16_IS_SURROGATE(c)) { \
jpayne@69 242 uint16_t __c2; \
jpayne@69 243 if(U16_IS_SURROGATE_LEAD(c)) { \
jpayne@69 244 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
jpayne@69 245 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
jpayne@69 246 } else { \
jpayne@69 247 (c)=0xfffd; \
jpayne@69 248 } \
jpayne@69 249 } else { \
jpayne@69 250 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
jpayne@69 251 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
jpayne@69 252 } else { \
jpayne@69 253 (c)=0xfffd; \
jpayne@69 254 } \
jpayne@69 255 } \
jpayne@69 256 } \
jpayne@69 257 } UPRV_BLOCK_MACRO_END
jpayne@69 258
jpayne@69 259 /* definitions with forward iteration --------------------------------------- */
jpayne@69 260
jpayne@69 261 /**
jpayne@69 262 * Get a code point from a string at a code point boundary offset,
jpayne@69 263 * and advance the offset to the next code point boundary.
jpayne@69 264 * (Post-incrementing forward iteration.)
jpayne@69 265 * "Unsafe" macro, assumes well-formed UTF-16.
jpayne@69 266 *
jpayne@69 267 * The offset may point to the lead surrogate unit
jpayne@69 268 * for a supplementary code point, in which case the macro will read
jpayne@69 269 * the following trail surrogate as well.
jpayne@69 270 * If the offset points to a trail surrogate, then that itself
jpayne@69 271 * will be returned as the code point.
jpayne@69 272 * The result is undefined if the offset points to a single, unpaired lead surrogate.
jpayne@69 273 *
jpayne@69 274 * @param s const UChar * string
jpayne@69 275 * @param i string offset
jpayne@69 276 * @param c output UChar32 variable
jpayne@69 277 * @see U16_NEXT
jpayne@69 278 * @stable ICU 2.4
jpayne@69 279 */
jpayne@69 280 #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 281 (c)=(s)[(i)++]; \
jpayne@69 282 if(U16_IS_LEAD(c)) { \
jpayne@69 283 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
jpayne@69 284 } \
jpayne@69 285 } UPRV_BLOCK_MACRO_END
jpayne@69 286
jpayne@69 287 /**
jpayne@69 288 * Get a code point from a string at a code point boundary offset,
jpayne@69 289 * and advance the offset to the next code point boundary.
jpayne@69 290 * (Post-incrementing forward iteration.)
jpayne@69 291 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 292 *
jpayne@69 293 * The length can be negative for a NUL-terminated string.
jpayne@69 294 *
jpayne@69 295 * The offset may point to the lead surrogate unit
jpayne@69 296 * for a supplementary code point, in which case the macro will read
jpayne@69 297 * the following trail surrogate as well.
jpayne@69 298 * If the offset points to a trail surrogate or
jpayne@69 299 * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
jpayne@69 300 *
jpayne@69 301 * @param s const UChar * string
jpayne@69 302 * @param i string offset, must be i<length
jpayne@69 303 * @param length string length
jpayne@69 304 * @param c output UChar32 variable
jpayne@69 305 * @see U16_NEXT_UNSAFE
jpayne@69 306 * @stable ICU 2.4
jpayne@69 307 */
jpayne@69 308 #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 309 (c)=(s)[(i)++]; \
jpayne@69 310 if(U16_IS_LEAD(c)) { \
jpayne@69 311 uint16_t __c2; \
jpayne@69 312 if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
jpayne@69 313 ++(i); \
jpayne@69 314 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
jpayne@69 315 } \
jpayne@69 316 } \
jpayne@69 317 } UPRV_BLOCK_MACRO_END
jpayne@69 318
jpayne@69 319 /**
jpayne@69 320 * Get a code point from a string at a code point boundary offset,
jpayne@69 321 * and advance the offset to the next code point boundary.
jpayne@69 322 * (Post-incrementing forward iteration.)
jpayne@69 323 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 324 *
jpayne@69 325 * The length can be negative for a NUL-terminated string.
jpayne@69 326 *
jpayne@69 327 * The offset may point to the lead surrogate unit
jpayne@69 328 * for a supplementary code point, in which case the macro will read
jpayne@69 329 * the following trail surrogate as well.
jpayne@69 330 * If the offset points to a trail surrogate or
jpayne@69 331 * to a single, unpaired lead surrogate, then c is set to U+FFFD.
jpayne@69 332 *
jpayne@69 333 * @param s const UChar * string
jpayne@69 334 * @param i string offset, must be i<length
jpayne@69 335 * @param length string length
jpayne@69 336 * @param c output UChar32 variable
jpayne@69 337 * @see U16_NEXT_UNSAFE
jpayne@69 338 * @stable ICU 60
jpayne@69 339 */
jpayne@69 340 #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 341 (c)=(s)[(i)++]; \
jpayne@69 342 if(U16_IS_SURROGATE(c)) { \
jpayne@69 343 uint16_t __c2; \
jpayne@69 344 if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
jpayne@69 345 ++(i); \
jpayne@69 346 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
jpayne@69 347 } else { \
jpayne@69 348 (c)=0xfffd; \
jpayne@69 349 } \
jpayne@69 350 } \
jpayne@69 351 } UPRV_BLOCK_MACRO_END
jpayne@69 352
jpayne@69 353 /**
jpayne@69 354 * Append a code point to a string, overwriting 1 or 2 code units.
jpayne@69 355 * The offset points to the current end of the string contents
jpayne@69 356 * and is advanced (post-increment).
jpayne@69 357 * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
jpayne@69 358 * Otherwise, the result is undefined.
jpayne@69 359 *
jpayne@69 360 * @param s const UChar * string buffer
jpayne@69 361 * @param i string offset
jpayne@69 362 * @param c code point to append
jpayne@69 363 * @see U16_APPEND
jpayne@69 364 * @stable ICU 2.4
jpayne@69 365 */
jpayne@69 366 #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 367 if((uint32_t)(c)<=0xffff) { \
jpayne@69 368 (s)[(i)++]=(uint16_t)(c); \
jpayne@69 369 } else { \
jpayne@69 370 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
jpayne@69 371 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
jpayne@69 372 } \
jpayne@69 373 } UPRV_BLOCK_MACRO_END
jpayne@69 374
jpayne@69 375 /**
jpayne@69 376 * Append a code point to a string, overwriting 1 or 2 code units.
jpayne@69 377 * The offset points to the current end of the string contents
jpayne@69 378 * and is advanced (post-increment).
jpayne@69 379 * "Safe" macro, checks for a valid code point.
jpayne@69 380 * If a surrogate pair is written, checks for sufficient space in the string.
jpayne@69 381 * If the code point is not valid or a trail surrogate does not fit,
jpayne@69 382 * then isError is set to TRUE.
jpayne@69 383 *
jpayne@69 384 * @param s const UChar * string buffer
jpayne@69 385 * @param i string offset, must be i<capacity
jpayne@69 386 * @param capacity size of the string buffer
jpayne@69 387 * @param c code point to append
jpayne@69 388 * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
jpayne@69 389 * @see U16_APPEND_UNSAFE
jpayne@69 390 * @stable ICU 2.4
jpayne@69 391 */
jpayne@69 392 #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 393 if((uint32_t)(c)<=0xffff) { \
jpayne@69 394 (s)[(i)++]=(uint16_t)(c); \
jpayne@69 395 } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
jpayne@69 396 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
jpayne@69 397 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
jpayne@69 398 } else /* c>0x10ffff or not enough space */ { \
jpayne@69 399 (isError)=TRUE; \
jpayne@69 400 } \
jpayne@69 401 } UPRV_BLOCK_MACRO_END
jpayne@69 402
jpayne@69 403 /**
jpayne@69 404 * Advance the string offset from one code point boundary to the next.
jpayne@69 405 * (Post-incrementing iteration.)
jpayne@69 406 * "Unsafe" macro, assumes well-formed UTF-16.
jpayne@69 407 *
jpayne@69 408 * @param s const UChar * string
jpayne@69 409 * @param i string offset
jpayne@69 410 * @see U16_FWD_1
jpayne@69 411 * @stable ICU 2.4
jpayne@69 412 */
jpayne@69 413 #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 414 if(U16_IS_LEAD((s)[(i)++])) { \
jpayne@69 415 ++(i); \
jpayne@69 416 } \
jpayne@69 417 } UPRV_BLOCK_MACRO_END
jpayne@69 418
jpayne@69 419 /**
jpayne@69 420 * Advance the string offset from one code point boundary to the next.
jpayne@69 421 * (Post-incrementing iteration.)
jpayne@69 422 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 423 *
jpayne@69 424 * The length can be negative for a NUL-terminated string.
jpayne@69 425 *
jpayne@69 426 * @param s const UChar * string
jpayne@69 427 * @param i string offset, must be i<length
jpayne@69 428 * @param length string length
jpayne@69 429 * @see U16_FWD_1_UNSAFE
jpayne@69 430 * @stable ICU 2.4
jpayne@69 431 */
jpayne@69 432 #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 433 if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
jpayne@69 434 ++(i); \
jpayne@69 435 } \
jpayne@69 436 } UPRV_BLOCK_MACRO_END
jpayne@69 437
jpayne@69 438 /**
jpayne@69 439 * Advance the string offset from one code point boundary to the n-th next one,
jpayne@69 440 * i.e., move forward by n code points.
jpayne@69 441 * (Post-incrementing iteration.)
jpayne@69 442 * "Unsafe" macro, assumes well-formed UTF-16.
jpayne@69 443 *
jpayne@69 444 * @param s const UChar * string
jpayne@69 445 * @param i string offset
jpayne@69 446 * @param n number of code points to skip
jpayne@69 447 * @see U16_FWD_N
jpayne@69 448 * @stable ICU 2.4
jpayne@69 449 */
jpayne@69 450 #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 451 int32_t __N=(n); \
jpayne@69 452 while(__N>0) { \
jpayne@69 453 U16_FWD_1_UNSAFE(s, i); \
jpayne@69 454 --__N; \
jpayne@69 455 } \
jpayne@69 456 } UPRV_BLOCK_MACRO_END
jpayne@69 457
jpayne@69 458 /**
jpayne@69 459 * Advance the string offset from one code point boundary to the n-th next one,
jpayne@69 460 * i.e., move forward by n code points.
jpayne@69 461 * (Post-incrementing iteration.)
jpayne@69 462 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 463 *
jpayne@69 464 * The length can be negative for a NUL-terminated string.
jpayne@69 465 *
jpayne@69 466 * @param s const UChar * string
jpayne@69 467 * @param i int32_t string offset, must be i<length
jpayne@69 468 * @param length int32_t string length
jpayne@69 469 * @param n number of code points to skip
jpayne@69 470 * @see U16_FWD_N_UNSAFE
jpayne@69 471 * @stable ICU 2.4
jpayne@69 472 */
jpayne@69 473 #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 474 int32_t __N=(n); \
jpayne@69 475 while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
jpayne@69 476 U16_FWD_1(s, i, length); \
jpayne@69 477 --__N; \
jpayne@69 478 } \
jpayne@69 479 } UPRV_BLOCK_MACRO_END
jpayne@69 480
jpayne@69 481 /**
jpayne@69 482 * Adjust a random-access offset to a code point boundary
jpayne@69 483 * at the start of a code point.
jpayne@69 484 * If the offset points to the trail surrogate of a surrogate pair,
jpayne@69 485 * then the offset is decremented.
jpayne@69 486 * Otherwise, it is not modified.
jpayne@69 487 * "Unsafe" macro, assumes well-formed UTF-16.
jpayne@69 488 *
jpayne@69 489 * @param s const UChar * string
jpayne@69 490 * @param i string offset
jpayne@69 491 * @see U16_SET_CP_START
jpayne@69 492 * @stable ICU 2.4
jpayne@69 493 */
jpayne@69 494 #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 495 if(U16_IS_TRAIL((s)[i])) { \
jpayne@69 496 --(i); \
jpayne@69 497 } \
jpayne@69 498 } UPRV_BLOCK_MACRO_END
jpayne@69 499
jpayne@69 500 /**
jpayne@69 501 * Adjust a random-access offset to a code point boundary
jpayne@69 502 * at the start of a code point.
jpayne@69 503 * If the offset points to the trail surrogate of a surrogate pair,
jpayne@69 504 * then the offset is decremented.
jpayne@69 505 * Otherwise, it is not modified.
jpayne@69 506 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 507 *
jpayne@69 508 * @param s const UChar * string
jpayne@69 509 * @param start starting string offset (usually 0)
jpayne@69 510 * @param i string offset, must be start<=i
jpayne@69 511 * @see U16_SET_CP_START_UNSAFE
jpayne@69 512 * @stable ICU 2.4
jpayne@69 513 */
jpayne@69 514 #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 515 if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
jpayne@69 516 --(i); \
jpayne@69 517 } \
jpayne@69 518 } UPRV_BLOCK_MACRO_END
jpayne@69 519
jpayne@69 520 /* definitions with backward iteration -------------------------------------- */
jpayne@69 521
jpayne@69 522 /**
jpayne@69 523 * Move the string offset from one code point boundary to the previous one
jpayne@69 524 * and get the code point between them.
jpayne@69 525 * (Pre-decrementing backward iteration.)
jpayne@69 526 * "Unsafe" macro, assumes well-formed UTF-16.
jpayne@69 527 *
jpayne@69 528 * The input offset may be the same as the string length.
jpayne@69 529 * If the offset is behind a trail surrogate unit
jpayne@69 530 * for a supplementary code point, then the macro will read
jpayne@69 531 * the preceding lead surrogate as well.
jpayne@69 532 * If the offset is behind a lead surrogate, then that itself
jpayne@69 533 * will be returned as the code point.
jpayne@69 534 * The result is undefined if the offset is behind a single, unpaired trail surrogate.
jpayne@69 535 *
jpayne@69 536 * @param s const UChar * string
jpayne@69 537 * @param i string offset
jpayne@69 538 * @param c output UChar32 variable
jpayne@69 539 * @see U16_PREV
jpayne@69 540 * @stable ICU 2.4
jpayne@69 541 */
jpayne@69 542 #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 543 (c)=(s)[--(i)]; \
jpayne@69 544 if(U16_IS_TRAIL(c)) { \
jpayne@69 545 (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
jpayne@69 546 } \
jpayne@69 547 } UPRV_BLOCK_MACRO_END
jpayne@69 548
jpayne@69 549 /**
jpayne@69 550 * Move the string offset from one code point boundary to the previous one
jpayne@69 551 * and get the code point between them.
jpayne@69 552 * (Pre-decrementing backward iteration.)
jpayne@69 553 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 554 *
jpayne@69 555 * The input offset may be the same as the string length.
jpayne@69 556 * If the offset is behind a trail surrogate unit
jpayne@69 557 * for a supplementary code point, then the macro will read
jpayne@69 558 * the preceding lead surrogate as well.
jpayne@69 559 * If the offset is behind a lead surrogate or behind a single, unpaired
jpayne@69 560 * trail surrogate, then c is set to that unpaired surrogate.
jpayne@69 561 *
jpayne@69 562 * @param s const UChar * string
jpayne@69 563 * @param start starting string offset (usually 0)
jpayne@69 564 * @param i string offset, must be start<i
jpayne@69 565 * @param c output UChar32 variable
jpayne@69 566 * @see U16_PREV_UNSAFE
jpayne@69 567 * @stable ICU 2.4
jpayne@69 568 */
jpayne@69 569 #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 570 (c)=(s)[--(i)]; \
jpayne@69 571 if(U16_IS_TRAIL(c)) { \
jpayne@69 572 uint16_t __c2; \
jpayne@69 573 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
jpayne@69 574 --(i); \
jpayne@69 575 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
jpayne@69 576 } \
jpayne@69 577 } \
jpayne@69 578 } UPRV_BLOCK_MACRO_END
jpayne@69 579
jpayne@69 580 /**
jpayne@69 581 * Move the string offset from one code point boundary to the previous one
jpayne@69 582 * and get the code point between them.
jpayne@69 583 * (Pre-decrementing backward iteration.)
jpayne@69 584 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 585 *
jpayne@69 586 * The input offset may be the same as the string length.
jpayne@69 587 * If the offset is behind a trail surrogate unit
jpayne@69 588 * for a supplementary code point, then the macro will read
jpayne@69 589 * the preceding lead surrogate as well.
jpayne@69 590 * If the offset is behind a lead surrogate or behind a single, unpaired
jpayne@69 591 * trail surrogate, then c is set to U+FFFD.
jpayne@69 592 *
jpayne@69 593 * @param s const UChar * string
jpayne@69 594 * @param start starting string offset (usually 0)
jpayne@69 595 * @param i string offset, must be start<i
jpayne@69 596 * @param c output UChar32 variable
jpayne@69 597 * @see U16_PREV_UNSAFE
jpayne@69 598 * @stable ICU 60
jpayne@69 599 */
jpayne@69 600 #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 601 (c)=(s)[--(i)]; \
jpayne@69 602 if(U16_IS_SURROGATE(c)) { \
jpayne@69 603 uint16_t __c2; \
jpayne@69 604 if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
jpayne@69 605 --(i); \
jpayne@69 606 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
jpayne@69 607 } else { \
jpayne@69 608 (c)=0xfffd; \
jpayne@69 609 } \
jpayne@69 610 } \
jpayne@69 611 } UPRV_BLOCK_MACRO_END
jpayne@69 612
jpayne@69 613 /**
jpayne@69 614 * Move the string offset from one code point boundary to the previous one.
jpayne@69 615 * (Pre-decrementing backward iteration.)
jpayne@69 616 * The input offset may be the same as the string length.
jpayne@69 617 * "Unsafe" macro, assumes well-formed UTF-16.
jpayne@69 618 *
jpayne@69 619 * @param s const UChar * string
jpayne@69 620 * @param i string offset
jpayne@69 621 * @see U16_BACK_1
jpayne@69 622 * @stable ICU 2.4
jpayne@69 623 */
jpayne@69 624 #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 625 if(U16_IS_TRAIL((s)[--(i)])) { \
jpayne@69 626 --(i); \
jpayne@69 627 } \
jpayne@69 628 } UPRV_BLOCK_MACRO_END
jpayne@69 629
jpayne@69 630 /**
jpayne@69 631 * Move the string offset from one code point boundary to the previous one.
jpayne@69 632 * (Pre-decrementing backward iteration.)
jpayne@69 633 * The input offset may be the same as the string length.
jpayne@69 634 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 635 *
jpayne@69 636 * @param s const UChar * string
jpayne@69 637 * @param start starting string offset (usually 0)
jpayne@69 638 * @param i string offset, must be start<i
jpayne@69 639 * @see U16_BACK_1_UNSAFE
jpayne@69 640 * @stable ICU 2.4
jpayne@69 641 */
jpayne@69 642 #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 643 if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
jpayne@69 644 --(i); \
jpayne@69 645 } \
jpayne@69 646 } UPRV_BLOCK_MACRO_END
jpayne@69 647
jpayne@69 648 /**
jpayne@69 649 * Move the string offset from one code point boundary to the n-th one before it,
jpayne@69 650 * i.e., move backward by n code points.
jpayne@69 651 * (Pre-decrementing backward iteration.)
jpayne@69 652 * The input offset may be the same as the string length.
jpayne@69 653 * "Unsafe" macro, assumes well-formed UTF-16.
jpayne@69 654 *
jpayne@69 655 * @param s const UChar * string
jpayne@69 656 * @param i string offset
jpayne@69 657 * @param n number of code points to skip
jpayne@69 658 * @see U16_BACK_N
jpayne@69 659 * @stable ICU 2.4
jpayne@69 660 */
jpayne@69 661 #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 662 int32_t __N=(n); \
jpayne@69 663 while(__N>0) { \
jpayne@69 664 U16_BACK_1_UNSAFE(s, i); \
jpayne@69 665 --__N; \
jpayne@69 666 } \
jpayne@69 667 } UPRV_BLOCK_MACRO_END
jpayne@69 668
jpayne@69 669 /**
jpayne@69 670 * Move the string offset from one code point boundary to the n-th one before it,
jpayne@69 671 * i.e., move backward by n code points.
jpayne@69 672 * (Pre-decrementing backward iteration.)
jpayne@69 673 * The input offset may be the same as the string length.
jpayne@69 674 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 675 *
jpayne@69 676 * @param s const UChar * string
jpayne@69 677 * @param start start of string
jpayne@69 678 * @param i string offset, must be start<i
jpayne@69 679 * @param n number of code points to skip
jpayne@69 680 * @see U16_BACK_N_UNSAFE
jpayne@69 681 * @stable ICU 2.4
jpayne@69 682 */
jpayne@69 683 #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 684 int32_t __N=(n); \
jpayne@69 685 while(__N>0 && (i)>(start)) { \
jpayne@69 686 U16_BACK_1(s, start, i); \
jpayne@69 687 --__N; \
jpayne@69 688 } \
jpayne@69 689 } UPRV_BLOCK_MACRO_END
jpayne@69 690
jpayne@69 691 /**
jpayne@69 692 * Adjust a random-access offset to a code point boundary after a code point.
jpayne@69 693 * If the offset is behind the lead surrogate of a surrogate pair,
jpayne@69 694 * then the offset is incremented.
jpayne@69 695 * Otherwise, it is not modified.
jpayne@69 696 * The input offset may be the same as the string length.
jpayne@69 697 * "Unsafe" macro, assumes well-formed UTF-16.
jpayne@69 698 *
jpayne@69 699 * @param s const UChar * string
jpayne@69 700 * @param i string offset
jpayne@69 701 * @see U16_SET_CP_LIMIT
jpayne@69 702 * @stable ICU 2.4
jpayne@69 703 */
jpayne@69 704 #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 705 if(U16_IS_LEAD((s)[(i)-1])) { \
jpayne@69 706 ++(i); \
jpayne@69 707 } \
jpayne@69 708 } UPRV_BLOCK_MACRO_END
jpayne@69 709
jpayne@69 710 /**
jpayne@69 711 * Adjust a random-access offset to a code point boundary after a code point.
jpayne@69 712 * If the offset is behind the lead surrogate of a surrogate pair,
jpayne@69 713 * then the offset is incremented.
jpayne@69 714 * Otherwise, it is not modified.
jpayne@69 715 * The input offset may be the same as the string length.
jpayne@69 716 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
jpayne@69 717 *
jpayne@69 718 * The length can be negative for a NUL-terminated string.
jpayne@69 719 *
jpayne@69 720 * @param s const UChar * string
jpayne@69 721 * @param start int32_t starting string offset (usually 0)
jpayne@69 722 * @param i int32_t string offset, start<=i<=length
jpayne@69 723 * @param length int32_t string length
jpayne@69 724 * @see U16_SET_CP_LIMIT_UNSAFE
jpayne@69 725 * @stable ICU 2.4
jpayne@69 726 */
jpayne@69 727 #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
jpayne@69 728 if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
jpayne@69 729 ++(i); \
jpayne@69 730 } \
jpayne@69 731 } UPRV_BLOCK_MACRO_END
jpayne@69 732
jpayne@69 733 #endif