annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/python3.8/unicodeobject.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 #ifndef Py_UNICODEOBJECT_H
jpayne@69 2 #define Py_UNICODEOBJECT_H
jpayne@69 3
jpayne@69 4 #include <stdarg.h>
jpayne@69 5
jpayne@69 6 /*
jpayne@69 7
jpayne@69 8 Unicode implementation based on original code by Fredrik Lundh,
jpayne@69 9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
jpayne@69 10 Unicode Integration Proposal. (See
jpayne@69 11 http://www.egenix.com/files/python/unicode-proposal.txt).
jpayne@69 12
jpayne@69 13 Copyright (c) Corporation for National Research Initiatives.
jpayne@69 14
jpayne@69 15
jpayne@69 16 Original header:
jpayne@69 17 --------------------------------------------------------------------
jpayne@69 18
jpayne@69 19 * Yet another Unicode string type for Python. This type supports the
jpayne@69 20 * 16-bit Basic Multilingual Plane (BMP) only.
jpayne@69 21 *
jpayne@69 22 * Written by Fredrik Lundh, January 1999.
jpayne@69 23 *
jpayne@69 24 * Copyright (c) 1999 by Secret Labs AB.
jpayne@69 25 * Copyright (c) 1999 by Fredrik Lundh.
jpayne@69 26 *
jpayne@69 27 * fredrik@pythonware.com
jpayne@69 28 * http://www.pythonware.com
jpayne@69 29 *
jpayne@69 30 * --------------------------------------------------------------------
jpayne@69 31 * This Unicode String Type is
jpayne@69 32 *
jpayne@69 33 * Copyright (c) 1999 by Secret Labs AB
jpayne@69 34 * Copyright (c) 1999 by Fredrik Lundh
jpayne@69 35 *
jpayne@69 36 * By obtaining, using, and/or copying this software and/or its
jpayne@69 37 * associated documentation, you agree that you have read, understood,
jpayne@69 38 * and will comply with the following terms and conditions:
jpayne@69 39 *
jpayne@69 40 * Permission to use, copy, modify, and distribute this software and its
jpayne@69 41 * associated documentation for any purpose and without fee is hereby
jpayne@69 42 * granted, provided that the above copyright notice appears in all
jpayne@69 43 * copies, and that both that copyright notice and this permission notice
jpayne@69 44 * appear in supporting documentation, and that the name of Secret Labs
jpayne@69 45 * AB or the author not be used in advertising or publicity pertaining to
jpayne@69 46 * distribution of the software without specific, written prior
jpayne@69 47 * permission.
jpayne@69 48 *
jpayne@69 49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
jpayne@69 50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
jpayne@69 51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
jpayne@69 52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
jpayne@69 53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
jpayne@69 54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
jpayne@69 55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
jpayne@69 56 * -------------------------------------------------------------------- */
jpayne@69 57
jpayne@69 58 #include <ctype.h>
jpayne@69 59
jpayne@69 60 /* === Internal API ======================================================= */
jpayne@69 61
jpayne@69 62 /* --- Internal Unicode Format -------------------------------------------- */
jpayne@69 63
jpayne@69 64 /* Python 3.x requires unicode */
jpayne@69 65 #define Py_USING_UNICODE
jpayne@69 66
jpayne@69 67 #ifndef SIZEOF_WCHAR_T
jpayne@69 68 #error Must define SIZEOF_WCHAR_T
jpayne@69 69 #endif
jpayne@69 70
jpayne@69 71 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T
jpayne@69 72
jpayne@69 73 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
jpayne@69 74 Otherwise, Unicode strings are stored as UCS-2 (with limited support
jpayne@69 75 for UTF-16) */
jpayne@69 76
jpayne@69 77 #if Py_UNICODE_SIZE >= 4
jpayne@69 78 #define Py_UNICODE_WIDE
jpayne@69 79 #endif
jpayne@69 80
jpayne@69 81 /* Set these flags if the platform has "wchar.h" and the
jpayne@69 82 wchar_t type is a 16-bit unsigned type */
jpayne@69 83 /* #define HAVE_WCHAR_H */
jpayne@69 84 /* #define HAVE_USABLE_WCHAR_T */
jpayne@69 85
jpayne@69 86 /* If the compiler provides a wchar_t type we try to support it
jpayne@69 87 through the interface functions PyUnicode_FromWideChar(),
jpayne@69 88 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
jpayne@69 89
jpayne@69 90 #ifdef HAVE_USABLE_WCHAR_T
jpayne@69 91 # ifndef HAVE_WCHAR_H
jpayne@69 92 # define HAVE_WCHAR_H
jpayne@69 93 # endif
jpayne@69 94 #endif
jpayne@69 95
jpayne@69 96 #ifdef HAVE_WCHAR_H
jpayne@69 97 # include <wchar.h>
jpayne@69 98 #endif
jpayne@69 99
jpayne@69 100 /* Py_UCS4 and Py_UCS2 are typedefs for the respective
jpayne@69 101 unicode representations. */
jpayne@69 102 typedef uint32_t Py_UCS4;
jpayne@69 103 typedef uint16_t Py_UCS2;
jpayne@69 104 typedef uint8_t Py_UCS1;
jpayne@69 105
jpayne@69 106 #ifdef __cplusplus
jpayne@69 107 extern "C" {
jpayne@69 108 #endif
jpayne@69 109
jpayne@69 110
jpayne@69 111 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
jpayne@69 112 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
jpayne@69 113
jpayne@69 114 #define PyUnicode_Check(op) \
jpayne@69 115 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
jpayne@69 116 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
jpayne@69 117
jpayne@69 118 /* --- Constants ---------------------------------------------------------- */
jpayne@69 119
jpayne@69 120 /* This Unicode character will be used as replacement character during
jpayne@69 121 decoding if the errors argument is set to "replace". Note: the
jpayne@69 122 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
jpayne@69 123 Unicode 3.0. */
jpayne@69 124
jpayne@69 125 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
jpayne@69 126
jpayne@69 127 /* === Public API ========================================================= */
jpayne@69 128
jpayne@69 129 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
jpayne@69 130 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
jpayne@69 131 const char *u, /* UTF-8 encoded string */
jpayne@69 132 Py_ssize_t size /* size of buffer */
jpayne@69 133 );
jpayne@69 134
jpayne@69 135 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
jpayne@69 136 UTF-8 encoded bytes. The size is determined with strlen(). */
jpayne@69 137 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
jpayne@69 138 const char *u /* UTF-8 encoded string */
jpayne@69 139 );
jpayne@69 140
jpayne@69 141 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
jpayne@69 142 PyAPI_FUNC(PyObject*) PyUnicode_Substring(
jpayne@69 143 PyObject *str,
jpayne@69 144 Py_ssize_t start,
jpayne@69 145 Py_ssize_t end);
jpayne@69 146 #endif
jpayne@69 147
jpayne@69 148 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
jpayne@69 149 /* Copy the string into a UCS4 buffer including the null character if copy_null
jpayne@69 150 is set. Return NULL and raise an exception on error. Raise a SystemError if
jpayne@69 151 the buffer is smaller than the string. Return buffer on success.
jpayne@69 152
jpayne@69 153 buflen is the length of the buffer in (Py_UCS4) characters. */
jpayne@69 154 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
jpayne@69 155 PyObject *unicode,
jpayne@69 156 Py_UCS4* buffer,
jpayne@69 157 Py_ssize_t buflen,
jpayne@69 158 int copy_null);
jpayne@69 159
jpayne@69 160 /* Copy the string into a UCS4 buffer. A new buffer is allocated using
jpayne@69 161 * PyMem_Malloc; if this fails, NULL is returned with a memory error
jpayne@69 162 exception set. */
jpayne@69 163 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
jpayne@69 164 #endif
jpayne@69 165
jpayne@69 166 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
jpayne@69 167 /* Get the length of the Unicode object. */
jpayne@69 168
jpayne@69 169 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
jpayne@69 170 PyObject *unicode
jpayne@69 171 );
jpayne@69 172 #endif
jpayne@69 173
jpayne@69 174 /* Get the number of Py_UNICODE units in the
jpayne@69 175 string representation. */
jpayne@69 176
jpayne@69 177 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
jpayne@69 178 PyObject *unicode /* Unicode object */
jpayne@69 179 );
jpayne@69 180
jpayne@69 181 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
jpayne@69 182 /* Read a character from the string. */
jpayne@69 183
jpayne@69 184 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
jpayne@69 185 PyObject *unicode,
jpayne@69 186 Py_ssize_t index
jpayne@69 187 );
jpayne@69 188
jpayne@69 189 /* Write a character to the string. The string must have been created through
jpayne@69 190 PyUnicode_New, must not be shared, and must not have been hashed yet.
jpayne@69 191
jpayne@69 192 Return 0 on success, -1 on error. */
jpayne@69 193
jpayne@69 194 PyAPI_FUNC(int) PyUnicode_WriteChar(
jpayne@69 195 PyObject *unicode,
jpayne@69 196 Py_ssize_t index,
jpayne@69 197 Py_UCS4 character
jpayne@69 198 );
jpayne@69 199 #endif
jpayne@69 200
jpayne@69 201 /* Resize a Unicode object. The length is the number of characters, except
jpayne@69 202 if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
jpayne@69 203 is the number of Py_UNICODE characters.
jpayne@69 204
jpayne@69 205 *unicode is modified to point to the new (resized) object and 0
jpayne@69 206 returned on success.
jpayne@69 207
jpayne@69 208 Try to resize the string in place (which is usually faster than allocating
jpayne@69 209 a new string and copy characters), or create a new string.
jpayne@69 210
jpayne@69 211 Error handling is implemented as follows: an exception is set, -1
jpayne@69 212 is returned and *unicode left untouched.
jpayne@69 213
jpayne@69 214 WARNING: The function doesn't check string content, the result may not be a
jpayne@69 215 string in canonical representation. */
jpayne@69 216
jpayne@69 217 PyAPI_FUNC(int) PyUnicode_Resize(
jpayne@69 218 PyObject **unicode, /* Pointer to the Unicode object */
jpayne@69 219 Py_ssize_t length /* New length */
jpayne@69 220 );
jpayne@69 221
jpayne@69 222 /* Decode obj to a Unicode object.
jpayne@69 223
jpayne@69 224 bytes, bytearray and other bytes-like objects are decoded according to the
jpayne@69 225 given encoding and error handler. The encoding and error handler can be
jpayne@69 226 NULL to have the interface use UTF-8 and "strict".
jpayne@69 227
jpayne@69 228 All other objects (including Unicode objects) raise an exception.
jpayne@69 229
jpayne@69 230 The API returns NULL in case of an error. The caller is responsible
jpayne@69 231 for decref'ing the returned objects.
jpayne@69 232
jpayne@69 233 */
jpayne@69 234
jpayne@69 235 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
jpayne@69 236 PyObject *obj, /* Object */
jpayne@69 237 const char *encoding, /* encoding */
jpayne@69 238 const char *errors /* error handling */
jpayne@69 239 );
jpayne@69 240
jpayne@69 241 /* Copy an instance of a Unicode subtype to a new true Unicode object if
jpayne@69 242 necessary. If obj is already a true Unicode object (not a subtype), return
jpayne@69 243 the reference with *incremented* refcount.
jpayne@69 244
jpayne@69 245 The API returns NULL in case of an error. The caller is responsible
jpayne@69 246 for decref'ing the returned objects.
jpayne@69 247
jpayne@69 248 */
jpayne@69 249
jpayne@69 250 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
jpayne@69 251 PyObject *obj /* Object */
jpayne@69 252 );
jpayne@69 253
jpayne@69 254 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
jpayne@69 255 const char *format, /* ASCII-encoded string */
jpayne@69 256 va_list vargs
jpayne@69 257 );
jpayne@69 258 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
jpayne@69 259 const char *format, /* ASCII-encoded string */
jpayne@69 260 ...
jpayne@69 261 );
jpayne@69 262
jpayne@69 263 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
jpayne@69 264 PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
jpayne@69 265 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
jpayne@69 266 const char *u /* UTF-8 encoded string */
jpayne@69 267 );
jpayne@69 268
jpayne@69 269 /* Use only if you know it's a string */
jpayne@69 270 #define PyUnicode_CHECK_INTERNED(op) \
jpayne@69 271 (((PyASCIIObject *)(op))->state.interned)
jpayne@69 272
jpayne@69 273 /* --- wchar_t support for platforms which support it --------------------- */
jpayne@69 274
jpayne@69 275 #ifdef HAVE_WCHAR_H
jpayne@69 276
jpayne@69 277 /* Create a Unicode Object from the wchar_t buffer w of the given
jpayne@69 278 size.
jpayne@69 279
jpayne@69 280 The buffer is copied into the new object. */
jpayne@69 281
jpayne@69 282 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
jpayne@69 283 const wchar_t *w, /* wchar_t buffer */
jpayne@69 284 Py_ssize_t size /* size of buffer */
jpayne@69 285 );
jpayne@69 286
jpayne@69 287 /* Copies the Unicode Object contents into the wchar_t buffer w. At
jpayne@69 288 most size wchar_t characters are copied.
jpayne@69 289
jpayne@69 290 Note that the resulting wchar_t string may or may not be
jpayne@69 291 0-terminated. It is the responsibility of the caller to make sure
jpayne@69 292 that the wchar_t string is 0-terminated in case this is required by
jpayne@69 293 the application.
jpayne@69 294
jpayne@69 295 Returns the number of wchar_t characters copied (excluding a
jpayne@69 296 possibly trailing 0-termination character) or -1 in case of an
jpayne@69 297 error. */
jpayne@69 298
jpayne@69 299 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
jpayne@69 300 PyObject *unicode, /* Unicode object */
jpayne@69 301 wchar_t *w, /* wchar_t buffer */
jpayne@69 302 Py_ssize_t size /* size of buffer */
jpayne@69 303 );
jpayne@69 304
jpayne@69 305 /* Convert the Unicode object to a wide character string. The output string
jpayne@69 306 always ends with a nul character. If size is not NULL, write the number of
jpayne@69 307 wide characters (excluding the null character) into *size.
jpayne@69 308
jpayne@69 309 Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
jpayne@69 310 on success. On error, returns NULL, *size is undefined and raises a
jpayne@69 311 MemoryError. */
jpayne@69 312
jpayne@69 313 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
jpayne@69 314 PyObject *unicode, /* Unicode object */
jpayne@69 315 Py_ssize_t *size /* number of characters of the result */
jpayne@69 316 );
jpayne@69 317
jpayne@69 318 #endif
jpayne@69 319
jpayne@69 320 /* --- Unicode ordinals --------------------------------------------------- */
jpayne@69 321
jpayne@69 322 /* Create a Unicode Object from the given Unicode code point ordinal.
jpayne@69 323
jpayne@69 324 The ordinal must be in range(0x110000). A ValueError is
jpayne@69 325 raised in case it is not.
jpayne@69 326
jpayne@69 327 */
jpayne@69 328
jpayne@69 329 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
jpayne@69 330
jpayne@69 331 /* --- Free-list management ----------------------------------------------- */
jpayne@69 332
jpayne@69 333 /* Clear the free list used by the Unicode implementation.
jpayne@69 334
jpayne@69 335 This can be used to release memory used for objects on the free
jpayne@69 336 list back to the Python memory allocator.
jpayne@69 337
jpayne@69 338 */
jpayne@69 339
jpayne@69 340 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
jpayne@69 341
jpayne@69 342 /* === Builtin Codecs =====================================================
jpayne@69 343
jpayne@69 344 Many of these APIs take two arguments encoding and errors. These
jpayne@69 345 parameters encoding and errors have the same semantics as the ones
jpayne@69 346 of the builtin str() API.
jpayne@69 347
jpayne@69 348 Setting encoding to NULL causes the default encoding (UTF-8) to be used.
jpayne@69 349
jpayne@69 350 Error handling is set by errors which may also be set to NULL
jpayne@69 351 meaning to use the default handling defined for the codec. Default
jpayne@69 352 error handling for all builtin codecs is "strict" (ValueErrors are
jpayne@69 353 raised).
jpayne@69 354
jpayne@69 355 The codecs all use a similar interface. Only deviation from the
jpayne@69 356 generic ones are documented.
jpayne@69 357
jpayne@69 358 */
jpayne@69 359
jpayne@69 360 /* --- Manage the default encoding ---------------------------------------- */
jpayne@69 361
jpayne@69 362 /* Returns "utf-8". */
jpayne@69 363 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
jpayne@69 364
jpayne@69 365 /* --- Generic Codecs ----------------------------------------------------- */
jpayne@69 366
jpayne@69 367 /* Create a Unicode object by decoding the encoded string s of the
jpayne@69 368 given size. */
jpayne@69 369
jpayne@69 370 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
jpayne@69 371 const char *s, /* encoded string */
jpayne@69 372 Py_ssize_t size, /* size of buffer */
jpayne@69 373 const char *encoding, /* encoding */
jpayne@69 374 const char *errors /* error handling */
jpayne@69 375 );
jpayne@69 376
jpayne@69 377 /* Decode a Unicode object unicode and return the result as Python
jpayne@69 378 object.
jpayne@69 379
jpayne@69 380 This API is DEPRECATED. The only supported standard encoding is rot13.
jpayne@69 381 Use PyCodec_Decode() to decode with rot13 and non-standard codecs
jpayne@69 382 that decode from str. */
jpayne@69 383
jpayne@69 384 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
jpayne@69 385 PyObject *unicode, /* Unicode object */
jpayne@69 386 const char *encoding, /* encoding */
jpayne@69 387 const char *errors /* error handling */
jpayne@69 388 );
jpayne@69 389
jpayne@69 390 /* Decode a Unicode object unicode and return the result as Unicode
jpayne@69 391 object.
jpayne@69 392
jpayne@69 393 This API is DEPRECATED. The only supported standard encoding is rot13.
jpayne@69 394 Use PyCodec_Decode() to decode with rot13 and non-standard codecs
jpayne@69 395 that decode from str to str. */
jpayne@69 396
jpayne@69 397 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
jpayne@69 398 PyObject *unicode, /* Unicode object */
jpayne@69 399 const char *encoding, /* encoding */
jpayne@69 400 const char *errors /* error handling */
jpayne@69 401 );
jpayne@69 402
jpayne@69 403 /* Encodes a Unicode object and returns the result as Python
jpayne@69 404 object.
jpayne@69 405
jpayne@69 406 This API is DEPRECATED. It is superseded by PyUnicode_AsEncodedString()
jpayne@69 407 since all standard encodings (except rot13) encode str to bytes.
jpayne@69 408 Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
jpayne@69 409 that encode form str to non-bytes. */
jpayne@69 410
jpayne@69 411 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
jpayne@69 412 PyObject *unicode, /* Unicode object */
jpayne@69 413 const char *encoding, /* encoding */
jpayne@69 414 const char *errors /* error handling */
jpayne@69 415 );
jpayne@69 416
jpayne@69 417 /* Encodes a Unicode object and returns the result as Python string
jpayne@69 418 object. */
jpayne@69 419
jpayne@69 420 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
jpayne@69 421 PyObject *unicode, /* Unicode object */
jpayne@69 422 const char *encoding, /* encoding */
jpayne@69 423 const char *errors /* error handling */
jpayne@69 424 );
jpayne@69 425
jpayne@69 426 /* Encodes a Unicode object and returns the result as Unicode
jpayne@69 427 object.
jpayne@69 428
jpayne@69 429 This API is DEPRECATED. The only supported standard encodings is rot13.
jpayne@69 430 Use PyCodec_Encode() to encode with rot13 and non-standard codecs
jpayne@69 431 that encode from str to str. */
jpayne@69 432
jpayne@69 433 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
jpayne@69 434 PyObject *unicode, /* Unicode object */
jpayne@69 435 const char *encoding, /* encoding */
jpayne@69 436 const char *errors /* error handling */
jpayne@69 437 );
jpayne@69 438
jpayne@69 439 /* Build an encoding map. */
jpayne@69 440
jpayne@69 441 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
jpayne@69 442 PyObject* string /* 256 character map */
jpayne@69 443 );
jpayne@69 444
jpayne@69 445 /* --- UTF-7 Codecs ------------------------------------------------------- */
jpayne@69 446
jpayne@69 447 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
jpayne@69 448 const char *string, /* UTF-7 encoded string */
jpayne@69 449 Py_ssize_t length, /* size of string */
jpayne@69 450 const char *errors /* error handling */
jpayne@69 451 );
jpayne@69 452
jpayne@69 453 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
jpayne@69 454 const char *string, /* UTF-7 encoded string */
jpayne@69 455 Py_ssize_t length, /* size of string */
jpayne@69 456 const char *errors, /* error handling */
jpayne@69 457 Py_ssize_t *consumed /* bytes consumed */
jpayne@69 458 );
jpayne@69 459
jpayne@69 460 /* --- UTF-8 Codecs ------------------------------------------------------- */
jpayne@69 461
jpayne@69 462 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
jpayne@69 463 const char *string, /* UTF-8 encoded string */
jpayne@69 464 Py_ssize_t length, /* size of string */
jpayne@69 465 const char *errors /* error handling */
jpayne@69 466 );
jpayne@69 467
jpayne@69 468 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
jpayne@69 469 const char *string, /* UTF-8 encoded string */
jpayne@69 470 Py_ssize_t length, /* size of string */
jpayne@69 471 const char *errors, /* error handling */
jpayne@69 472 Py_ssize_t *consumed /* bytes consumed */
jpayne@69 473 );
jpayne@69 474
jpayne@69 475 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
jpayne@69 476 PyObject *unicode /* Unicode object */
jpayne@69 477 );
jpayne@69 478
jpayne@69 479 /* --- UTF-32 Codecs ------------------------------------------------------ */
jpayne@69 480
jpayne@69 481 /* Decodes length bytes from a UTF-32 encoded buffer string and returns
jpayne@69 482 the corresponding Unicode object.
jpayne@69 483
jpayne@69 484 errors (if non-NULL) defines the error handling. It defaults
jpayne@69 485 to "strict".
jpayne@69 486
jpayne@69 487 If byteorder is non-NULL, the decoder starts decoding using the
jpayne@69 488 given byte order:
jpayne@69 489
jpayne@69 490 *byteorder == -1: little endian
jpayne@69 491 *byteorder == 0: native order
jpayne@69 492 *byteorder == 1: big endian
jpayne@69 493
jpayne@69 494 In native mode, the first four bytes of the stream are checked for a
jpayne@69 495 BOM mark. If found, the BOM mark is analysed, the byte order
jpayne@69 496 adjusted and the BOM skipped. In the other modes, no BOM mark
jpayne@69 497 interpretation is done. After completion, *byteorder is set to the
jpayne@69 498 current byte order at the end of input data.
jpayne@69 499
jpayne@69 500 If byteorder is NULL, the codec starts in native order mode.
jpayne@69 501
jpayne@69 502 */
jpayne@69 503
jpayne@69 504 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
jpayne@69 505 const char *string, /* UTF-32 encoded string */
jpayne@69 506 Py_ssize_t length, /* size of string */
jpayne@69 507 const char *errors, /* error handling */
jpayne@69 508 int *byteorder /* pointer to byteorder to use
jpayne@69 509 0=native;-1=LE,1=BE; updated on
jpayne@69 510 exit */
jpayne@69 511 );
jpayne@69 512
jpayne@69 513 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
jpayne@69 514 const char *string, /* UTF-32 encoded string */
jpayne@69 515 Py_ssize_t length, /* size of string */
jpayne@69 516 const char *errors, /* error handling */
jpayne@69 517 int *byteorder, /* pointer to byteorder to use
jpayne@69 518 0=native;-1=LE,1=BE; updated on
jpayne@69 519 exit */
jpayne@69 520 Py_ssize_t *consumed /* bytes consumed */
jpayne@69 521 );
jpayne@69 522
jpayne@69 523 /* Returns a Python string using the UTF-32 encoding in native byte
jpayne@69 524 order. The string always starts with a BOM mark. */
jpayne@69 525
jpayne@69 526 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
jpayne@69 527 PyObject *unicode /* Unicode object */
jpayne@69 528 );
jpayne@69 529
jpayne@69 530 /* Returns a Python string object holding the UTF-32 encoded value of
jpayne@69 531 the Unicode data.
jpayne@69 532
jpayne@69 533 If byteorder is not 0, output is written according to the following
jpayne@69 534 byte order:
jpayne@69 535
jpayne@69 536 byteorder == -1: little endian
jpayne@69 537 byteorder == 0: native byte order (writes a BOM mark)
jpayne@69 538 byteorder == 1: big endian
jpayne@69 539
jpayne@69 540 If byteorder is 0, the output string will always start with the
jpayne@69 541 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
jpayne@69 542 prepended.
jpayne@69 543
jpayne@69 544 */
jpayne@69 545
jpayne@69 546 /* --- UTF-16 Codecs ------------------------------------------------------ */
jpayne@69 547
jpayne@69 548 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
jpayne@69 549 the corresponding Unicode object.
jpayne@69 550
jpayne@69 551 errors (if non-NULL) defines the error handling. It defaults
jpayne@69 552 to "strict".
jpayne@69 553
jpayne@69 554 If byteorder is non-NULL, the decoder starts decoding using the
jpayne@69 555 given byte order:
jpayne@69 556
jpayne@69 557 *byteorder == -1: little endian
jpayne@69 558 *byteorder == 0: native order
jpayne@69 559 *byteorder == 1: big endian
jpayne@69 560
jpayne@69 561 In native mode, the first two bytes of the stream are checked for a
jpayne@69 562 BOM mark. If found, the BOM mark is analysed, the byte order
jpayne@69 563 adjusted and the BOM skipped. In the other modes, no BOM mark
jpayne@69 564 interpretation is done. After completion, *byteorder is set to the
jpayne@69 565 current byte order at the end of input data.
jpayne@69 566
jpayne@69 567 If byteorder is NULL, the codec starts in native order mode.
jpayne@69 568
jpayne@69 569 */
jpayne@69 570
jpayne@69 571 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
jpayne@69 572 const char *string, /* UTF-16 encoded string */
jpayne@69 573 Py_ssize_t length, /* size of string */
jpayne@69 574 const char *errors, /* error handling */
jpayne@69 575 int *byteorder /* pointer to byteorder to use
jpayne@69 576 0=native;-1=LE,1=BE; updated on
jpayne@69 577 exit */
jpayne@69 578 );
jpayne@69 579
jpayne@69 580 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
jpayne@69 581 const char *string, /* UTF-16 encoded string */
jpayne@69 582 Py_ssize_t length, /* size of string */
jpayne@69 583 const char *errors, /* error handling */
jpayne@69 584 int *byteorder, /* pointer to byteorder to use
jpayne@69 585 0=native;-1=LE,1=BE; updated on
jpayne@69 586 exit */
jpayne@69 587 Py_ssize_t *consumed /* bytes consumed */
jpayne@69 588 );
jpayne@69 589
jpayne@69 590 /* Returns a Python string using the UTF-16 encoding in native byte
jpayne@69 591 order. The string always starts with a BOM mark. */
jpayne@69 592
jpayne@69 593 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
jpayne@69 594 PyObject *unicode /* Unicode object */
jpayne@69 595 );
jpayne@69 596
jpayne@69 597 /* --- Unicode-Escape Codecs ---------------------------------------------- */
jpayne@69 598
jpayne@69 599 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
jpayne@69 600 const char *string, /* Unicode-Escape encoded string */
jpayne@69 601 Py_ssize_t length, /* size of string */
jpayne@69 602 const char *errors /* error handling */
jpayne@69 603 );
jpayne@69 604
jpayne@69 605 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
jpayne@69 606 PyObject *unicode /* Unicode object */
jpayne@69 607 );
jpayne@69 608
jpayne@69 609 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
jpayne@69 610
jpayne@69 611 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
jpayne@69 612 const char *string, /* Raw-Unicode-Escape encoded string */
jpayne@69 613 Py_ssize_t length, /* size of string */
jpayne@69 614 const char *errors /* error handling */
jpayne@69 615 );
jpayne@69 616
jpayne@69 617 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
jpayne@69 618 PyObject *unicode /* Unicode object */
jpayne@69 619 );
jpayne@69 620
jpayne@69 621 /* --- Latin-1 Codecs -----------------------------------------------------
jpayne@69 622
jpayne@69 623 Note: Latin-1 corresponds to the first 256 Unicode ordinals. */
jpayne@69 624
jpayne@69 625 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
jpayne@69 626 const char *string, /* Latin-1 encoded string */
jpayne@69 627 Py_ssize_t length, /* size of string */
jpayne@69 628 const char *errors /* error handling */
jpayne@69 629 );
jpayne@69 630
jpayne@69 631 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
jpayne@69 632 PyObject *unicode /* Unicode object */
jpayne@69 633 );
jpayne@69 634
jpayne@69 635 /* --- ASCII Codecs -------------------------------------------------------
jpayne@69 636
jpayne@69 637 Only 7-bit ASCII data is excepted. All other codes generate errors.
jpayne@69 638
jpayne@69 639 */
jpayne@69 640
jpayne@69 641 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
jpayne@69 642 const char *string, /* ASCII encoded string */
jpayne@69 643 Py_ssize_t length, /* size of string */
jpayne@69 644 const char *errors /* error handling */
jpayne@69 645 );
jpayne@69 646
jpayne@69 647 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
jpayne@69 648 PyObject *unicode /* Unicode object */
jpayne@69 649 );
jpayne@69 650
jpayne@69 651 /* --- Character Map Codecs -----------------------------------------------
jpayne@69 652
jpayne@69 653 This codec uses mappings to encode and decode characters.
jpayne@69 654
jpayne@69 655 Decoding mappings must map byte ordinals (integers in the range from 0 to
jpayne@69 656 255) to Unicode strings, integers (which are then interpreted as Unicode
jpayne@69 657 ordinals) or None. Unmapped data bytes (ones which cause a LookupError)
jpayne@69 658 as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined
jpayne@69 659 mapping" and cause an error.
jpayne@69 660
jpayne@69 661 Encoding mappings must map Unicode ordinal integers to bytes objects,
jpayne@69 662 integers in the range from 0 to 255 or None. Unmapped character
jpayne@69 663 ordinals (ones which cause a LookupError) as well as mapped to
jpayne@69 664 None are treated as "undefined mapping" and cause an error.
jpayne@69 665
jpayne@69 666 */
jpayne@69 667
jpayne@69 668 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
jpayne@69 669 const char *string, /* Encoded string */
jpayne@69 670 Py_ssize_t length, /* size of string */
jpayne@69 671 PyObject *mapping, /* decoding mapping */
jpayne@69 672 const char *errors /* error handling */
jpayne@69 673 );
jpayne@69 674
jpayne@69 675 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
jpayne@69 676 PyObject *unicode, /* Unicode object */
jpayne@69 677 PyObject *mapping /* encoding mapping */
jpayne@69 678 );
jpayne@69 679
jpayne@69 680 /* --- MBCS codecs for Windows -------------------------------------------- */
jpayne@69 681
jpayne@69 682 #ifdef MS_WINDOWS
jpayne@69 683 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
jpayne@69 684 const char *string, /* MBCS encoded string */
jpayne@69 685 Py_ssize_t length, /* size of string */
jpayne@69 686 const char *errors /* error handling */
jpayne@69 687 );
jpayne@69 688
jpayne@69 689 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
jpayne@69 690 const char *string, /* MBCS encoded string */
jpayne@69 691 Py_ssize_t length, /* size of string */
jpayne@69 692 const char *errors, /* error handling */
jpayne@69 693 Py_ssize_t *consumed /* bytes consumed */
jpayne@69 694 );
jpayne@69 695
jpayne@69 696 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
jpayne@69 697 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
jpayne@69 698 int code_page, /* code page number */
jpayne@69 699 const char *string, /* encoded string */
jpayne@69 700 Py_ssize_t length, /* size of string */
jpayne@69 701 const char *errors, /* error handling */
jpayne@69 702 Py_ssize_t *consumed /* bytes consumed */
jpayne@69 703 );
jpayne@69 704 #endif
jpayne@69 705
jpayne@69 706 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
jpayne@69 707 PyObject *unicode /* Unicode object */
jpayne@69 708 );
jpayne@69 709
jpayne@69 710 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
jpayne@69 711 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
jpayne@69 712 int code_page, /* code page number */
jpayne@69 713 PyObject *unicode, /* Unicode object */
jpayne@69 714 const char *errors /* error handling */
jpayne@69 715 );
jpayne@69 716 #endif
jpayne@69 717
jpayne@69 718 #endif /* MS_WINDOWS */
jpayne@69 719
jpayne@69 720 /* --- Locale encoding --------------------------------------------------- */
jpayne@69 721
jpayne@69 722 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
jpayne@69 723 /* Decode a string from the current locale encoding. The decoder is strict if
jpayne@69 724 *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
jpayne@69 725 error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
jpayne@69 726 be decoded as a surrogate character and *surrogateescape* is not equal to
jpayne@69 727 zero, the byte sequence is escaped using the 'surrogateescape' error handler
jpayne@69 728 instead of being decoded. *str* must end with a null character but cannot
jpayne@69 729 contain embedded null characters. */
jpayne@69 730
jpayne@69 731 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
jpayne@69 732 const char *str,
jpayne@69 733 Py_ssize_t len,
jpayne@69 734 const char *errors);
jpayne@69 735
jpayne@69 736 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
jpayne@69 737 length using strlen(). */
jpayne@69 738
jpayne@69 739 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
jpayne@69 740 const char *str,
jpayne@69 741 const char *errors);
jpayne@69 742
jpayne@69 743 /* Encode a Unicode object to the current locale encoding. The encoder is
jpayne@69 744 strict is *surrogateescape* is equal to zero, otherwise the
jpayne@69 745 "surrogateescape" error handler is used. Return a bytes object. The string
jpayne@69 746 cannot contain embedded null characters. */
jpayne@69 747
jpayne@69 748 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
jpayne@69 749 PyObject *unicode,
jpayne@69 750 const char *errors
jpayne@69 751 );
jpayne@69 752 #endif
jpayne@69 753
jpayne@69 754 /* --- File system encoding ---------------------------------------------- */
jpayne@69 755
jpayne@69 756 /* ParseTuple converter: encode str objects to bytes using
jpayne@69 757 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
jpayne@69 758
jpayne@69 759 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
jpayne@69 760
jpayne@69 761 /* ParseTuple converter: decode bytes objects to unicode using
jpayne@69 762 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
jpayne@69 763
jpayne@69 764 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
jpayne@69 765
jpayne@69 766 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding
jpayne@69 767 and the "surrogateescape" error handler.
jpayne@69 768
jpayne@69 769 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
jpayne@69 770 encoding.
jpayne@69 771
jpayne@69 772 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
jpayne@69 773 */
jpayne@69 774
jpayne@69 775 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
jpayne@69 776 const char *s /* encoded string */
jpayne@69 777 );
jpayne@69 778
jpayne@69 779 /* Decode a string using Py_FileSystemDefaultEncoding
jpayne@69 780 and the "surrogateescape" error handler.
jpayne@69 781
jpayne@69 782 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
jpayne@69 783 encoding.
jpayne@69 784 */
jpayne@69 785
jpayne@69 786 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
jpayne@69 787 const char *s, /* encoded string */
jpayne@69 788 Py_ssize_t size /* size */
jpayne@69 789 );
jpayne@69 790
jpayne@69 791 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
jpayne@69 792 "surrogateescape" error handler, and return bytes.
jpayne@69 793
jpayne@69 794 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
jpayne@69 795 encoding.
jpayne@69 796 */
jpayne@69 797
jpayne@69 798 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
jpayne@69 799 PyObject *unicode
jpayne@69 800 );
jpayne@69 801
jpayne@69 802 /* --- Methods & Slots ----------------------------------------------------
jpayne@69 803
jpayne@69 804 These are capable of handling Unicode objects and strings on input
jpayne@69 805 (we refer to them as strings in the descriptions) and return
jpayne@69 806 Unicode objects or integers as appropriate. */
jpayne@69 807
jpayne@69 808 /* Concat two strings giving a new Unicode string. */
jpayne@69 809
jpayne@69 810 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
jpayne@69 811 PyObject *left, /* Left string */
jpayne@69 812 PyObject *right /* Right string */
jpayne@69 813 );
jpayne@69 814
jpayne@69 815 /* Concat two strings and put the result in *pleft
jpayne@69 816 (sets *pleft to NULL on error) */
jpayne@69 817
jpayne@69 818 PyAPI_FUNC(void) PyUnicode_Append(
jpayne@69 819 PyObject **pleft, /* Pointer to left string */
jpayne@69 820 PyObject *right /* Right string */
jpayne@69 821 );
jpayne@69 822
jpayne@69 823 /* Concat two strings, put the result in *pleft and drop the right object
jpayne@69 824 (sets *pleft to NULL on error) */
jpayne@69 825
jpayne@69 826 PyAPI_FUNC(void) PyUnicode_AppendAndDel(
jpayne@69 827 PyObject **pleft, /* Pointer to left string */
jpayne@69 828 PyObject *right /* Right string */
jpayne@69 829 );
jpayne@69 830
jpayne@69 831 /* Split a string giving a list of Unicode strings.
jpayne@69 832
jpayne@69 833 If sep is NULL, splitting will be done at all whitespace
jpayne@69 834 substrings. Otherwise, splits occur at the given separator.
jpayne@69 835
jpayne@69 836 At most maxsplit splits will be done. If negative, no limit is set.
jpayne@69 837
jpayne@69 838 Separators are not included in the resulting list.
jpayne@69 839
jpayne@69 840 */
jpayne@69 841
jpayne@69 842 PyAPI_FUNC(PyObject*) PyUnicode_Split(
jpayne@69 843 PyObject *s, /* String to split */
jpayne@69 844 PyObject *sep, /* String separator */
jpayne@69 845 Py_ssize_t maxsplit /* Maxsplit count */
jpayne@69 846 );
jpayne@69 847
jpayne@69 848 /* Dito, but split at line breaks.
jpayne@69 849
jpayne@69 850 CRLF is considered to be one line break. Line breaks are not
jpayne@69 851 included in the resulting list. */
jpayne@69 852
jpayne@69 853 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
jpayne@69 854 PyObject *s, /* String to split */
jpayne@69 855 int keepends /* If true, line end markers are included */
jpayne@69 856 );
jpayne@69 857
jpayne@69 858 /* Partition a string using a given separator. */
jpayne@69 859
jpayne@69 860 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
jpayne@69 861 PyObject *s, /* String to partition */
jpayne@69 862 PyObject *sep /* String separator */
jpayne@69 863 );
jpayne@69 864
jpayne@69 865 /* Partition a string using a given separator, searching from the end of the
jpayne@69 866 string. */
jpayne@69 867
jpayne@69 868 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
jpayne@69 869 PyObject *s, /* String to partition */
jpayne@69 870 PyObject *sep /* String separator */
jpayne@69 871 );
jpayne@69 872
jpayne@69 873 /* Split a string giving a list of Unicode strings.
jpayne@69 874
jpayne@69 875 If sep is NULL, splitting will be done at all whitespace
jpayne@69 876 substrings. Otherwise, splits occur at the given separator.
jpayne@69 877
jpayne@69 878 At most maxsplit splits will be done. But unlike PyUnicode_Split
jpayne@69 879 PyUnicode_RSplit splits from the end of the string. If negative,
jpayne@69 880 no limit is set.
jpayne@69 881
jpayne@69 882 Separators are not included in the resulting list.
jpayne@69 883
jpayne@69 884 */
jpayne@69 885
jpayne@69 886 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
jpayne@69 887 PyObject *s, /* String to split */
jpayne@69 888 PyObject *sep, /* String separator */
jpayne@69 889 Py_ssize_t maxsplit /* Maxsplit count */
jpayne@69 890 );
jpayne@69 891
jpayne@69 892 /* Translate a string by applying a character mapping table to it and
jpayne@69 893 return the resulting Unicode object.
jpayne@69 894
jpayne@69 895 The mapping table must map Unicode ordinal integers to Unicode strings,
jpayne@69 896 Unicode ordinal integers or None (causing deletion of the character).
jpayne@69 897
jpayne@69 898 Mapping tables may be dictionaries or sequences. Unmapped character
jpayne@69 899 ordinals (ones which cause a LookupError) are left untouched and
jpayne@69 900 are copied as-is.
jpayne@69 901
jpayne@69 902 */
jpayne@69 903
jpayne@69 904 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
jpayne@69 905 PyObject *str, /* String */
jpayne@69 906 PyObject *table, /* Translate table */
jpayne@69 907 const char *errors /* error handling */
jpayne@69 908 );
jpayne@69 909
jpayne@69 910 /* Join a sequence of strings using the given separator and return
jpayne@69 911 the resulting Unicode string. */
jpayne@69 912
jpayne@69 913 PyAPI_FUNC(PyObject*) PyUnicode_Join(
jpayne@69 914 PyObject *separator, /* Separator string */
jpayne@69 915 PyObject *seq /* Sequence object */
jpayne@69 916 );
jpayne@69 917
jpayne@69 918 /* Return 1 if substr matches str[start:end] at the given tail end, 0
jpayne@69 919 otherwise. */
jpayne@69 920
jpayne@69 921 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
jpayne@69 922 PyObject *str, /* String */
jpayne@69 923 PyObject *substr, /* Prefix or Suffix string */
jpayne@69 924 Py_ssize_t start, /* Start index */
jpayne@69 925 Py_ssize_t end, /* Stop index */
jpayne@69 926 int direction /* Tail end: -1 prefix, +1 suffix */
jpayne@69 927 );
jpayne@69 928
jpayne@69 929 /* Return the first position of substr in str[start:end] using the
jpayne@69 930 given search direction or -1 if not found. -2 is returned in case
jpayne@69 931 an error occurred and an exception is set. */
jpayne@69 932
jpayne@69 933 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
jpayne@69 934 PyObject *str, /* String */
jpayne@69 935 PyObject *substr, /* Substring to find */
jpayne@69 936 Py_ssize_t start, /* Start index */
jpayne@69 937 Py_ssize_t end, /* Stop index */
jpayne@69 938 int direction /* Find direction: +1 forward, -1 backward */
jpayne@69 939 );
jpayne@69 940
jpayne@69 941 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
jpayne@69 942 /* Like PyUnicode_Find, but search for single character only. */
jpayne@69 943 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
jpayne@69 944 PyObject *str,
jpayne@69 945 Py_UCS4 ch,
jpayne@69 946 Py_ssize_t start,
jpayne@69 947 Py_ssize_t end,
jpayne@69 948 int direction
jpayne@69 949 );
jpayne@69 950 #endif
jpayne@69 951
jpayne@69 952 /* Count the number of occurrences of substr in str[start:end]. */
jpayne@69 953
jpayne@69 954 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
jpayne@69 955 PyObject *str, /* String */
jpayne@69 956 PyObject *substr, /* Substring to count */
jpayne@69 957 Py_ssize_t start, /* Start index */
jpayne@69 958 Py_ssize_t end /* Stop index */
jpayne@69 959 );
jpayne@69 960
jpayne@69 961 /* Replace at most maxcount occurrences of substr in str with replstr
jpayne@69 962 and return the resulting Unicode object. */
jpayne@69 963
jpayne@69 964 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
jpayne@69 965 PyObject *str, /* String */
jpayne@69 966 PyObject *substr, /* Substring to find */
jpayne@69 967 PyObject *replstr, /* Substring to replace */
jpayne@69 968 Py_ssize_t maxcount /* Max. number of replacements to apply;
jpayne@69 969 -1 = all */
jpayne@69 970 );
jpayne@69 971
jpayne@69 972 /* Compare two strings and return -1, 0, 1 for less than, equal,
jpayne@69 973 greater than resp.
jpayne@69 974 Raise an exception and return -1 on error. */
jpayne@69 975
jpayne@69 976 PyAPI_FUNC(int) PyUnicode_Compare(
jpayne@69 977 PyObject *left, /* Left string */
jpayne@69 978 PyObject *right /* Right string */
jpayne@69 979 );
jpayne@69 980
jpayne@69 981 /* Compare a Unicode object with C string and return -1, 0, 1 for less than,
jpayne@69 982 equal, and greater than, respectively. It is best to pass only
jpayne@69 983 ASCII-encoded strings, but the function interprets the input string as
jpayne@69 984 ISO-8859-1 if it contains non-ASCII characters.
jpayne@69 985 This function does not raise exceptions. */
jpayne@69 986
jpayne@69 987 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
jpayne@69 988 PyObject *left,
jpayne@69 989 const char *right /* ASCII-encoded string */
jpayne@69 990 );
jpayne@69 991
jpayne@69 992 /* Rich compare two strings and return one of the following:
jpayne@69 993
jpayne@69 994 - NULL in case an exception was raised
jpayne@69 995 - Py_True or Py_False for successful comparisons
jpayne@69 996 - Py_NotImplemented in case the type combination is unknown
jpayne@69 997
jpayne@69 998 Possible values for op:
jpayne@69 999
jpayne@69 1000 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
jpayne@69 1001
jpayne@69 1002 */
jpayne@69 1003
jpayne@69 1004 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
jpayne@69 1005 PyObject *left, /* Left string */
jpayne@69 1006 PyObject *right, /* Right string */
jpayne@69 1007 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
jpayne@69 1008 );
jpayne@69 1009
jpayne@69 1010 /* Apply an argument tuple or dictionary to a format string and return
jpayne@69 1011 the resulting Unicode string. */
jpayne@69 1012
jpayne@69 1013 PyAPI_FUNC(PyObject *) PyUnicode_Format(
jpayne@69 1014 PyObject *format, /* Format string */
jpayne@69 1015 PyObject *args /* Argument tuple or dictionary */
jpayne@69 1016 );
jpayne@69 1017
jpayne@69 1018 /* Checks whether element is contained in container and return 1/0
jpayne@69 1019 accordingly.
jpayne@69 1020
jpayne@69 1021 element has to coerce to a one element Unicode string. -1 is
jpayne@69 1022 returned in case of an error. */
jpayne@69 1023
jpayne@69 1024 PyAPI_FUNC(int) PyUnicode_Contains(
jpayne@69 1025 PyObject *container, /* Container string */
jpayne@69 1026 PyObject *element /* Element string */
jpayne@69 1027 );
jpayne@69 1028
jpayne@69 1029 /* Checks whether argument is a valid identifier. */
jpayne@69 1030
jpayne@69 1031 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
jpayne@69 1032
jpayne@69 1033 /* === Characters Type APIs =============================================== */
jpayne@69 1034
jpayne@69 1035 #ifndef Py_LIMITED_API
jpayne@69 1036 # define Py_CPYTHON_UNICODEOBJECT_H
jpayne@69 1037 # include "cpython/unicodeobject.h"
jpayne@69 1038 # undef Py_CPYTHON_UNICODEOBJECT_H
jpayne@69 1039 #endif
jpayne@69 1040
jpayne@69 1041 #ifdef __cplusplus
jpayne@69 1042 }
jpayne@69 1043 #endif
jpayne@69 1044 #endif /* !Py_UNICODEOBJECT_H */