Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/python3.8/unicodeobject.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #ifndef Py_UNICODEOBJECT_H | |
2 #define Py_UNICODEOBJECT_H | |
3 | |
4 #include <stdarg.h> | |
5 | |
6 /* | |
7 | |
8 Unicode implementation based on original code by Fredrik Lundh, | |
9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the | |
10 Unicode Integration Proposal. (See | |
11 http://www.egenix.com/files/python/unicode-proposal.txt). | |
12 | |
13 Copyright (c) Corporation for National Research Initiatives. | |
14 | |
15 | |
16 Original header: | |
17 -------------------------------------------------------------------- | |
18 | |
19 * Yet another Unicode string type for Python. This type supports the | |
20 * 16-bit Basic Multilingual Plane (BMP) only. | |
21 * | |
22 * Written by Fredrik Lundh, January 1999. | |
23 * | |
24 * Copyright (c) 1999 by Secret Labs AB. | |
25 * Copyright (c) 1999 by Fredrik Lundh. | |
26 * | |
27 * fredrik@pythonware.com | |
28 * http://www.pythonware.com | |
29 * | |
30 * -------------------------------------------------------------------- | |
31 * This Unicode String Type is | |
32 * | |
33 * Copyright (c) 1999 by Secret Labs AB | |
34 * Copyright (c) 1999 by Fredrik Lundh | |
35 * | |
36 * By obtaining, using, and/or copying this software and/or its | |
37 * associated documentation, you agree that you have read, understood, | |
38 * and will comply with the following terms and conditions: | |
39 * | |
40 * Permission to use, copy, modify, and distribute this software and its | |
41 * associated documentation for any purpose and without fee is hereby | |
42 * granted, provided that the above copyright notice appears in all | |
43 * copies, and that both that copyright notice and this permission notice | |
44 * appear in supporting documentation, and that the name of Secret Labs | |
45 * AB or the author not be used in advertising or publicity pertaining to | |
46 * distribution of the software without specific, written prior | |
47 * permission. | |
48 * | |
49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO | |
50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR | |
52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT | |
55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
56 * -------------------------------------------------------------------- */ | |
57 | |
58 #include <ctype.h> | |
59 | |
60 /* === Internal API ======================================================= */ | |
61 | |
62 /* --- Internal Unicode Format -------------------------------------------- */ | |
63 | |
64 /* Python 3.x requires unicode */ | |
65 #define Py_USING_UNICODE | |
66 | |
67 #ifndef SIZEOF_WCHAR_T | |
68 #error Must define SIZEOF_WCHAR_T | |
69 #endif | |
70 | |
71 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T | |
72 | |
73 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. | |
74 Otherwise, Unicode strings are stored as UCS-2 (with limited support | |
75 for UTF-16) */ | |
76 | |
77 #if Py_UNICODE_SIZE >= 4 | |
78 #define Py_UNICODE_WIDE | |
79 #endif | |
80 | |
81 /* Set these flags if the platform has "wchar.h" and the | |
82 wchar_t type is a 16-bit unsigned type */ | |
83 /* #define HAVE_WCHAR_H */ | |
84 /* #define HAVE_USABLE_WCHAR_T */ | |
85 | |
86 /* If the compiler provides a wchar_t type we try to support it | |
87 through the interface functions PyUnicode_FromWideChar(), | |
88 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ | |
89 | |
90 #ifdef HAVE_USABLE_WCHAR_T | |
91 # ifndef HAVE_WCHAR_H | |
92 # define HAVE_WCHAR_H | |
93 # endif | |
94 #endif | |
95 | |
96 #ifdef HAVE_WCHAR_H | |
97 # include <wchar.h> | |
98 #endif | |
99 | |
100 /* Py_UCS4 and Py_UCS2 are typedefs for the respective | |
101 unicode representations. */ | |
102 typedef uint32_t Py_UCS4; | |
103 typedef uint16_t Py_UCS2; | |
104 typedef uint8_t Py_UCS1; | |
105 | |
106 #ifdef __cplusplus | |
107 extern "C" { | |
108 #endif | |
109 | |
110 | |
111 PyAPI_DATA(PyTypeObject) PyUnicode_Type; | |
112 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; | |
113 | |
114 #define PyUnicode_Check(op) \ | |
115 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) | |
116 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) | |
117 | |
118 /* --- Constants ---------------------------------------------------------- */ | |
119 | |
120 /* This Unicode character will be used as replacement character during | |
121 decoding if the errors argument is set to "replace". Note: the | |
122 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in | |
123 Unicode 3.0. */ | |
124 | |
125 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) | |
126 | |
127 /* === Public API ========================================================= */ | |
128 | |
129 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ | |
130 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( | |
131 const char *u, /* UTF-8 encoded string */ | |
132 Py_ssize_t size /* size of buffer */ | |
133 ); | |
134 | |
135 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated | |
136 UTF-8 encoded bytes. The size is determined with strlen(). */ | |
137 PyAPI_FUNC(PyObject*) PyUnicode_FromString( | |
138 const char *u /* UTF-8 encoded string */ | |
139 ); | |
140 | |
141 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | |
142 PyAPI_FUNC(PyObject*) PyUnicode_Substring( | |
143 PyObject *str, | |
144 Py_ssize_t start, | |
145 Py_ssize_t end); | |
146 #endif | |
147 | |
148 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | |
149 /* Copy the string into a UCS4 buffer including the null character if copy_null | |
150 is set. Return NULL and raise an exception on error. Raise a SystemError if | |
151 the buffer is smaller than the string. Return buffer on success. | |
152 | |
153 buflen is the length of the buffer in (Py_UCS4) characters. */ | |
154 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( | |
155 PyObject *unicode, | |
156 Py_UCS4* buffer, | |
157 Py_ssize_t buflen, | |
158 int copy_null); | |
159 | |
160 /* Copy the string into a UCS4 buffer. A new buffer is allocated using | |
161 * PyMem_Malloc; if this fails, NULL is returned with a memory error | |
162 exception set. */ | |
163 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); | |
164 #endif | |
165 | |
166 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | |
167 /* Get the length of the Unicode object. */ | |
168 | |
169 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( | |
170 PyObject *unicode | |
171 ); | |
172 #endif | |
173 | |
174 /* Get the number of Py_UNICODE units in the | |
175 string representation. */ | |
176 | |
177 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( | |
178 PyObject *unicode /* Unicode object */ | |
179 ); | |
180 | |
181 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | |
182 /* Read a character from the string. */ | |
183 | |
184 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( | |
185 PyObject *unicode, | |
186 Py_ssize_t index | |
187 ); | |
188 | |
189 /* Write a character to the string. The string must have been created through | |
190 PyUnicode_New, must not be shared, and must not have been hashed yet. | |
191 | |
192 Return 0 on success, -1 on error. */ | |
193 | |
194 PyAPI_FUNC(int) PyUnicode_WriteChar( | |
195 PyObject *unicode, | |
196 Py_ssize_t index, | |
197 Py_UCS4 character | |
198 ); | |
199 #endif | |
200 | |
201 /* Resize a Unicode object. The length is the number of characters, except | |
202 if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length | |
203 is the number of Py_UNICODE characters. | |
204 | |
205 *unicode is modified to point to the new (resized) object and 0 | |
206 returned on success. | |
207 | |
208 Try to resize the string in place (which is usually faster than allocating | |
209 a new string and copy characters), or create a new string. | |
210 | |
211 Error handling is implemented as follows: an exception is set, -1 | |
212 is returned and *unicode left untouched. | |
213 | |
214 WARNING: The function doesn't check string content, the result may not be a | |
215 string in canonical representation. */ | |
216 | |
217 PyAPI_FUNC(int) PyUnicode_Resize( | |
218 PyObject **unicode, /* Pointer to the Unicode object */ | |
219 Py_ssize_t length /* New length */ | |
220 ); | |
221 | |
222 /* Decode obj to a Unicode object. | |
223 | |
224 bytes, bytearray and other bytes-like objects are decoded according to the | |
225 given encoding and error handler. The encoding and error handler can be | |
226 NULL to have the interface use UTF-8 and "strict". | |
227 | |
228 All other objects (including Unicode objects) raise an exception. | |
229 | |
230 The API returns NULL in case of an error. The caller is responsible | |
231 for decref'ing the returned objects. | |
232 | |
233 */ | |
234 | |
235 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( | |
236 PyObject *obj, /* Object */ | |
237 const char *encoding, /* encoding */ | |
238 const char *errors /* error handling */ | |
239 ); | |
240 | |
241 /* Copy an instance of a Unicode subtype to a new true Unicode object if | |
242 necessary. If obj is already a true Unicode object (not a subtype), return | |
243 the reference with *incremented* refcount. | |
244 | |
245 The API returns NULL in case of an error. The caller is responsible | |
246 for decref'ing the returned objects. | |
247 | |
248 */ | |
249 | |
250 PyAPI_FUNC(PyObject*) PyUnicode_FromObject( | |
251 PyObject *obj /* Object */ | |
252 ); | |
253 | |
254 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( | |
255 const char *format, /* ASCII-encoded string */ | |
256 va_list vargs | |
257 ); | |
258 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( | |
259 const char *format, /* ASCII-encoded string */ | |
260 ... | |
261 ); | |
262 | |
263 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); | |
264 PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); | |
265 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( | |
266 const char *u /* UTF-8 encoded string */ | |
267 ); | |
268 | |
269 /* Use only if you know it's a string */ | |
270 #define PyUnicode_CHECK_INTERNED(op) \ | |
271 (((PyASCIIObject *)(op))->state.interned) | |
272 | |
273 /* --- wchar_t support for platforms which support it --------------------- */ | |
274 | |
275 #ifdef HAVE_WCHAR_H | |
276 | |
277 /* Create a Unicode Object from the wchar_t buffer w of the given | |
278 size. | |
279 | |
280 The buffer is copied into the new object. */ | |
281 | |
282 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( | |
283 const wchar_t *w, /* wchar_t buffer */ | |
284 Py_ssize_t size /* size of buffer */ | |
285 ); | |
286 | |
287 /* Copies the Unicode Object contents into the wchar_t buffer w. At | |
288 most size wchar_t characters are copied. | |
289 | |
290 Note that the resulting wchar_t string may or may not be | |
291 0-terminated. It is the responsibility of the caller to make sure | |
292 that the wchar_t string is 0-terminated in case this is required by | |
293 the application. | |
294 | |
295 Returns the number of wchar_t characters copied (excluding a | |
296 possibly trailing 0-termination character) or -1 in case of an | |
297 error. */ | |
298 | |
299 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( | |
300 PyObject *unicode, /* Unicode object */ | |
301 wchar_t *w, /* wchar_t buffer */ | |
302 Py_ssize_t size /* size of buffer */ | |
303 ); | |
304 | |
305 /* Convert the Unicode object to a wide character string. The output string | |
306 always ends with a nul character. If size is not NULL, write the number of | |
307 wide characters (excluding the null character) into *size. | |
308 | |
309 Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it) | |
310 on success. On error, returns NULL, *size is undefined and raises a | |
311 MemoryError. */ | |
312 | |
313 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( | |
314 PyObject *unicode, /* Unicode object */ | |
315 Py_ssize_t *size /* number of characters of the result */ | |
316 ); | |
317 | |
318 #endif | |
319 | |
320 /* --- Unicode ordinals --------------------------------------------------- */ | |
321 | |
322 /* Create a Unicode Object from the given Unicode code point ordinal. | |
323 | |
324 The ordinal must be in range(0x110000). A ValueError is | |
325 raised in case it is not. | |
326 | |
327 */ | |
328 | |
329 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); | |
330 | |
331 /* --- Free-list management ----------------------------------------------- */ | |
332 | |
333 /* Clear the free list used by the Unicode implementation. | |
334 | |
335 This can be used to release memory used for objects on the free | |
336 list back to the Python memory allocator. | |
337 | |
338 */ | |
339 | |
340 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); | |
341 | |
342 /* === Builtin Codecs ===================================================== | |
343 | |
344 Many of these APIs take two arguments encoding and errors. These | |
345 parameters encoding and errors have the same semantics as the ones | |
346 of the builtin str() API. | |
347 | |
348 Setting encoding to NULL causes the default encoding (UTF-8) to be used. | |
349 | |
350 Error handling is set by errors which may also be set to NULL | |
351 meaning to use the default handling defined for the codec. Default | |
352 error handling for all builtin codecs is "strict" (ValueErrors are | |
353 raised). | |
354 | |
355 The codecs all use a similar interface. Only deviation from the | |
356 generic ones are documented. | |
357 | |
358 */ | |
359 | |
360 /* --- Manage the default encoding ---------------------------------------- */ | |
361 | |
362 /* Returns "utf-8". */ | |
363 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); | |
364 | |
365 /* --- Generic Codecs ----------------------------------------------------- */ | |
366 | |
367 /* Create a Unicode object by decoding the encoded string s of the | |
368 given size. */ | |
369 | |
370 PyAPI_FUNC(PyObject*) PyUnicode_Decode( | |
371 const char *s, /* encoded string */ | |
372 Py_ssize_t size, /* size of buffer */ | |
373 const char *encoding, /* encoding */ | |
374 const char *errors /* error handling */ | |
375 ); | |
376 | |
377 /* Decode a Unicode object unicode and return the result as Python | |
378 object. | |
379 | |
380 This API is DEPRECATED. The only supported standard encoding is rot13. | |
381 Use PyCodec_Decode() to decode with rot13 and non-standard codecs | |
382 that decode from str. */ | |
383 | |
384 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( | |
385 PyObject *unicode, /* Unicode object */ | |
386 const char *encoding, /* encoding */ | |
387 const char *errors /* error handling */ | |
388 ); | |
389 | |
390 /* Decode a Unicode object unicode and return the result as Unicode | |
391 object. | |
392 | |
393 This API is DEPRECATED. The only supported standard encoding is rot13. | |
394 Use PyCodec_Decode() to decode with rot13 and non-standard codecs | |
395 that decode from str to str. */ | |
396 | |
397 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( | |
398 PyObject *unicode, /* Unicode object */ | |
399 const char *encoding, /* encoding */ | |
400 const char *errors /* error handling */ | |
401 ); | |
402 | |
403 /* Encodes a Unicode object and returns the result as Python | |
404 object. | |
405 | |
406 This API is DEPRECATED. It is superseded by PyUnicode_AsEncodedString() | |
407 since all standard encodings (except rot13) encode str to bytes. | |
408 Use PyCodec_Encode() for encoding with rot13 and non-standard codecs | |
409 that encode form str to non-bytes. */ | |
410 | |
411 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( | |
412 PyObject *unicode, /* Unicode object */ | |
413 const char *encoding, /* encoding */ | |
414 const char *errors /* error handling */ | |
415 ); | |
416 | |
417 /* Encodes a Unicode object and returns the result as Python string | |
418 object. */ | |
419 | |
420 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( | |
421 PyObject *unicode, /* Unicode object */ | |
422 const char *encoding, /* encoding */ | |
423 const char *errors /* error handling */ | |
424 ); | |
425 | |
426 /* Encodes a Unicode object and returns the result as Unicode | |
427 object. | |
428 | |
429 This API is DEPRECATED. The only supported standard encodings is rot13. | |
430 Use PyCodec_Encode() to encode with rot13 and non-standard codecs | |
431 that encode from str to str. */ | |
432 | |
433 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( | |
434 PyObject *unicode, /* Unicode object */ | |
435 const char *encoding, /* encoding */ | |
436 const char *errors /* error handling */ | |
437 ); | |
438 | |
439 /* Build an encoding map. */ | |
440 | |
441 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( | |
442 PyObject* string /* 256 character map */ | |
443 ); | |
444 | |
445 /* --- UTF-7 Codecs ------------------------------------------------------- */ | |
446 | |
447 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( | |
448 const char *string, /* UTF-7 encoded string */ | |
449 Py_ssize_t length, /* size of string */ | |
450 const char *errors /* error handling */ | |
451 ); | |
452 | |
453 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( | |
454 const char *string, /* UTF-7 encoded string */ | |
455 Py_ssize_t length, /* size of string */ | |
456 const char *errors, /* error handling */ | |
457 Py_ssize_t *consumed /* bytes consumed */ | |
458 ); | |
459 | |
460 /* --- UTF-8 Codecs ------------------------------------------------------- */ | |
461 | |
462 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( | |
463 const char *string, /* UTF-8 encoded string */ | |
464 Py_ssize_t length, /* size of string */ | |
465 const char *errors /* error handling */ | |
466 ); | |
467 | |
468 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( | |
469 const char *string, /* UTF-8 encoded string */ | |
470 Py_ssize_t length, /* size of string */ | |
471 const char *errors, /* error handling */ | |
472 Py_ssize_t *consumed /* bytes consumed */ | |
473 ); | |
474 | |
475 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( | |
476 PyObject *unicode /* Unicode object */ | |
477 ); | |
478 | |
479 /* --- UTF-32 Codecs ------------------------------------------------------ */ | |
480 | |
481 /* Decodes length bytes from a UTF-32 encoded buffer string and returns | |
482 the corresponding Unicode object. | |
483 | |
484 errors (if non-NULL) defines the error handling. It defaults | |
485 to "strict". | |
486 | |
487 If byteorder is non-NULL, the decoder starts decoding using the | |
488 given byte order: | |
489 | |
490 *byteorder == -1: little endian | |
491 *byteorder == 0: native order | |
492 *byteorder == 1: big endian | |
493 | |
494 In native mode, the first four bytes of the stream are checked for a | |
495 BOM mark. If found, the BOM mark is analysed, the byte order | |
496 adjusted and the BOM skipped. In the other modes, no BOM mark | |
497 interpretation is done. After completion, *byteorder is set to the | |
498 current byte order at the end of input data. | |
499 | |
500 If byteorder is NULL, the codec starts in native order mode. | |
501 | |
502 */ | |
503 | |
504 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( | |
505 const char *string, /* UTF-32 encoded string */ | |
506 Py_ssize_t length, /* size of string */ | |
507 const char *errors, /* error handling */ | |
508 int *byteorder /* pointer to byteorder to use | |
509 0=native;-1=LE,1=BE; updated on | |
510 exit */ | |
511 ); | |
512 | |
513 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( | |
514 const char *string, /* UTF-32 encoded string */ | |
515 Py_ssize_t length, /* size of string */ | |
516 const char *errors, /* error handling */ | |
517 int *byteorder, /* pointer to byteorder to use | |
518 0=native;-1=LE,1=BE; updated on | |
519 exit */ | |
520 Py_ssize_t *consumed /* bytes consumed */ | |
521 ); | |
522 | |
523 /* Returns a Python string using the UTF-32 encoding in native byte | |
524 order. The string always starts with a BOM mark. */ | |
525 | |
526 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( | |
527 PyObject *unicode /* Unicode object */ | |
528 ); | |
529 | |
530 /* Returns a Python string object holding the UTF-32 encoded value of | |
531 the Unicode data. | |
532 | |
533 If byteorder is not 0, output is written according to the following | |
534 byte order: | |
535 | |
536 byteorder == -1: little endian | |
537 byteorder == 0: native byte order (writes a BOM mark) | |
538 byteorder == 1: big endian | |
539 | |
540 If byteorder is 0, the output string will always start with the | |
541 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is | |
542 prepended. | |
543 | |
544 */ | |
545 | |
546 /* --- UTF-16 Codecs ------------------------------------------------------ */ | |
547 | |
548 /* Decodes length bytes from a UTF-16 encoded buffer string and returns | |
549 the corresponding Unicode object. | |
550 | |
551 errors (if non-NULL) defines the error handling. It defaults | |
552 to "strict". | |
553 | |
554 If byteorder is non-NULL, the decoder starts decoding using the | |
555 given byte order: | |
556 | |
557 *byteorder == -1: little endian | |
558 *byteorder == 0: native order | |
559 *byteorder == 1: big endian | |
560 | |
561 In native mode, the first two bytes of the stream are checked for a | |
562 BOM mark. If found, the BOM mark is analysed, the byte order | |
563 adjusted and the BOM skipped. In the other modes, no BOM mark | |
564 interpretation is done. After completion, *byteorder is set to the | |
565 current byte order at the end of input data. | |
566 | |
567 If byteorder is NULL, the codec starts in native order mode. | |
568 | |
569 */ | |
570 | |
571 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( | |
572 const char *string, /* UTF-16 encoded string */ | |
573 Py_ssize_t length, /* size of string */ | |
574 const char *errors, /* error handling */ | |
575 int *byteorder /* pointer to byteorder to use | |
576 0=native;-1=LE,1=BE; updated on | |
577 exit */ | |
578 ); | |
579 | |
580 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( | |
581 const char *string, /* UTF-16 encoded string */ | |
582 Py_ssize_t length, /* size of string */ | |
583 const char *errors, /* error handling */ | |
584 int *byteorder, /* pointer to byteorder to use | |
585 0=native;-1=LE,1=BE; updated on | |
586 exit */ | |
587 Py_ssize_t *consumed /* bytes consumed */ | |
588 ); | |
589 | |
590 /* Returns a Python string using the UTF-16 encoding in native byte | |
591 order. The string always starts with a BOM mark. */ | |
592 | |
593 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( | |
594 PyObject *unicode /* Unicode object */ | |
595 ); | |
596 | |
597 /* --- Unicode-Escape Codecs ---------------------------------------------- */ | |
598 | |
599 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( | |
600 const char *string, /* Unicode-Escape encoded string */ | |
601 Py_ssize_t length, /* size of string */ | |
602 const char *errors /* error handling */ | |
603 ); | |
604 | |
605 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( | |
606 PyObject *unicode /* Unicode object */ | |
607 ); | |
608 | |
609 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ | |
610 | |
611 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( | |
612 const char *string, /* Raw-Unicode-Escape encoded string */ | |
613 Py_ssize_t length, /* size of string */ | |
614 const char *errors /* error handling */ | |
615 ); | |
616 | |
617 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( | |
618 PyObject *unicode /* Unicode object */ | |
619 ); | |
620 | |
621 /* --- Latin-1 Codecs ----------------------------------------------------- | |
622 | |
623 Note: Latin-1 corresponds to the first 256 Unicode ordinals. */ | |
624 | |
625 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( | |
626 const char *string, /* Latin-1 encoded string */ | |
627 Py_ssize_t length, /* size of string */ | |
628 const char *errors /* error handling */ | |
629 ); | |
630 | |
631 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( | |
632 PyObject *unicode /* Unicode object */ | |
633 ); | |
634 | |
635 /* --- ASCII Codecs ------------------------------------------------------- | |
636 | |
637 Only 7-bit ASCII data is excepted. All other codes generate errors. | |
638 | |
639 */ | |
640 | |
641 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( | |
642 const char *string, /* ASCII encoded string */ | |
643 Py_ssize_t length, /* size of string */ | |
644 const char *errors /* error handling */ | |
645 ); | |
646 | |
647 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( | |
648 PyObject *unicode /* Unicode object */ | |
649 ); | |
650 | |
651 /* --- Character Map Codecs ----------------------------------------------- | |
652 | |
653 This codec uses mappings to encode and decode characters. | |
654 | |
655 Decoding mappings must map byte ordinals (integers in the range from 0 to | |
656 255) to Unicode strings, integers (which are then interpreted as Unicode | |
657 ordinals) or None. Unmapped data bytes (ones which cause a LookupError) | |
658 as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined | |
659 mapping" and cause an error. | |
660 | |
661 Encoding mappings must map Unicode ordinal integers to bytes objects, | |
662 integers in the range from 0 to 255 or None. Unmapped character | |
663 ordinals (ones which cause a LookupError) as well as mapped to | |
664 None are treated as "undefined mapping" and cause an error. | |
665 | |
666 */ | |
667 | |
668 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( | |
669 const char *string, /* Encoded string */ | |
670 Py_ssize_t length, /* size of string */ | |
671 PyObject *mapping, /* decoding mapping */ | |
672 const char *errors /* error handling */ | |
673 ); | |
674 | |
675 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( | |
676 PyObject *unicode, /* Unicode object */ | |
677 PyObject *mapping /* encoding mapping */ | |
678 ); | |
679 | |
680 /* --- MBCS codecs for Windows -------------------------------------------- */ | |
681 | |
682 #ifdef MS_WINDOWS | |
683 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( | |
684 const char *string, /* MBCS encoded string */ | |
685 Py_ssize_t length, /* size of string */ | |
686 const char *errors /* error handling */ | |
687 ); | |
688 | |
689 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( | |
690 const char *string, /* MBCS encoded string */ | |
691 Py_ssize_t length, /* size of string */ | |
692 const char *errors, /* error handling */ | |
693 Py_ssize_t *consumed /* bytes consumed */ | |
694 ); | |
695 | |
696 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | |
697 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( | |
698 int code_page, /* code page number */ | |
699 const char *string, /* encoded string */ | |
700 Py_ssize_t length, /* size of string */ | |
701 const char *errors, /* error handling */ | |
702 Py_ssize_t *consumed /* bytes consumed */ | |
703 ); | |
704 #endif | |
705 | |
706 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( | |
707 PyObject *unicode /* Unicode object */ | |
708 ); | |
709 | |
710 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | |
711 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( | |
712 int code_page, /* code page number */ | |
713 PyObject *unicode, /* Unicode object */ | |
714 const char *errors /* error handling */ | |
715 ); | |
716 #endif | |
717 | |
718 #endif /* MS_WINDOWS */ | |
719 | |
720 /* --- Locale encoding --------------------------------------------------- */ | |
721 | |
722 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | |
723 /* Decode a string from the current locale encoding. The decoder is strict if | |
724 *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape' | |
725 error handler (PEP 383) to escape undecodable bytes. If a byte sequence can | |
726 be decoded as a surrogate character and *surrogateescape* is not equal to | |
727 zero, the byte sequence is escaped using the 'surrogateescape' error handler | |
728 instead of being decoded. *str* must end with a null character but cannot | |
729 contain embedded null characters. */ | |
730 | |
731 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( | |
732 const char *str, | |
733 Py_ssize_t len, | |
734 const char *errors); | |
735 | |
736 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string | |
737 length using strlen(). */ | |
738 | |
739 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( | |
740 const char *str, | |
741 const char *errors); | |
742 | |
743 /* Encode a Unicode object to the current locale encoding. The encoder is | |
744 strict is *surrogateescape* is equal to zero, otherwise the | |
745 "surrogateescape" error handler is used. Return a bytes object. The string | |
746 cannot contain embedded null characters. */ | |
747 | |
748 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( | |
749 PyObject *unicode, | |
750 const char *errors | |
751 ); | |
752 #endif | |
753 | |
754 /* --- File system encoding ---------------------------------------------- */ | |
755 | |
756 /* ParseTuple converter: encode str objects to bytes using | |
757 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ | |
758 | |
759 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); | |
760 | |
761 /* ParseTuple converter: decode bytes objects to unicode using | |
762 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ | |
763 | |
764 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); | |
765 | |
766 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding | |
767 and the "surrogateescape" error handler. | |
768 | |
769 If Py_FileSystemDefaultEncoding is not set, fall back to the locale | |
770 encoding. | |
771 | |
772 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. | |
773 */ | |
774 | |
775 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( | |
776 const char *s /* encoded string */ | |
777 ); | |
778 | |
779 /* Decode a string using Py_FileSystemDefaultEncoding | |
780 and the "surrogateescape" error handler. | |
781 | |
782 If Py_FileSystemDefaultEncoding is not set, fall back to the locale | |
783 encoding. | |
784 */ | |
785 | |
786 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( | |
787 const char *s, /* encoded string */ | |
788 Py_ssize_t size /* size */ | |
789 ); | |
790 | |
791 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the | |
792 "surrogateescape" error handler, and return bytes. | |
793 | |
794 If Py_FileSystemDefaultEncoding is not set, fall back to the locale | |
795 encoding. | |
796 */ | |
797 | |
798 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( | |
799 PyObject *unicode | |
800 ); | |
801 | |
802 /* --- Methods & Slots ---------------------------------------------------- | |
803 | |
804 These are capable of handling Unicode objects and strings on input | |
805 (we refer to them as strings in the descriptions) and return | |
806 Unicode objects or integers as appropriate. */ | |
807 | |
808 /* Concat two strings giving a new Unicode string. */ | |
809 | |
810 PyAPI_FUNC(PyObject*) PyUnicode_Concat( | |
811 PyObject *left, /* Left string */ | |
812 PyObject *right /* Right string */ | |
813 ); | |
814 | |
815 /* Concat two strings and put the result in *pleft | |
816 (sets *pleft to NULL on error) */ | |
817 | |
818 PyAPI_FUNC(void) PyUnicode_Append( | |
819 PyObject **pleft, /* Pointer to left string */ | |
820 PyObject *right /* Right string */ | |
821 ); | |
822 | |
823 /* Concat two strings, put the result in *pleft and drop the right object | |
824 (sets *pleft to NULL on error) */ | |
825 | |
826 PyAPI_FUNC(void) PyUnicode_AppendAndDel( | |
827 PyObject **pleft, /* Pointer to left string */ | |
828 PyObject *right /* Right string */ | |
829 ); | |
830 | |
831 /* Split a string giving a list of Unicode strings. | |
832 | |
833 If sep is NULL, splitting will be done at all whitespace | |
834 substrings. Otherwise, splits occur at the given separator. | |
835 | |
836 At most maxsplit splits will be done. If negative, no limit is set. | |
837 | |
838 Separators are not included in the resulting list. | |
839 | |
840 */ | |
841 | |
842 PyAPI_FUNC(PyObject*) PyUnicode_Split( | |
843 PyObject *s, /* String to split */ | |
844 PyObject *sep, /* String separator */ | |
845 Py_ssize_t maxsplit /* Maxsplit count */ | |
846 ); | |
847 | |
848 /* Dito, but split at line breaks. | |
849 | |
850 CRLF is considered to be one line break. Line breaks are not | |
851 included in the resulting list. */ | |
852 | |
853 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( | |
854 PyObject *s, /* String to split */ | |
855 int keepends /* If true, line end markers are included */ | |
856 ); | |
857 | |
858 /* Partition a string using a given separator. */ | |
859 | |
860 PyAPI_FUNC(PyObject*) PyUnicode_Partition( | |
861 PyObject *s, /* String to partition */ | |
862 PyObject *sep /* String separator */ | |
863 ); | |
864 | |
865 /* Partition a string using a given separator, searching from the end of the | |
866 string. */ | |
867 | |
868 PyAPI_FUNC(PyObject*) PyUnicode_RPartition( | |
869 PyObject *s, /* String to partition */ | |
870 PyObject *sep /* String separator */ | |
871 ); | |
872 | |
873 /* Split a string giving a list of Unicode strings. | |
874 | |
875 If sep is NULL, splitting will be done at all whitespace | |
876 substrings. Otherwise, splits occur at the given separator. | |
877 | |
878 At most maxsplit splits will be done. But unlike PyUnicode_Split | |
879 PyUnicode_RSplit splits from the end of the string. If negative, | |
880 no limit is set. | |
881 | |
882 Separators are not included in the resulting list. | |
883 | |
884 */ | |
885 | |
886 PyAPI_FUNC(PyObject*) PyUnicode_RSplit( | |
887 PyObject *s, /* String to split */ | |
888 PyObject *sep, /* String separator */ | |
889 Py_ssize_t maxsplit /* Maxsplit count */ | |
890 ); | |
891 | |
892 /* Translate a string by applying a character mapping table to it and | |
893 return the resulting Unicode object. | |
894 | |
895 The mapping table must map Unicode ordinal integers to Unicode strings, | |
896 Unicode ordinal integers or None (causing deletion of the character). | |
897 | |
898 Mapping tables may be dictionaries or sequences. Unmapped character | |
899 ordinals (ones which cause a LookupError) are left untouched and | |
900 are copied as-is. | |
901 | |
902 */ | |
903 | |
904 PyAPI_FUNC(PyObject *) PyUnicode_Translate( | |
905 PyObject *str, /* String */ | |
906 PyObject *table, /* Translate table */ | |
907 const char *errors /* error handling */ | |
908 ); | |
909 | |
910 /* Join a sequence of strings using the given separator and return | |
911 the resulting Unicode string. */ | |
912 | |
913 PyAPI_FUNC(PyObject*) PyUnicode_Join( | |
914 PyObject *separator, /* Separator string */ | |
915 PyObject *seq /* Sequence object */ | |
916 ); | |
917 | |
918 /* Return 1 if substr matches str[start:end] at the given tail end, 0 | |
919 otherwise. */ | |
920 | |
921 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( | |
922 PyObject *str, /* String */ | |
923 PyObject *substr, /* Prefix or Suffix string */ | |
924 Py_ssize_t start, /* Start index */ | |
925 Py_ssize_t end, /* Stop index */ | |
926 int direction /* Tail end: -1 prefix, +1 suffix */ | |
927 ); | |
928 | |
929 /* Return the first position of substr in str[start:end] using the | |
930 given search direction or -1 if not found. -2 is returned in case | |
931 an error occurred and an exception is set. */ | |
932 | |
933 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( | |
934 PyObject *str, /* String */ | |
935 PyObject *substr, /* Substring to find */ | |
936 Py_ssize_t start, /* Start index */ | |
937 Py_ssize_t end, /* Stop index */ | |
938 int direction /* Find direction: +1 forward, -1 backward */ | |
939 ); | |
940 | |
941 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | |
942 /* Like PyUnicode_Find, but search for single character only. */ | |
943 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( | |
944 PyObject *str, | |
945 Py_UCS4 ch, | |
946 Py_ssize_t start, | |
947 Py_ssize_t end, | |
948 int direction | |
949 ); | |
950 #endif | |
951 | |
952 /* Count the number of occurrences of substr in str[start:end]. */ | |
953 | |
954 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( | |
955 PyObject *str, /* String */ | |
956 PyObject *substr, /* Substring to count */ | |
957 Py_ssize_t start, /* Start index */ | |
958 Py_ssize_t end /* Stop index */ | |
959 ); | |
960 | |
961 /* Replace at most maxcount occurrences of substr in str with replstr | |
962 and return the resulting Unicode object. */ | |
963 | |
964 PyAPI_FUNC(PyObject *) PyUnicode_Replace( | |
965 PyObject *str, /* String */ | |
966 PyObject *substr, /* Substring to find */ | |
967 PyObject *replstr, /* Substring to replace */ | |
968 Py_ssize_t maxcount /* Max. number of replacements to apply; | |
969 -1 = all */ | |
970 ); | |
971 | |
972 /* Compare two strings and return -1, 0, 1 for less than, equal, | |
973 greater than resp. | |
974 Raise an exception and return -1 on error. */ | |
975 | |
976 PyAPI_FUNC(int) PyUnicode_Compare( | |
977 PyObject *left, /* Left string */ | |
978 PyObject *right /* Right string */ | |
979 ); | |
980 | |
981 /* Compare a Unicode object with C string and return -1, 0, 1 for less than, | |
982 equal, and greater than, respectively. It is best to pass only | |
983 ASCII-encoded strings, but the function interprets the input string as | |
984 ISO-8859-1 if it contains non-ASCII characters. | |
985 This function does not raise exceptions. */ | |
986 | |
987 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( | |
988 PyObject *left, | |
989 const char *right /* ASCII-encoded string */ | |
990 ); | |
991 | |
992 /* Rich compare two strings and return one of the following: | |
993 | |
994 - NULL in case an exception was raised | |
995 - Py_True or Py_False for successful comparisons | |
996 - Py_NotImplemented in case the type combination is unknown | |
997 | |
998 Possible values for op: | |
999 | |
1000 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE | |
1001 | |
1002 */ | |
1003 | |
1004 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( | |
1005 PyObject *left, /* Left string */ | |
1006 PyObject *right, /* Right string */ | |
1007 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ | |
1008 ); | |
1009 | |
1010 /* Apply an argument tuple or dictionary to a format string and return | |
1011 the resulting Unicode string. */ | |
1012 | |
1013 PyAPI_FUNC(PyObject *) PyUnicode_Format( | |
1014 PyObject *format, /* Format string */ | |
1015 PyObject *args /* Argument tuple or dictionary */ | |
1016 ); | |
1017 | |
1018 /* Checks whether element is contained in container and return 1/0 | |
1019 accordingly. | |
1020 | |
1021 element has to coerce to a one element Unicode string. -1 is | |
1022 returned in case of an error. */ | |
1023 | |
1024 PyAPI_FUNC(int) PyUnicode_Contains( | |
1025 PyObject *container, /* Container string */ | |
1026 PyObject *element /* Element string */ | |
1027 ); | |
1028 | |
1029 /* Checks whether argument is a valid identifier. */ | |
1030 | |
1031 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); | |
1032 | |
1033 /* === Characters Type APIs =============================================== */ | |
1034 | |
1035 #ifndef Py_LIMITED_API | |
1036 # define Py_CPYTHON_UNICODEOBJECT_H | |
1037 # include "cpython/unicodeobject.h" | |
1038 # undef Py_CPYTHON_UNICODEOBJECT_H | |
1039 #endif | |
1040 | |
1041 #ifdef __cplusplus | |
1042 } | |
1043 #endif | |
1044 #endif /* !Py_UNICODEOBJECT_H */ |