csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/python3.8/cpython/unicodeobject.h annotate

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/python3.8/cpython/unicodeobject.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 17:55:14 -0400
parents
children

rev	line source
jpayne@69	1 #ifndef Py_CPYTHON_UNICODEOBJECT_H
jpayne@69	2 # error "this header file must not be included directly"
jpayne@69	3 #endif
jpayne@69	4
jpayne@69	5 #ifdef __cplusplus
jpayne@69	6 extern "C" {
jpayne@69	7 #endif
jpayne@69	8
jpayne@69	9 /* Py_UNICODE was the native Unicode storage format (code unit) used by
jpayne@69	10 Python and represents a single Unicode element in the Unicode type.
jpayne@69	11 With PEP 393, Py_UNICODE is deprecated and replaced with a
jpayne@69	12 typedef to wchar_t. */
jpayne@69	13 #define PY_UNICODE_TYPE wchar_t
jpayne@69	14 /* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE;
jpayne@69	15
jpayne@69	16 /* --- Internal Unicode Operations ---------------------------------------- */
jpayne@69	17
jpayne@69	18 /* Since splitting on whitespace is an important use case, and
jpayne@69	19 whitespace in most situations is solely ASCII whitespace, we
jpayne@69	20 optimize for the common case by using a quick look-up table
jpayne@69	21 _Py_ascii_whitespace (see below) with an inlined check.
jpayne@69	22
jpayne@69	23 */
jpayne@69	24 #define Py_UNICODE_ISSPACE(ch) \
jpayne@69	25 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
jpayne@69	26
jpayne@69	27 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
jpayne@69	28 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
jpayne@69	29 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
jpayne@69	30 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
jpayne@69	31
jpayne@69	32 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
jpayne@69	33 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
jpayne@69	34 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
jpayne@69	35
jpayne@69	36 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
jpayne@69	37 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
jpayne@69	38 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
jpayne@69	39 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
jpayne@69	40
jpayne@69	41 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
jpayne@69	42 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
jpayne@69	43 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
jpayne@69	44
jpayne@69	45 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
jpayne@69	46
jpayne@69	47 #define Py_UNICODE_ISALNUM(ch) \
jpayne@69	48 (Py_UNICODE_ISALPHA(ch) \|\| \
jpayne@69	49 Py_UNICODE_ISDECIMAL(ch) \|\| \
jpayne@69	50 Py_UNICODE_ISDIGIT(ch) \|\| \
jpayne@69	51 Py_UNICODE_ISNUMERIC(ch))
jpayne@69	52
jpayne@69	53 #define Py_UNICODE_COPY(target, source, length) \
jpayne@69	54 memcpy((target), (source), (length)*sizeof(Py_UNICODE))
jpayne@69	55
jpayne@69	56 #define Py_UNICODE_FILL(target, value, length) \
jpayne@69	57 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
jpayne@69	58 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
jpayne@69	59 } while (0)
jpayne@69	60
jpayne@69	61 /* macros to work with surrogates */
jpayne@69	62 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
jpayne@69	63 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
jpayne@69	64 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
jpayne@69	65 /* Join two surrogate characters and return a single Py_UCS4 value. */
jpayne@69	66 #define Py_UNICODE_JOIN_SURROGATES(high, low) \
jpayne@69	67 (((((Py_UCS4)(high) & 0x03FF) << 10) \| \
jpayne@69	68 ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
jpayne@69	69 /* high surrogate = top 10 bits added to D800 */
jpayne@69	70 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
jpayne@69	71 /* low surrogate = bottom 10 bits added to DC00 */
jpayne@69	72 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
jpayne@69	73
jpayne@69	74 /* Check if substring matches at given offset. The offset must be
jpayne@69	75 valid, and the substring must not be empty. */
jpayne@69	76
jpayne@69	77 #define Py_UNICODE_MATCH(string, offset, substring) \
jpayne@69	78 ((((string)->wstr + (offset)) == ((substring)->wstr)) && \
jpayne@69	79 ((((string)->wstr + (offset) + (substring)->wstr_length-1) == ((substring)->wstr + (substring)->wstr_length-1))) && \
jpayne@69	80 !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
jpayne@69	81
jpayne@69	82 /* --- Unicode Type ------------------------------------------------------- */
jpayne@69	83
jpayne@69	84 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
jpayne@69	85 structure. state.ascii and state.compact are set, and the data
jpayne@69	86 immediately follow the structure. utf8_length and wstr_length can be found
jpayne@69	87 in the length field; the utf8 pointer is equal to the data pointer. */
jpayne@69	88 typedef struct {
jpayne@69	89 /* There are 4 forms of Unicode strings:
jpayne@69	90
jpayne@69	91 - compact ascii:
jpayne@69	92
jpayne@69	93 * structure = PyASCIIObject
jpayne@69	94 * test: PyUnicode_IS_COMPACT_ASCII(op)
jpayne@69	95 * kind = PyUnicode_1BYTE_KIND
jpayne@69	96 * compact = 1
jpayne@69	97 * ascii = 1
jpayne@69	98 * ready = 1
jpayne@69	99 * (length is the length of the utf8 and wstr strings)
jpayne@69	100 * (data starts just after the structure)
jpayne@69	101 * (since ASCII is decoded from UTF-8, the utf8 string are the data)
jpayne@69	102
jpayne@69	103 - compact:
jpayne@69	104
jpayne@69	105 * structure = PyCompactUnicodeObject
jpayne@69	106 * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
jpayne@69	107 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
jpayne@69	108 PyUnicode_4BYTE_KIND
jpayne@69	109 * compact = 1
jpayne@69	110 * ready = 1
jpayne@69	111 * ascii = 0
jpayne@69	112 * utf8 is not shared with data
jpayne@69	113 * utf8_length = 0 if utf8 is NULL
jpayne@69	114 * wstr is shared with data and wstr_length=length
jpayne@69	115 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
jpayne@69	116 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
jpayne@69	117 * wstr_length = 0 if wstr is NULL
jpayne@69	118 * (data starts just after the structure)
jpayne@69	119
jpayne@69	120 - legacy string, not ready:
jpayne@69	121
jpayne@69	122 * structure = PyUnicodeObject
jpayne@69	123 * test: kind == PyUnicode_WCHAR_KIND
jpayne@69	124 * length = 0 (use wstr_length)
jpayne@69	125 * hash = -1
jpayne@69	126 * kind = PyUnicode_WCHAR_KIND
jpayne@69	127 * compact = 0
jpayne@69	128 * ascii = 0
jpayne@69	129 * ready = 0
jpayne@69	130 * interned = SSTATE_NOT_INTERNED
jpayne@69	131 * wstr is not NULL
jpayne@69	132 * data.any is NULL
jpayne@69	133 * utf8 is NULL
jpayne@69	134 * utf8_length = 0
jpayne@69	135
jpayne@69	136 - legacy string, ready:
jpayne@69	137
jpayne@69	138 * structure = PyUnicodeObject structure
jpayne@69	139 * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
jpayne@69	140 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
jpayne@69	141 PyUnicode_4BYTE_KIND
jpayne@69	142 * compact = 0
jpayne@69	143 * ready = 1
jpayne@69	144 * data.any is not NULL
jpayne@69	145 * utf8 is shared and utf8_length = length with data.any if ascii = 1
jpayne@69	146 * utf8_length = 0 if utf8 is NULL
jpayne@69	147 * wstr is shared with data.any and wstr_length = length
jpayne@69	148 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
jpayne@69	149 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
jpayne@69	150 * wstr_length = 0 if wstr is NULL
jpayne@69	151
jpayne@69	152 Compact strings use only one memory block (structure + characters),
jpayne@69	153 whereas legacy strings use one block for the structure and one block
jpayne@69	154 for characters.
jpayne@69	155
jpayne@69	156 Legacy strings are created by PyUnicode_FromUnicode() and
jpayne@69	157 PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
jpayne@69	158 when PyUnicode_READY() is called.
jpayne@69	159
jpayne@69	160 See also _PyUnicode_CheckConsistency().
jpayne@69	161 */
jpayne@69	162 PyObject_HEAD
jpayne@69	163 Py_ssize_t length; /* Number of code points in the string */
jpayne@69	164 Py_hash_t hash; /* Hash value; -1 if not set */
jpayne@69	165 struct {
jpayne@69	166 /*
jpayne@69	167 SSTATE_NOT_INTERNED (0)
jpayne@69	168 SSTATE_INTERNED_MORTAL (1)
jpayne@69	169 SSTATE_INTERNED_IMMORTAL (2)
jpayne@69	170
jpayne@69	171 If interned != SSTATE_NOT_INTERNED, the two references from the
jpayne@69	172 dictionary to this object are not counted in ob_refcnt.
jpayne@69	173 */
jpayne@69	174 unsigned int interned:2;
jpayne@69	175 /* Character size:
jpayne@69	176
jpayne@69	177 - PyUnicode_WCHAR_KIND (0):
jpayne@69	178
jpayne@69	179 * character type = wchar_t (16 or 32 bits, depending on the
jpayne@69	180 platform)
jpayne@69	181
jpayne@69	182 - PyUnicode_1BYTE_KIND (1):
jpayne@69	183
jpayne@69	184 * character type = Py_UCS1 (8 bits, unsigned)
jpayne@69	185 * all characters are in the range U+0000-U+00FF (latin1)
jpayne@69	186 * if ascii is set, all characters are in the range U+0000-U+007F
jpayne@69	187 (ASCII), otherwise at least one character is in the range
jpayne@69	188 U+0080-U+00FF
jpayne@69	189
jpayne@69	190 - PyUnicode_2BYTE_KIND (2):
jpayne@69	191
jpayne@69	192 * character type = Py_UCS2 (16 bits, unsigned)
jpayne@69	193 * all characters are in the range U+0000-U+FFFF (BMP)
jpayne@69	194 * at least one character is in the range U+0100-U+FFFF
jpayne@69	195
jpayne@69	196 - PyUnicode_4BYTE_KIND (4):
jpayne@69	197
jpayne@69	198 * character type = Py_UCS4 (32 bits, unsigned)
jpayne@69	199 * all characters are in the range U+0000-U+10FFFF
jpayne@69	200 * at least one character is in the range U+10000-U+10FFFF
jpayne@69	201 */
jpayne@69	202 unsigned int kind:3;
jpayne@69	203 /* Compact is with respect to the allocation scheme. Compact unicode
jpayne@69	204 objects only require one memory block while non-compact objects use
jpayne@69	205 one block for the PyUnicodeObject struct and another for its data
jpayne@69	206 buffer. */
jpayne@69	207 unsigned int compact:1;
jpayne@69	208 /* The string only contains characters in the range U+0000-U+007F (ASCII)
jpayne@69	209 and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
jpayne@69	210 set, use the PyASCIIObject structure. */
jpayne@69	211 unsigned int ascii:1;
jpayne@69	212 /* The ready flag indicates whether the object layout is initialized
jpayne@69	213 completely. This means that this is either a compact object, or
jpayne@69	214 the data pointer is filled out. The bit is redundant, and helps
jpayne@69	215 to minimize the test in PyUnicode_IS_READY(). */
jpayne@69	216 unsigned int ready:1;
jpayne@69	217 /* Padding to ensure that PyUnicode_DATA() is always aligned to
jpayne@69	218 4 bytes (see issue #19537 on m68k). */
jpayne@69	219 unsigned int :24;
jpayne@69	220 } state;
jpayne@69	221 wchar_t wstr; / wchar_t representation (null-terminated) */
jpayne@69	222 } PyASCIIObject;
jpayne@69	223
jpayne@69	224 /* Non-ASCII strings allocated through PyUnicode_New use the
jpayne@69	225 PyCompactUnicodeObject structure. state.compact is set, and the data
jpayne@69	226 immediately follow the structure. */
jpayne@69	227 typedef struct {
jpayne@69	228 PyASCIIObject _base;
jpayne@69	229 Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
jpayne@69	230 * terminating \0. */
jpayne@69	231 char utf8; / UTF-8 representation (null-terminated) */
jpayne@69	232 Py_ssize_t wstr_length; /* Number of code points in wstr, possible
jpayne@69	233 * surrogates count as two code points. */
jpayne@69	234 } PyCompactUnicodeObject;
jpayne@69	235
jpayne@69	236 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
jpayne@69	237 PyUnicodeObject structure. The actual string data is initially in the wstr
jpayne@69	238 block, and copied into the data block using _PyUnicode_Ready. */
jpayne@69	239 typedef struct {
jpayne@69	240 PyCompactUnicodeObject _base;
jpayne@69	241 union {
jpayne@69	242 void *any;
jpayne@69	243 Py_UCS1 *latin1;
jpayne@69	244 Py_UCS2 *ucs2;
jpayne@69	245 Py_UCS4 *ucs4;
jpayne@69	246 } data; /* Canonical, smallest-form Unicode buffer */
jpayne@69	247 } PyUnicodeObject;
jpayne@69	248
jpayne@69	249 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
jpayne@69	250 PyObject *op,
jpayne@69	251 int check_content);
jpayne@69	252
jpayne@69	253 /* Fast access macros */
jpayne@69	254 #define PyUnicode_WSTR_LENGTH(op) \
jpayne@69	255 (PyUnicode_IS_COMPACT_ASCII(op) ? \
jpayne@69	256 ((PyASCIIObject*)op)->length : \
jpayne@69	257 ((PyCompactUnicodeObject*)op)->wstr_length)
jpayne@69	258
jpayne@69	259 /* Returns the deprecated Py_UNICODE representation's size in code units
jpayne@69	260 (this includes surrogate pairs as 2 units).
jpayne@69	261 If the Py_UNICODE representation is not available, it will be computed
jpayne@69	262 on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
jpayne@69	263
jpayne@69	264 /* Py_DEPRECATED(3.3) */
jpayne@69	265 #define PyUnicode_GET_SIZE(op) \
jpayne@69	266 (assert(PyUnicode_Check(op)), \
jpayne@69	267 (((PyASCIIObject *)(op))->wstr) ? \
jpayne@69	268 PyUnicode_WSTR_LENGTH(op) : \
jpayne@69	269 ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
jpayne@69	270 assert(((PyASCIIObject *)(op))->wstr), \
jpayne@69	271 PyUnicode_WSTR_LENGTH(op)))
jpayne@69	272
jpayne@69	273 /* Py_DEPRECATED(3.3) */
jpayne@69	274 #define PyUnicode_GET_DATA_SIZE(op) \
jpayne@69	275 (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
jpayne@69	276
jpayne@69	277 /* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
jpayne@69	278 representation on demand. Using this macro is very inefficient now,
jpayne@69	279 try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
jpayne@69	280 use PyUnicode_WRITE() and PyUnicode_READ(). */
jpayne@69	281
jpayne@69	282 /* Py_DEPRECATED(3.3) */
jpayne@69	283 #define PyUnicode_AS_UNICODE(op) \
jpayne@69	284 (assert(PyUnicode_Check(op)), \
jpayne@69	285 (((PyASCIIObject )(op))->wstr) ? (((PyASCIIObject )(op))->wstr) : \
jpayne@69	286 PyUnicode_AsUnicode(_PyObject_CAST(op)))
jpayne@69	287
jpayne@69	288 /* Py_DEPRECATED(3.3) */
jpayne@69	289 #define PyUnicode_AS_DATA(op) \
jpayne@69	290 ((const char *)(PyUnicode_AS_UNICODE(op)))
jpayne@69	291
jpayne@69	292
jpayne@69	293 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
jpayne@69	294
jpayne@69	295 /* Values for PyASCIIObject.state: */
jpayne@69	296
jpayne@69	297 /* Interning state. */
jpayne@69	298 #define SSTATE_NOT_INTERNED 0
jpayne@69	299 #define SSTATE_INTERNED_MORTAL 1
jpayne@69	300 #define SSTATE_INTERNED_IMMORTAL 2
jpayne@69	301
jpayne@69	302 /* Return true if the string contains only ASCII characters, or 0 if not. The
jpayne@69	303 string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
jpayne@69	304 ready. */
jpayne@69	305 #define PyUnicode_IS_ASCII(op) \
jpayne@69	306 (assert(PyUnicode_Check(op)), \
jpayne@69	307 assert(PyUnicode_IS_READY(op)), \
jpayne@69	308 ((PyASCIIObject*)op)->state.ascii)
jpayne@69	309
jpayne@69	310 /* Return true if the string is compact or 0 if not.
jpayne@69	311 No type checks or Ready calls are performed. */
jpayne@69	312 #define PyUnicode_IS_COMPACT(op) \
jpayne@69	313 (((PyASCIIObject*)(op))->state.compact)
jpayne@69	314
jpayne@69	315 /* Return true if the string is a compact ASCII string (use PyASCIIObject
jpayne@69	316 structure), or 0 if not. No type checks or Ready calls are performed. */
jpayne@69	317 #define PyUnicode_IS_COMPACT_ASCII(op) \
jpayne@69	318 (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
jpayne@69	319
jpayne@69	320 enum PyUnicode_Kind {
jpayne@69	321 /* String contains only wstr byte characters. This is only possible
jpayne@69	322 when the string was created with a legacy API and _PyUnicode_Ready()
jpayne@69	323 has not been called yet. */
jpayne@69	324 PyUnicode_WCHAR_KIND = 0,
jpayne@69	325 /* Return values of the PyUnicode_KIND() macro: */
jpayne@69	326 PyUnicode_1BYTE_KIND = 1,
jpayne@69	327 PyUnicode_2BYTE_KIND = 2,
jpayne@69	328 PyUnicode_4BYTE_KIND = 4
jpayne@69	329 };
jpayne@69	330
jpayne@69	331 /* Return pointers to the canonical representation cast to unsigned char,
jpayne@69	332 Py_UCS2, or Py_UCS4 for direct character access.
jpayne@69	333 No checks are performed, use PyUnicode_KIND() before to ensure
jpayne@69	334 these will work correctly. */
jpayne@69	335
jpayne@69	336 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
jpayne@69	337 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
jpayne@69	338 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
jpayne@69	339
jpayne@69	340 /* Return one of the PyUnicode__KIND values defined above. /
jpayne@69	341 #define PyUnicode_KIND(op) \
jpayne@69	342 (assert(PyUnicode_Check(op)), \
jpayne@69	343 assert(PyUnicode_IS_READY(op)), \
jpayne@69	344 ((PyASCIIObject *)(op))->state.kind)
jpayne@69	345
jpayne@69	346 /* Return a void pointer to the raw unicode buffer. */
jpayne@69	347 #define _PyUnicode_COMPACT_DATA(op) \
jpayne@69	348 (PyUnicode_IS_ASCII(op) ? \
jpayne@69	349 ((void)((PyASCIIObject)(op) + 1)) : \
jpayne@69	350 ((void)((PyCompactUnicodeObject)(op) + 1)))
jpayne@69	351
jpayne@69	352 #define _PyUnicode_NONCOMPACT_DATA(op) \
jpayne@69	353 (assert(((PyUnicodeObject*)(op))->data.any), \
jpayne@69	354 ((((PyUnicodeObject *)(op))->data.any)))
jpayne@69	355
jpayne@69	356 #define PyUnicode_DATA(op) \
jpayne@69	357 (assert(PyUnicode_Check(op)), \
jpayne@69	358 PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
jpayne@69	359 _PyUnicode_NONCOMPACT_DATA(op))
jpayne@69	360
jpayne@69	361 /* In the access macros below, "kind" may be evaluated more than once.
jpayne@69	362 All other macro parameters are evaluated exactly once, so it is safe
jpayne@69	363 to put side effects into them (such as increasing the index). */
jpayne@69	364
jpayne@69	365 /* Write into the canonical representation, this macro does not do any sanity
jpayne@69	366 checks and is intended for usage in loops. The caller should cache the
jpayne@69	367 kind and data pointers obtained from other macro calls.
jpayne@69	368 index is the index in the string (starts at 0) and value is the new
jpayne@69	369 code point value which should be written to that location. */
jpayne@69	370 #define PyUnicode_WRITE(kind, data, index, value) \
jpayne@69	371 do { \
jpayne@69	372 switch ((kind)) { \
jpayne@69	373 case PyUnicode_1BYTE_KIND: { \
jpayne@69	374 ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
jpayne@69	375 break; \
jpayne@69	376 } \
jpayne@69	377 case PyUnicode_2BYTE_KIND: { \
jpayne@69	378 ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
jpayne@69	379 break; \
jpayne@69	380 } \
jpayne@69	381 default: { \
jpayne@69	382 assert((kind) == PyUnicode_4BYTE_KIND); \
jpayne@69	383 ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
jpayne@69	384 } \
jpayne@69	385 } \
jpayne@69	386 } while (0)
jpayne@69	387
jpayne@69	388 /* Read a code point from the string's canonical representation. No checks
jpayne@69	389 or ready calls are performed. */
jpayne@69	390 #define PyUnicode_READ(kind, data, index) \
jpayne@69	391 ((Py_UCS4) \
jpayne@69	392 ((kind) == PyUnicode_1BYTE_KIND ? \
jpayne@69	393 ((const Py_UCS1 *)(data))[(index)] : \
jpayne@69	394 ((kind) == PyUnicode_2BYTE_KIND ? \
jpayne@69	395 ((const Py_UCS2 *)(data))[(index)] : \
jpayne@69	396 ((const Py_UCS4 *)(data))[(index)] \
jpayne@69	397 ) \
jpayne@69	398 ))
jpayne@69	399
jpayne@69	400 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
jpayne@69	401 calls PyUnicode_KIND() and might call it twice. For single reads, use
jpayne@69	402 PyUnicode_READ_CHAR, for multiple consecutive reads callers should
jpayne@69	403 cache kind and use PyUnicode_READ instead. */
jpayne@69	404 #define PyUnicode_READ_CHAR(unicode, index) \
jpayne@69	405 (assert(PyUnicode_Check(unicode)), \
jpayne@69	406 assert(PyUnicode_IS_READY(unicode)), \
jpayne@69	407 (Py_UCS4) \
jpayne@69	408 (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
jpayne@69	409 ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
jpayne@69	410 (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
jpayne@69	411 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
jpayne@69	412 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
jpayne@69	413 ) \
jpayne@69	414 ))
jpayne@69	415
jpayne@69	416 /* Returns the length of the unicode string. The caller has to make sure that
jpayne@69	417 the string has it's canonical representation set before calling
jpayne@69	418 this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
jpayne@69	419 #define PyUnicode_GET_LENGTH(op) \
jpayne@69	420 (assert(PyUnicode_Check(op)), \
jpayne@69	421 assert(PyUnicode_IS_READY(op)), \
jpayne@69	422 ((PyASCIIObject *)(op))->length)
jpayne@69	423
jpayne@69	424
jpayne@69	425 /* Fast check to determine whether an object is ready. Equivalent to
jpayne@69	426 PyUnicode_IS_COMPACT(op) \|\| ((PyUnicodeObject)(op))->data.any) /
jpayne@69	427
jpayne@69	428 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
jpayne@69	429
jpayne@69	430 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
jpayne@69	431 case. If the canonical representation is not yet set, it will still call
jpayne@69	432 _PyUnicode_Ready().
jpayne@69	433 Returns 0 on success and -1 on errors. */
jpayne@69	434 #define PyUnicode_READY(op) \
jpayne@69	435 (assert(PyUnicode_Check(op)), \
jpayne@69	436 (PyUnicode_IS_READY(op) ? \
jpayne@69	437 0 : _PyUnicode_Ready(_PyObject_CAST(op))))
jpayne@69	438
jpayne@69	439 /* Return a maximum character value which is suitable for creating another
jpayne@69	440 string based on op. This is always an approximation but more efficient
jpayne@69	441 than iterating over the string. */
jpayne@69	442 #define PyUnicode_MAX_CHAR_VALUE(op) \
jpayne@69	443 (assert(PyUnicode_IS_READY(op)), \
jpayne@69	444 (PyUnicode_IS_ASCII(op) ? \
jpayne@69	445 (0x7f) : \
jpayne@69	446 (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
jpayne@69	447 (0xffU) : \
jpayne@69	448 (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
jpayne@69	449 (0xffffU) : \
jpayne@69	450 (0x10ffffU)))))
jpayne@69	451
jpayne@69	452 /* === Public API ========================================================= */
jpayne@69	453
jpayne@69	454 /* --- Plain Py_UNICODE --------------------------------------------------- */
jpayne@69	455
jpayne@69	456 /* With PEP 393, this is the recommended way to allocate a new unicode object.
jpayne@69	457 This function will allocate the object and its buffer in a single memory
jpayne@69	458 block. Objects created using this function are not resizable. */
jpayne@69	459 PyAPI_FUNC(PyObject*) PyUnicode_New(
jpayne@69	460 Py_ssize_t size, /* Number of code points in the new string */
jpayne@69	461 Py_UCS4 maxchar /* maximum code point value in the string */
jpayne@69	462 );
jpayne@69	463
jpayne@69	464 /* Initializes the canonical string representation from the deprecated
jpayne@69	465 wstr/Py_UNICODE representation. This function is used to convert Unicode
jpayne@69	466 objects which were created using the old API to the new flexible format
jpayne@69	467 introduced with PEP 393.
jpayne@69	468
jpayne@69	469 Don't call this function directly, use the public PyUnicode_READY() macro
jpayne@69	470 instead. */
jpayne@69	471 PyAPI_FUNC(int) _PyUnicode_Ready(
jpayne@69	472 PyObject unicode / Unicode object */
jpayne@69	473 );
jpayne@69	474
jpayne@69	475 /* Get a copy of a Unicode string. */
jpayne@69	476 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
jpayne@69	477 PyObject *unicode
jpayne@69	478 );
jpayne@69	479
jpayne@69	480 /* Copy character from one unicode object into another, this function performs
jpayne@69	481 character conversion when necessary and falls back to memcpy() if possible.
jpayne@69	482
jpayne@69	483 Fail if to is too small (smaller than how_many or smaller than
jpayne@69	484 len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
jpayne@69	485 kind(to), or if to has more than 1 reference.
jpayne@69	486
jpayne@69	487 Return the number of written character, or return -1 and raise an exception
jpayne@69	488 on error.
jpayne@69	489
jpayne@69	490 Pseudo-code:
jpayne@69	491
jpayne@69	492 how_many = min(how_many, len(from) - from_start)
jpayne@69	493 to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
jpayne@69	494 return how_many
jpayne@69	495
jpayne@69	496 Note: The function doesn't write a terminating null character.
jpayne@69	497 */
jpayne@69	498 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
jpayne@69	499 PyObject *to,
jpayne@69	500 Py_ssize_t to_start,
jpayne@69	501 PyObject *from,
jpayne@69	502 Py_ssize_t from_start,
jpayne@69	503 Py_ssize_t how_many
jpayne@69	504 );
jpayne@69	505
jpayne@69	506 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
jpayne@69	507 may crash if parameters are invalid (e.g. if the output string
jpayne@69	508 is too short). */
jpayne@69	509 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
jpayne@69	510 PyObject *to,
jpayne@69	511 Py_ssize_t to_start,
jpayne@69	512 PyObject *from,
jpayne@69	513 Py_ssize_t from_start,
jpayne@69	514 Py_ssize_t how_many
jpayne@69	515 );
jpayne@69	516
jpayne@69	517 /* Fill a string with a character: write fill_char into
jpayne@69	518 unicode[start:start+length].
jpayne@69	519
jpayne@69	520 Fail if fill_char is bigger than the string maximum character, or if the
jpayne@69	521 string has more than 1 reference.
jpayne@69	522
jpayne@69	523 Return the number of written character, or return -1 and raise an exception
jpayne@69	524 on error. */
jpayne@69	525 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
jpayne@69	526 PyObject *unicode,
jpayne@69	527 Py_ssize_t start,
jpayne@69	528 Py_ssize_t length,
jpayne@69	529 Py_UCS4 fill_char
jpayne@69	530 );
jpayne@69	531
jpayne@69	532 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
jpayne@69	533 if parameters are invalid (e.g. if length is longer than the string). */
jpayne@69	534 PyAPI_FUNC(void) _PyUnicode_FastFill(
jpayne@69	535 PyObject *unicode,
jpayne@69	536 Py_ssize_t start,
jpayne@69	537 Py_ssize_t length,
jpayne@69	538 Py_UCS4 fill_char
jpayne@69	539 );
jpayne@69	540
jpayne@69	541 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
jpayne@69	542 size.
jpayne@69	543
jpayne@69	544 u may be NULL which causes the contents to be undefined. It is the
jpayne@69	545 user's responsibility to fill in the needed data afterwards. Note
jpayne@69	546 that modifying the Unicode object contents after construction is
jpayne@69	547 only allowed if u was set to NULL.
jpayne@69	548
jpayne@69	549 The buffer is copied into the new object. */
jpayne@69	550 /* Py_DEPRECATED(3.3) / PyAPI_FUNC(PyObject) PyUnicode_FromUnicode(
jpayne@69	551 const Py_UNICODE u, / Unicode buffer */
jpayne@69	552 Py_ssize_t size /* size of buffer */
jpayne@69	553 );
jpayne@69	554
jpayne@69	555 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
jpayne@69	556 Scan the string to find the maximum character. */
jpayne@69	557 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
jpayne@69	558 int kind,
jpayne@69	559 const void *buffer,
jpayne@69	560 Py_ssize_t size);
jpayne@69	561
jpayne@69	562 /* Create a new string from a buffer of ASCII characters.
jpayne@69	563 WARNING: Don't check if the string contains any non-ASCII character. */
jpayne@69	564 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
jpayne@69	565 const char *buffer,
jpayne@69	566 Py_ssize_t size);
jpayne@69	567
jpayne@69	568 /* Compute the maximum character of the substring unicode[start:end].
jpayne@69	569 Return 127 for an empty string. */
jpayne@69	570 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
jpayne@69	571 PyObject *unicode,
jpayne@69	572 Py_ssize_t start,
jpayne@69	573 Py_ssize_t end);
jpayne@69	574
jpayne@69	575 /* Return a read-only pointer to the Unicode object's internal
jpayne@69	576 Py_UNICODE buffer.
jpayne@69	577 If the wchar_t/Py_UNICODE representation is not yet available, this
jpayne@69	578 function will calculate it. */
jpayne@69	579 /* Py_DEPRECATED(3.3) / PyAPI_FUNC(Py_UNICODE ) PyUnicode_AsUnicode(
jpayne@69	580 PyObject unicode / Unicode object */
jpayne@69	581 );
jpayne@69	582
jpayne@69	583 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
jpayne@69	584 contains null characters. */
jpayne@69	585 PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
jpayne@69	586 PyObject unicode / Unicode object */
jpayne@69	587 );
jpayne@69	588
jpayne@69	589 /* Return a read-only pointer to the Unicode object's internal
jpayne@69	590 Py_UNICODE buffer and save the length at size.
jpayne@69	591 If the wchar_t/Py_UNICODE representation is not yet available, this
jpayne@69	592 function will calculate it. */
jpayne@69	593
jpayne@69	594 /* Py_DEPRECATED(3.3) / PyAPI_FUNC(Py_UNICODE ) PyUnicode_AsUnicodeAndSize(
jpayne@69	595 PyObject unicode, / Unicode object */
jpayne@69	596 Py_ssize_t size / location where to save the length */
jpayne@69	597 );
jpayne@69	598
jpayne@69	599 /* Get the maximum ordinal for a Unicode character. */
jpayne@69	600 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
jpayne@69	601
jpayne@69	602
jpayne@69	603 /* --- _PyUnicodeWriter API ----------------------------------------------- */
jpayne@69	604
jpayne@69	605 typedef struct {
jpayne@69	606 PyObject *buffer;
jpayne@69	607 void *data;
jpayne@69	608 enum PyUnicode_Kind kind;
jpayne@69	609 Py_UCS4 maxchar;
jpayne@69	610 Py_ssize_t size;
jpayne@69	611 Py_ssize_t pos;
jpayne@69	612
jpayne@69	613 /* minimum number of allocated characters (default: 0) */
jpayne@69	614 Py_ssize_t min_length;
jpayne@69	615
jpayne@69	616 /* minimum character (default: 127, ASCII) */
jpayne@69	617 Py_UCS4 min_char;
jpayne@69	618
jpayne@69	619 /* If non-zero, overallocate the buffer (default: 0). */
jpayne@69	620 unsigned char overallocate;
jpayne@69	621
jpayne@69	622 /* If readonly is 1, buffer is a shared string (cannot be modified)
jpayne@69	623 and size is set to 0. */
jpayne@69	624 unsigned char readonly;
jpayne@69	625 } _PyUnicodeWriter ;
jpayne@69	626
jpayne@69	627 /* Initialize a Unicode writer.
jpayne@69	628 *
jpayne@69	629 * By default, the minimum buffer size is 0 character and overallocation is
jpayne@69	630 * disabled. Set min_length, min_char and overallocate attributes to control
jpayne@69	631 * the allocation of the buffer. */
jpayne@69	632 PyAPI_FUNC(void)
jpayne@69	633 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
jpayne@69	634
jpayne@69	635 /* Prepare the buffer to write 'length' characters
jpayne@69	636 with the specified maximum character.
jpayne@69	637
jpayne@69	638 Return 0 on success, raise an exception and return -1 on error. */
jpayne@69	639 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
jpayne@69	640 (((MAXCHAR) <= (WRITER)->maxchar \
jpayne@69	641 && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
jpayne@69	642 ? 0 \
jpayne@69	643 : (((LENGTH) == 0) \
jpayne@69	644 ? 0 \
jpayne@69	645 : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
jpayne@69	646
jpayne@69	647 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
jpayne@69	648 instead. */
jpayne@69	649 PyAPI_FUNC(int)
jpayne@69	650 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
jpayne@69	651 Py_ssize_t length, Py_UCS4 maxchar);
jpayne@69	652
jpayne@69	653 /* Prepare the buffer to have at least the kind KIND.
jpayne@69	654 For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
jpayne@69	655 support characters in range U+000-U+FFFF.
jpayne@69	656
jpayne@69	657 Return 0 on success, raise an exception and return -1 on error. */
jpayne@69	658 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
jpayne@69	659 (assert((KIND) != PyUnicode_WCHAR_KIND), \
jpayne@69	660 (KIND) <= (WRITER)->kind \
jpayne@69	661 ? 0 \
jpayne@69	662 : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
jpayne@69	663
jpayne@69	664 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
jpayne@69	665 macro instead. */
jpayne@69	666 PyAPI_FUNC(int)
jpayne@69	667 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
jpayne@69	668 enum PyUnicode_Kind kind);
jpayne@69	669
jpayne@69	670 /* Append a Unicode character.
jpayne@69	671 Return 0 on success, raise an exception and return -1 on error. */
jpayne@69	672 PyAPI_FUNC(int)
jpayne@69	673 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
jpayne@69	674 Py_UCS4 ch
jpayne@69	675 );
jpayne@69	676
jpayne@69	677 /* Append a Unicode string.
jpayne@69	678 Return 0 on success, raise an exception and return -1 on error. */
jpayne@69	679 PyAPI_FUNC(int)
jpayne@69	680 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
jpayne@69	681 PyObject str / Unicode string */
jpayne@69	682 );
jpayne@69	683
jpayne@69	684 /* Append a substring of a Unicode string.
jpayne@69	685 Return 0 on success, raise an exception and return -1 on error. */
jpayne@69	686 PyAPI_FUNC(int)
jpayne@69	687 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
jpayne@69	688 PyObject str, / Unicode string */
jpayne@69	689 Py_ssize_t start,
jpayne@69	690 Py_ssize_t end
jpayne@69	691 );
jpayne@69	692
jpayne@69	693 /* Append an ASCII-encoded byte string.
jpayne@69	694 Return 0 on success, raise an exception and return -1 on error. */
jpayne@69	695 PyAPI_FUNC(int)
jpayne@69	696 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
jpayne@69	697 const char str, / ASCII-encoded byte string */
jpayne@69	698 Py_ssize_t len /* number of bytes, or -1 if unknown */
jpayne@69	699 );
jpayne@69	700
jpayne@69	701 /* Append a latin1-encoded byte string.
jpayne@69	702 Return 0 on success, raise an exception and return -1 on error. */
jpayne@69	703 PyAPI_FUNC(int)
jpayne@69	704 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
jpayne@69	705 const char str, / latin1-encoded byte string */
jpayne@69	706 Py_ssize_t len /* length in bytes */
jpayne@69	707 );
jpayne@69	708
jpayne@69	709 /* Get the value of the writer as a Unicode string. Clear the
jpayne@69	710 buffer of the writer. Raise an exception and return NULL
jpayne@69	711 on error. */
jpayne@69	712 PyAPI_FUNC(PyObject *)
jpayne@69	713 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
jpayne@69	714
jpayne@69	715 /* Deallocate memory of a writer (clear its internal buffer). */
jpayne@69	716 PyAPI_FUNC(void)
jpayne@69	717 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
jpayne@69	718
jpayne@69	719
jpayne@69	720 /* Format the object based on the format_spec, as defined in PEP 3101
jpayne@69	721 (Advanced String Formatting). */
jpayne@69	722 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
jpayne@69	723 _PyUnicodeWriter *writer,
jpayne@69	724 PyObject *obj,
jpayne@69	725 PyObject *format_spec,
jpayne@69	726 Py_ssize_t start,
jpayne@69	727 Py_ssize_t end);
jpayne@69	728
jpayne@69	729 /* --- wchar_t support for platforms which support it --------------------- */
jpayne@69	730
jpayne@69	731 #ifdef HAVE_WCHAR_H
jpayne@69	732 PyAPI_FUNC(void) _PyUnicode_AsKind(PyObject s, unsigned int kind);
jpayne@69	733 #endif
jpayne@69	734
jpayne@69	735 /* --- Manage the default encoding ---------------------------------------- */
jpayne@69	736
jpayne@69	737 /* Returns a pointer to the default encoding (UTF-8) of the
jpayne@69	738 Unicode object unicode and the size of the encoded representation
jpayne@69	739 in bytes stored in *size.
jpayne@69	740
jpayne@69	741 In case of an error, no *size is set.
jpayne@69	742
jpayne@69	743 This function caches the UTF-8 encoded string in the unicodeobject
jpayne@69	744 and subsequent calls will return the same string. The memory is released
jpayne@69	745 when the unicodeobject is deallocated.
jpayne@69	746
jpayne@69	747 _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
jpayne@69	748 support the previous internal function with the same behaviour.
jpayne@69	749
jpayne@69	750 *** This API is for interpreter INTERNAL USE ONLY and will likely
jpayne@69	751 *** be removed or changed in the future.
jpayne@69	752
jpayne@69	753 *** If you need to access the Unicode object as UTF-8 bytes string,
jpayne@69	754 *** please use PyUnicode_AsUTF8String() instead.
jpayne@69	755 */
jpayne@69	756
jpayne@69	757 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
jpayne@69	758 PyObject *unicode,
jpayne@69	759 Py_ssize_t *size);
jpayne@69	760
jpayne@69	761 #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
jpayne@69	762
jpayne@69	763 /* Returns a pointer to the default encoding (UTF-8) of the
jpayne@69	764 Unicode object unicode.
jpayne@69	765
jpayne@69	766 Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
jpayne@69	767 in the unicodeobject.
jpayne@69	768
jpayne@69	769 _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
jpayne@69	770 support the previous internal function with the same behaviour.
jpayne@69	771
jpayne@69	772 Use of this API is DEPRECATED since no size information can be
jpayne@69	773 extracted from the returned data.
jpayne@69	774
jpayne@69	775 *** This API is for interpreter INTERNAL USE ONLY and will likely
jpayne@69	776 *** be removed or changed for Python 3.1.
jpayne@69	777
jpayne@69	778 *** If you need to access the Unicode object as UTF-8 bytes string,
jpayne@69	779 *** please use PyUnicode_AsUTF8String() instead.
jpayne@69	780
jpayne@69	781 */
jpayne@69	782
jpayne@69	783 PyAPI_FUNC(const char ) PyUnicode_AsUTF8(PyObject unicode);
jpayne@69	784
jpayne@69	785 #define _PyUnicode_AsString PyUnicode_AsUTF8
jpayne@69	786
jpayne@69	787 /* --- Generic Codecs ----------------------------------------------------- */
jpayne@69	788
jpayne@69	789 /* Encodes a Py_UNICODE buffer of the given size and returns a
jpayne@69	790 Python string object. */
jpayne@69	791 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_Encode(
jpayne@69	792 const Py_UNICODE s, / Unicode char buffer */
jpayne@69	793 Py_ssize_t size, /* number of Py_UNICODE chars to encode */
jpayne@69	794 const char encoding, / encoding */
jpayne@69	795 const char errors / error handling */
jpayne@69	796 );
jpayne@69	797
jpayne@69	798 /* --- UTF-7 Codecs ------------------------------------------------------- */
jpayne@69	799
jpayne@69	800 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
jpayne@69	801 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	802 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
jpayne@69	803 int base64SetO, /* Encode RFC2152 Set O characters in base64 */
jpayne@69	804 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
jpayne@69	805 const char errors / error handling */
jpayne@69	806 );
jpayne@69	807
jpayne@69	808 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
jpayne@69	809 PyObject unicode, / Unicode object */
jpayne@69	810 int base64SetO, /* Encode RFC2152 Set O characters in base64 */
jpayne@69	811 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
jpayne@69	812 const char errors / error handling */
jpayne@69	813 );
jpayne@69	814
jpayne@69	815 /* --- UTF-8 Codecs ------------------------------------------------------- */
jpayne@69	816
jpayne@69	817 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
jpayne@69	818 PyObject *unicode,
jpayne@69	819 const char *errors);
jpayne@69	820
jpayne@69	821 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
jpayne@69	822 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	823 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
jpayne@69	824 const char errors / error handling */
jpayne@69	825 );
jpayne@69	826
jpayne@69	827 /* --- UTF-32 Codecs ------------------------------------------------------ */
jpayne@69	828
jpayne@69	829 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
jpayne@69	830 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	831 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
jpayne@69	832 const char errors, / error handling */
jpayne@69	833 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
jpayne@69	834 );
jpayne@69	835
jpayne@69	836 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
jpayne@69	837 PyObject object, / Unicode object */
jpayne@69	838 const char errors, / error handling */
jpayne@69	839 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
jpayne@69	840 );
jpayne@69	841
jpayne@69	842 /* --- UTF-16 Codecs ------------------------------------------------------ */
jpayne@69	843
jpayne@69	844 /* Returns a Python string object holding the UTF-16 encoded value of
jpayne@69	845 the Unicode data.
jpayne@69	846
jpayne@69	847 If byteorder is not 0, output is written according to the following
jpayne@69	848 byte order:
jpayne@69	849
jpayne@69	850 byteorder == -1: little endian
jpayne@69	851 byteorder == 0: native byte order (writes a BOM mark)
jpayne@69	852 byteorder == 1: big endian
jpayne@69	853
jpayne@69	854 If byteorder is 0, the output string will always start with the
jpayne@69	855 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
jpayne@69	856 prepended.
jpayne@69	857
jpayne@69	858 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
jpayne@69	859 UCS-2. This trick makes it possible to add full UTF-16 capabilities
jpayne@69	860 at a later point without compromising the APIs.
jpayne@69	861
jpayne@69	862 */
jpayne@69	863 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
jpayne@69	864 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	865 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
jpayne@69	866 const char errors, / error handling */
jpayne@69	867 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
jpayne@69	868 );
jpayne@69	869
jpayne@69	870 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
jpayne@69	871 PyObject* unicode, /* Unicode object */
jpayne@69	872 const char errors, / error handling */
jpayne@69	873 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
jpayne@69	874 );
jpayne@69	875
jpayne@69	876 /* --- Unicode-Escape Codecs ---------------------------------------------- */
jpayne@69	877
jpayne@69	878 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
jpayne@69	879 chars. */
jpayne@69	880 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
jpayne@69	881 const char string, / Unicode-Escape encoded string */
jpayne@69	882 Py_ssize_t length, /* size of string */
jpayne@69	883 const char errors, / error handling */
jpayne@69	884 const char *first_invalid_escape / on return, points to first
jpayne@69	885 invalid escaped char in
jpayne@69	886 string. */
jpayne@69	887 );
jpayne@69	888
jpayne@69	889 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
jpayne@69	890 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	891 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
jpayne@69	892 );
jpayne@69	893
jpayne@69	894 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
jpayne@69	895
jpayne@69	896 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
jpayne@69	897 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	898 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
jpayne@69	899 );
jpayne@69	900
jpayne@69	901 /* --- Latin-1 Codecs ----------------------------------------------------- */
jpayne@69	902
jpayne@69	903 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
jpayne@69	904 PyObject* unicode,
jpayne@69	905 const char* errors);
jpayne@69	906
jpayne@69	907 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
jpayne@69	908 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	909 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
jpayne@69	910 const char errors / error handling */
jpayne@69	911 );
jpayne@69	912
jpayne@69	913 /* --- ASCII Codecs ------------------------------------------------------- */
jpayne@69	914
jpayne@69	915 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
jpayne@69	916 PyObject* unicode,
jpayne@69	917 const char* errors);
jpayne@69	918
jpayne@69	919 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
jpayne@69	920 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	921 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
jpayne@69	922 const char errors / error handling */
jpayne@69	923 );
jpayne@69	924
jpayne@69	925 /* --- Character Map Codecs ----------------------------------------------- */
jpayne@69	926
jpayne@69	927 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
jpayne@69	928 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	929 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
jpayne@69	930 PyObject mapping, / encoding mapping */
jpayne@69	931 const char errors / error handling */
jpayne@69	932 );
jpayne@69	933
jpayne@69	934 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
jpayne@69	935 PyObject unicode, / Unicode object */
jpayne@69	936 PyObject mapping, / encoding mapping */
jpayne@69	937 const char errors / error handling */
jpayne@69	938 );
jpayne@69	939
jpayne@69	940 /* Translate a Py_UNICODE buffer of the given length by applying a
jpayne@69	941 character mapping table to it and return the resulting Unicode
jpayne@69	942 object.
jpayne@69	943
jpayne@69	944 The mapping table must map Unicode ordinal integers to Unicode strings,
jpayne@69	945 Unicode ordinal integers or None (causing deletion of the character).
jpayne@69	946
jpayne@69	947 Mapping tables may be dictionaries or sequences. Unmapped character
jpayne@69	948 ordinals (ones which cause a LookupError) are left untouched and
jpayne@69	949 are copied as-is.
jpayne@69	950
jpayne@69	951 */
jpayne@69	952 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
jpayne@69	953 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	954 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
jpayne@69	955 PyObject table, / Translate table */
jpayne@69	956 const char errors / error handling */
jpayne@69	957 );
jpayne@69	958
jpayne@69	959 /* --- MBCS codecs for Windows -------------------------------------------- */
jpayne@69	960
jpayne@69	961 #ifdef MS_WINDOWS
jpayne@69	962 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
jpayne@69	963 const Py_UNICODE data, / Unicode char buffer */
jpayne@69	964 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
jpayne@69	965 const char errors / error handling */
jpayne@69	966 );
jpayne@69	967 #endif
jpayne@69	968
jpayne@69	969 /* --- Decimal Encoder ---------------------------------------------------- */
jpayne@69	970
jpayne@69	971 /* Takes a Unicode string holding a decimal value and writes it into
jpayne@69	972 an output buffer using standard ASCII digit codes.
jpayne@69	973
jpayne@69	974 The output buffer has to provide at least length+1 bytes of storage
jpayne@69	975 area. The output string is 0-terminated.
jpayne@69	976
jpayne@69	977 The encoder converts whitespace to ' ', decimal characters to their
jpayne@69	978 corresponding ASCII digit and all other Latin-1 characters except
jpayne@69	979 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
jpayne@69	980 are treated as errors. This includes embedded NULL bytes.
jpayne@69	981
jpayne@69	982 Error handling is defined by the errors argument:
jpayne@69	983
jpayne@69	984 NULL or "strict": raise a ValueError
jpayne@69	985 "ignore": ignore the wrong characters (these are not copied to the
jpayne@69	986 output buffer)
jpayne@69	987 "replace": replaces illegal characters with '?'
jpayne@69	988
jpayne@69	989 Returns 0 on success, -1 on failure.
jpayne@69	990
jpayne@69	991 */
jpayne@69	992
jpayne@69	993 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
jpayne@69	994 Py_UNICODE s, / Unicode buffer */
jpayne@69	995 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
jpayne@69	996 char output, / Output buffer; must have size >= length */
jpayne@69	997 const char errors / error handling */
jpayne@69	998 );
jpayne@69	999
jpayne@69	1000 /* Transforms code points that have decimal digit property to the
jpayne@69	1001 corresponding ASCII digit code points.
jpayne@69	1002
jpayne@69	1003 Returns a new Unicode string on success, NULL on failure.
jpayne@69	1004 */
jpayne@69	1005
jpayne@69	1006 /* Py_DEPRECATED(3.3) */
jpayne@69	1007 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
jpayne@69	1008 Py_UNICODE s, / Unicode buffer */
jpayne@69	1009 Py_ssize_t length /* Number of Py_UNICODE chars to transform */
jpayne@69	1010 );
jpayne@69	1011
jpayne@69	1012 /* Coverts a Unicode object holding a decimal value to an ASCII string
jpayne@69	1013 for using in int, float and complex parsers.
jpayne@69	1014 Transforms code points that have decimal digit property to the
jpayne@69	1015 corresponding ASCII digit code points. Transforms spaces to ASCII.
jpayne@69	1016 Transforms code points starting from the first non-ASCII code point that
jpayne@69	1017 is neither a decimal digit nor a space to the end into '?'. */
jpayne@69	1018
jpayne@69	1019 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
jpayne@69	1020 PyObject unicode / Unicode object */
jpayne@69	1021 );
jpayne@69	1022
jpayne@69	1023 /* --- Methods & Slots ---------------------------------------------------- */
jpayne@69	1024
jpayne@69	1025 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
jpayne@69	1026 PyObject *separator,
jpayne@69	1027 PyObject const items,
jpayne@69	1028 Py_ssize_t seqlen
jpayne@69	1029 );
jpayne@69	1030
jpayne@69	1031 /* Test whether a unicode is equal to ASCII identifier. Return 1 if true,
jpayne@69	1032 0 otherwise. The right argument must be ASCII identifier.
jpayne@69	1033 Any error occurs inside will be cleared before return. */
jpayne@69	1034 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
jpayne@69	1035 PyObject left, / Left string */
jpayne@69	1036 _Py_Identifier right / Right identifier */
jpayne@69	1037 );
jpayne@69	1038
jpayne@69	1039 /* Test whether a unicode is equal to ASCII string. Return 1 if true,
jpayne@69	1040 0 otherwise. The right argument must be ASCII-encoded string.
jpayne@69	1041 Any error occurs inside will be cleared before return. */
jpayne@69	1042 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
jpayne@69	1043 PyObject *left,
jpayne@69	1044 const char right / ASCII-encoded string */
jpayne@69	1045 );
jpayne@69	1046
jpayne@69	1047 /* Externally visible for str.strip(unicode) */
jpayne@69	1048 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
jpayne@69	1049 PyObject *self,
jpayne@69	1050 int striptype,
jpayne@69	1051 PyObject *sepobj
jpayne@69	1052 );
jpayne@69	1053
jpayne@69	1054 /* Using explicit passed-in values, insert the thousands grouping
jpayne@69	1055 into the string pointed to by buffer. For the argument descriptions,
jpayne@69	1056 see Objects/stringlib/localeutil.h */
jpayne@69	1057 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
jpayne@69	1058 _PyUnicodeWriter *writer,
jpayne@69	1059 Py_ssize_t n_buffer,
jpayne@69	1060 PyObject *digits,
jpayne@69	1061 Py_ssize_t d_pos,
jpayne@69	1062 Py_ssize_t n_digits,
jpayne@69	1063 Py_ssize_t min_width,
jpayne@69	1064 const char *grouping,
jpayne@69	1065 PyObject *thousands_sep,
jpayne@69	1066 Py_UCS4 *maxchar);
jpayne@69	1067
jpayne@69	1068 /* === Characters Type APIs =============================================== */
jpayne@69	1069
jpayne@69	1070 /* Helper array used by Py_UNICODE_ISSPACE(). */
jpayne@69	1071
jpayne@69	1072 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
jpayne@69	1073
jpayne@69	1074 /* These should not be used directly. Use the Py_UNICODE_IS* and
jpayne@69	1075 Py_UNICODE_TO* macros instead.
jpayne@69	1076
jpayne@69	1077 These APIs are implemented in Objects/unicodectype.c.
jpayne@69	1078
jpayne@69	1079 */
jpayne@69	1080
jpayne@69	1081 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
jpayne@69	1082 Py_UCS4 ch /* Unicode character */
jpayne@69	1083 );
jpayne@69	1084
jpayne@69	1085 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
jpayne@69	1086 Py_UCS4 ch /* Unicode character */
jpayne@69	1087 );
jpayne@69	1088
jpayne@69	1089 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
jpayne@69	1090 Py_UCS4 ch /* Unicode character */
jpayne@69	1091 );
jpayne@69	1092
jpayne@69	1093 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
jpayne@69	1094 Py_UCS4 ch /* Unicode character */
jpayne@69	1095 );
jpayne@69	1096
jpayne@69	1097 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
jpayne@69	1098 Py_UCS4 ch /* Unicode character */
jpayne@69	1099 );
jpayne@69	1100
jpayne@69	1101 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
jpayne@69	1102 const Py_UCS4 ch /* Unicode character */
jpayne@69	1103 );
jpayne@69	1104
jpayne@69	1105 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
jpayne@69	1106 const Py_UCS4 ch /* Unicode character */
jpayne@69	1107 );
jpayne@69	1108
jpayne@69	1109 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
jpayne@69	1110 Py_UCS4 ch /* Unicode character */
jpayne@69	1111 );
jpayne@69	1112
jpayne@69	1113 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
jpayne@69	1114 Py_UCS4 ch /* Unicode character */
jpayne@69	1115 );
jpayne@69	1116
jpayne@69	1117 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
jpayne@69	1118 Py_UCS4 ch /* Unicode character */
jpayne@69	1119 );
jpayne@69	1120
jpayne@69	1121 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
jpayne@69	1122 Py_UCS4 ch, /* Unicode character */
jpayne@69	1123 Py_UCS4 *res
jpayne@69	1124 );
jpayne@69	1125
jpayne@69	1126 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
jpayne@69	1127 Py_UCS4 ch, /* Unicode character */
jpayne@69	1128 Py_UCS4 *res
jpayne@69	1129 );
jpayne@69	1130
jpayne@69	1131 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
jpayne@69	1132 Py_UCS4 ch, /* Unicode character */
jpayne@69	1133 Py_UCS4 *res
jpayne@69	1134 );
jpayne@69	1135
jpayne@69	1136 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
jpayne@69	1137 Py_UCS4 ch, /* Unicode character */
jpayne@69	1138 Py_UCS4 *res
jpayne@69	1139 );
jpayne@69	1140
jpayne@69	1141 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
jpayne@69	1142 Py_UCS4 ch /* Unicode character */
jpayne@69	1143 );
jpayne@69	1144
jpayne@69	1145 PyAPI_FUNC(int) _PyUnicode_IsCased(
jpayne@69	1146 Py_UCS4 ch /* Unicode character */
jpayne@69	1147 );
jpayne@69	1148
jpayne@69	1149 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
jpayne@69	1150 Py_UCS4 ch /* Unicode character */
jpayne@69	1151 );
jpayne@69	1152
jpayne@69	1153 PyAPI_FUNC(int) _PyUnicode_ToDigit(
jpayne@69	1154 Py_UCS4 ch /* Unicode character */
jpayne@69	1155 );
jpayne@69	1156
jpayne@69	1157 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
jpayne@69	1158 Py_UCS4 ch /* Unicode character */
jpayne@69	1159 );
jpayne@69	1160
jpayne@69	1161 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
jpayne@69	1162 Py_UCS4 ch /* Unicode character */
jpayne@69	1163 );
jpayne@69	1164
jpayne@69	1165 PyAPI_FUNC(int) _PyUnicode_IsDigit(
jpayne@69	1166 Py_UCS4 ch /* Unicode character */
jpayne@69	1167 );
jpayne@69	1168
jpayne@69	1169 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
jpayne@69	1170 Py_UCS4 ch /* Unicode character */
jpayne@69	1171 );
jpayne@69	1172
jpayne@69	1173 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
jpayne@69	1174 Py_UCS4 ch /* Unicode character */
jpayne@69	1175 );
jpayne@69	1176
jpayne@69	1177 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
jpayne@69	1178 Py_UCS4 ch /* Unicode character */
jpayne@69	1179 );
jpayne@69	1180
jpayne@69	1181 Py_DEPRECATED(3.3) PyAPI_FUNC(size_t) Py_UNICODE_strlen(
jpayne@69	1182 const Py_UNICODE *u
jpayne@69	1183 );
jpayne@69	1184
jpayne@69	1185 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
jpayne@69	1186 Py_UNICODE *s1,
jpayne@69	1187 const Py_UNICODE *s2);
jpayne@69	1188
jpayne@69	1189 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
jpayne@69	1190 Py_UNICODE s1, const Py_UNICODE s2);
jpayne@69	1191
jpayne@69	1192 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
jpayne@69	1193 Py_UNICODE *s1,
jpayne@69	1194 const Py_UNICODE *s2,
jpayne@69	1195 size_t n);
jpayne@69	1196
jpayne@69	1197 Py_DEPRECATED(3.3) PyAPI_FUNC(int) Py_UNICODE_strcmp(
jpayne@69	1198 const Py_UNICODE *s1,
jpayne@69	1199 const Py_UNICODE *s2
jpayne@69	1200 );
jpayne@69	1201
jpayne@69	1202 Py_DEPRECATED(3.3) PyAPI_FUNC(int) Py_UNICODE_strncmp(
jpayne@69	1203 const Py_UNICODE *s1,
jpayne@69	1204 const Py_UNICODE *s2,
jpayne@69	1205 size_t n
jpayne@69	1206 );
jpayne@69	1207
jpayne@69	1208 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
jpayne@69	1209 const Py_UNICODE *s,
jpayne@69	1210 Py_UNICODE c
jpayne@69	1211 );
jpayne@69	1212
jpayne@69	1213 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
jpayne@69	1214 const Py_UNICODE *s,
jpayne@69	1215 Py_UNICODE c
jpayne@69	1216 );
jpayne@69	1217
jpayne@69	1218 PyAPI_FUNC(PyObject) _PyUnicode_FormatLong(PyObject , int, int, int);
jpayne@69	1219
jpayne@69	1220 /* Create a copy of a unicode string ending with a nul character. Return NULL
jpayne@69	1221 and raise a MemoryError exception on memory allocation failure, otherwise
jpayne@69	1222 return a new allocated buffer (use PyMem_Free() to free the buffer). */
jpayne@69	1223
jpayne@69	1224 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
jpayne@69	1225 PyObject *unicode
jpayne@69	1226 );
jpayne@69	1227
jpayne@69	1228 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
jpayne@69	1229 PyAPI_FUNC(PyObject) _PyUnicode_FromId(_Py_Identifier);
jpayne@69	1230 /* Clear all static strings. */
jpayne@69	1231 PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
jpayne@69	1232
jpayne@69	1233 /* Fast equality check when the inputs are known to be exact unicode types
jpayne@69	1234 and where the hash values are equal (i.e. a very probable match) */
jpayne@69	1235 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject , PyObject );
jpayne@69	1236
jpayne@69	1237 #ifdef __cplusplus
jpayne@69	1238 }
jpayne@69	1239 #endif

Mercurial > repos > rliterman > csp2

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/python3.8/cpython/unicodeobject.h @ 69:33d812a61356