jpayne@69: // © 2016 and later: Unicode, Inc. and others. jpayne@69: // License & terms of use: http://www.unicode.org/copyright.html jpayne@69: /* jpayne@69: ********************************************************************** jpayne@69: * Copyright (C) 1999-2009, International Business Machines jpayne@69: * Corporation and others. All Rights Reserved. jpayne@69: ********************************************************************** jpayne@69: * jpayne@69: * jpayne@69: * ucnv_err.h: jpayne@69: */ jpayne@69: jpayne@69: /** jpayne@69: * \file jpayne@69: * \brief C UConverter predefined error callbacks jpayne@69: * jpayne@69: *

Error Behaviour Functions

jpayne@69: * Defines some error behaviour functions called by ucnv_{from,to}Unicode jpayne@69: * These are provided as part of ICU and many are stable, but they jpayne@69: * can also be considered only as an example of what can be done with jpayne@69: * callbacks. You may of course write your own. jpayne@69: * jpayne@69: * If you want to write your own, you may also find the functions from jpayne@69: * ucnv_cb.h useful when writing your own callbacks. jpayne@69: * jpayne@69: * These functions, although public, should NEVER be called directly. jpayne@69: * They should be used as parameters to the ucnv_setFromUCallback jpayne@69: * and ucnv_setToUCallback functions, to set the behaviour of a converter jpayne@69: * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. jpayne@69: * jpayne@69: * usage example: 'STOP' doesn't need any context, but newContext jpayne@69: * could be set to something other than 'NULL' if needed. The available jpayne@69: * contexts in this header can modify the default behavior of the callback. jpayne@69: * jpayne@69: * \code jpayne@69: * UErrorCode err = U_ZERO_ERROR; jpayne@69: * UConverter *myConverter = ucnv_open("ibm-949", &err); jpayne@69: * const void *oldContext; jpayne@69: * UConverterFromUCallback oldAction; jpayne@69: * jpayne@69: * jpayne@69: * if (U_SUCCESS(err)) jpayne@69: * { jpayne@69: * ucnv_setFromUCallBack(myConverter, jpayne@69: * UCNV_FROM_U_CALLBACK_STOP, jpayne@69: * NULL, jpayne@69: * &oldAction, jpayne@69: * &oldContext, jpayne@69: * &status); jpayne@69: * } jpayne@69: * \endcode jpayne@69: * jpayne@69: * The code above tells "myConverter" to stop when it encounters an jpayne@69: * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from jpayne@69: * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, jpayne@69: * and ucnv_setToUCallBack would need to be called in order to change jpayne@69: * that behavior too. jpayne@69: * jpayne@69: * Here is an example with a context: jpayne@69: * jpayne@69: * \code jpayne@69: * UErrorCode err = U_ZERO_ERROR; jpayne@69: * UConverter *myConverter = ucnv_open("ibm-949", &err); jpayne@69: * const void *oldContext; jpayne@69: * UConverterFromUCallback oldAction; jpayne@69: * jpayne@69: * jpayne@69: * if (U_SUCCESS(err)) jpayne@69: * { jpayne@69: * ucnv_setToUCallBack(myConverter, jpayne@69: * UCNV_TO_U_CALLBACK_SUBSTITUTE, jpayne@69: * UCNV_SUB_STOP_ON_ILLEGAL, jpayne@69: * &oldAction, jpayne@69: * &oldContext, jpayne@69: * &status); jpayne@69: * } jpayne@69: * \endcode jpayne@69: * jpayne@69: * The code above tells "myConverter" to stop when it encounters an jpayne@69: * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from jpayne@69: * Codepage -> Unicode. Any unmapped and legal characters will be jpayne@69: * substituted to be the default substitution character. jpayne@69: */ jpayne@69: jpayne@69: #ifndef UCNV_ERR_H jpayne@69: #define UCNV_ERR_H jpayne@69: jpayne@69: #include "unicode/utypes.h" jpayne@69: jpayne@69: #if !UCONFIG_NO_CONVERSION jpayne@69: jpayne@69: /** Forward declaring the UConverter structure. @stable ICU 2.0 */ jpayne@69: struct UConverter; jpayne@69: jpayne@69: /** @stable ICU 2.0 */ jpayne@69: typedef struct UConverter UConverter; jpayne@69: jpayne@69: /** jpayne@69: * FROM_U, TO_U context options for sub callback jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: #define UCNV_SUB_STOP_ON_ILLEGAL "i" jpayne@69: jpayne@69: /** jpayne@69: * FROM_U, TO_U context options for skip callback jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: #define UCNV_SKIP_STOP_ON_ILLEGAL "i" jpayne@69: jpayne@69: /** jpayne@69: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: #define UCNV_ESCAPE_ICU NULL jpayne@69: /** jpayne@69: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: #define UCNV_ESCAPE_JAVA "J" jpayne@69: /** jpayne@69: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) jpayne@69: * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX) jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: #define UCNV_ESCAPE_C "C" jpayne@69: /** jpayne@69: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly jpayne@69: * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: #define UCNV_ESCAPE_XML_DEC "D" jpayne@69: /** jpayne@69: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly jpayne@69: * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: #define UCNV_ESCAPE_XML_HEX "X" jpayne@69: /** jpayne@69: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: #define UCNV_ESCAPE_UNICODE "U" jpayne@69: jpayne@69: /** jpayne@69: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H, that is, jpayne@69: * a backslash, 1..6 hex digits, and a space) jpayne@69: * @stable ICU 4.0 jpayne@69: */ jpayne@69: #define UCNV_ESCAPE_CSS2 "S" jpayne@69: jpayne@69: /** jpayne@69: * The process condition code to be used with the callbacks. jpayne@69: * Codes which are greater than UCNV_IRREGULAR should be jpayne@69: * passed on to any chained callbacks. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: typedef enum { jpayne@69: UCNV_UNASSIGNED = 0, /**< The code point is unassigned. jpayne@69: The error code U_INVALID_CHAR_FOUND will be set. */ jpayne@69: UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, jpayne@69: \\x81\\x2E is illegal in SJIS because \\x2E jpayne@69: is not a valid trail byte for the \\x81 jpayne@69: lead byte. jpayne@69: Also, starting with Unicode 3.0.1, non-shortest byte sequences jpayne@69: in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) jpayne@69: are also illegal, not just irregular. jpayne@69: The error code U_ILLEGAL_CHAR_FOUND will be set. */ jpayne@69: UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in jpayne@69: the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF jpayne@69: are irregular UTF-8 byte sequences for single surrogate jpayne@69: code points. jpayne@69: The error code U_INVALID_CHAR_FOUND will be set. */ jpayne@69: UCNV_RESET = 3, /**< The callback is called with this reason when a jpayne@69: 'reset' has occurred. Callback should reset all jpayne@69: state. */ jpayne@69: UCNV_CLOSE = 4, /**< Called when the converter is closed. The jpayne@69: callback should release any allocated memory.*/ jpayne@69: UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the jpayne@69: converter. the pointer available as the jpayne@69: 'context' is an alias to the original converters' jpayne@69: context pointer. If the context must be owned jpayne@69: by the new converter, the callback must clone jpayne@69: the data and call ucnv_setFromUCallback jpayne@69: (or setToUCallback) with the correct pointer. jpayne@69: @stable ICU 2.2 jpayne@69: */ jpayne@69: } UConverterCallbackReason; jpayne@69: jpayne@69: jpayne@69: /** jpayne@69: * The structure for the fromUnicode callback function parameter. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: typedef struct { jpayne@69: uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ jpayne@69: UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ jpayne@69: UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ jpayne@69: const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ jpayne@69: const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ jpayne@69: char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ jpayne@69: const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ jpayne@69: int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ jpayne@69: } UConverterFromUnicodeArgs; jpayne@69: jpayne@69: jpayne@69: /** jpayne@69: * The structure for the toUnicode callback function parameter. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: typedef struct { jpayne@69: uint16_t size; /**< The size of this struct @stable ICU 2.0 */ jpayne@69: UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ jpayne@69: UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ jpayne@69: const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ jpayne@69: const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ jpayne@69: UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ jpayne@69: const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ jpayne@69: int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ jpayne@69: } UConverterToUnicodeArgs; jpayne@69: jpayne@69: jpayne@69: /** jpayne@69: * DO NOT CALL THIS FUNCTION DIRECTLY! jpayne@69: * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, jpayne@69: * returning the error code back to the caller immediately. jpayne@69: * jpayne@69: * @param context Pointer to the callback's private data jpayne@69: * @param fromUArgs Information about the conversion in progress jpayne@69: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence jpayne@69: * @param length Size (in bytes) of the concerned codepage sequence jpayne@69: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. jpayne@69: * @param reason Defines the reason the callback was invoked jpayne@69: * @param err This should always be set to a failure status prior to calling. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( jpayne@69: const void *context, jpayne@69: UConverterFromUnicodeArgs *fromUArgs, jpayne@69: const UChar* codeUnits, jpayne@69: int32_t length, jpayne@69: UChar32 codePoint, jpayne@69: UConverterCallbackReason reason, jpayne@69: UErrorCode * err); jpayne@69: jpayne@69: jpayne@69: jpayne@69: /** jpayne@69: * DO NOT CALL THIS FUNCTION DIRECTLY! jpayne@69: * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, jpayne@69: * returning the error code back to the caller immediately. jpayne@69: * jpayne@69: * @param context Pointer to the callback's private data jpayne@69: * @param toUArgs Information about the conversion in progress jpayne@69: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence jpayne@69: * @param length Size (in bytes) of the concerned codepage sequence jpayne@69: * @param reason Defines the reason the callback was invoked jpayne@69: * @param err This should always be set to a failure status prior to calling. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( jpayne@69: const void *context, jpayne@69: UConverterToUnicodeArgs *toUArgs, jpayne@69: const char* codeUnits, jpayne@69: int32_t length, jpayne@69: UConverterCallbackReason reason, jpayne@69: UErrorCode * err); jpayne@69: jpayne@69: /** jpayne@69: * DO NOT CALL THIS FUNCTION DIRECTLY! jpayne@69: * This From Unicode callback skips any ILLEGAL_SEQUENCE, or jpayne@69: * skips only UNASSINGED_SEQUENCE depending on the context parameter jpayne@69: * simply ignoring those characters. jpayne@69: * jpayne@69: * @param context The function currently recognizes the callback options: jpayne@69: * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, jpayne@69: * returning the error code back to the caller immediately. jpayne@69: * NULL: Skips any ILLEGAL_SEQUENCE jpayne@69: * @param fromUArgs Information about the conversion in progress jpayne@69: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence jpayne@69: * @param length Size (in bytes) of the concerned codepage sequence jpayne@69: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. jpayne@69: * @param reason Defines the reason the callback was invoked jpayne@69: * @param err Return value will be set to success if the callback was handled, jpayne@69: * otherwise this value will be set to a failure status. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( jpayne@69: const void *context, jpayne@69: UConverterFromUnicodeArgs *fromUArgs, jpayne@69: const UChar* codeUnits, jpayne@69: int32_t length, jpayne@69: UChar32 codePoint, jpayne@69: UConverterCallbackReason reason, jpayne@69: UErrorCode * err); jpayne@69: jpayne@69: /** jpayne@69: * DO NOT CALL THIS FUNCTION DIRECTLY! jpayne@69: * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or jpayne@69: * UNASSIGNED_SEQUENCE depending on context parameter, with the jpayne@69: * current substitution string for the converter. This is the default jpayne@69: * callback. jpayne@69: * jpayne@69: * @param context The function currently recognizes the callback options: jpayne@69: * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, jpayne@69: * returning the error code back to the caller immediately. jpayne@69: * NULL: Substitutes any ILLEGAL_SEQUENCE jpayne@69: * @param fromUArgs Information about the conversion in progress jpayne@69: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence jpayne@69: * @param length Size (in bytes) of the concerned codepage sequence jpayne@69: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. jpayne@69: * @param reason Defines the reason the callback was invoked jpayne@69: * @param err Return value will be set to success if the callback was handled, jpayne@69: * otherwise this value will be set to a failure status. jpayne@69: * @see ucnv_setSubstChars jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( jpayne@69: const void *context, jpayne@69: UConverterFromUnicodeArgs *fromUArgs, jpayne@69: const UChar* codeUnits, jpayne@69: int32_t length, jpayne@69: UChar32 codePoint, jpayne@69: UConverterCallbackReason reason, jpayne@69: UErrorCode * err); jpayne@69: jpayne@69: /** jpayne@69: * DO NOT CALL THIS FUNCTION DIRECTLY! jpayne@69: * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the jpayne@69: * hexadecimal representation of the illegal codepoints jpayne@69: * jpayne@69: * @param context The function currently recognizes the callback options: jpayne@69: * jpayne@69: * @param fromUArgs Information about the conversion in progress jpayne@69: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence jpayne@69: * @param length Size (in bytes) of the concerned codepage sequence jpayne@69: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. jpayne@69: * @param reason Defines the reason the callback was invoked jpayne@69: * @param err Return value will be set to success if the callback was handled, jpayne@69: * otherwise this value will be set to a failure status. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( jpayne@69: const void *context, jpayne@69: UConverterFromUnicodeArgs *fromUArgs, jpayne@69: const UChar* codeUnits, jpayne@69: int32_t length, jpayne@69: UChar32 codePoint, jpayne@69: UConverterCallbackReason reason, jpayne@69: UErrorCode * err); jpayne@69: jpayne@69: jpayne@69: /** jpayne@69: * DO NOT CALL THIS FUNCTION DIRECTLY! jpayne@69: * This To Unicode callback skips any ILLEGAL_SEQUENCE, or jpayne@69: * skips only UNASSINGED_SEQUENCE depending on the context parameter jpayne@69: * simply ignoring those characters. jpayne@69: * jpayne@69: * @param context The function currently recognizes the callback options: jpayne@69: * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, jpayne@69: * returning the error code back to the caller immediately. jpayne@69: * NULL: Skips any ILLEGAL_SEQUENCE jpayne@69: * @param toUArgs Information about the conversion in progress jpayne@69: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence jpayne@69: * @param length Size (in bytes) of the concerned codepage sequence jpayne@69: * @param reason Defines the reason the callback was invoked jpayne@69: * @param err Return value will be set to success if the callback was handled, jpayne@69: * otherwise this value will be set to a failure status. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( jpayne@69: const void *context, jpayne@69: UConverterToUnicodeArgs *toUArgs, jpayne@69: const char* codeUnits, jpayne@69: int32_t length, jpayne@69: UConverterCallbackReason reason, jpayne@69: UErrorCode * err); jpayne@69: jpayne@69: /** jpayne@69: * DO NOT CALL THIS FUNCTION DIRECTLY! jpayne@69: * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or jpayne@69: * UNASSIGNED_SEQUENCE depending on context parameter, with the jpayne@69: * Unicode substitution character, U+FFFD. jpayne@69: * jpayne@69: * @param context The function currently recognizes the callback options: jpayne@69: * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, jpayne@69: * returning the error code back to the caller immediately. jpayne@69: * NULL: Substitutes any ILLEGAL_SEQUENCE jpayne@69: * @param toUArgs Information about the conversion in progress jpayne@69: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence jpayne@69: * @param length Size (in bytes) of the concerned codepage sequence jpayne@69: * @param reason Defines the reason the callback was invoked jpayne@69: * @param err Return value will be set to success if the callback was handled, jpayne@69: * otherwise this value will be set to a failure status. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( jpayne@69: const void *context, jpayne@69: UConverterToUnicodeArgs *toUArgs, jpayne@69: const char* codeUnits, jpayne@69: int32_t length, jpayne@69: UConverterCallbackReason reason, jpayne@69: UErrorCode * err); jpayne@69: jpayne@69: /** jpayne@69: * DO NOT CALL THIS FUNCTION DIRECTLY! jpayne@69: * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the jpayne@69: * hexadecimal representation of the illegal bytes jpayne@69: * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). jpayne@69: * jpayne@69: * @param context This function currently recognizes the callback options: jpayne@69: * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, jpayne@69: * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. jpayne@69: * @param toUArgs Information about the conversion in progress jpayne@69: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence jpayne@69: * @param length Size (in bytes) of the concerned codepage sequence jpayne@69: * @param reason Defines the reason the callback was invoked jpayne@69: * @param err Return value will be set to success if the callback was handled, jpayne@69: * otherwise this value will be set to a failure status. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: jpayne@69: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( jpayne@69: const void *context, jpayne@69: UConverterToUnicodeArgs *toUArgs, jpayne@69: const char* codeUnits, jpayne@69: int32_t length, jpayne@69: UConverterCallbackReason reason, jpayne@69: UErrorCode * err); jpayne@69: jpayne@69: #endif jpayne@69: jpayne@69: #endif jpayne@69: jpayne@69: /*UCNV_ERR_H*/