jpayne@69: // © 2016 and later: Unicode, Inc. and others. jpayne@69: // License & terms of use: http://www.unicode.org/copyright.html jpayne@69: /* jpayne@69: ****************************************************************************** jpayne@69: * Copyright (C) 1997-2014, International Business Machines jpayne@69: * Corporation and others. All Rights Reserved. jpayne@69: ****************************************************************************** jpayne@69: */ jpayne@69: jpayne@69: /** jpayne@69: * \file jpayne@69: * \brief C++ API: Collation Element Iterator. jpayne@69: */ jpayne@69: jpayne@69: /** jpayne@69: * File coleitr.h jpayne@69: * jpayne@69: * Created by: Helena Shih jpayne@69: * jpayne@69: * Modification History: jpayne@69: * jpayne@69: * Date Name Description jpayne@69: * jpayne@69: * 8/18/97 helena Added internal API documentation. jpayne@69: * 08/03/98 erm Synched with 1.2 version CollationElementIterator.java jpayne@69: * 12/10/99 aliu Ported Thai collation support from Java. jpayne@69: * 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h) jpayne@69: * 02/19/01 swquek Removed CollationElementsIterator() since it is jpayne@69: * private constructor and no calls are made to it jpayne@69: * 2012-2014 markus Rewritten in C++ again. jpayne@69: */ jpayne@69: jpayne@69: #ifndef COLEITR_H jpayne@69: #define COLEITR_H jpayne@69: jpayne@69: #include "unicode/utypes.h" jpayne@69: jpayne@69: #if U_SHOW_CPLUSPLUS_API jpayne@69: jpayne@69: #if !UCONFIG_NO_COLLATION jpayne@69: jpayne@69: #include "unicode/unistr.h" jpayne@69: #include "unicode/uobject.h" jpayne@69: jpayne@69: struct UCollationElements; jpayne@69: struct UHashtable; jpayne@69: jpayne@69: U_NAMESPACE_BEGIN jpayne@69: jpayne@69: struct CollationData; jpayne@69: jpayne@69: class CharacterIterator; jpayne@69: class CollationIterator; jpayne@69: class RuleBasedCollator; jpayne@69: class UCollationPCE; jpayne@69: class UVector32; jpayne@69: jpayne@69: /** jpayne@69: * The CollationElementIterator class is used as an iterator to walk through jpayne@69: * each character of an international string. Use the iterator to return the jpayne@69: * ordering priority of the positioned character. The ordering priority of a jpayne@69: * character, which we refer to as a key, defines how a character is collated in jpayne@69: * the given collation object. jpayne@69: * For example, consider the following in Slovak and in traditional Spanish collation: jpayne@69: *
jpayne@69: *        "ca" -> the first key is key('c') and second key is key('a').
jpayne@69: *        "cha" -> the first key is key('ch') and second key is key('a').
jpayne@69: * And in German phonebook collation, jpayne@69: *
 \htmlonly       "æb"-> the first key is key('a'), the second key is key('e'), and
jpayne@69: *        the third key is key('b'). \endhtmlonly 
jpayne@69: * The key of a character, is an integer composed of primary order(short), jpayne@69: * secondary order(char), and tertiary order(char). Java strictly defines the jpayne@69: * size and signedness of its primitive data types. Therefore, the static jpayne@69: * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return jpayne@69: * int32_t to ensure the correctness of the key value. jpayne@69: *

Example of the iterator usage: (without error checking) jpayne@69: *

jpayne@69: * \code
jpayne@69: *   void CollationElementIterator_Example()
jpayne@69: *   {
jpayne@69: *       UnicodeString str = "This is a test";
jpayne@69: *       UErrorCode success = U_ZERO_ERROR;
jpayne@69: *       RuleBasedCollator* rbc =
jpayne@69: *           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
jpayne@69: *       CollationElementIterator* c =
jpayne@69: *           rbc->createCollationElementIterator( str );
jpayne@69: *       int32_t order = c->next(success);
jpayne@69: *       c->reset();
jpayne@69: *       order = c->previous(success);
jpayne@69: *       delete c;
jpayne@69: *       delete rbc;
jpayne@69: *   }
jpayne@69: * \endcode
jpayne@69: * 
jpayne@69: *

jpayne@69: * The method next() returns the collation order of the next character based on jpayne@69: * the comparison level of the collator. The method previous() returns the jpayne@69: * collation order of the previous character based on the comparison level of jpayne@69: * the collator. The Collation Element Iterator moves only in one direction jpayne@69: * between calls to reset(), setOffset(), or setText(). That is, next() jpayne@69: * and previous() can not be inter-used. Whenever previous() is to be called after jpayne@69: * next() or vice versa, reset(), setOffset() or setText() has to be called first jpayne@69: * to reset the status, shifting pointers to either the end or the start of jpayne@69: * the string (reset() or setText()), or the specified position (setOffset()). jpayne@69: * Hence at the next call of next() or previous(), the first or last collation order, jpayne@69: * or collation order at the spefcifieid position will be returned. If a change of jpayne@69: * direction is done without one of these calls, the result is undefined. jpayne@69: *

jpayne@69: * The result of a forward iterate (next()) and reversed result of the backward jpayne@69: * iterate (previous()) on the same string are equivalent, if collation orders jpayne@69: * with the value 0 are ignored. jpayne@69: * Character based on the comparison level of the collator. A collation order jpayne@69: * consists of primary order, secondary order and tertiary order. The data jpayne@69: * type of the collation order is int32_t. jpayne@69: * jpayne@69: * Note, CollationElementIterator should not be subclassed. jpayne@69: * @see Collator jpayne@69: * @see RuleBasedCollator jpayne@69: * @version 1.8 Jan 16 2001 jpayne@69: */ jpayne@69: class U_I18N_API CollationElementIterator U_FINAL : public UObject { jpayne@69: public: jpayne@69: jpayne@69: // CollationElementIterator public data member ------------------------------ jpayne@69: jpayne@69: enum { jpayne@69: /** jpayne@69: * NULLORDER indicates that an error has occured while processing jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: NULLORDER = (int32_t)0xffffffff jpayne@69: }; jpayne@69: jpayne@69: // CollationElementIterator public constructor/destructor ------------------- jpayne@69: jpayne@69: /** jpayne@69: * Copy constructor. jpayne@69: * jpayne@69: * @param other the object to be copied from jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: CollationElementIterator(const CollationElementIterator& other); jpayne@69: jpayne@69: /** jpayne@69: * Destructor jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: virtual ~CollationElementIterator(); jpayne@69: jpayne@69: // CollationElementIterator public methods ---------------------------------- jpayne@69: jpayne@69: /** jpayne@69: * Returns true if "other" is the same as "this" jpayne@69: * jpayne@69: * @param other the object to be compared jpayne@69: * @return true if "other" is the same as "this" jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: UBool operator==(const CollationElementIterator& other) const; jpayne@69: jpayne@69: /** jpayne@69: * Returns true if "other" is not the same as "this". jpayne@69: * jpayne@69: * @param other the object to be compared jpayne@69: * @return true if "other" is not the same as "this" jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: UBool operator!=(const CollationElementIterator& other) const; jpayne@69: jpayne@69: /** jpayne@69: * Resets the cursor to the beginning of the string. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: void reset(void); jpayne@69: jpayne@69: /** jpayne@69: * Gets the ordering priority of the next character in the string. jpayne@69: * @param status the error code status. jpayne@69: * @return the next character's ordering. otherwise returns NULLORDER if an jpayne@69: * error has occured or if the end of string has been reached jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: int32_t next(UErrorCode& status); jpayne@69: jpayne@69: /** jpayne@69: * Get the ordering priority of the previous collation element in the string. jpayne@69: * @param status the error code status. jpayne@69: * @return the previous element's ordering. otherwise returns NULLORDER if an jpayne@69: * error has occured or if the start of string has been reached jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: int32_t previous(UErrorCode& status); jpayne@69: jpayne@69: /** jpayne@69: * Gets the primary order of a collation order. jpayne@69: * @param order the collation order jpayne@69: * @return the primary order of a collation order. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: static inline int32_t primaryOrder(int32_t order); jpayne@69: jpayne@69: /** jpayne@69: * Gets the secondary order of a collation order. jpayne@69: * @param order the collation order jpayne@69: * @return the secondary order of a collation order. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: static inline int32_t secondaryOrder(int32_t order); jpayne@69: jpayne@69: /** jpayne@69: * Gets the tertiary order of a collation order. jpayne@69: * @param order the collation order jpayne@69: * @return the tertiary order of a collation order. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: static inline int32_t tertiaryOrder(int32_t order); jpayne@69: jpayne@69: /** jpayne@69: * Return the maximum length of any expansion sequences that end with the jpayne@69: * specified comparison order. jpayne@69: * @param order a collation order returned by previous or next. jpayne@69: * @return maximum size of the expansion sequences ending with the collation jpayne@69: * element or 1 if collation element does not occur at the end of any jpayne@69: * expansion sequence jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: int32_t getMaxExpansion(int32_t order) const; jpayne@69: jpayne@69: /** jpayne@69: * Gets the comparison order in the desired strength. Ignore the other jpayne@69: * differences. jpayne@69: * @param order The order value jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: int32_t strengthOrder(int32_t order) const; jpayne@69: jpayne@69: /** jpayne@69: * Sets the source string. jpayne@69: * @param str the source string. jpayne@69: * @param status the error code status. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: void setText(const UnicodeString& str, UErrorCode& status); jpayne@69: jpayne@69: /** jpayne@69: * Sets the source string. jpayne@69: * @param str the source character iterator. jpayne@69: * @param status the error code status. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: void setText(CharacterIterator& str, UErrorCode& status); jpayne@69: jpayne@69: /** jpayne@69: * Checks if a comparison order is ignorable. jpayne@69: * @param order the collation order. jpayne@69: * @return TRUE if a character is ignorable, FALSE otherwise. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: static inline UBool isIgnorable(int32_t order); jpayne@69: jpayne@69: /** jpayne@69: * Gets the offset of the currently processed character in the source string. jpayne@69: * @return the offset of the character. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: int32_t getOffset(void) const; jpayne@69: jpayne@69: /** jpayne@69: * Sets the offset of the currently processed character in the source string. jpayne@69: * @param newOffset the new offset. jpayne@69: * @param status the error code status. jpayne@69: * @return the offset of the character. jpayne@69: * @stable ICU 2.0 jpayne@69: */ jpayne@69: void setOffset(int32_t newOffset, UErrorCode& status); jpayne@69: jpayne@69: /** jpayne@69: * ICU "poor man's RTTI", returns a UClassID for the actual class. jpayne@69: * jpayne@69: * @stable ICU 2.2 jpayne@69: */ jpayne@69: virtual UClassID getDynamicClassID() const; jpayne@69: jpayne@69: /** jpayne@69: * ICU "poor man's RTTI", returns a UClassID for this class. jpayne@69: * jpayne@69: * @stable ICU 2.2 jpayne@69: */ jpayne@69: static UClassID U_EXPORT2 getStaticClassID(); jpayne@69: jpayne@69: #ifndef U_HIDE_INTERNAL_API jpayne@69: /** @internal */ jpayne@69: static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) { jpayne@69: return reinterpret_cast(uc); jpayne@69: } jpayne@69: /** @internal */ jpayne@69: static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) { jpayne@69: return reinterpret_cast(uc); jpayne@69: } jpayne@69: /** @internal */ jpayne@69: inline UCollationElements *toUCollationElements() { jpayne@69: return reinterpret_cast(this); jpayne@69: } jpayne@69: /** @internal */ jpayne@69: inline const UCollationElements *toUCollationElements() const { jpayne@69: return reinterpret_cast(this); jpayne@69: } jpayne@69: #endif // U_HIDE_INTERNAL_API jpayne@69: jpayne@69: private: jpayne@69: friend class RuleBasedCollator; jpayne@69: friend class UCollationPCE; jpayne@69: jpayne@69: /** jpayne@69: * CollationElementIterator constructor. This takes the source string and the jpayne@69: * collation object. The cursor will walk thru the source string based on the jpayne@69: * predefined collation rules. If the source string is empty, NULLORDER will jpayne@69: * be returned on the calls to next(). jpayne@69: * @param sourceText the source string. jpayne@69: * @param order the collation object. jpayne@69: * @param status the error code status. jpayne@69: */ jpayne@69: CollationElementIterator(const UnicodeString& sourceText, jpayne@69: const RuleBasedCollator* order, UErrorCode& status); jpayne@69: // Note: The constructors should take settings & tailoring, not a collator, jpayne@69: // to avoid circular dependencies. jpayne@69: // However, for operator==() we would need to be able to compare tailoring data for equality jpayne@69: // without making CollationData or CollationTailoring depend on TailoredSet. jpayne@69: // (See the implementation of RuleBasedCollator::operator==().) jpayne@69: // That might require creating an intermediate class that would be used jpayne@69: // by both CollationElementIterator and RuleBasedCollator jpayne@69: // but only contain the part of RBC== related to data and rules. jpayne@69: jpayne@69: /** jpayne@69: * CollationElementIterator constructor. This takes the source string and the jpayne@69: * collation object. The cursor will walk thru the source string based on the jpayne@69: * predefined collation rules. If the source string is empty, NULLORDER will jpayne@69: * be returned on the calls to next(). jpayne@69: * @param sourceText the source string. jpayne@69: * @param order the collation object. jpayne@69: * @param status the error code status. jpayne@69: */ jpayne@69: CollationElementIterator(const CharacterIterator& sourceText, jpayne@69: const RuleBasedCollator* order, UErrorCode& status); jpayne@69: jpayne@69: /** jpayne@69: * Assignment operator jpayne@69: * jpayne@69: * @param other the object to be copied jpayne@69: */ jpayne@69: const CollationElementIterator& jpayne@69: operator=(const CollationElementIterator& other); jpayne@69: jpayne@69: CollationElementIterator(); // default constructor not implemented jpayne@69: jpayne@69: /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ jpayne@69: inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; } jpayne@69: jpayne@69: static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode); jpayne@69: jpayne@69: static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order); jpayne@69: jpayne@69: // CollationElementIterator private data members ---------------------------- jpayne@69: jpayne@69: CollationIterator *iter_; // owned jpayne@69: const RuleBasedCollator *rbc_; // aliased jpayne@69: uint32_t otherHalf_; jpayne@69: /** jpayne@69: * <0: backwards; 0: just after reset() (previous() begins from end); jpayne@69: * 1: just after setOffset(); >1: forward jpayne@69: */ jpayne@69: int8_t dir_; jpayne@69: /** jpayne@69: * Stores offsets from expansions and from unsafe-backwards iteration, jpayne@69: * so that getOffset() returns intermediate offsets for the CEs jpayne@69: * that are consistent with forward iteration. jpayne@69: */ jpayne@69: UVector32 *offsets_; jpayne@69: jpayne@69: UnicodeString string_; jpayne@69: }; jpayne@69: jpayne@69: // CollationElementIterator inline method definitions -------------------------- jpayne@69: jpayne@69: inline int32_t CollationElementIterator::primaryOrder(int32_t order) jpayne@69: { jpayne@69: return (order >> 16) & 0xffff; jpayne@69: } jpayne@69: jpayne@69: inline int32_t CollationElementIterator::secondaryOrder(int32_t order) jpayne@69: { jpayne@69: return (order >> 8) & 0xff; jpayne@69: } jpayne@69: jpayne@69: inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) jpayne@69: { jpayne@69: return order & 0xff; jpayne@69: } jpayne@69: jpayne@69: inline UBool CollationElementIterator::isIgnorable(int32_t order) jpayne@69: { jpayne@69: return (order & 0xffff0000) == 0; jpayne@69: } jpayne@69: jpayne@69: U_NAMESPACE_END jpayne@69: jpayne@69: #endif /* #if !UCONFIG_NO_COLLATION */ jpayne@69: jpayne@69: #endif /* U_SHOW_CPLUSPLUS_API */ jpayne@69: jpayne@69: #endif