jpayne@69: // © 2019 and later: Unicode, Inc. and others. jpayne@69: // License & terms of use: http://www.unicode.org/copyright.html#License jpayne@69: jpayne@69: // localematcher.h jpayne@69: // created: 2019may08 Markus W. Scherer jpayne@69: jpayne@69: #ifndef __LOCALEMATCHER_H__ jpayne@69: #define __LOCALEMATCHER_H__ jpayne@69: jpayne@69: #include "unicode/utypes.h" jpayne@69: jpayne@69: #if U_SHOW_CPLUSPLUS_API jpayne@69: jpayne@69: #include "unicode/locid.h" jpayne@69: #include "unicode/stringpiece.h" jpayne@69: #include "unicode/uobject.h" jpayne@69: jpayne@69: /** jpayne@69: * \file jpayne@69: * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales. jpayne@69: */ jpayne@69: jpayne@69: #ifndef U_FORCE_HIDE_DRAFT_API jpayne@69: jpayne@69: /** jpayne@69: * Builder option for whether the language subtag or the script subtag is most important. jpayne@69: * jpayne@69: * @see Builder#setFavorSubtag(ULocMatchFavorSubtag) jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: enum ULocMatchFavorSubtag { jpayne@69: /** jpayne@69: * Language differences are most important, then script differences, then region differences. jpayne@69: * (This is the default behavior.) jpayne@69: * jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: ULOCMATCH_FAVOR_LANGUAGE, jpayne@69: /** jpayne@69: * Makes script differences matter relatively more than language differences. jpayne@69: * jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: ULOCMATCH_FAVOR_SCRIPT jpayne@69: }; jpayne@69: #ifndef U_IN_DOXYGEN jpayne@69: typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag; jpayne@69: #endif jpayne@69: jpayne@69: /** jpayne@69: * Builder option for whether all desired locales are treated equally or jpayne@69: * earlier ones are preferred. jpayne@69: * jpayne@69: * @see Builder#setDemotionPerDesiredLocale(ULocMatchDemotion) jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: enum ULocMatchDemotion { jpayne@69: /** jpayne@69: * All desired locales are treated equally. jpayne@69: * jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: ULOCMATCH_DEMOTION_NONE, jpayne@69: /** jpayne@69: * Earlier desired locales are preferred. jpayne@69: * jpayne@69: *
From each desired locale to the next, jpayne@69: * the distance to any supported locale is increased by an additional amount jpayne@69: * which is at least as large as most region mismatches. jpayne@69: * A later desired locale has to have a better match with some supported locale jpayne@69: * due to more than merely having the same region subtag. jpayne@69: * jpayne@69: *
For example: Supported={en, sv} desired=[en-GB, sv]
jpayne@69: * yields Result(en-GB, en)
because
jpayne@69: * with the demotion of sv its perfect match is no better than
jpayne@69: * the region distance between the earlier desired locale en-GB and en=en-US.
jpayne@69: *
jpayne@69: *
Notes: jpayne@69: *
For example, consider a web application with the UI in a given language, jpayne@69: * with a link to another, related web app. jpayne@69: * The link should include the UI language, and the target server may also use jpayne@69: * the client’s Accept-Language header data. jpayne@69: * The target server has its own list of supported languages. jpayne@69: * One may want to favor UI language consistency, that is, jpayne@69: * if there is a decent match for the original UI language, we want to use it, jpayne@69: * but not if it is merely a fallback. jpayne@69: * jpayne@69: * @see Builder#setDirection(ULocMatchDirection) jpayne@69: * @draft ICU 67 jpayne@69: */ jpayne@69: enum ULocMatchDirection { jpayne@69: /** jpayne@69: * Locale matching includes one-way matches such as Breton→French. (default) jpayne@69: * jpayne@69: * @draft ICU 67 jpayne@69: */ jpayne@69: ULOCMATCH_DIRECTION_WITH_ONE_WAY, jpayne@69: /** jpayne@69: * Locale matching limited to two-way matches including e.g. Danish↔Norwegian jpayne@69: * but ignoring one-way matches. jpayne@69: * jpayne@69: * @draft ICU 67 jpayne@69: */ jpayne@69: ULOCMATCH_DIRECTION_ONLY_TWO_WAY jpayne@69: }; jpayne@69: #ifndef U_IN_DOXYGEN jpayne@69: typedef enum ULocMatchDirection ULocMatchDirection; jpayne@69: #endif jpayne@69: jpayne@69: struct UHashtable; jpayne@69: jpayne@69: U_NAMESPACE_BEGIN jpayne@69: jpayne@69: struct LSR; jpayne@69: jpayne@69: class LocaleDistance; jpayne@69: class LocaleLsrIterator; jpayne@69: class UVector; jpayne@69: class XLikelySubtags; jpayne@69: jpayne@69: /** jpayne@69: * Immutable class that picks the best match between a user's desired locales and jpayne@69: * an application's supported locales. jpayne@69: * Movable but not copyable. jpayne@69: * jpayne@69: *
Example: jpayne@69: *
jpayne@69: * UErrorCode errorCode = U_ZERO_ERROR; jpayne@69: * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode); jpayne@69: * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en" jpayne@69: *jpayne@69: * jpayne@69: *
A matcher takes into account when languages are close to one another, jpayne@69: * such as Danish and Norwegian, jpayne@69: * and when regional variants are close, like en-GB and en-AU as opposed to en-US. jpayne@69: * jpayne@69: *
If there are multiple supported locales with the same (language, script, region) jpayne@69: * likely subtags, then the current implementation returns the first of those locales. jpayne@69: * It ignores variant subtags (except for pseudolocale variants) and extensions. jpayne@69: * This may change in future versions. jpayne@69: * jpayne@69: *
For example, the current implementation does not distinguish between jpayne@69: * de, de-DE, de-Latn, de-1901, de-u-co-phonebk. jpayne@69: * jpayne@69: *
If you prefer one equivalent locale over another, then provide only the preferred one, jpayne@69: * or place it earlier in the list of supported locales. jpayne@69: * jpayne@69: *
Otherwise, the order of supported locales may have no effect on the best-match results. jpayne@69: * The current implementation compares each desired locale with supported locales jpayne@69: * in the following order: jpayne@69: * 1. Default locale, if supported; jpayne@69: * 2. CLDR "paradigm locales" like en-GB and es-419; jpayne@69: * 3. other supported locales. jpayne@69: * This may change in future versions. jpayne@69: * jpayne@69: *
Often a product will just need one matcher instance, built with the languages jpayne@69: * that it supports. However, it may want multiple instances with different jpayne@69: * default languages based on additional information, such as the domain. jpayne@69: * jpayne@69: *
This class is not intended for public subclassing. jpayne@69: * jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: class U_COMMON_API LocaleMatcher : public UMemory { jpayne@69: public: jpayne@69: /** jpayne@69: * Data for the best-matching pair of a desired and a supported locale. jpayne@69: * Movable but not copyable. jpayne@69: * jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: class U_COMMON_API Result : public UMemory { jpayne@69: public: jpayne@69: /** jpayne@69: * Move constructor; might modify the source. jpayne@69: * This object will have the same contents that the source object had. jpayne@69: * jpayne@69: * @param src Result to move contents from. jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: Result(Result &&src) U_NOEXCEPT; jpayne@69: jpayne@69: /** jpayne@69: * Destructor. jpayne@69: * jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: ~Result(); jpayne@69: jpayne@69: /** jpayne@69: * Move assignment; might modify the source. jpayne@69: * This object will have the same contents that the source object had. jpayne@69: * jpayne@69: * @param src Result to move contents from. jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: Result &operator=(Result &&src) U_NOEXCEPT; jpayne@69: jpayne@69: #ifndef U_HIDE_DRAFT_API jpayne@69: /** jpayne@69: * Returns the best-matching desired locale. jpayne@69: * nullptr if the list of desired locales is empty or if none matched well enough. jpayne@69: * jpayne@69: * @return the best-matching desired locale, or nullptr. jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: inline const Locale *getDesiredLocale() const { return desiredLocale; } jpayne@69: jpayne@69: /** jpayne@69: * Returns the best-matching supported locale. jpayne@69: * If none matched well enough, this is the default locale. jpayne@69: * The default locale is nullptr if the list of supported locales is empty and jpayne@69: * no explicit default locale is set. jpayne@69: * jpayne@69: * @return the best-matching supported locale, or nullptr. jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: inline const Locale *getSupportedLocale() const { return supportedLocale; } jpayne@69: jpayne@69: /** jpayne@69: * Returns the index of the best-matching desired locale in the input Iterable order. jpayne@69: * -1 if the list of desired locales is empty or if none matched well enough. jpayne@69: * jpayne@69: * @return the index of the best-matching desired locale, or -1. jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: inline int32_t getDesiredIndex() const { return desiredIndex; } jpayne@69: jpayne@69: /** jpayne@69: * Returns the index of the best-matching supported locale in the jpayne@69: * constructor’s or builder’s input order (“set” Collection plus “added” locales). jpayne@69: * If the matcher was built from a locale list string, then the iteration order is that jpayne@69: * of a LocalePriorityList built from the same string. jpayne@69: * -1 if the list of supported locales is empty or if none matched well enough. jpayne@69: * jpayne@69: * @return the index of the best-matching supported locale, or -1. jpayne@69: * @draft ICU 65 jpayne@69: */ jpayne@69: inline int32_t getSupportedIndex() const { return supportedIndex; } jpayne@69: jpayne@69: /** jpayne@69: * Takes the best-matching supported locale and adds relevant fields of the jpayne@69: * best-matching desired locale, such as the -t- and -u- extensions. jpayne@69: * May replace some fields of the supported locale. jpayne@69: * The result is the locale that should be used for date and number formatting, collation, etc. jpayne@69: * Returns the root locale if getSupportedLocale() returns nullptr. jpayne@69: * jpayne@69: *
Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
jpayne@69: *
jpayne@69: * @return a locale combining the best-matching desired and supported locales.
jpayne@69: * @draft ICU 65
jpayne@69: */
jpayne@69: Locale makeResolvedLocale(UErrorCode &errorCode) const;
jpayne@69: #endif // U_HIDE_DRAFT_API
jpayne@69:
jpayne@69: private:
jpayne@69: Result(const Locale *desired, const Locale *supported,
jpayne@69: int32_t desIndex, int32_t suppIndex, UBool owned) :
jpayne@69: desiredLocale(desired), supportedLocale(supported),
jpayne@69: desiredIndex(desIndex), supportedIndex(suppIndex),
jpayne@69: desiredIsOwned(owned) {}
jpayne@69:
jpayne@69: Result(const Result &other) = delete;
jpayne@69: Result &operator=(const Result &other) = delete;
jpayne@69:
jpayne@69: const Locale *desiredLocale;
jpayne@69: const Locale *supportedLocale;
jpayne@69: int32_t desiredIndex;
jpayne@69: int32_t supportedIndex;
jpayne@69: UBool desiredIsOwned;
jpayne@69:
jpayne@69: friend class LocaleMatcher;
jpayne@69: };
jpayne@69:
jpayne@69: /**
jpayne@69: * LocaleMatcher builder.
jpayne@69: * Movable but not copyable.
jpayne@69: *
jpayne@69: * @see LocaleMatcher#builder()
jpayne@69: * @draft ICU 65
jpayne@69: */
jpayne@69: class U_COMMON_API Builder : public UMemory {
jpayne@69: public:
jpayne@69: /**
jpayne@69: * Constructs a builder used in chaining parameters for building a LocaleMatcher.
jpayne@69: *
jpayne@69: * @return a new Builder object
jpayne@69: * @draft ICU 65
jpayne@69: */
jpayne@69: Builder() {}
jpayne@69:
jpayne@69: /**
jpayne@69: * Move constructor; might modify the source.
jpayne@69: * This builder will have the same contents that the source builder had.
jpayne@69: *
jpayne@69: * @param src Builder to move contents from.
jpayne@69: * @draft ICU 65
jpayne@69: */
jpayne@69: Builder(Builder &&src) U_NOEXCEPT;
jpayne@69:
jpayne@69: /**
jpayne@69: * Destructor.
jpayne@69: *
jpayne@69: * @draft ICU 65
jpayne@69: */
jpayne@69: ~Builder();
jpayne@69:
jpayne@69: /**
jpayne@69: * Move assignment; might modify the source.
jpayne@69: * This builder will have the same contents that the source builder had.
jpayne@69: *
jpayne@69: * @param src Builder to move contents from.
jpayne@69: * @draft ICU 65
jpayne@69: */
jpayne@69: Builder &operator=(Builder &&src) U_NOEXCEPT;
jpayne@69:
jpayne@69: #ifndef U_HIDE_DRAFT_API
jpayne@69: /**
jpayne@69: * Parses an Accept-Language string
jpayne@69: * (RFC 2616 Section 14.4),
jpayne@69: * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
jpayne@69: * Allows whitespace in more places but does not allow "*".
jpayne@69: * Clears any previously set/added supported locales first.
jpayne@69: *
jpayne@69: * @param locales the Accept-Language string of locales to set
jpayne@69: * @return this Builder object
jpayne@69: * @draft ICU 65
jpayne@69: */
jpayne@69: Builder &setSupportedLocalesFromListString(StringPiece locales);
jpayne@69:
jpayne@69: /**
jpayne@69: * Copies the supported locales, preserving iteration order.
jpayne@69: * Clears any previously set/added supported locales first.
jpayne@69: * Duplicates are allowed, and are not removed.
jpayne@69: *
jpayne@69: * @param locales the list of locale
jpayne@69: * @return this Builder object
jpayne@69: * @draft ICU 65
jpayne@69: */
jpayne@69: Builder &setSupportedLocales(Locale::Iterator &locales);
jpayne@69:
jpayne@69: /**
jpayne@69: * Copies the supported locales from the begin/end range, preserving iteration order.
jpayne@69: * Clears any previously set/added supported locales first.
jpayne@69: * Duplicates are allowed, and are not removed.
jpayne@69: *
jpayne@69: * Each of the iterator parameter values must be an
jpayne@69: * input iterator whose value is convertible to const Locale &.
jpayne@69: *
jpayne@69: * @param begin Start of range.
jpayne@69: * @param end Exclusive end of range.
jpayne@69: * @return this Builder object
jpayne@69: * @draft ICU 65
jpayne@69: */
jpayne@69: template This is mostly an implementation detail, and the precise values may change over time.
jpayne@69: * The implementation may use either the maximized forms or the others ones, or both.
jpayne@69: * The implementation may or may not rely on the forms to be consistent with each other.
jpayne@69: *
jpayne@69: * Callers should construct and use a matcher rather than match pairs of locales directly.
jpayne@69: *
jpayne@69: * @param desired Desired locale.
jpayne@69: * @param supported Supported locale.
jpayne@69: * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
jpayne@69: * or else the function returns immediately. Check for U_FAILURE()
jpayne@69: * on output or use with function chaining. (See User Guide for details.)
jpayne@69: * @return value between 0 and 1, inclusive.
jpayne@69: * @internal (has a known user)
jpayne@69: */
jpayne@69: double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
jpayne@69: #endif // U_HIDE_INTERNAL_API
jpayne@69:
jpayne@69: private:
jpayne@69: LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
jpayne@69: LocaleMatcher(const LocaleMatcher &other) = delete;
jpayne@69: LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
jpayne@69:
jpayne@69: int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode);
jpayne@69:
jpayne@69: int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
jpayne@69:
jpayne@69: const XLikelySubtags &likelySubtags;
jpayne@69: const LocaleDistance &localeDistance;
jpayne@69: int32_t thresholdDistance;
jpayne@69: int32_t demotionPerDesiredLocale;
jpayne@69: ULocMatchFavorSubtag favorSubtag;
jpayne@69: ULocMatchDirection direction;
jpayne@69:
jpayne@69: // These are in input order.
jpayne@69: const Locale ** supportedLocales;
jpayne@69: LSR *lsrs;
jpayne@69: int32_t supportedLocalesLength;
jpayne@69: // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
jpayne@69: UHashtable *supportedLsrToIndex; // Map