jpayne@69: // © 2016 and later: Unicode, Inc. and others.
jpayne@69: // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69: /*
jpayne@69: *******************************************************************************
jpayne@69: * Copyright (C) 1997-2015, International Business Machines Corporation and others.
jpayne@69: * All Rights Reserved.
jpayne@69: *******************************************************************************
jpayne@69: */
jpayne@69: 
jpayne@69: #ifndef RBNF_H
jpayne@69: #define RBNF_H
jpayne@69: 
jpayne@69: #include "unicode/utypes.h"
jpayne@69: 
jpayne@69: #if U_SHOW_CPLUSPLUS_API
jpayne@69: 
jpayne@69: /**
jpayne@69:  * \file
jpayne@69:  * \brief C++ API: Rule Based Number Format
jpayne@69:  */
jpayne@69: 
jpayne@69: /**
jpayne@69:  * \def U_HAVE_RBNF
jpayne@69:  * This will be 0 if RBNF support is not included in ICU
jpayne@69:  * and 1 if it is.
jpayne@69:  *
jpayne@69:  * @stable ICU 2.4
jpayne@69:  */
jpayne@69: #if UCONFIG_NO_FORMATTING
jpayne@69: #define U_HAVE_RBNF 0
jpayne@69: #else
jpayne@69: #define U_HAVE_RBNF 1
jpayne@69: 
jpayne@69: #include "unicode/dcfmtsym.h"
jpayne@69: #include "unicode/fmtable.h"
jpayne@69: #include "unicode/locid.h"
jpayne@69: #include "unicode/numfmt.h"
jpayne@69: #include "unicode/unistr.h"
jpayne@69: #include "unicode/strenum.h"
jpayne@69: #include "unicode/brkiter.h"
jpayne@69: #include "unicode/upluralrules.h"
jpayne@69: 
jpayne@69: U_NAMESPACE_BEGIN
jpayne@69: 
jpayne@69: class NFRule;
jpayne@69: class NFRuleSet;
jpayne@69: class LocalizationInfo;
jpayne@69: class PluralFormat;
jpayne@69: class RuleBasedCollator;
jpayne@69: 
jpayne@69: /**
jpayne@69:  * Tags for the predefined rulesets.
jpayne@69:  *
jpayne@69:  * @stable ICU 2.2
jpayne@69:  */
jpayne@69: enum URBNFRuleSetTag {
jpayne@69:     URBNF_SPELLOUT,
jpayne@69:     URBNF_ORDINAL,
jpayne@69:     URBNF_DURATION,
jpayne@69:     URBNF_NUMBERING_SYSTEM,
jpayne@69: #ifndef U_HIDE_DEPRECATED_API
jpayne@69:     /**
jpayne@69:      * One more than the highest normal URBNFRuleSetTag value.
jpayne@69:      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
jpayne@69:      */
jpayne@69:     URBNF_COUNT
jpayne@69: #endif  // U_HIDE_DEPRECATED_API
jpayne@69: };
jpayne@69: 
jpayne@69: /**
jpayne@69:  * The RuleBasedNumberFormat class formats numbers according to a set of rules. This number formatter is
jpayne@69:  * typically used for spelling out numeric values in words (e.g., 25,3476 as
jpayne@69:  * &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
jpayne@69:  * cents soixante-seize&quot; or
jpayne@69:  * &quot;f&uuml;nfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
jpayne@69:  * other complicated formatting tasks, such as formatting a number of seconds as hours,
jpayne@69:  * minutes and seconds (e.g., 3,730 as &quot;1:02:10&quot;).
jpayne@69:  *
jpayne@69:  * <p>The resources contain three predefined formatters for each locale: spellout, which
jpayne@69:  * spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
jpayne@69:  * appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
jpayne@69:  * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
jpayne@69:  * &quot;2:03&quot;).&nbsp; The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
jpayne@69:  * by supplying programmer-defined rule sets.</p>
jpayne@69:  *
jpayne@69:  * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
jpayne@69:  * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
jpayne@69:  * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
jpayne@69:  * Each rule has a string of output text and a value or range of values it is applicable to.
jpayne@69:  * In a typical spellout rule set, the first twenty rules are the words for the numbers from
jpayne@69:  * 0 to 19:</p>
jpayne@69:  *
jpayne@69:  * <pre>zero; one; two; three; four; five; six; seven; eight; nine;
jpayne@69:  * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
jpayne@69:  *
jpayne@69:  * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
jpayne@69:  * we only have to supply the words for the multiples of 10:</p>
jpayne@69:  *
jpayne@69:  * <pre> 20: twenty[-&gt;&gt;];
jpayne@69:  * 30: thirty[-&gt;&gt;];
jpayne@69:  * 40: forty[-&gt;&gt;];
jpayne@69:  * 50: fifty[-&gt;&gt;];
jpayne@69:  * 60: sixty[-&gt;&gt;];
jpayne@69:  * 70: seventy[-&gt;&gt;];
jpayne@69:  * 80: eighty[-&gt;&gt;];
jpayne@69:  * 90: ninety[-&gt;&gt;];</pre>
jpayne@69:  *
jpayne@69:  * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
jpayne@69:  * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
jpayne@69:  * to all numbers from its own base value to one less than the next rule's base value. The
jpayne@69:  * &quot;&gt;&gt;&quot; token is called a <em>substitution</em> and tells the fomatter to
jpayne@69:  * isolate the number's ones digit, format it using this same set of rules, and place the
jpayne@69:  * result at the position of the &quot;&gt;&gt;&quot; token. Text in brackets is omitted if
jpayne@69:  * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
jpayne@69:  * is &quot;twenty-four,&quot; not &quot;twenty four&quot;).</p>
jpayne@69:  *
jpayne@69:  * <p>For even larger numbers, we can actually look up several parts of the number in the
jpayne@69:  * list:</p>
jpayne@69:  *
jpayne@69:  * <pre>100: &lt;&lt; hundred[ &gt;&gt;];</pre>
jpayne@69:  *
jpayne@69:  * <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
jpayne@69:  * the hundreds digit (and any digits to its left), formats it using this same rule set, and
jpayne@69:  * places the result where the &quot;&lt;&lt;&quot; was. Notice also that the meaning of
jpayne@69:  * &gt;&gt; has changed: it now refers to both the tens and the ones digits. The meaning of
jpayne@69:  * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
jpayne@69:  * which is the highest power of 10 that is less than or equal to the base value (the user
jpayne@69:  * can change this). To fill in the substitutions, the formatter divides the number being
jpayne@69:  * formatted by the divisor. The integral quotient is used to fill in the &lt;&lt;
jpayne@69:  * substitution, and the remainder is used to fill in the &gt;&gt; substitution. The meaning
jpayne@69:  * of the brackets changes similarly: text in brackets is omitted if the value being
jpayne@69:  * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
jpayne@69:  * if a substitution is filled in with text that includes another substitution, that
jpayne@69:  * substitution is also filled in.</p>
jpayne@69:  *
jpayne@69:  * <p>This rule covers values up to 999, at which point we add another rule:</p>
jpayne@69:  *
jpayne@69:  * <pre>1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
jpayne@69:  *
jpayne@69:  * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
jpayne@69:  * base value is a higher power of 10, changing the rule's divisor. This rule can actually be
jpayne@69:  * used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
jpayne@69:  *
jpayne@69:  * <pre> 1,000,000: &lt;&lt; million[ &gt;&gt;];
jpayne@69:  * 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
jpayne@69:  * 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
jpayne@69:  * 1,000,000,000,000,000: OUT OF RANGE!;</pre>
jpayne@69:  *
jpayne@69:  * <p>Commas, periods, and spaces can be used in the base values to improve legibility and
jpayne@69:  * are ignored by the rule parser. The last rule in the list is customarily treated as an
jpayne@69:  * &quot;overflow rule,&quot; applying to everything from its base value on up, and often (as
jpayne@69:  * in this example) being used to print out an error message or default representation.
jpayne@69:  * Notice also that the size of the major groupings in large numbers is controlled by the
jpayne@69:  * spacing of the rules: because in English we group numbers by thousand, the higher rules
jpayne@69:  * are separated from each other by a factor of 1,000.</p>
jpayne@69:  *
jpayne@69:  * <p>To see how these rules actually work in practice, consider the following example:
jpayne@69:  * Formatting 25,430 with this rule set would work like this:</p>
jpayne@69:  *
jpayne@69:  * <table border="0" width="100%">
jpayne@69:  *   <tr>
jpayne@69:  *     <td><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
jpayne@69:  *     <td>[the rule whose base value is 1,000 is applicable to 25,340]</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
jpayne@69:  *     <td>[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>twenty-<strong>five</strong> thousand &gt;&gt;</td>
jpayne@69:  *     <td>[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
jpayne@69:  *     <td>[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
jpayne@69:  *     <td>[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>twenty-five thousand three hundred <strong>forty</strong></td>
jpayne@69:  *     <td>[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
jpayne@69:  *     evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
jpayne@69:  *   </tr>
jpayne@69:  * </table>
jpayne@69:  *
jpayne@69:  * <p>The above syntax suffices only to format positive integers. To format negative numbers,
jpayne@69:  * we add a special rule:</p>
jpayne@69:  *
jpayne@69:  * <pre>-x: minus &gt;&gt;;</pre>
jpayne@69:  *
jpayne@69:  * <p>This is called a <em>negative-number rule,</em> and is identified by &quot;-x&quot;
jpayne@69:  * where the base value would be. This rule is used to format all negative numbers. the
jpayne@69:  * &gt;&gt; token here means &quot;find the number's absolute value, format it with these
jpayne@69:  * rules, and put the result here.&quot;</p>
jpayne@69:  *
jpayne@69:  * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
jpayne@69:  * parts:</p>
jpayne@69:  *
jpayne@69:  * <pre>x.x: &lt;&lt; point &gt;&gt;;</pre>
jpayne@69:  *
jpayne@69:  * <p>This rule is used for all positive non-integers (negative non-integers pass through the
jpayne@69:  * negative-number rule first and then through this rule). Here, the &lt;&lt; token refers to
jpayne@69:  * the number's integral part, and the &gt;&gt; to the number's fractional part. The
jpayne@69:  * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
jpayne@69:  * formatted as &quot;one hundred twenty-three point four five six&quot;).</p>
jpayne@69:  *
jpayne@69:  * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
jpayne@69:  *
jpayne@69:  * <p>There is actually much more flexibility built into the rule language than the
jpayne@69:  * description above shows. A formatter may own multiple rule sets, which can be selected by
jpayne@69:  * the caller, and which can use each other to fill in their substitutions. Substitutions can
jpayne@69:  * also be filled in with digits, using a DecimalFormat object. There is syntax that can be
jpayne@69:  * used to alter a rule's divisor in various ways. And there is provision for much more
jpayne@69:  * flexible fraction handling. A complete description of the rule syntax follows:</p>
jpayne@69:  *
jpayne@69:  * <hr>
jpayne@69:  *
jpayne@69:  * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
jpayne@69:  * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
jpayne@69:  * set name must begin with a % sign. Rule sets with names that begin with a single % sign
jpayne@69:  * are <em>public:</em> the caller can specify that they be used to format and parse numbers.
jpayne@69:  * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
jpayne@69:  * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
jpayne@69:  *
jpayne@69:  * <p>The user can also specify a special &quot;rule set&quot; named <tt>%%lenient-parse</tt>.
jpayne@69:  * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
jpayne@69:  * description which is used to define equivalences for lenient parsing. For more information
jpayne@69:  * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
jpayne@69:  * see <tt>setLenientParse()</tt>.  <em>Note:</em> symbols that have syntactic meaning
jpayne@69:  * in collation rules, such as '&amp;', have no particular meaning when appearing outside
jpayne@69:  * of the <tt>lenient-parse</tt> rule set.</p>
jpayne@69:  *
jpayne@69:  * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
jpayne@69:  * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
jpayne@69:  * These parameters are controlled by the description syntax, which consists of a <em>rule
jpayne@69:  * descriptor,</em> a colon, and a <em>rule body.</em></p>
jpayne@69:  *
jpayne@69:  * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
jpayne@69:  * name of a token):</p>
jpayne@69:  *
jpayne@69:  * <table border="0" width="100%">
jpayne@69:  *   <tr>
jpayne@69:  *     <td><em>bv</em>:</td>
jpayne@69:  *     <td><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
jpayne@69:  *     number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
jpayne@69:  *     which are ignored. The rule's divisor is the highest power of 10 less than or equal to
jpayne@69:  *     the base value.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td><em>bv</em>/<em>rad</em>:</td>
jpayne@69:  *     <td><em>bv</em> specifies the rule's base value. The rule's divisor is the
jpayne@69:  *     highest power of <em>rad</em> less than or equal to the base value.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td><em>bv</em>&gt;:</td>
jpayne@69:  *     <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
jpayne@69:  *     let the radix be 10, and the exponent be the highest exponent of the radix that yields a
jpayne@69:  *     result less than or equal to the base value. Every &gt; character after the base value
jpayne@69:  *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
jpayne@69:  *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td><em>bv</em>/<em>rad</em>&gt;:</td>
jpayne@69:  *     <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
jpayne@69:  *     let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
jpayne@69:  *     yields a result less than or equal to the base value. Every &gt; character after the radix
jpayne@69:  *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
jpayne@69:  *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>-x:</td>
jpayne@69:  *     <td>The rule is a negative-number rule.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>x.x:</td>
jpayne@69:  *     <td>The rule is an <em>improper fraction rule</em>. If the full stop in
jpayne@69:  *     the middle of the rule name is replaced with the decimal point
jpayne@69:  *     that is used in the language or DecimalFormatSymbols, then that rule will
jpayne@69:  *     have precedence when formatting and parsing this rule. For example, some
jpayne@69:  *     languages use the comma, and can thus be written as x,x instead. For example,
jpayne@69:  *     you can use "x.x: &lt;&lt; point &gt;&gt;;x,x: &lt;&lt; comma &gt;&gt;;" to
jpayne@69:  *     handle the decimal point that matches the language's natural spelling of
jpayne@69:  *     the punctuation of either the full stop or comma.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>0.x:</td>
jpayne@69:  *     <td>The rule is a <em>proper fraction rule</em>. If the full stop in
jpayne@69:  *     the middle of the rule name is replaced with the decimal point
jpayne@69:  *     that is used in the language or DecimalFormatSymbols, then that rule will
jpayne@69:  *     have precedence when formatting and parsing this rule. For example, some
jpayne@69:  *     languages use the comma, and can thus be written as 0,x instead. For example,
jpayne@69:  *     you can use "0.x: point &gt;&gt;;0,x: comma &gt;&gt;;" to
jpayne@69:  *     handle the decimal point that matches the language's natural spelling of
jpayne@69:  *     the punctuation of either the full stop or comma.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>x.0:</td>
jpayne@69:  *     <td>The rule is a <em>master rule</em>. If the full stop in
jpayne@69:  *     the middle of the rule name is replaced with the decimal point
jpayne@69:  *     that is used in the language or DecimalFormatSymbols, then that rule will
jpayne@69:  *     have precedence when formatting and parsing this rule. For example, some
jpayne@69:  *     languages use the comma, and can thus be written as x,0 instead. For example,
jpayne@69:  *     you can use "x.0: &lt;&lt; point;x,0: &lt;&lt; comma;" to
jpayne@69:  *     handle the decimal point that matches the language's natural spelling of
jpayne@69:  *     the punctuation of either the full stop or comma.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>Inf:</td>
jpayne@69:  *     <td>The rule for infinity.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>NaN:</td>
jpayne@69:  *     <td>The rule for an IEEE 754 NaN (not a number).</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td><em>nothing</em></td>
jpayne@69:  *     <td>If the rule's rule descriptor is left out, the base value is one plus the
jpayne@69:  *     preceding rule's base value (or zero if this is the first rule in the list) in a normal
jpayne@69:  *     rule set.&nbsp; In a fraction rule set, the base value is the same as the preceding rule's
jpayne@69:  *     base value.</td>
jpayne@69:  *   </tr>
jpayne@69:  * </table>
jpayne@69:  *
jpayne@69:  * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
jpayne@69:  * on whether it is used to format a number's integral part (or the whole number) or a
jpayne@69:  * number's fractional part. Using a rule set to format a rule's fractional part makes it a
jpayne@69:  * fraction rule set.</p>
jpayne@69:  *
jpayne@69:  * <p>Which rule is used to format a number is defined according to one of the following
jpayne@69:  * algorithms: If the rule set is a regular rule set, do the following:
jpayne@69:  *
jpayne@69:  * <ul>
jpayne@69:  *   <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>),
jpayne@69:  *     use the master rule.&nbsp; (If the number being formatted was passed in as a <tt>long</tt>,
jpayne@69:  *     the master rule is ignored.)</li>
jpayne@69:  *   <li>If the number is negative, use the negative-number rule.</li>
jpayne@69:  *   <li>If the number has a fractional part and is greater than 1, use the improper fraction
jpayne@69:  *     rule.</li>
jpayne@69:  *   <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
jpayne@69:  *     rule.</li>
jpayne@69:  *   <li>Binary-search the rule list for the rule with the highest base value less than or equal
jpayne@69:  *     to the number. If that rule has two substitutions, its base value is not an even multiple
jpayne@69:  *     of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
jpayne@69:  *     rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
jpayne@69:  * </ul>
jpayne@69:  *
jpayne@69:  * <p>If the rule set is a fraction rule set, do the following:
jpayne@69:  *
jpayne@69:  * <ul>
jpayne@69:  *   <li>Ignore negative-number and fraction rules.</li>
jpayne@69:  *   <li>For each rule in the list, multiply the number being formatted (which will always be
jpayne@69:  *     between 0 and 1) by the rule's base value. Keep track of the distance between the result
jpayne@69:  *     the nearest integer.</li>
jpayne@69:  *   <li>Use the rule that produced the result closest to zero in the above calculation. In the
jpayne@69:  *     event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
jpayne@69:  *     to try each rule's base value as a possible denominator of a fraction. Whichever
jpayne@69:  *     denominator produces the fraction closest in value to the number being formatted wins.) If
jpayne@69:  *     the rule following the matching rule has the same base value, use it if the numerator of
jpayne@69:  *     the fraction is anything other than 1; if the numerator is 1, use the original matching
jpayne@69:  *     rule. (This is to allow singular and plural forms of the rule text without a lot of extra
jpayne@69:  *     hassle.)</li>
jpayne@69:  * </ul>
jpayne@69:  *
jpayne@69:  * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
jpayne@69:  * may include zero, one, or two <em>substitution tokens,</em> and a range of text in
jpayne@69:  * brackets. The brackets denote optional text (and may also include one or both
jpayne@69:  * substitutions). The exact meanings of the substitution tokens, and under what conditions
jpayne@69:  * optional text is omitted, depend on the syntax of the substitution token and the context.
jpayne@69:  * The rest of the text in a rule body is literal text that is output when the rule matches
jpayne@69:  * the number being formatted.</p>
jpayne@69:  *
jpayne@69:  * <p>A substitution token begins and ends with a <em>token character.</em> The token
jpayne@69:  * character and the context together specify a mathematical operation to be performed on the
jpayne@69:  * number being formatted. An optional <em>substitution descriptor </em>specifies how the
jpayne@69:  * value resulting from that operation is used to fill in the substitution. The position of
jpayne@69:  * the substitution token in the rule body specifies the location of the resultant text in
jpayne@69:  * the original rule text.</p>
jpayne@69:  *
jpayne@69:  * <p>The meanings of the substitution token characters are as follows:</p>
jpayne@69:  *
jpayne@69:  * <table border="0" width="100%">
jpayne@69:  *   <tr>
jpayne@69:  *     <td>&gt;&gt;</td>
jpayne@69:  *     <td>in normal rule</td>
jpayne@69:  *     <td>Divide the number by the rule's divisor and format the remainder</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in negative-number rule</td>
jpayne@69:  *     <td>Find the absolute value of the number and format the result</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in fraction or master rule</td>
jpayne@69:  *     <td>Isolate the number's fractional part and format it.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in rule in fraction rule set</td>
jpayne@69:  *     <td>Not allowed.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>&gt;&gt;&gt;</td>
jpayne@69:  *     <td>in normal rule</td>
jpayne@69:  *     <td>Divide the number by the rule's divisor and format the remainder,
jpayne@69:  *       but bypass the normal rule-selection process and just use the
jpayne@69:  *       rule that precedes this one in this rule list.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in all other rules</td>
jpayne@69:  *     <td>Not allowed.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>&lt;&lt;</td>
jpayne@69:  *     <td>in normal rule</td>
jpayne@69:  *     <td>Divide the number by the rule's divisor and format the quotient</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in negative-number rule</td>
jpayne@69:  *     <td>Not allowed.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in fraction or master rule</td>
jpayne@69:  *     <td>Isolate the number's integral part and format it.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in rule in fraction rule set</td>
jpayne@69:  *     <td>Multiply the number by the rule's base value and format the result.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>==</td>
jpayne@69:  *     <td>in all rule sets</td>
jpayne@69:  *     <td>Format the number unchanged</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>[]</td>
jpayne@69:  *     <td>in normal rule</td>
jpayne@69:  *     <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in negative-number rule</td>
jpayne@69:  *     <td>Not allowed.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in improper-fraction rule</td>
jpayne@69:  *     <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
jpayne@69:  *     x.x rule and a 0.x rule)</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in master rule</td>
jpayne@69:  *     <td>Omit the optional text if the number is an integer (same as specifying both an x.x
jpayne@69:  *     rule and an x.0 rule)</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in proper-fraction rule</td>
jpayne@69:  *     <td>Not allowed.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td></td>
jpayne@69:  *     <td>in rule in fraction rule set</td>
jpayne@69:  *     <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td width="37">$(cardinal,<i>plural syntax</i>)$</td>
jpayne@69:  *     <td width="23"></td>
jpayne@69:  *     <td width="165" valign="top">in all rule sets</td>
jpayne@69:  *     <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
jpayne@69:  *     exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
jpayne@69:  *     This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated
jpayne@69:  *     as the same base value for parsing.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td width="37">$(ordinal,<i>plural syntax</i>)$</td>
jpayne@69:  *     <td width="23"></td>
jpayne@69:  *     <td width="165" valign="top">in all rule sets</td>
jpayne@69:  *     <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
jpayne@69:  *     exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
jpayne@69:  *     This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated
jpayne@69:  *     as the same base value for parsing.</td>
jpayne@69:  *   </tr>
jpayne@69:  * </table>
jpayne@69:  *
jpayne@69:  * <p>The substitution descriptor (i.e., the text between the token characters) may take one
jpayne@69:  * of three forms:</p>
jpayne@69:  *
jpayne@69:  * <table border="0" width="100%">
jpayne@69:  *   <tr>
jpayne@69:  *     <td>a rule set name</td>
jpayne@69:  *     <td>Perform the mathematical operation on the number, and format the result using the
jpayne@69:  *     named rule set.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>a DecimalFormat pattern</td>
jpayne@69:  *     <td>Perform the mathematical operation on the number, and format the result using a
jpayne@69:  *     DecimalFormat with the specified pattern.&nbsp; The pattern must begin with 0 or #.</td>
jpayne@69:  *   </tr>
jpayne@69:  *   <tr>
jpayne@69:  *     <td>nothing</td>
jpayne@69:  *     <td>Perform the mathematical operation on the number, and format the result using the rule
jpayne@69:  *     set containing the current rule, except:
jpayne@69:  *     <ul>
jpayne@69:  *       <li>You can't have an empty substitution descriptor with a == substitution.</li>
jpayne@69:  *       <li>If you omit the substitution descriptor in a &gt;&gt; substitution in a fraction rule,
jpayne@69:  *         format the result one digit at a time using the rule set containing the current rule.</li>
jpayne@69:  *       <li>If you omit the substitution descriptor in a &lt;&lt; substitution in a rule in a
jpayne@69:  *         fraction rule set, format the result using the default rule set for this formatter.</li>
jpayne@69:  *     </ul>
jpayne@69:  *     </td>
jpayne@69:  *   </tr>
jpayne@69:  * </table>
jpayne@69:  *
jpayne@69:  * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
jpayne@69:  * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
jpayne@69:  * the apostrophe is ignored, but all text after it becomes significant (this is how you can
jpayne@69:  * have a rule's rule text begin with whitespace). There is no escape function: the semicolon
jpayne@69:  * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
jpayne@69:  * names. The characters beginning a substitution token are always treated as the beginning
jpayne@69:  * of a substitution token.</p>
jpayne@69:  *
jpayne@69:  * <p>See the resource data and the demo program for annotated examples of real rule sets
jpayne@69:  * using these features.</p>
jpayne@69:  *
jpayne@69:  * <p><em>User subclasses are not supported.</em> While clients may write
jpayne@69:  * subclasses, such code will not necessarily work and will not be
jpayne@69:  * guaranteed to work stably from release to release.
jpayne@69:  *
jpayne@69:  * <p><b>Localizations</b></p>
jpayne@69:  * <p>Constructors are available that allow the specification of localizations for the
jpayne@69:  * public rule sets (and also allow more control over what public rule sets are available).
jpayne@69:  * Localization data is represented as a textual description.  The description represents
jpayne@69:  * an array of arrays of string.  The first element is an array of the public rule set names,
jpayne@69:  * each of these must be one of the public rule set names that appear in the rules.  Only
jpayne@69:  * names in this array will be treated as public rule set names by the API.  Each subsequent
jpayne@69:  * element is an array of localizations of these names.  The first element of one of these
jpayne@69:  * subarrays is the locale name, and the remaining elements are localizations of the
jpayne@69:  * public rule set names, in the same order as they were listed in the first arrray.</p>
jpayne@69:  * <p>In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used
jpayne@69:  * to separate elements of an array.  Whitespace is ignored, unless quoted.</p>
jpayne@69:  * <p>For example:<pre>
jpayne@69:  * < < %foo, %bar, %baz >,
jpayne@69:  *   < en, Foo, Bar, Baz >,
jpayne@69:  *   < fr, 'le Foo', 'le Bar', 'le Baz' >
jpayne@69:  *   < zh, \\u7532, \\u4e59, \\u4e19 > >
jpayne@69:  * </pre></p>
jpayne@69:  * @author Richard Gillam
jpayne@69:  * @see NumberFormat
jpayne@69:  * @see DecimalFormat
jpayne@69:  * @see PluralFormat
jpayne@69:  * @see PluralRules
jpayne@69:  * @stable ICU 2.0
jpayne@69:  */
jpayne@69: class U_I18N_API RuleBasedNumberFormat : public NumberFormat {
jpayne@69: public:
jpayne@69: 
jpayne@69:   //-----------------------------------------------------------------------
jpayne@69:   // constructors
jpayne@69:   //-----------------------------------------------------------------------
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * Creates a RuleBasedNumberFormat that behaves according to the description
jpayne@69:      * passed in.  The formatter uses the default locale.
jpayne@69:      * @param rules A description of the formatter's desired behavior.
jpayne@69:      * See the class documentation for a complete explanation of the description
jpayne@69:      * syntax.
jpayne@69:      * @param perror The parse error if an error was encountered.
jpayne@69:      * @param status The status indicating whether the constructor succeeded.
jpayne@69:      * @stable ICU 3.2
jpayne@69:      */
jpayne@69:     RuleBasedNumberFormat(const UnicodeString& rules, UParseError& perror, UErrorCode& status);
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * Creates a RuleBasedNumberFormat that behaves according to the description
jpayne@69:      * passed in.  The formatter uses the default locale.
jpayne@69:      * <p>
jpayne@69:      * The localizations data provides information about the public
jpayne@69:      * rule sets and their localized display names for different
jpayne@69:      * locales. The first element in the list is an array of the names
jpayne@69:      * of the public rule sets.  The first element in this array is
jpayne@69:      * the initial default ruleset.  The remaining elements in the
jpayne@69:      * list are arrays of localizations of the names of the public
jpayne@69:      * rule sets.  Each of these is one longer than the initial array,
jpayne@69:      * with the first String being the ULocale ID, and the remaining
jpayne@69:      * Strings being the localizations of the rule set names, in the
jpayne@69:      * same order as the initial array.  Arrays are NULL-terminated.
jpayne@69:      * @param rules A description of the formatter's desired behavior.
jpayne@69:      * See the class documentation for a complete explanation of the description
jpayne@69:      * syntax.
jpayne@69:      * @param localizations the localization information.
jpayne@69:      * names in the description.  These will be copied by the constructor.
jpayne@69:      * @param perror The parse error if an error was encountered.
jpayne@69:      * @param status The status indicating whether the constructor succeeded.
jpayne@69:      * @stable ICU 3.2
jpayne@69:      */
jpayne@69:     RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
jpayne@69:                         UParseError& perror, UErrorCode& status);
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Creates a RuleBasedNumberFormat that behaves according to the rules
jpayne@69:    * passed in.  The formatter uses the specified locale to determine the
jpayne@69:    * characters to use when formatting numerals, and to define equivalences
jpayne@69:    * for lenient parsing.
jpayne@69:    * @param rules The formatter rules.
jpayne@69:    * See the class documentation for a complete explanation of the rule
jpayne@69:    * syntax.
jpayne@69:    * @param locale A locale that governs which characters are used for
jpayne@69:    * formatting values in numerals and which characters are equivalent in
jpayne@69:    * lenient parsing.
jpayne@69:    * @param perror The parse error if an error was encountered.
jpayne@69:    * @param status The status indicating whether the constructor succeeded.
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale,
jpayne@69:                         UParseError& perror, UErrorCode& status);
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * Creates a RuleBasedNumberFormat that behaves according to the description
jpayne@69:      * passed in.  The formatter uses the default locale.
jpayne@69:      * <p>
jpayne@69:      * The localizations data provides information about the public
jpayne@69:      * rule sets and their localized display names for different
jpayne@69:      * locales. The first element in the list is an array of the names
jpayne@69:      * of the public rule sets.  The first element in this array is
jpayne@69:      * the initial default ruleset.  The remaining elements in the
jpayne@69:      * list are arrays of localizations of the names of the public
jpayne@69:      * rule sets.  Each of these is one longer than the initial array,
jpayne@69:      * with the first String being the ULocale ID, and the remaining
jpayne@69:      * Strings being the localizations of the rule set names, in the
jpayne@69:      * same order as the initial array.  Arrays are NULL-terminated.
jpayne@69:      * @param rules A description of the formatter's desired behavior.
jpayne@69:      * See the class documentation for a complete explanation of the description
jpayne@69:      * syntax.
jpayne@69:      * @param localizations a list of localizations for the rule set
jpayne@69:      * names in the description.  These will be copied by the constructor.
jpayne@69:      * @param locale A locale that governs which characters are used for
jpayne@69:      * formatting values in numerals and which characters are equivalent in
jpayne@69:      * lenient parsing.
jpayne@69:      * @param perror The parse error if an error was encountered.
jpayne@69:      * @param status The status indicating whether the constructor succeeded.
jpayne@69:      * @stable ICU 3.2
jpayne@69:      */
jpayne@69:     RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
jpayne@69:                         const Locale& locale, UParseError& perror, UErrorCode& status);
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Creates a RuleBasedNumberFormat from a predefined ruleset.  The selector
jpayne@69:    * code choosed among three possible predefined formats: spellout, ordinal,
jpayne@69:    * and duration.
jpayne@69:    * @param tag A selector code specifying which kind of formatter to create for that
jpayne@69:    * locale.  There are four legal values: URBNF_SPELLOUT, which creates a formatter that
jpayne@69:    * spells out a value in words in the desired language, URBNF_ORDINAL, which attaches
jpayne@69:    * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
jpayne@69:    * URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds always rounding down,
jpayne@69:    * and URBNF_NUMBERING_SYSTEM, which is used to invoke rules for alternate numbering
jpayne@69:    * systems such as the Hebrew numbering system, or for Roman Numerals, etc.
jpayne@69:    * @param locale The locale for the formatter.
jpayne@69:    * @param status The status indicating whether the constructor succeeded.
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status);
jpayne@69: 
jpayne@69:   //-----------------------------------------------------------------------
jpayne@69:   // boilerplate
jpayne@69:   //-----------------------------------------------------------------------
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Copy constructor
jpayne@69:    * @param rhs    the object to be copied from.
jpayne@69:    * @stable ICU 2.6
jpayne@69:    */
jpayne@69:   RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs);
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Assignment operator
jpayne@69:    * @param rhs    the object to be copied from.
jpayne@69:    * @stable ICU 2.6
jpayne@69:    */
jpayne@69:   RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs);
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Release memory allocated for a RuleBasedNumberFormat when you are finished with it.
jpayne@69:    * @stable ICU 2.6
jpayne@69:    */
jpayne@69:   virtual ~RuleBasedNumberFormat();
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Clone this object polymorphically.  The caller is responsible
jpayne@69:    * for deleting the result when done.
jpayne@69:    * @return  A copy of the object.
jpayne@69:    * @stable ICU 2.6
jpayne@69:    */
jpayne@69:   virtual RuleBasedNumberFormat* clone() const;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Return true if the given Format objects are semantically equal.
jpayne@69:    * Objects of different subclasses are considered unequal.
jpayne@69:    * @param other    the object to be compared with.
jpayne@69:    * @return        true if the given Format objects are semantically equal.
jpayne@69:    * @stable ICU 2.6
jpayne@69:    */
jpayne@69:   virtual UBool operator==(const Format& other) const;
jpayne@69: 
jpayne@69: //-----------------------------------------------------------------------
jpayne@69: // public API functions
jpayne@69: //-----------------------------------------------------------------------
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * return the rules that were provided to the RuleBasedNumberFormat.
jpayne@69:    * @return the result String that was passed in
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual UnicodeString getRules() const;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Return the number of public rule set names.
jpayne@69:    * @return the number of public rule set names.
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual int32_t getNumberOfRuleSetNames() const;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Return the name of the index'th public ruleSet.  If index is not valid,
jpayne@69:    * the function returns null.
jpayne@69:    * @param index the index of the ruleset
jpayne@69:    * @return the name of the index'th public ruleSet.
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual UnicodeString getRuleSetName(int32_t index) const;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Return the number of locales for which we have localized rule set display names.
jpayne@69:    * @return the number of locales for which we have localized rule set display names.
jpayne@69:    * @stable ICU 3.2
jpayne@69:    */
jpayne@69:   virtual int32_t getNumberOfRuleSetDisplayNameLocales(void) const;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Return the index'th display name locale.
jpayne@69:    * @param index the index of the locale
jpayne@69:    * @param status set to a failure code when this function fails
jpayne@69:    * @return the locale
jpayne@69:    * @see #getNumberOfRuleSetDisplayNameLocales
jpayne@69:    * @stable ICU 3.2
jpayne@69:    */
jpayne@69:   virtual Locale getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const;
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * Return the rule set display names for the provided locale.  These are in the same order
jpayne@69:      * as those returned by getRuleSetName.  The locale is matched against the locales for
jpayne@69:      * which there is display name data, using normal fallback rules.  If no locale matches,
jpayne@69:      * the default display names are returned.  (These are the internal rule set names minus
jpayne@69:      * the leading '%'.)
jpayne@69:      * @param index the index of the rule set
jpayne@69:      * @param locale the locale (returned by getRuleSetDisplayNameLocales) for which the localized
jpayne@69:      * display name is desired
jpayne@69:      * @return the display name for the given index, which might be bogus if there is an error
jpayne@69:      * @see #getRuleSetName
jpayne@69:      * @stable ICU 3.2
jpayne@69:      */
jpayne@69:   virtual UnicodeString getRuleSetDisplayName(int32_t index,
jpayne@69:                           const Locale& locale = Locale::getDefault());
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * Return the rule set display name for the provided rule set and locale.
jpayne@69:      * The locale is matched against the locales for which there is display name data, using
jpayne@69:      * normal fallback rules.  If no locale matches, the default display name is returned.
jpayne@69:      * @return the display name for the rule set
jpayne@69:      * @stable ICU 3.2
jpayne@69:      * @see #getRuleSetDisplayName
jpayne@69:      */
jpayne@69:   virtual UnicodeString getRuleSetDisplayName(const UnicodeString& ruleSetName,
jpayne@69:                           const Locale& locale = Locale::getDefault());
jpayne@69: 
jpayne@69: 
jpayne@69:   using NumberFormat::format;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Formats the specified 32-bit number using the default ruleset.
jpayne@69:    * @param number The number to format.
jpayne@69:    * @param toAppendTo the string that will hold the (appended) result
jpayne@69:    * @param pos the fieldposition
jpayne@69:    * @return A textual representation of the number.
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual UnicodeString& format(int32_t number,
jpayne@69:                                 UnicodeString& toAppendTo,
jpayne@69:                                 FieldPosition& pos) const;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Formats the specified 64-bit number using the default ruleset.
jpayne@69:    * @param number The number to format.
jpayne@69:    * @param toAppendTo the string that will hold the (appended) result
jpayne@69:    * @param pos the fieldposition
jpayne@69:    * @return A textual representation of the number.
jpayne@69:    * @stable ICU 2.1
jpayne@69:    */
jpayne@69:   virtual UnicodeString& format(int64_t number,
jpayne@69:                                 UnicodeString& toAppendTo,
jpayne@69:                                 FieldPosition& pos) const;
jpayne@69:   /**
jpayne@69:    * Formats the specified number using the default ruleset.
jpayne@69:    * @param number The number to format.
jpayne@69:    * @param toAppendTo the string that will hold the (appended) result
jpayne@69:    * @param pos the fieldposition
jpayne@69:    * @return A textual representation of the number.
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual UnicodeString& format(double number,
jpayne@69:                                 UnicodeString& toAppendTo,
jpayne@69:                                 FieldPosition& pos) const;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Formats the specified number using the named ruleset.
jpayne@69:    * @param number The number to format.
jpayne@69:    * @param ruleSetName The name of the rule set to format the number with.
jpayne@69:    * This must be the name of a valid public rule set for this formatter.
jpayne@69:    * @param toAppendTo the string that will hold the (appended) result
jpayne@69:    * @param pos the fieldposition
jpayne@69:    * @param status the status
jpayne@69:    * @return A textual representation of the number.
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual UnicodeString& format(int32_t number,
jpayne@69:                                 const UnicodeString& ruleSetName,
jpayne@69:                                 UnicodeString& toAppendTo,
jpayne@69:                                 FieldPosition& pos,
jpayne@69:                                 UErrorCode& status) const;
jpayne@69:   /**
jpayne@69:    * Formats the specified 64-bit number using the named ruleset.
jpayne@69:    * @param number The number to format.
jpayne@69:    * @param ruleSetName The name of the rule set to format the number with.
jpayne@69:    * This must be the name of a valid public rule set for this formatter.
jpayne@69:    * @param toAppendTo the string that will hold the (appended) result
jpayne@69:    * @param pos the fieldposition
jpayne@69:    * @param status the status
jpayne@69:    * @return A textual representation of the number.
jpayne@69:    * @stable ICU 2.1
jpayne@69:    */
jpayne@69:   virtual UnicodeString& format(int64_t number,
jpayne@69:                                 const UnicodeString& ruleSetName,
jpayne@69:                                 UnicodeString& toAppendTo,
jpayne@69:                                 FieldPosition& pos,
jpayne@69:                                 UErrorCode& status) const;
jpayne@69:   /**
jpayne@69:    * Formats the specified number using the named ruleset.
jpayne@69:    * @param number The number to format.
jpayne@69:    * @param ruleSetName The name of the rule set to format the number with.
jpayne@69:    * This must be the name of a valid public rule set for this formatter.
jpayne@69:    * @param toAppendTo the string that will hold the (appended) result
jpayne@69:    * @param pos the fieldposition
jpayne@69:    * @param status the status
jpayne@69:    * @return A textual representation of the number.
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual UnicodeString& format(double number,
jpayne@69:                                 const UnicodeString& ruleSetName,
jpayne@69:                                 UnicodeString& toAppendTo,
jpayne@69:                                 FieldPosition& pos,
jpayne@69:                                 UErrorCode& status) const;
jpayne@69: 
jpayne@69: protected:
jpayne@69:     /**
jpayne@69:      * Format a decimal number.
jpayne@69:      * The number is a DigitList wrapper onto a floating point decimal number.
jpayne@69:      * The default implementation in NumberFormat converts the decimal number
jpayne@69:      * to a double and formats that.  Subclasses of NumberFormat that want
jpayne@69:      * to specifically handle big decimal numbers must override this method.
jpayne@69:      * class DecimalFormat does so.
jpayne@69:      *
jpayne@69:      * @param number    The number, a DigitList format Decimal Floating Point.
jpayne@69:      * @param appendTo  Output parameter to receive result.
jpayne@69:      *                  Result is appended to existing contents.
jpayne@69:      * @param pos       On input: an alignment field, if desired.
jpayne@69:      *                  On output: the offsets of the alignment field.
jpayne@69:      * @param status    Output param filled with success/failure status.
jpayne@69:      * @return          Reference to 'appendTo' parameter.
jpayne@69:      * @internal
jpayne@69:      */
jpayne@69:     virtual UnicodeString& format(const number::impl::DecimalQuantity &number,
jpayne@69:                                   UnicodeString& appendTo,
jpayne@69:                                   FieldPosition& pos,
jpayne@69:                                   UErrorCode& status) const;
jpayne@69: public:
jpayne@69: 
jpayne@69:   using NumberFormat::parse;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Parses the specfied string, beginning at the specified position, according
jpayne@69:    * to this formatter's rules.  This will match the string against all of the
jpayne@69:    * formatter's public rule sets and return the value corresponding to the longest
jpayne@69:    * parseable substring.  This function's behavior is affected by the lenient
jpayne@69:    * parse mode.
jpayne@69:    * @param text The string to parse
jpayne@69:    * @param result the result of the parse, either a double or a long.
jpayne@69:    * @param parsePosition On entry, contains the position of the first character
jpayne@69:    * in "text" to examine.  On exit, has been updated to contain the position
jpayne@69:    * of the first character in "text" that wasn't consumed by the parse.
jpayne@69:    * @see #setLenient
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual void parse(const UnicodeString& text,
jpayne@69:                      Formattable& result,
jpayne@69:                      ParsePosition& parsePosition) const;
jpayne@69: 
jpayne@69: #if !UCONFIG_NO_COLLATION
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Turns lenient parse mode on and off.
jpayne@69:    *
jpayne@69:    * When in lenient parse mode, the formatter uses a Collator for parsing the text.
jpayne@69:    * Only primary differences are treated as significant.  This means that case
jpayne@69:    * differences, accent differences, alternate spellings of the same letter
jpayne@69:    * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
jpayne@69:    * matching the text.  In many cases, numerals will be accepted in place of words
jpayne@69:    * or phrases as well.
jpayne@69:    *
jpayne@69:    * For example, all of the following will correctly parse as 255 in English in
jpayne@69:    * lenient-parse mode:
jpayne@69:    * <br>"two hundred fifty-five"
jpayne@69:    * <br>"two hundred fifty five"
jpayne@69:    * <br>"TWO HUNDRED FIFTY-FIVE"
jpayne@69:    * <br>"twohundredfiftyfive"
jpayne@69:    * <br>"2 hundred fifty-5"
jpayne@69:    *
jpayne@69:    * The Collator used is determined by the locale that was
jpayne@69:    * passed to this object on construction.  The description passed to this object
jpayne@69:    * on construction may supply additional collation rules that are appended to the
jpayne@69:    * end of the default collator for the locale, enabling additional equivalences
jpayne@69:    * (such as adding more ignorable characters or permitting spelled-out version of
jpayne@69:    * symbols; see the demo program for examples).
jpayne@69:    *
jpayne@69:    * It's important to emphasize that even strict parsing is relatively lenient: it
jpayne@69:    * will accept some text that it won't produce as output.  In English, for example,
jpayne@69:    * it will correctly parse "two hundred zero" and "fifteen hundred".
jpayne@69:    *
jpayne@69:    * @param enabled If true, turns lenient-parse mode on; if false, turns it off.
jpayne@69:    * @see RuleBasedCollator
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual void setLenient(UBool enabled);
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Returns true if lenient-parse mode is turned on.  Lenient parsing is off
jpayne@69:    * by default.
jpayne@69:    * @return true if lenient-parse mode is turned on.
jpayne@69:    * @see #setLenient
jpayne@69:    * @stable ICU 2.0
jpayne@69:    */
jpayne@69:   virtual inline UBool isLenient(void) const;
jpayne@69: 
jpayne@69: #endif
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Override the default rule set to use.  If ruleSetName is null, reset
jpayne@69:    * to the initial default rule set.  If the rule set is not a public rule set name,
jpayne@69:    * U_ILLEGAL_ARGUMENT_ERROR is returned in status.
jpayne@69:    * @param ruleSetName the name of the rule set, or null to reset the initial default.
jpayne@69:    * @param status set to failure code when a problem occurs.
jpayne@69:    * @stable ICU 2.6
jpayne@69:    */
jpayne@69:   virtual void setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status);
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Return the name of the current default rule set.  If the current rule set is
jpayne@69:    * not public, returns a bogus (and empty) UnicodeString.
jpayne@69:    * @return the name of the current default rule set
jpayne@69:    * @stable ICU 3.0
jpayne@69:    */
jpayne@69:   virtual UnicodeString getDefaultRuleSetName() const;
jpayne@69: 
jpayne@69:   /**
jpayne@69:    * Set a particular UDisplayContext value in the formatter, such as
jpayne@69:    * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see
jpayne@69:    * NumberFormat.
jpayne@69:    * @param value The UDisplayContext value to set.
jpayne@69:    * @param status Input/output status. If at entry this indicates a failure
jpayne@69:    *               status, the function will do nothing; otherwise this will be
jpayne@69:    *               updated with any new status from the function. 
jpayne@69:    * @stable ICU 53
jpayne@69:    */
jpayne@69:   virtual void setContext(UDisplayContext value, UErrorCode& status);
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * Get the rounding mode.
jpayne@69:      * @return A rounding mode
jpayne@69:      * @stable ICU 60
jpayne@69:      */
jpayne@69:     virtual ERoundingMode getRoundingMode(void) const;
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * Set the rounding mode.
jpayne@69:      * @param roundingMode A rounding mode
jpayne@69:      * @stable ICU 60
jpayne@69:      */
jpayne@69:     virtual void setRoundingMode(ERoundingMode roundingMode);
jpayne@69: 
jpayne@69: public:
jpayne@69:     /**
jpayne@69:      * ICU "poor man's RTTI", returns a UClassID for this class.
jpayne@69:      *
jpayne@69:      * @stable ICU 2.8
jpayne@69:      */
jpayne@69:     static UClassID U_EXPORT2 getStaticClassID(void);
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * ICU "poor man's RTTI", returns a UClassID for the actual class.
jpayne@69:      *
jpayne@69:      * @stable ICU 2.8
jpayne@69:      */
jpayne@69:     virtual UClassID getDynamicClassID(void) const;
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * Sets the decimal format symbols, which is generally not changed
jpayne@69:      * by the programmer or user. The formatter takes ownership of
jpayne@69:      * symbolsToAdopt; the client must not delete it.
jpayne@69:      *
jpayne@69:      * @param symbolsToAdopt DecimalFormatSymbols to be adopted.
jpayne@69:      * @stable ICU 49
jpayne@69:      */
jpayne@69:     virtual void adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt);
jpayne@69: 
jpayne@69:     /**
jpayne@69:      * Sets the decimal format symbols, which is generally not changed
jpayne@69:      * by the programmer or user. A clone of the symbols is created and
jpayne@69:      * the symbols is _not_ adopted; the client is still responsible for
jpayne@69:      * deleting it.
jpayne@69:      *
jpayne@69:      * @param symbols DecimalFormatSymbols.
jpayne@69:      * @stable ICU 49
jpayne@69:      */
jpayne@69:     virtual void setDecimalFormatSymbols(const DecimalFormatSymbols& symbols);
jpayne@69: 
jpayne@69: private:
jpayne@69:     RuleBasedNumberFormat(); // default constructor not implemented
jpayne@69: 
jpayne@69:     // this will ref the localizations if they are not NULL
jpayne@69:     // caller must deref to get adoption
jpayne@69:     RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* localizations,
jpayne@69:               const Locale& locale, UParseError& perror, UErrorCode& status);
jpayne@69: 
jpayne@69:     void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status);
jpayne@69:     void initCapitalizationContextInfo(const Locale& thelocale);
jpayne@69:     void dispose();
jpayne@69:     void stripWhitespace(UnicodeString& src);
jpayne@69:     void initDefaultRuleSet();
jpayne@69:     NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const;
jpayne@69: 
jpayne@69:     /* friend access */
jpayne@69:     friend class NFSubstitution;
jpayne@69:     friend class NFRule;
jpayne@69:     friend class NFRuleSet;
jpayne@69:     friend class FractionalPartSubstitution;
jpayne@69: 
jpayne@69:     inline NFRuleSet * getDefaultRuleSet() const;
jpayne@69:     const RuleBasedCollator * getCollator() const;
jpayne@69:     DecimalFormatSymbols * initializeDecimalFormatSymbols(UErrorCode &status);
jpayne@69:     const DecimalFormatSymbols * getDecimalFormatSymbols() const;
jpayne@69:     NFRule * initializeDefaultInfinityRule(UErrorCode &status);
jpayne@69:     const NFRule * getDefaultInfinityRule() const;
jpayne@69:     NFRule * initializeDefaultNaNRule(UErrorCode &status);
jpayne@69:     const NFRule * getDefaultNaNRule() const;
jpayne@69:     PluralFormat *createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const;
jpayne@69:     UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult, UErrorCode& status) const;
jpayne@69:     UnicodeString& format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const;
jpayne@69:     void format(double number, NFRuleSet& rs, UnicodeString& toAppendTo, UErrorCode& status) const;
jpayne@69: 
jpayne@69: private:
jpayne@69:     NFRuleSet **fRuleSets;
jpayne@69:     UnicodeString* ruleSetDescriptions;
jpayne@69:     int32_t numRuleSets;
jpayne@69:     NFRuleSet *defaultRuleSet;
jpayne@69:     Locale locale;
jpayne@69:     RuleBasedCollator* collator;
jpayne@69:     DecimalFormatSymbols* decimalFormatSymbols;
jpayne@69:     NFRule *defaultInfinityRule;
jpayne@69:     NFRule *defaultNaNRule;
jpayne@69:     ERoundingMode fRoundingMode;
jpayne@69:     UBool lenient;
jpayne@69:     UnicodeString* lenientParseRules;
jpayne@69:     LocalizationInfo* localizations;
jpayne@69:     UnicodeString originalDescription;
jpayne@69:     UBool capitalizationInfoSet;
jpayne@69:     UBool capitalizationForUIListMenu;
jpayne@69:     UBool capitalizationForStandAlone;
jpayne@69:     BreakIterator* capitalizationBrkIter;
jpayne@69: };
jpayne@69: 
jpayne@69: // ---------------
jpayne@69: 
jpayne@69: #if !UCONFIG_NO_COLLATION
jpayne@69: 
jpayne@69: inline UBool
jpayne@69: RuleBasedNumberFormat::isLenient(void) const {
jpayne@69:     return lenient;
jpayne@69: }
jpayne@69: 
jpayne@69: #endif
jpayne@69: 
jpayne@69: inline NFRuleSet*
jpayne@69: RuleBasedNumberFormat::getDefaultRuleSet() const {
jpayne@69:     return defaultRuleSet;
jpayne@69: }
jpayne@69: 
jpayne@69: U_NAMESPACE_END
jpayne@69: 
jpayne@69: /* U_HAVE_RBNF */
jpayne@69: #endif
jpayne@69: 
jpayne@69: #endif /* U_SHOW_CPLUSPLUS_API */
jpayne@69: 
jpayne@69: /* RBNF_H */
jpayne@69: #endif
<< thousand >>	[the rule whose base value is 1,000 is applicable to 25,340]
twenty->> thousand >>	[25,340 over 1,000 is 25. The rule for 20 applies.]
twenty-five thousand >>	[25 mod 10 is 5. The rule for 5 is "five."
twenty-five thousand << hundred >>	[25,340 mod 1,000 is 340. The rule for 100 applies.]
twenty-five thousand three hundred >>	[340 over 100 is 3. The rule for 3 is "three."]
twenty-five thousand three hundred forty	[340 mod 100 is 40. The rule for 40 applies. Since 40 divides jpayne@69: * evenly by 10, the hyphen and substitution in the brackets are omitted.]
bv:	bv specifies the rule's base value. bv is a decimal jpayne@69: * number expressed using ASCII digits. bv may contain spaces, period, and commas, jpayne@69: * which are ignored. The rule's divisor is the highest power of 10 less than or equal to jpayne@69: * the base value.
bv/rad:	bv specifies the rule's base value. The rule's divisor is the jpayne@69: * highest power of rad less than or equal to the base value.
bv>:	bv specifies the rule's base value. To calculate the divisor, jpayne@69: * let the radix be 10, and the exponent be the highest exponent of the radix that yields a jpayne@69: * result less than or equal to the base value. Every > character after the base value jpayne@69: * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix jpayne@69: * raised to the power of the exponent; otherwise, the divisor is 1.
bv/rad>:	bv specifies the rule's base value. To calculate the divisor, jpayne@69: * let the radix be rad, and the exponent be the highest exponent of the radix that jpayne@69: * yields a result less than or equal to the base value. Every > character after the radix jpayne@69: * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix jpayne@69: * raised to the power of the exponent; otherwise, the divisor is 1.
-x:	The rule is a negative-number rule.
x.x:	The rule is an improper fraction rule. If the full stop in jpayne@69: * the middle of the rule name is replaced with the decimal point jpayne@69: * that is used in the language or DecimalFormatSymbols, then that rule will jpayne@69: * have precedence when formatting and parsing this rule. For example, some jpayne@69: * languages use the comma, and can thus be written as x,x instead. For example, jpayne@69: * you can use "x.x: << point >>;x,x: << comma >>;" to jpayne@69: * handle the decimal point that matches the language's natural spelling of jpayne@69: * the punctuation of either the full stop or comma.
0.x:	The rule is a proper fraction rule. If the full stop in jpayne@69: * the middle of the rule name is replaced with the decimal point jpayne@69: * that is used in the language or DecimalFormatSymbols, then that rule will jpayne@69: * have precedence when formatting and parsing this rule. For example, some jpayne@69: * languages use the comma, and can thus be written as 0,x instead. For example, jpayne@69: * you can use "0.x: point >>;0,x: comma >>;" to jpayne@69: * handle the decimal point that matches the language's natural spelling of jpayne@69: * the punctuation of either the full stop or comma.
x.0:	The rule is a master rule. If the full stop in jpayne@69: * the middle of the rule name is replaced with the decimal point jpayne@69: * that is used in the language or DecimalFormatSymbols, then that rule will jpayne@69: * have precedence when formatting and parsing this rule. For example, some jpayne@69: * languages use the comma, and can thus be written as x,0 instead. For example, jpayne@69: * you can use "x.0: << point;x,0: << comma;" to jpayne@69: * handle the decimal point that matches the language's natural spelling of jpayne@69: * the punctuation of either the full stop or comma.
Inf:	The rule for infinity.
NaN:	The rule for an IEEE 754 NaN (not a number).
nothing	If the rule's rule descriptor is left out, the base value is one plus the jpayne@69: * preceding rule's base value (or zero if this is the first rule in the list) in a normal jpayne@69: * rule set. In a fraction rule set, the base value is the same as the preceding rule's jpayne@69: * base value.
>>	in normal rule	Divide the number by the rule's divisor and format the remainder
	in negative-number rule	Find the absolute value of the number and format the result
	in fraction or master rule	Isolate the number's fractional part and format it.
	in rule in fraction rule set	Not allowed.
>>>	in normal rule	Divide the number by the rule's divisor and format the remainder, jpayne@69: * but bypass the normal rule-selection process and just use the jpayne@69: * rule that precedes this one in this rule list.
	in all other rules	Not allowed.
<<	in normal rule	Divide the number by the rule's divisor and format the quotient
	in negative-number rule	Not allowed.
	in fraction or master rule	Isolate the number's integral part and format it.
	in rule in fraction rule set	Multiply the number by the rule's base value and format the result.
==	in all rule sets	Format the number unchanged
[]	in normal rule	Omit the optional text if the number is an even multiple of the rule's divisor
	in negative-number rule	Not allowed.
	in improper-fraction rule	Omit the optional text if the number is between 0 and 1 (same as specifying both an jpayne@69: * x.x rule and a 0.x rule)
	in master rule	Omit the optional text if the number is an integer (same as specifying both an x.x jpayne@69: * rule and an x.0 rule)
	in proper-fraction rule	Not allowed.
	in rule in fraction rule set	Omit the optional text if multiplying the number by the rule's base value yields 1.
$(cardinal,plural syntax)$		in all rule sets	This provides the ability to choose a word based on the number divided by the radix to the power of the jpayne@69: * exponent of the base value for the specified locale, which is normally equivalent to the << value. jpayne@69: * This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated jpayne@69: * as the same base value for parsing.
$(ordinal,plural syntax)$		in all rule sets	This provides the ability to choose a word based on the number divided by the radix to the power of the jpayne@69: * exponent of the base value for the specified locale, which is normally equivalent to the << value. jpayne@69: * This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated jpayne@69: * as the same base value for parsing.
a rule set name	Perform the mathematical operation on the number, and format the result using the jpayne@69: * named rule set.
a DecimalFormat pattern	Perform the mathematical operation on the number, and format the result using a jpayne@69: * DecimalFormat with the specified pattern. The pattern must begin with 0 or #.
nothing	Perform the mathematical operation on the number, and format the result using the rule jpayne@69: * set containing the current rule, except: jpayne@69: * jpayne@69: * You can't have an empty substitution descriptor with a == substitution. jpayne@69: * If you omit the substitution descriptor in a >> substitution in a fraction rule, jpayne@69: * format the result one digit at a time using the rule set containing the current rule. jpayne@69: * If you omit the substitution descriptor in a << substitution in a rule in a jpayne@69: * fraction rule set, format the result using the default rule set for this formatter. jpayne@69: * jpayne@69: *