annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/rbnf.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 *******************************************************************************
jpayne@69 5 * Copyright (C) 1997-2015, International Business Machines Corporation and others.
jpayne@69 6 * All Rights Reserved.
jpayne@69 7 *******************************************************************************
jpayne@69 8 */
jpayne@69 9
jpayne@69 10 #ifndef RBNF_H
jpayne@69 11 #define RBNF_H
jpayne@69 12
jpayne@69 13 #include "unicode/utypes.h"
jpayne@69 14
jpayne@69 15 #if U_SHOW_CPLUSPLUS_API
jpayne@69 16
jpayne@69 17 /**
jpayne@69 18 * \file
jpayne@69 19 * \brief C++ API: Rule Based Number Format
jpayne@69 20 */
jpayne@69 21
jpayne@69 22 /**
jpayne@69 23 * \def U_HAVE_RBNF
jpayne@69 24 * This will be 0 if RBNF support is not included in ICU
jpayne@69 25 * and 1 if it is.
jpayne@69 26 *
jpayne@69 27 * @stable ICU 2.4
jpayne@69 28 */
jpayne@69 29 #if UCONFIG_NO_FORMATTING
jpayne@69 30 #define U_HAVE_RBNF 0
jpayne@69 31 #else
jpayne@69 32 #define U_HAVE_RBNF 1
jpayne@69 33
jpayne@69 34 #include "unicode/dcfmtsym.h"
jpayne@69 35 #include "unicode/fmtable.h"
jpayne@69 36 #include "unicode/locid.h"
jpayne@69 37 #include "unicode/numfmt.h"
jpayne@69 38 #include "unicode/unistr.h"
jpayne@69 39 #include "unicode/strenum.h"
jpayne@69 40 #include "unicode/brkiter.h"
jpayne@69 41 #include "unicode/upluralrules.h"
jpayne@69 42
jpayne@69 43 U_NAMESPACE_BEGIN
jpayne@69 44
jpayne@69 45 class NFRule;
jpayne@69 46 class NFRuleSet;
jpayne@69 47 class LocalizationInfo;
jpayne@69 48 class PluralFormat;
jpayne@69 49 class RuleBasedCollator;
jpayne@69 50
jpayne@69 51 /**
jpayne@69 52 * Tags for the predefined rulesets.
jpayne@69 53 *
jpayne@69 54 * @stable ICU 2.2
jpayne@69 55 */
jpayne@69 56 enum URBNFRuleSetTag {
jpayne@69 57 URBNF_SPELLOUT,
jpayne@69 58 URBNF_ORDINAL,
jpayne@69 59 URBNF_DURATION,
jpayne@69 60 URBNF_NUMBERING_SYSTEM,
jpayne@69 61 #ifndef U_HIDE_DEPRECATED_API
jpayne@69 62 /**
jpayne@69 63 * One more than the highest normal URBNFRuleSetTag value.
jpayne@69 64 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
jpayne@69 65 */
jpayne@69 66 URBNF_COUNT
jpayne@69 67 #endif // U_HIDE_DEPRECATED_API
jpayne@69 68 };
jpayne@69 69
jpayne@69 70 /**
jpayne@69 71 * The RuleBasedNumberFormat class formats numbers according to a set of rules. This number formatter is
jpayne@69 72 * typically used for spelling out numeric values in words (e.g., 25,3476 as
jpayne@69 73 * "twenty-five thousand three hundred seventy-six" or "vingt-cinq mille trois
jpayne@69 74 * cents soixante-seize" or
jpayne@69 75 * "fünfundzwanzigtausenddreihundertsechsundsiebzig"), but can also be used for
jpayne@69 76 * other complicated formatting tasks, such as formatting a number of seconds as hours,
jpayne@69 77 * minutes and seconds (e.g., 3,730 as "1:02:10").
jpayne@69 78 *
jpayne@69 79 * <p>The resources contain three predefined formatters for each locale: spellout, which
jpayne@69 80 * spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
jpayne@69 81 * appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
jpayne@69 82 * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
jpayne@69 83 * &quot;2:03&quot;).&nbsp; The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
jpayne@69 84 * by supplying programmer-defined rule sets.</p>
jpayne@69 85 *
jpayne@69 86 * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
jpayne@69 87 * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
jpayne@69 88 * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
jpayne@69 89 * Each rule has a string of output text and a value or range of values it is applicable to.
jpayne@69 90 * In a typical spellout rule set, the first twenty rules are the words for the numbers from
jpayne@69 91 * 0 to 19:</p>
jpayne@69 92 *
jpayne@69 93 * <pre>zero; one; two; three; four; five; six; seven; eight; nine;
jpayne@69 94 * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
jpayne@69 95 *
jpayne@69 96 * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
jpayne@69 97 * we only have to supply the words for the multiples of 10:</p>
jpayne@69 98 *
jpayne@69 99 * <pre> 20: twenty[-&gt;&gt;];
jpayne@69 100 * 30: thirty[-&gt;&gt;];
jpayne@69 101 * 40: forty[-&gt;&gt;];
jpayne@69 102 * 50: fifty[-&gt;&gt;];
jpayne@69 103 * 60: sixty[-&gt;&gt;];
jpayne@69 104 * 70: seventy[-&gt;&gt;];
jpayne@69 105 * 80: eighty[-&gt;&gt;];
jpayne@69 106 * 90: ninety[-&gt;&gt;];</pre>
jpayne@69 107 *
jpayne@69 108 * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
jpayne@69 109 * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
jpayne@69 110 * to all numbers from its own base value to one less than the next rule's base value. The
jpayne@69 111 * &quot;&gt;&gt;&quot; token is called a <em>substitution</em> and tells the fomatter to
jpayne@69 112 * isolate the number's ones digit, format it using this same set of rules, and place the
jpayne@69 113 * result at the position of the &quot;&gt;&gt;&quot; token. Text in brackets is omitted if
jpayne@69 114 * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
jpayne@69 115 * is &quot;twenty-four,&quot; not &quot;twenty four&quot;).</p>
jpayne@69 116 *
jpayne@69 117 * <p>For even larger numbers, we can actually look up several parts of the number in the
jpayne@69 118 * list:</p>
jpayne@69 119 *
jpayne@69 120 * <pre>100: &lt;&lt; hundred[ &gt;&gt;];</pre>
jpayne@69 121 *
jpayne@69 122 * <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
jpayne@69 123 * the hundreds digit (and any digits to its left), formats it using this same rule set, and
jpayne@69 124 * places the result where the &quot;&lt;&lt;&quot; was. Notice also that the meaning of
jpayne@69 125 * &gt;&gt; has changed: it now refers to both the tens and the ones digits. The meaning of
jpayne@69 126 * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
jpayne@69 127 * which is the highest power of 10 that is less than or equal to the base value (the user
jpayne@69 128 * can change this). To fill in the substitutions, the formatter divides the number being
jpayne@69 129 * formatted by the divisor. The integral quotient is used to fill in the &lt;&lt;
jpayne@69 130 * substitution, and the remainder is used to fill in the &gt;&gt; substitution. The meaning
jpayne@69 131 * of the brackets changes similarly: text in brackets is omitted if the value being
jpayne@69 132 * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
jpayne@69 133 * if a substitution is filled in with text that includes another substitution, that
jpayne@69 134 * substitution is also filled in.</p>
jpayne@69 135 *
jpayne@69 136 * <p>This rule covers values up to 999, at which point we add another rule:</p>
jpayne@69 137 *
jpayne@69 138 * <pre>1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
jpayne@69 139 *
jpayne@69 140 * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
jpayne@69 141 * base value is a higher power of 10, changing the rule's divisor. This rule can actually be
jpayne@69 142 * used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
jpayne@69 143 *
jpayne@69 144 * <pre> 1,000,000: &lt;&lt; million[ &gt;&gt;];
jpayne@69 145 * 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
jpayne@69 146 * 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
jpayne@69 147 * 1,000,000,000,000,000: OUT OF RANGE!;</pre>
jpayne@69 148 *
jpayne@69 149 * <p>Commas, periods, and spaces can be used in the base values to improve legibility and
jpayne@69 150 * are ignored by the rule parser. The last rule in the list is customarily treated as an
jpayne@69 151 * &quot;overflow rule,&quot; applying to everything from its base value on up, and often (as
jpayne@69 152 * in this example) being used to print out an error message or default representation.
jpayne@69 153 * Notice also that the size of the major groupings in large numbers is controlled by the
jpayne@69 154 * spacing of the rules: because in English we group numbers by thousand, the higher rules
jpayne@69 155 * are separated from each other by a factor of 1,000.</p>
jpayne@69 156 *
jpayne@69 157 * <p>To see how these rules actually work in practice, consider the following example:
jpayne@69 158 * Formatting 25,430 with this rule set would work like this:</p>
jpayne@69 159 *
jpayne@69 160 * <table border="0" width="100%">
jpayne@69 161 * <tr>
jpayne@69 162 * <td><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
jpayne@69 163 * <td>[the rule whose base value is 1,000 is applicable to 25,340]</td>
jpayne@69 164 * </tr>
jpayne@69 165 * <tr>
jpayne@69 166 * <td><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
jpayne@69 167 * <td>[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
jpayne@69 168 * </tr>
jpayne@69 169 * <tr>
jpayne@69 170 * <td>twenty-<strong>five</strong> thousand &gt;&gt;</td>
jpayne@69 171 * <td>[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
jpayne@69 172 * </tr>
jpayne@69 173 * <tr>
jpayne@69 174 * <td>twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
jpayne@69 175 * <td>[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
jpayne@69 176 * </tr>
jpayne@69 177 * <tr>
jpayne@69 178 * <td>twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
jpayne@69 179 * <td>[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
jpayne@69 180 * </tr>
jpayne@69 181 * <tr>
jpayne@69 182 * <td>twenty-five thousand three hundred <strong>forty</strong></td>
jpayne@69 183 * <td>[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
jpayne@69 184 * evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
jpayne@69 185 * </tr>
jpayne@69 186 * </table>
jpayne@69 187 *
jpayne@69 188 * <p>The above syntax suffices only to format positive integers. To format negative numbers,
jpayne@69 189 * we add a special rule:</p>
jpayne@69 190 *
jpayne@69 191 * <pre>-x: minus &gt;&gt;;</pre>
jpayne@69 192 *
jpayne@69 193 * <p>This is called a <em>negative-number rule,</em> and is identified by &quot;-x&quot;
jpayne@69 194 * where the base value would be. This rule is used to format all negative numbers. the
jpayne@69 195 * &gt;&gt; token here means &quot;find the number's absolute value, format it with these
jpayne@69 196 * rules, and put the result here.&quot;</p>
jpayne@69 197 *
jpayne@69 198 * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
jpayne@69 199 * parts:</p>
jpayne@69 200 *
jpayne@69 201 * <pre>x.x: &lt;&lt; point &gt;&gt;;</pre>
jpayne@69 202 *
jpayne@69 203 * <p>This rule is used for all positive non-integers (negative non-integers pass through the
jpayne@69 204 * negative-number rule first and then through this rule). Here, the &lt;&lt; token refers to
jpayne@69 205 * the number's integral part, and the &gt;&gt; to the number's fractional part. The
jpayne@69 206 * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
jpayne@69 207 * formatted as &quot;one hundred twenty-three point four five six&quot;).</p>
jpayne@69 208 *
jpayne@69 209 * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
jpayne@69 210 *
jpayne@69 211 * <p>There is actually much more flexibility built into the rule language than the
jpayne@69 212 * description above shows. A formatter may own multiple rule sets, which can be selected by
jpayne@69 213 * the caller, and which can use each other to fill in their substitutions. Substitutions can
jpayne@69 214 * also be filled in with digits, using a DecimalFormat object. There is syntax that can be
jpayne@69 215 * used to alter a rule's divisor in various ways. And there is provision for much more
jpayne@69 216 * flexible fraction handling. A complete description of the rule syntax follows:</p>
jpayne@69 217 *
jpayne@69 218 * <hr>
jpayne@69 219 *
jpayne@69 220 * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
jpayne@69 221 * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
jpayne@69 222 * set name must begin with a % sign. Rule sets with names that begin with a single % sign
jpayne@69 223 * are <em>public:</em> the caller can specify that they be used to format and parse numbers.
jpayne@69 224 * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
jpayne@69 225 * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
jpayne@69 226 *
jpayne@69 227 * <p>The user can also specify a special &quot;rule set&quot; named <tt>%%lenient-parse</tt>.
jpayne@69 228 * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
jpayne@69 229 * description which is used to define equivalences for lenient parsing. For more information
jpayne@69 230 * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
jpayne@69 231 * see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning
jpayne@69 232 * in collation rules, such as '&amp;', have no particular meaning when appearing outside
jpayne@69 233 * of the <tt>lenient-parse</tt> rule set.</p>
jpayne@69 234 *
jpayne@69 235 * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
jpayne@69 236 * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
jpayne@69 237 * These parameters are controlled by the description syntax, which consists of a <em>rule
jpayne@69 238 * descriptor,</em> a colon, and a <em>rule body.</em></p>
jpayne@69 239 *
jpayne@69 240 * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
jpayne@69 241 * name of a token):</p>
jpayne@69 242 *
jpayne@69 243 * <table border="0" width="100%">
jpayne@69 244 * <tr>
jpayne@69 245 * <td><em>bv</em>:</td>
jpayne@69 246 * <td><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
jpayne@69 247 * number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
jpayne@69 248 * which are ignored. The rule's divisor is the highest power of 10 less than or equal to
jpayne@69 249 * the base value.</td>
jpayne@69 250 * </tr>
jpayne@69 251 * <tr>
jpayne@69 252 * <td><em>bv</em>/<em>rad</em>:</td>
jpayne@69 253 * <td><em>bv</em> specifies the rule's base value. The rule's divisor is the
jpayne@69 254 * highest power of <em>rad</em> less than or equal to the base value.</td>
jpayne@69 255 * </tr>
jpayne@69 256 * <tr>
jpayne@69 257 * <td><em>bv</em>&gt;:</td>
jpayne@69 258 * <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
jpayne@69 259 * let the radix be 10, and the exponent be the highest exponent of the radix that yields a
jpayne@69 260 * result less than or equal to the base value. Every &gt; character after the base value
jpayne@69 261 * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
jpayne@69 262 * raised to the power of the exponent; otherwise, the divisor is 1.</td>
jpayne@69 263 * </tr>
jpayne@69 264 * <tr>
jpayne@69 265 * <td><em>bv</em>/<em>rad</em>&gt;:</td>
jpayne@69 266 * <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
jpayne@69 267 * let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
jpayne@69 268 * yields a result less than or equal to the base value. Every &gt; character after the radix
jpayne@69 269 * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
jpayne@69 270 * raised to the power of the exponent; otherwise, the divisor is 1.</td>
jpayne@69 271 * </tr>
jpayne@69 272 * <tr>
jpayne@69 273 * <td>-x:</td>
jpayne@69 274 * <td>The rule is a negative-number rule.</td>
jpayne@69 275 * </tr>
jpayne@69 276 * <tr>
jpayne@69 277 * <td>x.x:</td>
jpayne@69 278 * <td>The rule is an <em>improper fraction rule</em>. If the full stop in
jpayne@69 279 * the middle of the rule name is replaced with the decimal point
jpayne@69 280 * that is used in the language or DecimalFormatSymbols, then that rule will
jpayne@69 281 * have precedence when formatting and parsing this rule. For example, some
jpayne@69 282 * languages use the comma, and can thus be written as x,x instead. For example,
jpayne@69 283 * you can use "x.x: &lt;&lt; point &gt;&gt;;x,x: &lt;&lt; comma &gt;&gt;;" to
jpayne@69 284 * handle the decimal point that matches the language's natural spelling of
jpayne@69 285 * the punctuation of either the full stop or comma.</td>
jpayne@69 286 * </tr>
jpayne@69 287 * <tr>
jpayne@69 288 * <td>0.x:</td>
jpayne@69 289 * <td>The rule is a <em>proper fraction rule</em>. If the full stop in
jpayne@69 290 * the middle of the rule name is replaced with the decimal point
jpayne@69 291 * that is used in the language or DecimalFormatSymbols, then that rule will
jpayne@69 292 * have precedence when formatting and parsing this rule. For example, some
jpayne@69 293 * languages use the comma, and can thus be written as 0,x instead. For example,
jpayne@69 294 * you can use "0.x: point &gt;&gt;;0,x: comma &gt;&gt;;" to
jpayne@69 295 * handle the decimal point that matches the language's natural spelling of
jpayne@69 296 * the punctuation of either the full stop or comma.</td>
jpayne@69 297 * </tr>
jpayne@69 298 * <tr>
jpayne@69 299 * <td>x.0:</td>
jpayne@69 300 * <td>The rule is a <em>master rule</em>. If the full stop in
jpayne@69 301 * the middle of the rule name is replaced with the decimal point
jpayne@69 302 * that is used in the language or DecimalFormatSymbols, then that rule will
jpayne@69 303 * have precedence when formatting and parsing this rule. For example, some
jpayne@69 304 * languages use the comma, and can thus be written as x,0 instead. For example,
jpayne@69 305 * you can use "x.0: &lt;&lt; point;x,0: &lt;&lt; comma;" to
jpayne@69 306 * handle the decimal point that matches the language's natural spelling of
jpayne@69 307 * the punctuation of either the full stop or comma.</td>
jpayne@69 308 * </tr>
jpayne@69 309 * <tr>
jpayne@69 310 * <td>Inf:</td>
jpayne@69 311 * <td>The rule for infinity.</td>
jpayne@69 312 * </tr>
jpayne@69 313 * <tr>
jpayne@69 314 * <td>NaN:</td>
jpayne@69 315 * <td>The rule for an IEEE 754 NaN (not a number).</td>
jpayne@69 316 * </tr>
jpayne@69 317 * <tr>
jpayne@69 318 * <td><em>nothing</em></td>
jpayne@69 319 * <td>If the rule's rule descriptor is left out, the base value is one plus the
jpayne@69 320 * preceding rule's base value (or zero if this is the first rule in the list) in a normal
jpayne@69 321 * rule set.&nbsp; In a fraction rule set, the base value is the same as the preceding rule's
jpayne@69 322 * base value.</td>
jpayne@69 323 * </tr>
jpayne@69 324 * </table>
jpayne@69 325 *
jpayne@69 326 * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
jpayne@69 327 * on whether it is used to format a number's integral part (or the whole number) or a
jpayne@69 328 * number's fractional part. Using a rule set to format a rule's fractional part makes it a
jpayne@69 329 * fraction rule set.</p>
jpayne@69 330 *
jpayne@69 331 * <p>Which rule is used to format a number is defined according to one of the following
jpayne@69 332 * algorithms: If the rule set is a regular rule set, do the following:
jpayne@69 333 *
jpayne@69 334 * <ul>
jpayne@69 335 * <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>),
jpayne@69 336 * use the master rule.&nbsp; (If the number being formatted was passed in as a <tt>long</tt>,
jpayne@69 337 * the master rule is ignored.)</li>
jpayne@69 338 * <li>If the number is negative, use the negative-number rule.</li>
jpayne@69 339 * <li>If the number has a fractional part and is greater than 1, use the improper fraction
jpayne@69 340 * rule.</li>
jpayne@69 341 * <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
jpayne@69 342 * rule.</li>
jpayne@69 343 * <li>Binary-search the rule list for the rule with the highest base value less than or equal
jpayne@69 344 * to the number. If that rule has two substitutions, its base value is not an even multiple
jpayne@69 345 * of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
jpayne@69 346 * rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
jpayne@69 347 * </ul>
jpayne@69 348 *
jpayne@69 349 * <p>If the rule set is a fraction rule set, do the following:
jpayne@69 350 *
jpayne@69 351 * <ul>
jpayne@69 352 * <li>Ignore negative-number and fraction rules.</li>
jpayne@69 353 * <li>For each rule in the list, multiply the number being formatted (which will always be
jpayne@69 354 * between 0 and 1) by the rule's base value. Keep track of the distance between the result
jpayne@69 355 * the nearest integer.</li>
jpayne@69 356 * <li>Use the rule that produced the result closest to zero in the above calculation. In the
jpayne@69 357 * event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
jpayne@69 358 * to try each rule's base value as a possible denominator of a fraction. Whichever
jpayne@69 359 * denominator produces the fraction closest in value to the number being formatted wins.) If
jpayne@69 360 * the rule following the matching rule has the same base value, use it if the numerator of
jpayne@69 361 * the fraction is anything other than 1; if the numerator is 1, use the original matching
jpayne@69 362 * rule. (This is to allow singular and plural forms of the rule text without a lot of extra
jpayne@69 363 * hassle.)</li>
jpayne@69 364 * </ul>
jpayne@69 365 *
jpayne@69 366 * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
jpayne@69 367 * may include zero, one, or two <em>substitution tokens,</em> and a range of text in
jpayne@69 368 * brackets. The brackets denote optional text (and may also include one or both
jpayne@69 369 * substitutions). The exact meanings of the substitution tokens, and under what conditions
jpayne@69 370 * optional text is omitted, depend on the syntax of the substitution token and the context.
jpayne@69 371 * The rest of the text in a rule body is literal text that is output when the rule matches
jpayne@69 372 * the number being formatted.</p>
jpayne@69 373 *
jpayne@69 374 * <p>A substitution token begins and ends with a <em>token character.</em> The token
jpayne@69 375 * character and the context together specify a mathematical operation to be performed on the
jpayne@69 376 * number being formatted. An optional <em>substitution descriptor </em>specifies how the
jpayne@69 377 * value resulting from that operation is used to fill in the substitution. The position of
jpayne@69 378 * the substitution token in the rule body specifies the location of the resultant text in
jpayne@69 379 * the original rule text.</p>
jpayne@69 380 *
jpayne@69 381 * <p>The meanings of the substitution token characters are as follows:</p>
jpayne@69 382 *
jpayne@69 383 * <table border="0" width="100%">
jpayne@69 384 * <tr>
jpayne@69 385 * <td>&gt;&gt;</td>
jpayne@69 386 * <td>in normal rule</td>
jpayne@69 387 * <td>Divide the number by the rule's divisor and format the remainder</td>
jpayne@69 388 * </tr>
jpayne@69 389 * <tr>
jpayne@69 390 * <td></td>
jpayne@69 391 * <td>in negative-number rule</td>
jpayne@69 392 * <td>Find the absolute value of the number and format the result</td>
jpayne@69 393 * </tr>
jpayne@69 394 * <tr>
jpayne@69 395 * <td></td>
jpayne@69 396 * <td>in fraction or master rule</td>
jpayne@69 397 * <td>Isolate the number's fractional part and format it.</td>
jpayne@69 398 * </tr>
jpayne@69 399 * <tr>
jpayne@69 400 * <td></td>
jpayne@69 401 * <td>in rule in fraction rule set</td>
jpayne@69 402 * <td>Not allowed.</td>
jpayne@69 403 * </tr>
jpayne@69 404 * <tr>
jpayne@69 405 * <td>&gt;&gt;&gt;</td>
jpayne@69 406 * <td>in normal rule</td>
jpayne@69 407 * <td>Divide the number by the rule's divisor and format the remainder,
jpayne@69 408 * but bypass the normal rule-selection process and just use the
jpayne@69 409 * rule that precedes this one in this rule list.</td>
jpayne@69 410 * </tr>
jpayne@69 411 * <tr>
jpayne@69 412 * <td></td>
jpayne@69 413 * <td>in all other rules</td>
jpayne@69 414 * <td>Not allowed.</td>
jpayne@69 415 * </tr>
jpayne@69 416 * <tr>
jpayne@69 417 * <td>&lt;&lt;</td>
jpayne@69 418 * <td>in normal rule</td>
jpayne@69 419 * <td>Divide the number by the rule's divisor and format the quotient</td>
jpayne@69 420 * </tr>
jpayne@69 421 * <tr>
jpayne@69 422 * <td></td>
jpayne@69 423 * <td>in negative-number rule</td>
jpayne@69 424 * <td>Not allowed.</td>
jpayne@69 425 * </tr>
jpayne@69 426 * <tr>
jpayne@69 427 * <td></td>
jpayne@69 428 * <td>in fraction or master rule</td>
jpayne@69 429 * <td>Isolate the number's integral part and format it.</td>
jpayne@69 430 * </tr>
jpayne@69 431 * <tr>
jpayne@69 432 * <td></td>
jpayne@69 433 * <td>in rule in fraction rule set</td>
jpayne@69 434 * <td>Multiply the number by the rule's base value and format the result.</td>
jpayne@69 435 * </tr>
jpayne@69 436 * <tr>
jpayne@69 437 * <td>==</td>
jpayne@69 438 * <td>in all rule sets</td>
jpayne@69 439 * <td>Format the number unchanged</td>
jpayne@69 440 * </tr>
jpayne@69 441 * <tr>
jpayne@69 442 * <td>[]</td>
jpayne@69 443 * <td>in normal rule</td>
jpayne@69 444 * <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
jpayne@69 445 * </tr>
jpayne@69 446 * <tr>
jpayne@69 447 * <td></td>
jpayne@69 448 * <td>in negative-number rule</td>
jpayne@69 449 * <td>Not allowed.</td>
jpayne@69 450 * </tr>
jpayne@69 451 * <tr>
jpayne@69 452 * <td></td>
jpayne@69 453 * <td>in improper-fraction rule</td>
jpayne@69 454 * <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
jpayne@69 455 * x.x rule and a 0.x rule)</td>
jpayne@69 456 * </tr>
jpayne@69 457 * <tr>
jpayne@69 458 * <td></td>
jpayne@69 459 * <td>in master rule</td>
jpayne@69 460 * <td>Omit the optional text if the number is an integer (same as specifying both an x.x
jpayne@69 461 * rule and an x.0 rule)</td>
jpayne@69 462 * </tr>
jpayne@69 463 * <tr>
jpayne@69 464 * <td></td>
jpayne@69 465 * <td>in proper-fraction rule</td>
jpayne@69 466 * <td>Not allowed.</td>
jpayne@69 467 * </tr>
jpayne@69 468 * <tr>
jpayne@69 469 * <td></td>
jpayne@69 470 * <td>in rule in fraction rule set</td>
jpayne@69 471 * <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
jpayne@69 472 * </tr>
jpayne@69 473 * <tr>
jpayne@69 474 * <td width="37">$(cardinal,<i>plural syntax</i>)$</td>
jpayne@69 475 * <td width="23"></td>
jpayne@69 476 * <td width="165" valign="top">in all rule sets</td>
jpayne@69 477 * <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
jpayne@69 478 * exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
jpayne@69 479 * This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated
jpayne@69 480 * as the same base value for parsing.</td>
jpayne@69 481 * </tr>
jpayne@69 482 * <tr>
jpayne@69 483 * <td width="37">$(ordinal,<i>plural syntax</i>)$</td>
jpayne@69 484 * <td width="23"></td>
jpayne@69 485 * <td width="165" valign="top">in all rule sets</td>
jpayne@69 486 * <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
jpayne@69 487 * exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
jpayne@69 488 * This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated
jpayne@69 489 * as the same base value for parsing.</td>
jpayne@69 490 * </tr>
jpayne@69 491 * </table>
jpayne@69 492 *
jpayne@69 493 * <p>The substitution descriptor (i.e., the text between the token characters) may take one
jpayne@69 494 * of three forms:</p>
jpayne@69 495 *
jpayne@69 496 * <table border="0" width="100%">
jpayne@69 497 * <tr>
jpayne@69 498 * <td>a rule set name</td>
jpayne@69 499 * <td>Perform the mathematical operation on the number, and format the result using the
jpayne@69 500 * named rule set.</td>
jpayne@69 501 * </tr>
jpayne@69 502 * <tr>
jpayne@69 503 * <td>a DecimalFormat pattern</td>
jpayne@69 504 * <td>Perform the mathematical operation on the number, and format the result using a
jpayne@69 505 * DecimalFormat with the specified pattern.&nbsp; The pattern must begin with 0 or #.</td>
jpayne@69 506 * </tr>
jpayne@69 507 * <tr>
jpayne@69 508 * <td>nothing</td>
jpayne@69 509 * <td>Perform the mathematical operation on the number, and format the result using the rule
jpayne@69 510 * set containing the current rule, except:
jpayne@69 511 * <ul>
jpayne@69 512 * <li>You can't have an empty substitution descriptor with a == substitution.</li>
jpayne@69 513 * <li>If you omit the substitution descriptor in a &gt;&gt; substitution in a fraction rule,
jpayne@69 514 * format the result one digit at a time using the rule set containing the current rule.</li>
jpayne@69 515 * <li>If you omit the substitution descriptor in a &lt;&lt; substitution in a rule in a
jpayne@69 516 * fraction rule set, format the result using the default rule set for this formatter.</li>
jpayne@69 517 * </ul>
jpayne@69 518 * </td>
jpayne@69 519 * </tr>
jpayne@69 520 * </table>
jpayne@69 521 *
jpayne@69 522 * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
jpayne@69 523 * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
jpayne@69 524 * the apostrophe is ignored, but all text after it becomes significant (this is how you can
jpayne@69 525 * have a rule's rule text begin with whitespace). There is no escape function: the semicolon
jpayne@69 526 * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
jpayne@69 527 * names. The characters beginning a substitution token are always treated as the beginning
jpayne@69 528 * of a substitution token.</p>
jpayne@69 529 *
jpayne@69 530 * <p>See the resource data and the demo program for annotated examples of real rule sets
jpayne@69 531 * using these features.</p>
jpayne@69 532 *
jpayne@69 533 * <p><em>User subclasses are not supported.</em> While clients may write
jpayne@69 534 * subclasses, such code will not necessarily work and will not be
jpayne@69 535 * guaranteed to work stably from release to release.
jpayne@69 536 *
jpayne@69 537 * <p><b>Localizations</b></p>
jpayne@69 538 * <p>Constructors are available that allow the specification of localizations for the
jpayne@69 539 * public rule sets (and also allow more control over what public rule sets are available).
jpayne@69 540 * Localization data is represented as a textual description. The description represents
jpayne@69 541 * an array of arrays of string. The first element is an array of the public rule set names,
jpayne@69 542 * each of these must be one of the public rule set names that appear in the rules. Only
jpayne@69 543 * names in this array will be treated as public rule set names by the API. Each subsequent
jpayne@69 544 * element is an array of localizations of these names. The first element of one of these
jpayne@69 545 * subarrays is the locale name, and the remaining elements are localizations of the
jpayne@69 546 * public rule set names, in the same order as they were listed in the first arrray.</p>
jpayne@69 547 * <p>In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used
jpayne@69 548 * to separate elements of an array. Whitespace is ignored, unless quoted.</p>
jpayne@69 549 * <p>For example:<pre>
jpayne@69 550 * < < %foo, %bar, %baz >,
jpayne@69 551 * < en, Foo, Bar, Baz >,
jpayne@69 552 * < fr, 'le Foo', 'le Bar', 'le Baz' >
jpayne@69 553 * < zh, \\u7532, \\u4e59, \\u4e19 > >
jpayne@69 554 * </pre></p>
jpayne@69 555 * @author Richard Gillam
jpayne@69 556 * @see NumberFormat
jpayne@69 557 * @see DecimalFormat
jpayne@69 558 * @see PluralFormat
jpayne@69 559 * @see PluralRules
jpayne@69 560 * @stable ICU 2.0
jpayne@69 561 */
jpayne@69 562 class U_I18N_API RuleBasedNumberFormat : public NumberFormat {
jpayne@69 563 public:
jpayne@69 564
jpayne@69 565 //-----------------------------------------------------------------------
jpayne@69 566 // constructors
jpayne@69 567 //-----------------------------------------------------------------------
jpayne@69 568
jpayne@69 569 /**
jpayne@69 570 * Creates a RuleBasedNumberFormat that behaves according to the description
jpayne@69 571 * passed in. The formatter uses the default locale.
jpayne@69 572 * @param rules A description of the formatter's desired behavior.
jpayne@69 573 * See the class documentation for a complete explanation of the description
jpayne@69 574 * syntax.
jpayne@69 575 * @param perror The parse error if an error was encountered.
jpayne@69 576 * @param status The status indicating whether the constructor succeeded.
jpayne@69 577 * @stable ICU 3.2
jpayne@69 578 */
jpayne@69 579 RuleBasedNumberFormat(const UnicodeString& rules, UParseError& perror, UErrorCode& status);
jpayne@69 580
jpayne@69 581 /**
jpayne@69 582 * Creates a RuleBasedNumberFormat that behaves according to the description
jpayne@69 583 * passed in. The formatter uses the default locale.
jpayne@69 584 * <p>
jpayne@69 585 * The localizations data provides information about the public
jpayne@69 586 * rule sets and their localized display names for different
jpayne@69 587 * locales. The first element in the list is an array of the names
jpayne@69 588 * of the public rule sets. The first element in this array is
jpayne@69 589 * the initial default ruleset. The remaining elements in the
jpayne@69 590 * list are arrays of localizations of the names of the public
jpayne@69 591 * rule sets. Each of these is one longer than the initial array,
jpayne@69 592 * with the first String being the ULocale ID, and the remaining
jpayne@69 593 * Strings being the localizations of the rule set names, in the
jpayne@69 594 * same order as the initial array. Arrays are NULL-terminated.
jpayne@69 595 * @param rules A description of the formatter's desired behavior.
jpayne@69 596 * See the class documentation for a complete explanation of the description
jpayne@69 597 * syntax.
jpayne@69 598 * @param localizations the localization information.
jpayne@69 599 * names in the description. These will be copied by the constructor.
jpayne@69 600 * @param perror The parse error if an error was encountered.
jpayne@69 601 * @param status The status indicating whether the constructor succeeded.
jpayne@69 602 * @stable ICU 3.2
jpayne@69 603 */
jpayne@69 604 RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
jpayne@69 605 UParseError& perror, UErrorCode& status);
jpayne@69 606
jpayne@69 607 /**
jpayne@69 608 * Creates a RuleBasedNumberFormat that behaves according to the rules
jpayne@69 609 * passed in. The formatter uses the specified locale to determine the
jpayne@69 610 * characters to use when formatting numerals, and to define equivalences
jpayne@69 611 * for lenient parsing.
jpayne@69 612 * @param rules The formatter rules.
jpayne@69 613 * See the class documentation for a complete explanation of the rule
jpayne@69 614 * syntax.
jpayne@69 615 * @param locale A locale that governs which characters are used for
jpayne@69 616 * formatting values in numerals and which characters are equivalent in
jpayne@69 617 * lenient parsing.
jpayne@69 618 * @param perror The parse error if an error was encountered.
jpayne@69 619 * @param status The status indicating whether the constructor succeeded.
jpayne@69 620 * @stable ICU 2.0
jpayne@69 621 */
jpayne@69 622 RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale,
jpayne@69 623 UParseError& perror, UErrorCode& status);
jpayne@69 624
jpayne@69 625 /**
jpayne@69 626 * Creates a RuleBasedNumberFormat that behaves according to the description
jpayne@69 627 * passed in. The formatter uses the default locale.
jpayne@69 628 * <p>
jpayne@69 629 * The localizations data provides information about the public
jpayne@69 630 * rule sets and their localized display names for different
jpayne@69 631 * locales. The first element in the list is an array of the names
jpayne@69 632 * of the public rule sets. The first element in this array is
jpayne@69 633 * the initial default ruleset. The remaining elements in the
jpayne@69 634 * list are arrays of localizations of the names of the public
jpayne@69 635 * rule sets. Each of these is one longer than the initial array,
jpayne@69 636 * with the first String being the ULocale ID, and the remaining
jpayne@69 637 * Strings being the localizations of the rule set names, in the
jpayne@69 638 * same order as the initial array. Arrays are NULL-terminated.
jpayne@69 639 * @param rules A description of the formatter's desired behavior.
jpayne@69 640 * See the class documentation for a complete explanation of the description
jpayne@69 641 * syntax.
jpayne@69 642 * @param localizations a list of localizations for the rule set
jpayne@69 643 * names in the description. These will be copied by the constructor.
jpayne@69 644 * @param locale A locale that governs which characters are used for
jpayne@69 645 * formatting values in numerals and which characters are equivalent in
jpayne@69 646 * lenient parsing.
jpayne@69 647 * @param perror The parse error if an error was encountered.
jpayne@69 648 * @param status The status indicating whether the constructor succeeded.
jpayne@69 649 * @stable ICU 3.2
jpayne@69 650 */
jpayne@69 651 RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
jpayne@69 652 const Locale& locale, UParseError& perror, UErrorCode& status);
jpayne@69 653
jpayne@69 654 /**
jpayne@69 655 * Creates a RuleBasedNumberFormat from a predefined ruleset. The selector
jpayne@69 656 * code choosed among three possible predefined formats: spellout, ordinal,
jpayne@69 657 * and duration.
jpayne@69 658 * @param tag A selector code specifying which kind of formatter to create for that
jpayne@69 659 * locale. There are four legal values: URBNF_SPELLOUT, which creates a formatter that
jpayne@69 660 * spells out a value in words in the desired language, URBNF_ORDINAL, which attaches
jpayne@69 661 * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
jpayne@69 662 * URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds always rounding down,
jpayne@69 663 * and URBNF_NUMBERING_SYSTEM, which is used to invoke rules for alternate numbering
jpayne@69 664 * systems such as the Hebrew numbering system, or for Roman Numerals, etc.
jpayne@69 665 * @param locale The locale for the formatter.
jpayne@69 666 * @param status The status indicating whether the constructor succeeded.
jpayne@69 667 * @stable ICU 2.0
jpayne@69 668 */
jpayne@69 669 RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status);
jpayne@69 670
jpayne@69 671 //-----------------------------------------------------------------------
jpayne@69 672 // boilerplate
jpayne@69 673 //-----------------------------------------------------------------------
jpayne@69 674
jpayne@69 675 /**
jpayne@69 676 * Copy constructor
jpayne@69 677 * @param rhs the object to be copied from.
jpayne@69 678 * @stable ICU 2.6
jpayne@69 679 */
jpayne@69 680 RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs);
jpayne@69 681
jpayne@69 682 /**
jpayne@69 683 * Assignment operator
jpayne@69 684 * @param rhs the object to be copied from.
jpayne@69 685 * @stable ICU 2.6
jpayne@69 686 */
jpayne@69 687 RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs);
jpayne@69 688
jpayne@69 689 /**
jpayne@69 690 * Release memory allocated for a RuleBasedNumberFormat when you are finished with it.
jpayne@69 691 * @stable ICU 2.6
jpayne@69 692 */
jpayne@69 693 virtual ~RuleBasedNumberFormat();
jpayne@69 694
jpayne@69 695 /**
jpayne@69 696 * Clone this object polymorphically. The caller is responsible
jpayne@69 697 * for deleting the result when done.
jpayne@69 698 * @return A copy of the object.
jpayne@69 699 * @stable ICU 2.6
jpayne@69 700 */
jpayne@69 701 virtual RuleBasedNumberFormat* clone() const;
jpayne@69 702
jpayne@69 703 /**
jpayne@69 704 * Return true if the given Format objects are semantically equal.
jpayne@69 705 * Objects of different subclasses are considered unequal.
jpayne@69 706 * @param other the object to be compared with.
jpayne@69 707 * @return true if the given Format objects are semantically equal.
jpayne@69 708 * @stable ICU 2.6
jpayne@69 709 */
jpayne@69 710 virtual UBool operator==(const Format& other) const;
jpayne@69 711
jpayne@69 712 //-----------------------------------------------------------------------
jpayne@69 713 // public API functions
jpayne@69 714 //-----------------------------------------------------------------------
jpayne@69 715
jpayne@69 716 /**
jpayne@69 717 * return the rules that were provided to the RuleBasedNumberFormat.
jpayne@69 718 * @return the result String that was passed in
jpayne@69 719 * @stable ICU 2.0
jpayne@69 720 */
jpayne@69 721 virtual UnicodeString getRules() const;
jpayne@69 722
jpayne@69 723 /**
jpayne@69 724 * Return the number of public rule set names.
jpayne@69 725 * @return the number of public rule set names.
jpayne@69 726 * @stable ICU 2.0
jpayne@69 727 */
jpayne@69 728 virtual int32_t getNumberOfRuleSetNames() const;
jpayne@69 729
jpayne@69 730 /**
jpayne@69 731 * Return the name of the index'th public ruleSet. If index is not valid,
jpayne@69 732 * the function returns null.
jpayne@69 733 * @param index the index of the ruleset
jpayne@69 734 * @return the name of the index'th public ruleSet.
jpayne@69 735 * @stable ICU 2.0
jpayne@69 736 */
jpayne@69 737 virtual UnicodeString getRuleSetName(int32_t index) const;
jpayne@69 738
jpayne@69 739 /**
jpayne@69 740 * Return the number of locales for which we have localized rule set display names.
jpayne@69 741 * @return the number of locales for which we have localized rule set display names.
jpayne@69 742 * @stable ICU 3.2
jpayne@69 743 */
jpayne@69 744 virtual int32_t getNumberOfRuleSetDisplayNameLocales(void) const;
jpayne@69 745
jpayne@69 746 /**
jpayne@69 747 * Return the index'th display name locale.
jpayne@69 748 * @param index the index of the locale
jpayne@69 749 * @param status set to a failure code when this function fails
jpayne@69 750 * @return the locale
jpayne@69 751 * @see #getNumberOfRuleSetDisplayNameLocales
jpayne@69 752 * @stable ICU 3.2
jpayne@69 753 */
jpayne@69 754 virtual Locale getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const;
jpayne@69 755
jpayne@69 756 /**
jpayne@69 757 * Return the rule set display names for the provided locale. These are in the same order
jpayne@69 758 * as those returned by getRuleSetName. The locale is matched against the locales for
jpayne@69 759 * which there is display name data, using normal fallback rules. If no locale matches,
jpayne@69 760 * the default display names are returned. (These are the internal rule set names minus
jpayne@69 761 * the leading '%'.)
jpayne@69 762 * @param index the index of the rule set
jpayne@69 763 * @param locale the locale (returned by getRuleSetDisplayNameLocales) for which the localized
jpayne@69 764 * display name is desired
jpayne@69 765 * @return the display name for the given index, which might be bogus if there is an error
jpayne@69 766 * @see #getRuleSetName
jpayne@69 767 * @stable ICU 3.2
jpayne@69 768 */
jpayne@69 769 virtual UnicodeString getRuleSetDisplayName(int32_t index,
jpayne@69 770 const Locale& locale = Locale::getDefault());
jpayne@69 771
jpayne@69 772 /**
jpayne@69 773 * Return the rule set display name for the provided rule set and locale.
jpayne@69 774 * The locale is matched against the locales for which there is display name data, using
jpayne@69 775 * normal fallback rules. If no locale matches, the default display name is returned.
jpayne@69 776 * @return the display name for the rule set
jpayne@69 777 * @stable ICU 3.2
jpayne@69 778 * @see #getRuleSetDisplayName
jpayne@69 779 */
jpayne@69 780 virtual UnicodeString getRuleSetDisplayName(const UnicodeString& ruleSetName,
jpayne@69 781 const Locale& locale = Locale::getDefault());
jpayne@69 782
jpayne@69 783
jpayne@69 784 using NumberFormat::format;
jpayne@69 785
jpayne@69 786 /**
jpayne@69 787 * Formats the specified 32-bit number using the default ruleset.
jpayne@69 788 * @param number The number to format.
jpayne@69 789 * @param toAppendTo the string that will hold the (appended) result
jpayne@69 790 * @param pos the fieldposition
jpayne@69 791 * @return A textual representation of the number.
jpayne@69 792 * @stable ICU 2.0
jpayne@69 793 */
jpayne@69 794 virtual UnicodeString& format(int32_t number,
jpayne@69 795 UnicodeString& toAppendTo,
jpayne@69 796 FieldPosition& pos) const;
jpayne@69 797
jpayne@69 798 /**
jpayne@69 799 * Formats the specified 64-bit number using the default ruleset.
jpayne@69 800 * @param number The number to format.
jpayne@69 801 * @param toAppendTo the string that will hold the (appended) result
jpayne@69 802 * @param pos the fieldposition
jpayne@69 803 * @return A textual representation of the number.
jpayne@69 804 * @stable ICU 2.1
jpayne@69 805 */
jpayne@69 806 virtual UnicodeString& format(int64_t number,
jpayne@69 807 UnicodeString& toAppendTo,
jpayne@69 808 FieldPosition& pos) const;
jpayne@69 809 /**
jpayne@69 810 * Formats the specified number using the default ruleset.
jpayne@69 811 * @param number The number to format.
jpayne@69 812 * @param toAppendTo the string that will hold the (appended) result
jpayne@69 813 * @param pos the fieldposition
jpayne@69 814 * @return A textual representation of the number.
jpayne@69 815 * @stable ICU 2.0
jpayne@69 816 */
jpayne@69 817 virtual UnicodeString& format(double number,
jpayne@69 818 UnicodeString& toAppendTo,
jpayne@69 819 FieldPosition& pos) const;
jpayne@69 820
jpayne@69 821 /**
jpayne@69 822 * Formats the specified number using the named ruleset.
jpayne@69 823 * @param number The number to format.
jpayne@69 824 * @param ruleSetName The name of the rule set to format the number with.
jpayne@69 825 * This must be the name of a valid public rule set for this formatter.
jpayne@69 826 * @param toAppendTo the string that will hold the (appended) result
jpayne@69 827 * @param pos the fieldposition
jpayne@69 828 * @param status the status
jpayne@69 829 * @return A textual representation of the number.
jpayne@69 830 * @stable ICU 2.0
jpayne@69 831 */
jpayne@69 832 virtual UnicodeString& format(int32_t number,
jpayne@69 833 const UnicodeString& ruleSetName,
jpayne@69 834 UnicodeString& toAppendTo,
jpayne@69 835 FieldPosition& pos,
jpayne@69 836 UErrorCode& status) const;
jpayne@69 837 /**
jpayne@69 838 * Formats the specified 64-bit number using the named ruleset.
jpayne@69 839 * @param number The number to format.
jpayne@69 840 * @param ruleSetName The name of the rule set to format the number with.
jpayne@69 841 * This must be the name of a valid public rule set for this formatter.
jpayne@69 842 * @param toAppendTo the string that will hold the (appended) result
jpayne@69 843 * @param pos the fieldposition
jpayne@69 844 * @param status the status
jpayne@69 845 * @return A textual representation of the number.
jpayne@69 846 * @stable ICU 2.1
jpayne@69 847 */
jpayne@69 848 virtual UnicodeString& format(int64_t number,
jpayne@69 849 const UnicodeString& ruleSetName,
jpayne@69 850 UnicodeString& toAppendTo,
jpayne@69 851 FieldPosition& pos,
jpayne@69 852 UErrorCode& status) const;
jpayne@69 853 /**
jpayne@69 854 * Formats the specified number using the named ruleset.
jpayne@69 855 * @param number The number to format.
jpayne@69 856 * @param ruleSetName The name of the rule set to format the number with.
jpayne@69 857 * This must be the name of a valid public rule set for this formatter.
jpayne@69 858 * @param toAppendTo the string that will hold the (appended) result
jpayne@69 859 * @param pos the fieldposition
jpayne@69 860 * @param status the status
jpayne@69 861 * @return A textual representation of the number.
jpayne@69 862 * @stable ICU 2.0
jpayne@69 863 */
jpayne@69 864 virtual UnicodeString& format(double number,
jpayne@69 865 const UnicodeString& ruleSetName,
jpayne@69 866 UnicodeString& toAppendTo,
jpayne@69 867 FieldPosition& pos,
jpayne@69 868 UErrorCode& status) const;
jpayne@69 869
jpayne@69 870 protected:
jpayne@69 871 /**
jpayne@69 872 * Format a decimal number.
jpayne@69 873 * The number is a DigitList wrapper onto a floating point decimal number.
jpayne@69 874 * The default implementation in NumberFormat converts the decimal number
jpayne@69 875 * to a double and formats that. Subclasses of NumberFormat that want
jpayne@69 876 * to specifically handle big decimal numbers must override this method.
jpayne@69 877 * class DecimalFormat does so.
jpayne@69 878 *
jpayne@69 879 * @param number The number, a DigitList format Decimal Floating Point.
jpayne@69 880 * @param appendTo Output parameter to receive result.
jpayne@69 881 * Result is appended to existing contents.
jpayne@69 882 * @param pos On input: an alignment field, if desired.
jpayne@69 883 * On output: the offsets of the alignment field.
jpayne@69 884 * @param status Output param filled with success/failure status.
jpayne@69 885 * @return Reference to 'appendTo' parameter.
jpayne@69 886 * @internal
jpayne@69 887 */
jpayne@69 888 virtual UnicodeString& format(const number::impl::DecimalQuantity &number,
jpayne@69 889 UnicodeString& appendTo,
jpayne@69 890 FieldPosition& pos,
jpayne@69 891 UErrorCode& status) const;
jpayne@69 892 public:
jpayne@69 893
jpayne@69 894 using NumberFormat::parse;
jpayne@69 895
jpayne@69 896 /**
jpayne@69 897 * Parses the specfied string, beginning at the specified position, according
jpayne@69 898 * to this formatter's rules. This will match the string against all of the
jpayne@69 899 * formatter's public rule sets and return the value corresponding to the longest
jpayne@69 900 * parseable substring. This function's behavior is affected by the lenient
jpayne@69 901 * parse mode.
jpayne@69 902 * @param text The string to parse
jpayne@69 903 * @param result the result of the parse, either a double or a long.
jpayne@69 904 * @param parsePosition On entry, contains the position of the first character
jpayne@69 905 * in "text" to examine. On exit, has been updated to contain the position
jpayne@69 906 * of the first character in "text" that wasn't consumed by the parse.
jpayne@69 907 * @see #setLenient
jpayne@69 908 * @stable ICU 2.0
jpayne@69 909 */
jpayne@69 910 virtual void parse(const UnicodeString& text,
jpayne@69 911 Formattable& result,
jpayne@69 912 ParsePosition& parsePosition) const;
jpayne@69 913
jpayne@69 914 #if !UCONFIG_NO_COLLATION
jpayne@69 915
jpayne@69 916 /**
jpayne@69 917 * Turns lenient parse mode on and off.
jpayne@69 918 *
jpayne@69 919 * When in lenient parse mode, the formatter uses a Collator for parsing the text.
jpayne@69 920 * Only primary differences are treated as significant. This means that case
jpayne@69 921 * differences, accent differences, alternate spellings of the same letter
jpayne@69 922 * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
jpayne@69 923 * matching the text. In many cases, numerals will be accepted in place of words
jpayne@69 924 * or phrases as well.
jpayne@69 925 *
jpayne@69 926 * For example, all of the following will correctly parse as 255 in English in
jpayne@69 927 * lenient-parse mode:
jpayne@69 928 * <br>"two hundred fifty-five"
jpayne@69 929 * <br>"two hundred fifty five"
jpayne@69 930 * <br>"TWO HUNDRED FIFTY-FIVE"
jpayne@69 931 * <br>"twohundredfiftyfive"
jpayne@69 932 * <br>"2 hundred fifty-5"
jpayne@69 933 *
jpayne@69 934 * The Collator used is determined by the locale that was
jpayne@69 935 * passed to this object on construction. The description passed to this object
jpayne@69 936 * on construction may supply additional collation rules that are appended to the
jpayne@69 937 * end of the default collator for the locale, enabling additional equivalences
jpayne@69 938 * (such as adding more ignorable characters or permitting spelled-out version of
jpayne@69 939 * symbols; see the demo program for examples).
jpayne@69 940 *
jpayne@69 941 * It's important to emphasize that even strict parsing is relatively lenient: it
jpayne@69 942 * will accept some text that it won't produce as output. In English, for example,
jpayne@69 943 * it will correctly parse "two hundred zero" and "fifteen hundred".
jpayne@69 944 *
jpayne@69 945 * @param enabled If true, turns lenient-parse mode on; if false, turns it off.
jpayne@69 946 * @see RuleBasedCollator
jpayne@69 947 * @stable ICU 2.0
jpayne@69 948 */
jpayne@69 949 virtual void setLenient(UBool enabled);
jpayne@69 950
jpayne@69 951 /**
jpayne@69 952 * Returns true if lenient-parse mode is turned on. Lenient parsing is off
jpayne@69 953 * by default.
jpayne@69 954 * @return true if lenient-parse mode is turned on.
jpayne@69 955 * @see #setLenient
jpayne@69 956 * @stable ICU 2.0
jpayne@69 957 */
jpayne@69 958 virtual inline UBool isLenient(void) const;
jpayne@69 959
jpayne@69 960 #endif
jpayne@69 961
jpayne@69 962 /**
jpayne@69 963 * Override the default rule set to use. If ruleSetName is null, reset
jpayne@69 964 * to the initial default rule set. If the rule set is not a public rule set name,
jpayne@69 965 * U_ILLEGAL_ARGUMENT_ERROR is returned in status.
jpayne@69 966 * @param ruleSetName the name of the rule set, or null to reset the initial default.
jpayne@69 967 * @param status set to failure code when a problem occurs.
jpayne@69 968 * @stable ICU 2.6
jpayne@69 969 */
jpayne@69 970 virtual void setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status);
jpayne@69 971
jpayne@69 972 /**
jpayne@69 973 * Return the name of the current default rule set. If the current rule set is
jpayne@69 974 * not public, returns a bogus (and empty) UnicodeString.
jpayne@69 975 * @return the name of the current default rule set
jpayne@69 976 * @stable ICU 3.0
jpayne@69 977 */
jpayne@69 978 virtual UnicodeString getDefaultRuleSetName() const;
jpayne@69 979
jpayne@69 980 /**
jpayne@69 981 * Set a particular UDisplayContext value in the formatter, such as
jpayne@69 982 * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see
jpayne@69 983 * NumberFormat.
jpayne@69 984 * @param value The UDisplayContext value to set.
jpayne@69 985 * @param status Input/output status. If at entry this indicates a failure
jpayne@69 986 * status, the function will do nothing; otherwise this will be
jpayne@69 987 * updated with any new status from the function.
jpayne@69 988 * @stable ICU 53
jpayne@69 989 */
jpayne@69 990 virtual void setContext(UDisplayContext value, UErrorCode& status);
jpayne@69 991
jpayne@69 992 /**
jpayne@69 993 * Get the rounding mode.
jpayne@69 994 * @return A rounding mode
jpayne@69 995 * @stable ICU 60
jpayne@69 996 */
jpayne@69 997 virtual ERoundingMode getRoundingMode(void) const;
jpayne@69 998
jpayne@69 999 /**
jpayne@69 1000 * Set the rounding mode.
jpayne@69 1001 * @param roundingMode A rounding mode
jpayne@69 1002 * @stable ICU 60
jpayne@69 1003 */
jpayne@69 1004 virtual void setRoundingMode(ERoundingMode roundingMode);
jpayne@69 1005
jpayne@69 1006 public:
jpayne@69 1007 /**
jpayne@69 1008 * ICU "poor man's RTTI", returns a UClassID for this class.
jpayne@69 1009 *
jpayne@69 1010 * @stable ICU 2.8
jpayne@69 1011 */
jpayne@69 1012 static UClassID U_EXPORT2 getStaticClassID(void);
jpayne@69 1013
jpayne@69 1014 /**
jpayne@69 1015 * ICU "poor man's RTTI", returns a UClassID for the actual class.
jpayne@69 1016 *
jpayne@69 1017 * @stable ICU 2.8
jpayne@69 1018 */
jpayne@69 1019 virtual UClassID getDynamicClassID(void) const;
jpayne@69 1020
jpayne@69 1021 /**
jpayne@69 1022 * Sets the decimal format symbols, which is generally not changed
jpayne@69 1023 * by the programmer or user. The formatter takes ownership of
jpayne@69 1024 * symbolsToAdopt; the client must not delete it.
jpayne@69 1025 *
jpayne@69 1026 * @param symbolsToAdopt DecimalFormatSymbols to be adopted.
jpayne@69 1027 * @stable ICU 49
jpayne@69 1028 */
jpayne@69 1029 virtual void adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt);
jpayne@69 1030
jpayne@69 1031 /**
jpayne@69 1032 * Sets the decimal format symbols, which is generally not changed
jpayne@69 1033 * by the programmer or user. A clone of the symbols is created and
jpayne@69 1034 * the symbols is _not_ adopted; the client is still responsible for
jpayne@69 1035 * deleting it.
jpayne@69 1036 *
jpayne@69 1037 * @param symbols DecimalFormatSymbols.
jpayne@69 1038 * @stable ICU 49
jpayne@69 1039 */
jpayne@69 1040 virtual void setDecimalFormatSymbols(const DecimalFormatSymbols& symbols);
jpayne@69 1041
jpayne@69 1042 private:
jpayne@69 1043 RuleBasedNumberFormat(); // default constructor not implemented
jpayne@69 1044
jpayne@69 1045 // this will ref the localizations if they are not NULL
jpayne@69 1046 // caller must deref to get adoption
jpayne@69 1047 RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* localizations,
jpayne@69 1048 const Locale& locale, UParseError& perror, UErrorCode& status);
jpayne@69 1049
jpayne@69 1050 void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status);
jpayne@69 1051 void initCapitalizationContextInfo(const Locale& thelocale);
jpayne@69 1052 void dispose();
jpayne@69 1053 void stripWhitespace(UnicodeString& src);
jpayne@69 1054 void initDefaultRuleSet();
jpayne@69 1055 NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const;
jpayne@69 1056
jpayne@69 1057 /* friend access */
jpayne@69 1058 friend class NFSubstitution;
jpayne@69 1059 friend class NFRule;
jpayne@69 1060 friend class NFRuleSet;
jpayne@69 1061 friend class FractionalPartSubstitution;
jpayne@69 1062
jpayne@69 1063 inline NFRuleSet * getDefaultRuleSet() const;
jpayne@69 1064 const RuleBasedCollator * getCollator() const;
jpayne@69 1065 DecimalFormatSymbols * initializeDecimalFormatSymbols(UErrorCode &status);
jpayne@69 1066 const DecimalFormatSymbols * getDecimalFormatSymbols() const;
jpayne@69 1067 NFRule * initializeDefaultInfinityRule(UErrorCode &status);
jpayne@69 1068 const NFRule * getDefaultInfinityRule() const;
jpayne@69 1069 NFRule * initializeDefaultNaNRule(UErrorCode &status);
jpayne@69 1070 const NFRule * getDefaultNaNRule() const;
jpayne@69 1071 PluralFormat *createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const;
jpayne@69 1072 UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult, UErrorCode& status) const;
jpayne@69 1073 UnicodeString& format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const;
jpayne@69 1074 void format(double number, NFRuleSet& rs, UnicodeString& toAppendTo, UErrorCode& status) const;
jpayne@69 1075
jpayne@69 1076 private:
jpayne@69 1077 NFRuleSet **fRuleSets;
jpayne@69 1078 UnicodeString* ruleSetDescriptions;
jpayne@69 1079 int32_t numRuleSets;
jpayne@69 1080 NFRuleSet *defaultRuleSet;
jpayne@69 1081 Locale locale;
jpayne@69 1082 RuleBasedCollator* collator;
jpayne@69 1083 DecimalFormatSymbols* decimalFormatSymbols;
jpayne@69 1084 NFRule *defaultInfinityRule;
jpayne@69 1085 NFRule *defaultNaNRule;
jpayne@69 1086 ERoundingMode fRoundingMode;
jpayne@69 1087 UBool lenient;
jpayne@69 1088 UnicodeString* lenientParseRules;
jpayne@69 1089 LocalizationInfo* localizations;
jpayne@69 1090 UnicodeString originalDescription;
jpayne@69 1091 UBool capitalizationInfoSet;
jpayne@69 1092 UBool capitalizationForUIListMenu;
jpayne@69 1093 UBool capitalizationForStandAlone;
jpayne@69 1094 BreakIterator* capitalizationBrkIter;
jpayne@69 1095 };
jpayne@69 1096
jpayne@69 1097 // ---------------
jpayne@69 1098
jpayne@69 1099 #if !UCONFIG_NO_COLLATION
jpayne@69 1100
jpayne@69 1101 inline UBool
jpayne@69 1102 RuleBasedNumberFormat::isLenient(void) const {
jpayne@69 1103 return lenient;
jpayne@69 1104 }
jpayne@69 1105
jpayne@69 1106 #endif
jpayne@69 1107
jpayne@69 1108 inline NFRuleSet*
jpayne@69 1109 RuleBasedNumberFormat::getDefaultRuleSet() const {
jpayne@69 1110 return defaultRuleSet;
jpayne@69 1111 }
jpayne@69 1112
jpayne@69 1113 U_NAMESPACE_END
jpayne@69 1114
jpayne@69 1115 /* U_HAVE_RBNF */
jpayne@69 1116 #endif
jpayne@69 1117
jpayne@69 1118 #endif /* U_SHOW_CPLUSPLUS_API */
jpayne@69 1119
jpayne@69 1120 /* RBNF_H */
jpayne@69 1121 #endif