annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/stringtriebuilder.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 *******************************************************************************
jpayne@69 5 * Copyright (C) 2010-2012,2014, International Business Machines
jpayne@69 6 * Corporation and others. All Rights Reserved.
jpayne@69 7 *******************************************************************************
jpayne@69 8 * file name: stringtriebuilder.h
jpayne@69 9 * encoding: UTF-8
jpayne@69 10 * tab size: 8 (not used)
jpayne@69 11 * indentation:4
jpayne@69 12 *
jpayne@69 13 * created on: 2010dec24
jpayne@69 14 * created by: Markus W. Scherer
jpayne@69 15 */
jpayne@69 16
jpayne@69 17 #ifndef __STRINGTRIEBUILDER_H__
jpayne@69 18 #define __STRINGTRIEBUILDER_H__
jpayne@69 19
jpayne@69 20 #include "unicode/utypes.h"
jpayne@69 21
jpayne@69 22 #if U_SHOW_CPLUSPLUS_API
jpayne@69 23
jpayne@69 24 #include "unicode/uobject.h"
jpayne@69 25
jpayne@69 26 /**
jpayne@69 27 * \file
jpayne@69 28 * \brief C++ API: Builder API for trie builders
jpayne@69 29 */
jpayne@69 30
jpayne@69 31 // Forward declaration.
jpayne@69 32 /// \cond
jpayne@69 33 struct UHashtable;
jpayne@69 34 typedef struct UHashtable UHashtable;
jpayne@69 35 /// \endcond
jpayne@69 36
jpayne@69 37 /**
jpayne@69 38 * Build options for BytesTrieBuilder and CharsTrieBuilder.
jpayne@69 39 * @stable ICU 4.8
jpayne@69 40 */
jpayne@69 41 enum UStringTrieBuildOption {
jpayne@69 42 /**
jpayne@69 43 * Builds a trie quickly.
jpayne@69 44 * @stable ICU 4.8
jpayne@69 45 */
jpayne@69 46 USTRINGTRIE_BUILD_FAST,
jpayne@69 47 /**
jpayne@69 48 * Builds a trie more slowly, attempting to generate
jpayne@69 49 * a shorter but equivalent serialization.
jpayne@69 50 * This build option also uses more memory.
jpayne@69 51 *
jpayne@69 52 * This option can be effective when many integer values are the same
jpayne@69 53 * and string/byte sequence suffixes can be shared.
jpayne@69 54 * Runtime speed is not expected to improve.
jpayne@69 55 * @stable ICU 4.8
jpayne@69 56 */
jpayne@69 57 USTRINGTRIE_BUILD_SMALL
jpayne@69 58 };
jpayne@69 59
jpayne@69 60 U_NAMESPACE_BEGIN
jpayne@69 61
jpayne@69 62 /**
jpayne@69 63 * Base class for string trie builder classes.
jpayne@69 64 *
jpayne@69 65 * This class is not intended for public subclassing.
jpayne@69 66 * @stable ICU 4.8
jpayne@69 67 */
jpayne@69 68 class U_COMMON_API StringTrieBuilder : public UObject {
jpayne@69 69 public:
jpayne@69 70 #ifndef U_HIDE_INTERNAL_API
jpayne@69 71 /** @internal */
jpayne@69 72 static int32_t hashNode(const void *node);
jpayne@69 73 /** @internal */
jpayne@69 74 static UBool equalNodes(const void *left, const void *right);
jpayne@69 75 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 76
jpayne@69 77 protected:
jpayne@69 78 // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
jpayne@69 79 // or else the compiler will create a public default constructor.
jpayne@69 80 /** @internal */
jpayne@69 81 StringTrieBuilder();
jpayne@69 82 /** @internal */
jpayne@69 83 virtual ~StringTrieBuilder();
jpayne@69 84
jpayne@69 85 #ifndef U_HIDE_INTERNAL_API
jpayne@69 86 /** @internal */
jpayne@69 87 void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
jpayne@69 88 /** @internal */
jpayne@69 89 void deleteCompactBuilder();
jpayne@69 90
jpayne@69 91 /** @internal */
jpayne@69 92 void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
jpayne@69 93
jpayne@69 94 /** @internal */
jpayne@69 95 int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
jpayne@69 96 /** @internal */
jpayne@69 97 int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
jpayne@69 98 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 99
jpayne@69 100 class Node;
jpayne@69 101
jpayne@69 102 #ifndef U_HIDE_INTERNAL_API
jpayne@69 103 /** @internal */
jpayne@69 104 Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
jpayne@69 105 /** @internal */
jpayne@69 106 Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
jpayne@69 107 int32_t length, UErrorCode &errorCode);
jpayne@69 108 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 109
jpayne@69 110 /** @internal */
jpayne@69 111 virtual int32_t getElementStringLength(int32_t i) const = 0;
jpayne@69 112 /** @internal */
jpayne@69 113 virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
jpayne@69 114 /** @internal */
jpayne@69 115 virtual int32_t getElementValue(int32_t i) const = 0;
jpayne@69 116
jpayne@69 117 // Finds the first unit index after this one where
jpayne@69 118 // the first and last element have different units again.
jpayne@69 119 /** @internal */
jpayne@69 120 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
jpayne@69 121
jpayne@69 122 // Number of different units at unitIndex.
jpayne@69 123 /** @internal */
jpayne@69 124 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
jpayne@69 125 /** @internal */
jpayne@69 126 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
jpayne@69 127 /** @internal */
jpayne@69 128 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
jpayne@69 129
jpayne@69 130 /** @internal */
jpayne@69 131 virtual UBool matchNodesCanHaveValues() const = 0;
jpayne@69 132
jpayne@69 133 /** @internal */
jpayne@69 134 virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
jpayne@69 135 /** @internal */
jpayne@69 136 virtual int32_t getMinLinearMatch() const = 0;
jpayne@69 137 /** @internal */
jpayne@69 138 virtual int32_t getMaxLinearMatchLength() const = 0;
jpayne@69 139
jpayne@69 140 #ifndef U_HIDE_INTERNAL_API
jpayne@69 141 // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
jpayne@69 142 /** @internal */
jpayne@69 143 static const int32_t kMaxBranchLinearSubNodeLength=5;
jpayne@69 144
jpayne@69 145 // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units.
jpayne@69 146 // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
jpayne@69 147 /** @internal */
jpayne@69 148 static const int32_t kMaxSplitBranchLevels=14;
jpayne@69 149
jpayne@69 150 /**
jpayne@69 151 * Makes sure that there is only one unique node registered that is
jpayne@69 152 * equivalent to newNode.
jpayne@69 153 * @param newNode Input node. The builder takes ownership.
jpayne@69 154 * @param errorCode ICU in/out UErrorCode.
jpayne@69 155 Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
jpayne@69 156 * @return newNode if it is the first of its kind, or
jpayne@69 157 * an equivalent node if newNode is a duplicate.
jpayne@69 158 * @internal
jpayne@69 159 */
jpayne@69 160 Node *registerNode(Node *newNode, UErrorCode &errorCode);
jpayne@69 161 /**
jpayne@69 162 * Makes sure that there is only one unique FinalValueNode registered
jpayne@69 163 * with this value.
jpayne@69 164 * Avoids creating a node if the value is a duplicate.
jpayne@69 165 * @param value A final value.
jpayne@69 166 * @param errorCode ICU in/out UErrorCode.
jpayne@69 167 Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
jpayne@69 168 * @return A FinalValueNode with the given value.
jpayne@69 169 * @internal
jpayne@69 170 */
jpayne@69 171 Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
jpayne@69 172 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 173
jpayne@69 174 /*
jpayne@69 175 * C++ note:
jpayne@69 176 * registerNode() and registerFinalValue() take ownership of their input nodes,
jpayne@69 177 * and only return owned nodes.
jpayne@69 178 * If they see a failure UErrorCode, they will delete the input node.
jpayne@69 179 * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
jpayne@69 180 * If there is a failure, they return NULL.
jpayne@69 181 *
jpayne@69 182 * NULL Node pointers can be safely passed into other Nodes because
jpayne@69 183 * they call the static Node::hashCode() which checks for a NULL pointer first.
jpayne@69 184 *
jpayne@69 185 * Therefore, as long as builder functions register a new node,
jpayne@69 186 * they need to check for failures only before explicitly dereferencing
jpayne@69 187 * a Node pointer, or before setting a new UErrorCode.
jpayne@69 188 */
jpayne@69 189
jpayne@69 190 // Hash set of nodes, maps from nodes to integer 1.
jpayne@69 191 /** @internal */
jpayne@69 192 UHashtable *nodes;
jpayne@69 193
jpayne@69 194 // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
jpayne@69 195 // it is needed for layout of other objects.
jpayne@69 196 /**
jpayne@69 197 * @internal
jpayne@69 198 * \cond
jpayne@69 199 */
jpayne@69 200 class Node : public UObject {
jpayne@69 201 public:
jpayne@69 202 Node(int32_t initialHash) : hash(initialHash), offset(0) {}
jpayne@69 203 inline int32_t hashCode() const { return hash; }
jpayne@69 204 // Handles node==NULL.
jpayne@69 205 static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
jpayne@69 206 // Base class operator==() compares the actual class types.
jpayne@69 207 virtual UBool operator==(const Node &other) const;
jpayne@69 208 inline UBool operator!=(const Node &other) const { return !operator==(other); }
jpayne@69 209 /**
jpayne@69 210 * Traverses the Node graph and numbers branch edges, with rightmost edges first.
jpayne@69 211 * This is to avoid writing a duplicate node twice.
jpayne@69 212 *
jpayne@69 213 * Branch nodes in this trie data structure are not symmetric.
jpayne@69 214 * Most branch edges "jump" to other nodes but the rightmost branch edges
jpayne@69 215 * just continue without a jump.
jpayne@69 216 * Therefore, write() must write the rightmost branch edge last
jpayne@69 217 * (trie units are written backwards), and must write it at that point even if
jpayne@69 218 * it is a duplicate of a node previously written elsewhere.
jpayne@69 219 *
jpayne@69 220 * This function visits and marks right branch edges first.
jpayne@69 221 * Edges are numbered with increasingly negative values because we share the
jpayne@69 222 * offset field which gets positive values when nodes are written.
jpayne@69 223 * A branch edge also remembers the first number for any of its edges.
jpayne@69 224 *
jpayne@69 225 * When a further-left branch edge has a number in the range of the rightmost
jpayne@69 226 * edge's numbers, then it will be written as part of the required right edge
jpayne@69 227 * and we can avoid writing it first.
jpayne@69 228 *
jpayne@69 229 * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
jpayne@69 230 * edge numbers.
jpayne@69 231 *
jpayne@69 232 * @param edgeNumber The first edge number for this node and its sub-nodes.
jpayne@69 233 * @return An edge number that is at least the maximum-negative
jpayne@69 234 * of the input edge number and the numbers of this node and all of its sub-nodes.
jpayne@69 235 */
jpayne@69 236 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
jpayne@69 237 // write() must set the offset to a positive value.
jpayne@69 238 virtual void write(StringTrieBuilder &builder) = 0;
jpayne@69 239 // See markRightEdgesFirst.
jpayne@69 240 inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
jpayne@69 241 StringTrieBuilder &builder) {
jpayne@69 242 // Note: Edge numbers are negative, lastRight<=firstRight.
jpayne@69 243 // If offset>0 then this node and its sub-nodes have been written already
jpayne@69 244 // and we need not write them again.
jpayne@69 245 // If this node is part of the unwritten right branch edge,
jpayne@69 246 // then we wait until that is written.
jpayne@69 247 if(offset<0 && (offset<lastRight || firstRight<offset)) {
jpayne@69 248 write(builder);
jpayne@69 249 }
jpayne@69 250 }
jpayne@69 251 inline int32_t getOffset() const { return offset; }
jpayne@69 252 protected:
jpayne@69 253 int32_t hash;
jpayne@69 254 int32_t offset;
jpayne@69 255 };
jpayne@69 256
jpayne@69 257 #ifndef U_HIDE_INTERNAL_API
jpayne@69 258 // This class should not be overridden because
jpayne@69 259 // registerFinalValue() compares a stack-allocated FinalValueNode
jpayne@69 260 // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
jpayne@69 261 // with the input node, and the
jpayne@69 262 // !Node::operator==(other) used inside FinalValueNode::operator==(other)
jpayne@69 263 // will be false if the typeid's are different.
jpayne@69 264 /** @internal */
jpayne@69 265 class FinalValueNode : public Node {
jpayne@69 266 public:
jpayne@69 267 FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
jpayne@69 268 virtual UBool operator==(const Node &other) const;
jpayne@69 269 virtual void write(StringTrieBuilder &builder);
jpayne@69 270 protected:
jpayne@69 271 int32_t value;
jpayne@69 272 };
jpayne@69 273 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 274
jpayne@69 275 // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
jpayne@69 276 // it is needed for layout of other objects.
jpayne@69 277 /**
jpayne@69 278 * @internal
jpayne@69 279 */
jpayne@69 280 class ValueNode : public Node {
jpayne@69 281 public:
jpayne@69 282 ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
jpayne@69 283 virtual UBool operator==(const Node &other) const;
jpayne@69 284 void setValue(int32_t v) {
jpayne@69 285 hasValue=TRUE;
jpayne@69 286 value=v;
jpayne@69 287 hash=hash*37u+v;
jpayne@69 288 }
jpayne@69 289 protected:
jpayne@69 290 UBool hasValue;
jpayne@69 291 int32_t value;
jpayne@69 292 };
jpayne@69 293
jpayne@69 294 #ifndef U_HIDE_INTERNAL_API
jpayne@69 295 /**
jpayne@69 296 * @internal
jpayne@69 297 */
jpayne@69 298 class IntermediateValueNode : public ValueNode {
jpayne@69 299 public:
jpayne@69 300 IntermediateValueNode(int32_t v, Node *nextNode)
jpayne@69 301 : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
jpayne@69 302 virtual UBool operator==(const Node &other) const;
jpayne@69 303 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
jpayne@69 304 virtual void write(StringTrieBuilder &builder);
jpayne@69 305 protected:
jpayne@69 306 Node *next;
jpayne@69 307 };
jpayne@69 308 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 309
jpayne@69 310 // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
jpayne@69 311 // it is needed for layout of other objects.
jpayne@69 312 /**
jpayne@69 313 * @internal
jpayne@69 314 */
jpayne@69 315 class LinearMatchNode : public ValueNode {
jpayne@69 316 public:
jpayne@69 317 LinearMatchNode(int32_t len, Node *nextNode)
jpayne@69 318 : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
jpayne@69 319 length(len), next(nextNode) {}
jpayne@69 320 virtual UBool operator==(const Node &other) const;
jpayne@69 321 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
jpayne@69 322 protected:
jpayne@69 323 int32_t length;
jpayne@69 324 Node *next;
jpayne@69 325 };
jpayne@69 326
jpayne@69 327 #ifndef U_HIDE_INTERNAL_API
jpayne@69 328 /**
jpayne@69 329 * @internal
jpayne@69 330 */
jpayne@69 331 class BranchNode : public Node {
jpayne@69 332 public:
jpayne@69 333 BranchNode(int32_t initialHash) : Node(initialHash) {}
jpayne@69 334 protected:
jpayne@69 335 int32_t firstEdgeNumber;
jpayne@69 336 };
jpayne@69 337
jpayne@69 338 /**
jpayne@69 339 * @internal
jpayne@69 340 */
jpayne@69 341 class ListBranchNode : public BranchNode {
jpayne@69 342 public:
jpayne@69 343 ListBranchNode() : BranchNode(0x444444), length(0) {}
jpayne@69 344 virtual UBool operator==(const Node &other) const;
jpayne@69 345 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
jpayne@69 346 virtual void write(StringTrieBuilder &builder);
jpayne@69 347 // Adds a unit with a final value.
jpayne@69 348 void add(int32_t c, int32_t value) {
jpayne@69 349 units[length]=(char16_t)c;
jpayne@69 350 equal[length]=NULL;
jpayne@69 351 values[length]=value;
jpayne@69 352 ++length;
jpayne@69 353 hash=(hash*37u+c)*37u+value;
jpayne@69 354 }
jpayne@69 355 // Adds a unit which leads to another match node.
jpayne@69 356 void add(int32_t c, Node *node) {
jpayne@69 357 units[length]=(char16_t)c;
jpayne@69 358 equal[length]=node;
jpayne@69 359 values[length]=0;
jpayne@69 360 ++length;
jpayne@69 361 hash=(hash*37u+c)*37u+hashCode(node);
jpayne@69 362 }
jpayne@69 363 protected:
jpayne@69 364 Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
jpayne@69 365 int32_t length;
jpayne@69 366 int32_t values[kMaxBranchLinearSubNodeLength];
jpayne@69 367 char16_t units[kMaxBranchLinearSubNodeLength];
jpayne@69 368 };
jpayne@69 369
jpayne@69 370 /**
jpayne@69 371 * @internal
jpayne@69 372 */
jpayne@69 373 class SplitBranchNode : public BranchNode {
jpayne@69 374 public:
jpayne@69 375 SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
jpayne@69 376 : BranchNode(((0x555555u*37u+middleUnit)*37u+
jpayne@69 377 hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
jpayne@69 378 unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
jpayne@69 379 virtual UBool operator==(const Node &other) const;
jpayne@69 380 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
jpayne@69 381 virtual void write(StringTrieBuilder &builder);
jpayne@69 382 protected:
jpayne@69 383 char16_t unit;
jpayne@69 384 Node *lessThan;
jpayne@69 385 Node *greaterOrEqual;
jpayne@69 386 };
jpayne@69 387
jpayne@69 388 // Branch head node, for writing the actual node lead unit.
jpayne@69 389 /** @internal */
jpayne@69 390 class BranchHeadNode : public ValueNode {
jpayne@69 391 public:
jpayne@69 392 BranchHeadNode(int32_t len, Node *subNode)
jpayne@69 393 : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
jpayne@69 394 length(len), next(subNode) {}
jpayne@69 395 virtual UBool operator==(const Node &other) const;
jpayne@69 396 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
jpayne@69 397 virtual void write(StringTrieBuilder &builder);
jpayne@69 398 protected:
jpayne@69 399 int32_t length;
jpayne@69 400 Node *next; // A branch sub-node.
jpayne@69 401 };
jpayne@69 402
jpayne@69 403 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 404 /// \endcond
jpayne@69 405
jpayne@69 406 /** @internal */
jpayne@69 407 virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
jpayne@69 408 Node *nextNode) const = 0;
jpayne@69 409
jpayne@69 410 /** @internal */
jpayne@69 411 virtual int32_t write(int32_t unit) = 0;
jpayne@69 412 /** @internal */
jpayne@69 413 virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
jpayne@69 414 /** @internal */
jpayne@69 415 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
jpayne@69 416 /** @internal */
jpayne@69 417 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
jpayne@69 418 /** @internal */
jpayne@69 419 virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
jpayne@69 420 };
jpayne@69 421
jpayne@69 422 U_NAMESPACE_END
jpayne@69 423
jpayne@69 424 #endif /* U_SHOW_CPLUSPLUS_API */
jpayne@69 425
jpayne@69 426 #endif // __STRINGTRIEBUILDER_H__