tclStringRep.h 4.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. /*
  2. * tclStringRep.h --
  3. *
  4. * This file contains the definition of the Unicode string internal
  5. * representation and macros to access it.
  6. *
  7. * A Unicode string is an internationalized string. Conceptually, a
  8. * Unicode string is an array of 16-bit quantities organized as a
  9. * sequence of properly formed UTF-8 characters. There is a one-to-one
  10. * map between Unicode and UTF characters. Because Unicode characters
  11. * have a fixed width, operations such as indexing operate on Unicode
  12. * data. The String object is optimized for the case where each UTF char
  13. * in a string is only one byte. In this case, we store the value of
  14. * numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
  15. * is explicitly called).
  16. *
  17. * The String object type stores one or both formats. The default
  18. * behavior is to store UTF. Once Unicode is calculated by a function, it
  19. * is stored in the internal rep for future access (without an additional
  20. * O(n) cost).
  21. *
  22. * To allow many appends to be done to an object without constantly
  23. * reallocating the space for the string or Unicode representation, we
  24. * allocate double the space for the string or Unicode and use the
  25. * internal representation to keep track of how much space is used vs.
  26. * allocated.
  27. *
  28. * Copyright (c) 1995-1997 Sun Microsystems, Inc.
  29. * Copyright (c) 1999 by Scriptics Corporation.
  30. *
  31. * See the file "license.terms" for information on usage and redistribution of
  32. * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  33. */
  34. /*
  35. * The following structure is the internal rep for a String object. It keeps
  36. * track of how much memory has been used and how much has been allocated for
  37. * the Unicode and UTF string to enable growing and shrinking of the UTF and
  38. * Unicode reps of the String object with fewer mallocs. To optimize string
  39. * length and indexing operations, this structure also stores the number of
  40. * characters (same of UTF and Unicode!) once that value has been computed.
  41. *
  42. * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16
  43. * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This
  44. * can be officially modified by altering the definition of Tcl_UniChar in
  45. * tcl.h, but do not do that unless you are sure what you're doing!
  46. */
  47. typedef struct String {
  48. int numChars; /* The number of chars in the string. -1 means
  49. * this value has not been calculated. >= 0
  50. * means that there is a valid Unicode rep, or
  51. * that the number of UTF bytes == the number
  52. * of chars. */
  53. int allocated; /* The amount of space actually allocated for
  54. * the UTF string (minus 1 byte for the
  55. * termination char). */
  56. int maxChars; /* Max number of chars that can fit in the
  57. * space allocated for the unicode array. */
  58. int hasUnicode; /* Boolean determining whether the string has
  59. * a Unicode representation. */
  60. Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size
  61. * of this field depends on the 'maxChars'
  62. * field above. */
  63. } String;
  64. #define STRING_MAXCHARS \
  65. (int)(((size_t)UINT_MAX - 1 - TclOffset(String, unicode))/sizeof(Tcl_UniChar))
  66. #define STRING_SIZE(numChars) \
  67. (TclOffset(String, unicode) + ((numChars + 1) * sizeof(Tcl_UniChar)))
  68. #define stringCheckLimits(numChars) \
  69. do { \
  70. if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \
  71. Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \
  72. STRING_MAXCHARS); \
  73. } \
  74. } while (0)
  75. #define stringAttemptAlloc(numChars) \
  76. (String *) attemptckalloc((unsigned) STRING_SIZE(numChars))
  77. #define stringAlloc(numChars) \
  78. (String *) ckalloc((unsigned) STRING_SIZE(numChars))
  79. #define stringRealloc(ptr, numChars) \
  80. (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars))
  81. #define stringAttemptRealloc(ptr, numChars) \
  82. (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars))
  83. #define GET_STRING(objPtr) \
  84. ((String *) (objPtr)->internalRep.twoPtrValue.ptr1)
  85. #define SET_STRING(objPtr, stringPtr) \
  86. ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr))
  87. /*
  88. * Local Variables:
  89. * mode: c
  90. * c-basic-offset: 4
  91. * fill-column: 78
  92. * End:
  93. */