ICU 52.1  52.1
unistr.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1998-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File unistr.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 11/11/98 stephen Changed per 11/9 code review.
14 * 04/20/99 stephen Overhauled per 4/16 code review.
15 * 11/18/99 aliu Made to inherit from Replaceable. Added method
16 * handleReplaceBetween(); other methods unchanged.
17 * 06/25/01 grhoten Remove dependency on iostream.
18 ******************************************************************************
19 */
20 
21 #ifndef UNISTR_H
22 #define UNISTR_H
23 
29 #include "unicode/utypes.h"
30 #include "unicode/rep.h"
31 #include "unicode/std_string.h"
32 #include "unicode/stringpiece.h"
33 #include "unicode/bytestream.h"
34 #include "unicode/ucasemap.h"
35 
36 struct UConverter; // unicode/ucnv.h
37 class StringThreadTest;
38 
39 #ifndef U_COMPARE_CODE_POINT_ORDER
40 /* see also ustring.h and unorm.h */
46 #define U_COMPARE_CODE_POINT_ORDER 0x8000
47 #endif
48 
49 #ifndef USTRING_H
50 
53 U_STABLE int32_t U_EXPORT2
54 u_strlen(const UChar *s);
55 #endif
56 
61 #ifndef U_STRING_CASE_MAPPER_DEFINED
62 #define U_STRING_CASE_MAPPER_DEFINED
63 
68 typedef int32_t U_CALLCONV
70  UChar *dest, int32_t destCapacity,
71  const UChar *src, int32_t srcLength,
72  UErrorCode *pErrorCode);
73 
74 #endif
75 
77 
78 class BreakIterator; // unicode/brkiter.h
79 class Locale; // unicode/locid.h
80 class StringCharacterIterator;
81 class UnicodeStringAppendable; // unicode/appendable.h
82 
83 /* The <iostream> include has been moved to unicode/ustream.h */
84 
95 #define US_INV icu::UnicodeString::kInvariant
96 
114 #if defined(U_DECLARE_UTF16)
115 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
116 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
117 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
118 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
119 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
120 #else
121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
122 #endif
123 
137 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
138 
146 #ifndef UNISTR_FROM_CHAR_EXPLICIT
147 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
148  // Auto-"explicit" in ICU library code.
149 # define UNISTR_FROM_CHAR_EXPLICIT explicit
150 # else
151  // Empty by default for source code compatibility.
152 # define UNISTR_FROM_CHAR_EXPLICIT
153 # endif
154 #endif
155 
166 #ifndef UNISTR_FROM_STRING_EXPLICIT
167 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
168  // Auto-"explicit" in ICU library code.
169 # define UNISTR_FROM_STRING_EXPLICIT explicit
170 # else
171  // Empty by default for source code compatibility.
172 # define UNISTR_FROM_STRING_EXPLICIT
173 # endif
174 #endif
175 
246 {
247 public:
248 
257  enum EInvariant {
262  kInvariant
263  };
264 
265  //========================================
266  // Read-only operations
267  //========================================
268 
269  /* Comparison - bitwise only - for international comparison use collation */
270 
278  inline UBool operator== (const UnicodeString& text) const;
279 
287  inline UBool operator!= (const UnicodeString& text) const;
288 
296  inline UBool operator> (const UnicodeString& text) const;
297 
305  inline UBool operator< (const UnicodeString& text) const;
306 
314  inline UBool operator>= (const UnicodeString& text) const;
315 
323  inline UBool operator<= (const UnicodeString& text) const;
324 
336  inline int8_t compare(const UnicodeString& text) const;
337 
353  inline int8_t compare(int32_t start,
354  int32_t length,
355  const UnicodeString& text) const;
356 
374  inline int8_t compare(int32_t start,
375  int32_t length,
376  const UnicodeString& srcText,
377  int32_t srcStart,
378  int32_t srcLength) const;
379 
392  inline int8_t compare(const UChar *srcChars,
393  int32_t srcLength) const;
394 
409  inline int8_t compare(int32_t start,
410  int32_t length,
411  const UChar *srcChars) const;
412 
430  inline int8_t compare(int32_t start,
431  int32_t length,
432  const UChar *srcChars,
433  int32_t srcStart,
434  int32_t srcLength) const;
435 
453  inline int8_t compareBetween(int32_t start,
454  int32_t limit,
455  const UnicodeString& srcText,
456  int32_t srcStart,
457  int32_t srcLimit) const;
458 
476  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
477 
497  inline int8_t compareCodePointOrder(int32_t start,
498  int32_t length,
499  const UnicodeString& srcText) const;
500 
522  inline int8_t compareCodePointOrder(int32_t start,
523  int32_t length,
524  const UnicodeString& srcText,
525  int32_t srcStart,
526  int32_t srcLength) const;
527 
546  inline int8_t compareCodePointOrder(const UChar *srcChars,
547  int32_t srcLength) const;
548 
568  inline int8_t compareCodePointOrder(int32_t start,
569  int32_t length,
570  const UChar *srcChars) const;
571 
593  inline int8_t compareCodePointOrder(int32_t start,
594  int32_t length,
595  const UChar *srcChars,
596  int32_t srcStart,
597  int32_t srcLength) const;
598 
620  inline int8_t compareCodePointOrderBetween(int32_t start,
621  int32_t limit,
622  const UnicodeString& srcText,
623  int32_t srcStart,
624  int32_t srcLimit) const;
625 
644  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
645 
666  inline int8_t caseCompare(int32_t start,
667  int32_t length,
668  const UnicodeString& srcText,
669  uint32_t options) const;
670 
693  inline int8_t caseCompare(int32_t start,
694  int32_t length,
695  const UnicodeString& srcText,
696  int32_t srcStart,
697  int32_t srcLength,
698  uint32_t options) const;
699 
719  inline int8_t caseCompare(const UChar *srcChars,
720  int32_t srcLength,
721  uint32_t options) const;
722 
743  inline int8_t caseCompare(int32_t start,
744  int32_t length,
745  const UChar *srcChars,
746  uint32_t options) const;
747 
770  inline int8_t caseCompare(int32_t start,
771  int32_t length,
772  const UChar *srcChars,
773  int32_t srcStart,
774  int32_t srcLength,
775  uint32_t options) const;
776 
799  inline int8_t caseCompareBetween(int32_t start,
800  int32_t limit,
801  const UnicodeString& srcText,
802  int32_t srcStart,
803  int32_t srcLimit,
804  uint32_t options) const;
805 
813  inline UBool startsWith(const UnicodeString& text) const;
814 
825  inline UBool startsWith(const UnicodeString& srcText,
826  int32_t srcStart,
827  int32_t srcLength) const;
828 
837  inline UBool startsWith(const UChar *srcChars,
838  int32_t srcLength) const;
839 
849  inline UBool startsWith(const UChar *srcChars,
850  int32_t srcStart,
851  int32_t srcLength) const;
852 
860  inline UBool endsWith(const UnicodeString& text) const;
861 
872  inline UBool endsWith(const UnicodeString& srcText,
873  int32_t srcStart,
874  int32_t srcLength) const;
875 
884  inline UBool endsWith(const UChar *srcChars,
885  int32_t srcLength) const;
886 
897  inline UBool endsWith(const UChar *srcChars,
898  int32_t srcStart,
899  int32_t srcLength) const;
900 
901 
902  /* Searching - bitwise only */
903 
912  inline int32_t indexOf(const UnicodeString& text) const;
913 
923  inline int32_t indexOf(const UnicodeString& text,
924  int32_t start) const;
925 
937  inline int32_t indexOf(const UnicodeString& text,
938  int32_t start,
939  int32_t length) const;
940 
957  inline int32_t indexOf(const UnicodeString& srcText,
958  int32_t srcStart,
959  int32_t srcLength,
960  int32_t start,
961  int32_t length) const;
962 
974  inline int32_t indexOf(const UChar *srcChars,
975  int32_t srcLength,
976  int32_t start) const;
977 
990  inline int32_t indexOf(const UChar *srcChars,
991  int32_t srcLength,
992  int32_t start,
993  int32_t length) const;
994 
1011  int32_t indexOf(const UChar *srcChars,
1012  int32_t srcStart,
1013  int32_t srcLength,
1014  int32_t start,
1015  int32_t length) const;
1016 
1024  inline int32_t indexOf(UChar c) const;
1025 
1034  inline int32_t indexOf(UChar32 c) const;
1035 
1044  inline int32_t indexOf(UChar c,
1045  int32_t start) const;
1046 
1056  inline int32_t indexOf(UChar32 c,
1057  int32_t start) const;
1058 
1069  inline int32_t indexOf(UChar c,
1070  int32_t start,
1071  int32_t length) const;
1072 
1084  inline int32_t indexOf(UChar32 c,
1085  int32_t start,
1086  int32_t length) const;
1087 
1096  inline int32_t lastIndexOf(const UnicodeString& text) const;
1097 
1107  inline int32_t lastIndexOf(const UnicodeString& text,
1108  int32_t start) const;
1109 
1121  inline int32_t lastIndexOf(const UnicodeString& text,
1122  int32_t start,
1123  int32_t length) const;
1124 
1141  inline int32_t lastIndexOf(const UnicodeString& srcText,
1142  int32_t srcStart,
1143  int32_t srcLength,
1144  int32_t start,
1145  int32_t length) const;
1146 
1157  inline int32_t lastIndexOf(const UChar *srcChars,
1158  int32_t srcLength,
1159  int32_t start) const;
1160 
1173  inline int32_t lastIndexOf(const UChar *srcChars,
1174  int32_t srcLength,
1175  int32_t start,
1176  int32_t length) const;
1177 
1194  int32_t lastIndexOf(const UChar *srcChars,
1195  int32_t srcStart,
1196  int32_t srcLength,
1197  int32_t start,
1198  int32_t length) const;
1199 
1207  inline int32_t lastIndexOf(UChar c) const;
1208 
1217  inline int32_t lastIndexOf(UChar32 c) const;
1218 
1227  inline int32_t lastIndexOf(UChar c,
1228  int32_t start) const;
1229 
1239  inline int32_t lastIndexOf(UChar32 c,
1240  int32_t start) const;
1241 
1252  inline int32_t lastIndexOf(UChar c,
1253  int32_t start,
1254  int32_t length) const;
1255 
1267  inline int32_t lastIndexOf(UChar32 c,
1268  int32_t start,
1269  int32_t length) const;
1270 
1271 
1272  /* Character access */
1273 
1282  inline UChar charAt(int32_t offset) const;
1283 
1291  inline UChar operator[] (int32_t offset) const;
1292 
1304  UChar32 char32At(int32_t offset) const;
1305 
1321  int32_t getChar32Start(int32_t offset) const;
1322 
1339  int32_t getChar32Limit(int32_t offset) const;
1340 
1391  int32_t moveIndex32(int32_t index, int32_t delta) const;
1392 
1393  /* Substring extraction */
1394 
1410  inline void extract(int32_t start,
1411  int32_t length,
1412  UChar *dst,
1413  int32_t dstStart = 0) const;
1414 
1436  int32_t
1437  extract(UChar *dest, int32_t destCapacity,
1438  UErrorCode &errorCode) const;
1439 
1450  inline void extract(int32_t start,
1451  int32_t length,
1452  UnicodeString& target) const;
1453 
1465  inline void extractBetween(int32_t start,
1466  int32_t limit,
1467  UChar *dst,
1468  int32_t dstStart = 0) const;
1469 
1479  virtual void extractBetween(int32_t start,
1480  int32_t limit,
1481  UnicodeString& target) const;
1482 
1504  int32_t extract(int32_t start,
1505  int32_t startLength,
1506  char *target,
1507  int32_t targetCapacity,
1508  enum EInvariant inv) const;
1509 
1510 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1511 
1531  int32_t extract(int32_t start,
1532  int32_t startLength,
1533  char *target,
1534  uint32_t targetLength) const;
1535 
1536 #endif
1537 
1538 #if !UCONFIG_NO_CONVERSION
1539 
1565  inline int32_t extract(int32_t start,
1566  int32_t startLength,
1567  char *target,
1568  const char *codepage = 0) const;
1569 
1599  int32_t extract(int32_t start,
1600  int32_t startLength,
1601  char *target,
1602  uint32_t targetLength,
1603  const char *codepage) const;
1604 
1622  int32_t extract(char *dest, int32_t destCapacity,
1623  UConverter *cnv,
1624  UErrorCode &errorCode) const;
1625 
1626 #endif
1627 
1641  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1642 
1653  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1654 
1666  void toUTF8(ByteSink &sink) const;
1667 
1668 #if U_HAVE_STD_STRING
1669 
1682  template<typename StringClass>
1683  StringClass &toUTF8String(StringClass &result) const {
1684  StringByteSink<StringClass> sbs(&result);
1685  toUTF8(sbs);
1686  return result;
1687  }
1688 
1689 #endif
1690 
1706  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1707 
1708  /* Length operations */
1709 
1718  inline int32_t length(void) const;
1719 
1733  int32_t
1734  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1735 
1759  UBool
1760  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1761 
1767  inline UBool isEmpty(void) const;
1768 
1778  inline int32_t getCapacity(void) const;
1779 
1780  /* Other operations */
1781 
1787  inline int32_t hashCode(void) const;
1788 
1801  inline UBool isBogus(void) const;
1802 
1803 
1804  //========================================
1805  // Write operations
1806  //========================================
1807 
1808  /* Assignment operations */
1809 
1817  UnicodeString &operator=(const UnicodeString &srcText);
1818 
1841  UnicodeString &fastCopyFrom(const UnicodeString &src);
1842 
1850  inline UnicodeString& operator= (UChar ch);
1851 
1859  inline UnicodeString& operator= (UChar32 ch);
1860 
1872  inline UnicodeString& setTo(const UnicodeString& srcText,
1873  int32_t srcStart);
1874 
1888  inline UnicodeString& setTo(const UnicodeString& srcText,
1889  int32_t srcStart,
1890  int32_t srcLength);
1891 
1900  inline UnicodeString& setTo(const UnicodeString& srcText);
1901 
1910  inline UnicodeString& setTo(const UChar *srcChars,
1911  int32_t srcLength);
1912 
1921  UnicodeString& setTo(UChar srcChar);
1922 
1931  UnicodeString& setTo(UChar32 srcChar);
1932 
1956  UnicodeString &setTo(UBool isTerminated,
1957  const UChar *text,
1958  int32_t textLength);
1959 
1979  UnicodeString &setTo(UChar *buffer,
1980  int32_t buffLength,
1981  int32_t buffCapacity);
1982 
2023  void setToBogus();
2024 
2032  UnicodeString& setCharAt(int32_t offset,
2033  UChar ch);
2034 
2035 
2036  /* Append operations */
2037 
2045  inline UnicodeString& operator+= (UChar ch);
2046 
2054  inline UnicodeString& operator+= (UChar32 ch);
2055 
2063  inline UnicodeString& operator+= (const UnicodeString& srcText);
2064 
2079  inline UnicodeString& append(const UnicodeString& srcText,
2080  int32_t srcStart,
2081  int32_t srcLength);
2082 
2090  inline UnicodeString& append(const UnicodeString& srcText);
2091 
2105  inline UnicodeString& append(const UChar *srcChars,
2106  int32_t srcStart,
2107  int32_t srcLength);
2108 
2118  inline UnicodeString& append(const UChar *srcChars,
2119  int32_t srcLength);
2120 
2127  inline UnicodeString& append(UChar srcChar);
2128 
2135  UnicodeString& append(UChar32 srcChar);
2136 
2137 
2138  /* Insert operations */
2139 
2153  inline UnicodeString& insert(int32_t start,
2154  const UnicodeString& srcText,
2155  int32_t srcStart,
2156  int32_t srcLength);
2157 
2166  inline UnicodeString& insert(int32_t start,
2167  const UnicodeString& srcText);
2168 
2182  inline UnicodeString& insert(int32_t start,
2183  const UChar *srcChars,
2184  int32_t srcStart,
2185  int32_t srcLength);
2186 
2196  inline UnicodeString& insert(int32_t start,
2197  const UChar *srcChars,
2198  int32_t srcLength);
2199 
2208  inline UnicodeString& insert(int32_t start,
2209  UChar srcChar);
2210 
2219  inline UnicodeString& insert(int32_t start,
2220  UChar32 srcChar);
2221 
2222 
2223  /* Replace operations */
2224 
2242  UnicodeString& replace(int32_t start,
2243  int32_t length,
2244  const UnicodeString& srcText,
2245  int32_t srcStart,
2246  int32_t srcLength);
2247 
2260  UnicodeString& replace(int32_t start,
2261  int32_t length,
2262  const UnicodeString& srcText);
2263 
2281  UnicodeString& replace(int32_t start,
2282  int32_t length,
2283  const UChar *srcChars,
2284  int32_t srcStart,
2285  int32_t srcLength);
2286 
2299  inline UnicodeString& replace(int32_t start,
2300  int32_t length,
2301  const UChar *srcChars,
2302  int32_t srcLength);
2303 
2315  inline UnicodeString& replace(int32_t start,
2316  int32_t length,
2317  UChar srcChar);
2318 
2330  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2331 
2341  inline UnicodeString& replaceBetween(int32_t start,
2342  int32_t limit,
2343  const UnicodeString& srcText);
2344 
2359  inline UnicodeString& replaceBetween(int32_t start,
2360  int32_t limit,
2361  const UnicodeString& srcText,
2362  int32_t srcStart,
2363  int32_t srcLimit);
2364 
2375  virtual void handleReplaceBetween(int32_t start,
2376  int32_t limit,
2377  const UnicodeString& text);
2378 
2384  virtual UBool hasMetaData() const;
2385 
2401  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2402 
2403  /* Search and replace operations */
2404 
2413  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2414  const UnicodeString& newText);
2415 
2427  inline UnicodeString& findAndReplace(int32_t start,
2428  int32_t length,
2429  const UnicodeString& oldText,
2430  const UnicodeString& newText);
2431 
2449  UnicodeString& findAndReplace(int32_t start,
2450  int32_t length,
2451  const UnicodeString& oldText,
2452  int32_t oldStart,
2453  int32_t oldLength,
2454  const UnicodeString& newText,
2455  int32_t newStart,
2456  int32_t newLength);
2457 
2458 
2459  /* Remove operations */
2460 
2466  inline UnicodeString& remove(void);
2467 
2476  inline UnicodeString& remove(int32_t start,
2477  int32_t length = (int32_t)INT32_MAX);
2478 
2487  inline UnicodeString& removeBetween(int32_t start,
2488  int32_t limit = (int32_t)INT32_MAX);
2489 
2499  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2500 
2501  /* Length operations */
2502 
2514  UBool padLeading(int32_t targetLength,
2515  UChar padChar = 0x0020);
2516 
2528  UBool padTrailing(int32_t targetLength,
2529  UChar padChar = 0x0020);
2530 
2537  inline UBool truncate(int32_t targetLength);
2538 
2544  UnicodeString& trim(void);
2545 
2546 
2547  /* Miscellaneous operations */
2548 
2554  inline UnicodeString& reverse(void);
2555 
2564  inline UnicodeString& reverse(int32_t start,
2565  int32_t length);
2566 
2573  UnicodeString& toUpper(void);
2574 
2582  UnicodeString& toUpper(const Locale& locale);
2583 
2590  UnicodeString& toLower(void);
2591 
2599  UnicodeString& toLower(const Locale& locale);
2600 
2601 #if !UCONFIG_NO_BREAK_ITERATION
2602 
2629  UnicodeString &toTitle(BreakIterator *titleIter);
2630 
2658  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2659 
2691  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2692 
2693 #endif
2694 
2708  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2709 
2710  //========================================
2711  // Access to the internal buffer
2712  //========================================
2713 
2757  UChar *getBuffer(int32_t minCapacity);
2758 
2779  void releaseBuffer(int32_t newLength=-1);
2780 
2811  inline const UChar *getBuffer() const;
2812 
2846  const UChar *getTerminatedBuffer();
2847 
2848  //========================================
2849  // Constructors
2850  //========================================
2851 
2855  inline UnicodeString();
2856 
2868  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2869 
2880 
2891 
2903 
2911  UnicodeString(const UChar *text,
2912  int32_t textLength);
2913 
2936  UnicodeString(UBool isTerminated,
2937  const UChar *text,
2938  int32_t textLength);
2939 
2958  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2959 
2960 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2961 
2981  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
2982 
2991  UnicodeString(const char *codepageData, int32_t dataLength);
2992 
2993 #endif
2994 
2995 #if !UCONFIG_NO_CONVERSION
2996 
3014  UnicodeString(const char *codepageData, const char *codepage);
3015 
3033  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3034 
3056  UnicodeString(
3057  const char *src, int32_t srcLength,
3058  UConverter *cnv,
3059  UErrorCode &errorCode);
3060 
3061 #endif
3062 
3087  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3088 
3089 
3095  UnicodeString(const UnicodeString& that);
3096 
3103  UnicodeString(const UnicodeString& src, int32_t srcStart);
3104 
3112  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3113 
3130  virtual Replaceable *clone() const;
3131 
3135  virtual ~UnicodeString();
3136 
3150  static UnicodeString fromUTF8(const StringPiece &utf8);
3151 
3163  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3164 
3165  /* Miscellaneous operations */
3166 
3201  UnicodeString unescape() const;
3202 
3222  UChar32 unescapeAt(int32_t &offset) const;
3223 
3229  static UClassID U_EXPORT2 getStaticClassID();
3230 
3236  virtual UClassID getDynamicClassID() const;
3237 
3238  //========================================
3239  // Implementation methods
3240  //========================================
3241 
3242 protected:
3247  virtual int32_t getLength() const;
3248 
3254  virtual UChar getCharAt(int32_t offset) const;
3255 
3261  virtual UChar32 getChar32At(int32_t offset) const;
3262 
3263 private:
3264  // For char* constructors. Could be made public.
3265  UnicodeString &setToUTF8(const StringPiece &utf8);
3266  // For extract(char*).
3267  // We could make a toUTF8(target, capacity, errorCode) public but not
3268  // this version: New API will be cleaner if we make callers create substrings
3269  // rather than having start+length on every method,
3270  // and it should take a UErrorCode&.
3271  int32_t
3272  toUTF8(int32_t start, int32_t len,
3273  char *target, int32_t capacity) const;
3274 
3279  UBool doEquals(const UnicodeString &text, int32_t len) const;
3280 
3281  inline int8_t
3282  doCompare(int32_t start,
3283  int32_t length,
3284  const UnicodeString& srcText,
3285  int32_t srcStart,
3286  int32_t srcLength) const;
3287 
3288  int8_t doCompare(int32_t start,
3289  int32_t length,
3290  const UChar *srcChars,
3291  int32_t srcStart,
3292  int32_t srcLength) const;
3293 
3294  inline int8_t
3295  doCompareCodePointOrder(int32_t start,
3296  int32_t length,
3297  const UnicodeString& srcText,
3298  int32_t srcStart,
3299  int32_t srcLength) const;
3300 
3301  int8_t doCompareCodePointOrder(int32_t start,
3302  int32_t length,
3303  const UChar *srcChars,
3304  int32_t srcStart,
3305  int32_t srcLength) const;
3306 
3307  inline int8_t
3308  doCaseCompare(int32_t start,
3309  int32_t length,
3310  const UnicodeString &srcText,
3311  int32_t srcStart,
3312  int32_t srcLength,
3313  uint32_t options) const;
3314 
3315  int8_t
3316  doCaseCompare(int32_t start,
3317  int32_t length,
3318  const UChar *srcChars,
3319  int32_t srcStart,
3320  int32_t srcLength,
3321  uint32_t options) const;
3322 
3323  int32_t doIndexOf(UChar c,
3324  int32_t start,
3325  int32_t length) const;
3326 
3327  int32_t doIndexOf(UChar32 c,
3328  int32_t start,
3329  int32_t length) const;
3330 
3331  int32_t doLastIndexOf(UChar c,
3332  int32_t start,
3333  int32_t length) const;
3334 
3335  int32_t doLastIndexOf(UChar32 c,
3336  int32_t start,
3337  int32_t length) const;
3338 
3339  void doExtract(int32_t start,
3340  int32_t length,
3341  UChar *dst,
3342  int32_t dstStart) const;
3343 
3344  inline void doExtract(int32_t start,
3345  int32_t length,
3346  UnicodeString& target) const;
3347 
3348  inline UChar doCharAt(int32_t offset) const;
3349 
3350  UnicodeString& doReplace(int32_t start,
3351  int32_t length,
3352  const UnicodeString& srcText,
3353  int32_t srcStart,
3354  int32_t srcLength);
3355 
3356  UnicodeString& doReplace(int32_t start,
3357  int32_t length,
3358  const UChar *srcChars,
3359  int32_t srcStart,
3360  int32_t srcLength);
3361 
3362  UnicodeString& doReverse(int32_t start,
3363  int32_t length);
3364 
3365  // calculate hash code
3366  int32_t doHashCode(void) const;
3367 
3368  // get pointer to start of array
3369  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3370  inline UChar* getArrayStart(void);
3371  inline const UChar* getArrayStart(void) const;
3372 
3373  // A UnicodeString object (not necessarily its current buffer)
3374  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3375  inline UBool isWritable() const;
3376 
3377  // Is the current buffer writable?
3378  inline UBool isBufferWritable() const;
3379 
3380  // None of the following does releaseArray().
3381  inline void setLength(int32_t len); // sets only fShortLength and fLength
3382  inline void setToEmpty(); // sets fFlags=kShortString
3383  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3384 
3385  // allocate the array; result may be fStackBuffer
3386  // sets refCount to 1 if appropriate
3387  // sets fArray, fCapacity, and fFlags
3388  // returns boolean for success or failure
3389  UBool allocate(int32_t capacity);
3390 
3391  // release the array if owned
3392  void releaseArray(void);
3393 
3394  // turn a bogus string into an empty one
3395  void unBogus();
3396 
3397  // implements assigment operator, copy constructor, and fastCopyFrom()
3398  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3399 
3400  // Pin start and limit to acceptable values.
3401  inline void pinIndex(int32_t& start) const;
3402  inline void pinIndices(int32_t& start,
3403  int32_t& length) const;
3404 
3405 #if !UCONFIG_NO_CONVERSION
3406 
3407  /* Internal extract() using UConverter. */
3408  int32_t doExtract(int32_t start, int32_t length,
3409  char *dest, int32_t destCapacity,
3410  UConverter *cnv,
3411  UErrorCode &errorCode) const;
3412 
3413  /*
3414  * Real constructor for converting from codepage data.
3415  * It assumes that it is called with !fRefCounted.
3416  *
3417  * If <code>codepage==0</code>, then the default converter
3418  * is used for the platform encoding.
3419  * If <code>codepage</code> is an empty string (<code>""</code>),
3420  * then a simple conversion is performed on the codepage-invariant
3421  * subset ("invariant characters") of the platform encoding. See utypes.h.
3422  */
3423  void doCodepageCreate(const char *codepageData,
3424  int32_t dataLength,
3425  const char *codepage);
3426 
3427  /*
3428  * Worker function for creating a UnicodeString from
3429  * a codepage string using a UConverter.
3430  */
3431  void
3432  doCodepageCreate(const char *codepageData,
3433  int32_t dataLength,
3434  UConverter *converter,
3435  UErrorCode &status);
3436 
3437 #endif
3438 
3439  /*
3440  * This function is called when write access to the array
3441  * is necessary.
3442  *
3443  * We need to make a copy of the array if
3444  * the buffer is read-only, or
3445  * the buffer is refCounted (shared), and refCount>1, or
3446  * the buffer is too small.
3447  *
3448  * Return FALSE if memory could not be allocated.
3449  */
3450  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3451  int32_t growCapacity = -1,
3452  UBool doCopyArray = TRUE,
3453  int32_t **pBufferToDelete = 0,
3454  UBool forceClone = FALSE);
3455 
3461  UnicodeString &
3462  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
3463 
3464  // ref counting
3465  void addRef(void);
3466  int32_t removeRef(void);
3467  int32_t refCount(void) const;
3468 
3469  // constants
3470  enum {
3471  // Set the stack buffer size so that sizeof(UnicodeString) is,
3472  // naturally (without padding), a multiple of sizeof(pointer).
3473  US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
3474  kInvalidUChar=0xffff, // invalid UChar index
3475  kGrowSize=128, // grow size for this buffer
3476  kInvalidHashCode=0, // invalid hash code
3477  kEmptyHashCode=1, // hash code for empty string
3478 
3479  // bit flag values for fFlags
3480  kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3481  kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
3482  kRefCounted=4, // there is a refCount field before the characters in fArray
3483  kBufferIsReadonly=8,// do not write to this buffer
3484  kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3485  // and releaseBuffer(newLength) must be called
3486 
3487  // combined values for convenience
3488  kShortString=kUsingStackBuffer,
3489  kLongString=kRefCounted,
3490  kReadonlyAlias=kBufferIsReadonly,
3491  kWritableAlias=0
3492  };
3493 
3494  friend class StringThreadTest;
3495  friend class UnicodeStringAppendable;
3496 
3497  union StackBufferOrFields; // forward declaration necessary before friend declaration
3498  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3499 
3500  /*
3501  * The following are all the class fields that are stored
3502  * in each UnicodeString object.
3503  * Note that UnicodeString has virtual functions,
3504  * therefore there is an implicit vtable pointer
3505  * as the first real field.
3506  * The fields should be aligned such that no padding is necessary.
3507  * On 32-bit machines, the size should be 32 bytes,
3508  * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3509  *
3510  * We use a hack to achieve this.
3511  *
3512  * With at least some compilers, each of the following is forced to
3513  * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3514  * rounded up with additional padding if the fields do not already fit that requirement:
3515  * - sizeof(class UnicodeString)
3516  * - offsetof(UnicodeString, fUnion)
3517  * - sizeof(fUnion)
3518  * - sizeof(fFields)
3519  *
3520  * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
3521  * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
3522  * (Padding at the end of fFields is ok:
3523  * As long as there is no padding after fStackBuffer, it is not wasted space.)
3524  *
3525  * We further assume that the compiler does not reorder the fields,
3526  * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
3527  * with at most some padding (but no other field) in between.
3528  * (Padding there would be wasted space, but functionally harmless.)
3529  *
3530  * We use a few more sizeof(pointer)'s chunks of space with
3531  * fRestOfStackBuffer, fShortLength and fFlags,
3532  * to get up exactly to the intended sizeof(UnicodeString).
3533  */
3534  // (implicit) *vtable;
3535  union StackBufferOrFields {
3536  // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3537  // else fFields is used
3538  UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
3539  struct {
3540  UChar *fArray; // the Unicode data
3541  int32_t fCapacity; // capacity of fArray (in UChars)
3542  int32_t fLength; // number of characters in fArray if >127; else undefined
3543  } fFields;
3544  } fUnion;
3545  UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
3546  int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
3547  uint8_t fFlags; // bit flags: see constants above
3548 };
3549 
3558 U_COMMON_API UnicodeString U_EXPORT2
3559 operator+ (const UnicodeString &s1, const UnicodeString &s2);
3560 
3561 //========================================
3562 // Inline members
3563 //========================================
3564 
3565 //========================================
3566 // Privates
3567 //========================================
3568 
3569 inline void
3570 UnicodeString::pinIndex(int32_t& start) const
3571 {
3572  // pin index
3573  if(start < 0) {
3574  start = 0;
3575  } else if(start > length()) {
3576  start = length();
3577  }
3578 }
3579 
3580 inline void
3581 UnicodeString::pinIndices(int32_t& start,
3582  int32_t& _length) const
3583 {
3584  // pin indices
3585  int32_t len = length();
3586  if(start < 0) {
3587  start = 0;
3588  } else if(start > len) {
3589  start = len;
3590  }
3591  if(_length < 0) {
3592  _length = 0;
3593  } else if(_length > (len - start)) {
3594  _length = (len - start);
3595  }
3596 }
3597 
3598 inline UChar*
3599 UnicodeString::getArrayStart()
3600 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3601 
3602 inline const UChar*
3603 UnicodeString::getArrayStart() const
3604 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3605 
3606 //========================================
3607 // Default constructor
3608 //========================================
3609 
3610 inline
3612  : fShortLength(0),
3613  fFlags(kShortString)
3614 {}
3615 
3616 //========================================
3617 // Read-only implementation methods
3618 //========================================
3619 inline int32_t
3621 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3622 
3623 inline int32_t
3625 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3626 
3627 inline int32_t
3629 { return doHashCode(); }
3630 
3631 inline UBool
3633 { return (UBool)(fFlags & kIsBogus); }
3634 
3635 inline UBool
3636 UnicodeString::isWritable() const
3637 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3638 
3639 inline UBool
3640 UnicodeString::isBufferWritable() const
3641 {
3642  return (UBool)(
3643  !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3644  (!(fFlags&kRefCounted) || refCount()==1));
3645 }
3646 
3647 inline const UChar *
3649  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3650  return 0;
3651  } else if(fFlags&kUsingStackBuffer) {
3652  return fUnion.fStackBuffer;
3653  } else {
3654  return fUnion.fFields.fArray;
3655  }
3656 }
3657 
3658 //========================================
3659 // Read-only alias methods
3660 //========================================
3661 inline int8_t
3662 UnicodeString::doCompare(int32_t start,
3663  int32_t thisLength,
3664  const UnicodeString& srcText,
3665  int32_t srcStart,
3666  int32_t srcLength) const
3667 {
3668  if(srcText.isBogus()) {
3669  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3670  } else {
3671  srcText.pinIndices(srcStart, srcLength);
3672  return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3673  }
3674 }
3675 
3676 inline UBool
3678 {
3679  if(isBogus()) {
3680  return text.isBogus();
3681  } else {
3682  int32_t len = length(), textLength = text.length();
3683  return !text.isBogus() && len == textLength && doEquals(text, len);
3684  }
3685 }
3686 
3687 inline UBool
3689 { return (! operator==(text)); }
3690 
3691 inline UBool
3693 { return doCompare(0, length(), text, 0, text.length()) == 1; }
3694 
3695 inline UBool
3697 { return doCompare(0, length(), text, 0, text.length()) == -1; }
3698 
3699 inline UBool
3701 { return doCompare(0, length(), text, 0, text.length()) != -1; }
3702 
3703 inline UBool
3705 { return doCompare(0, length(), text, 0, text.length()) != 1; }
3706 
3707 inline int8_t
3709 { return doCompare(0, length(), text, 0, text.length()); }
3710 
3711 inline int8_t
3713  int32_t _length,
3714  const UnicodeString& srcText) const
3715 { return doCompare(start, _length, srcText, 0, srcText.length()); }
3716 
3717 inline int8_t
3719  int32_t srcLength) const
3720 { return doCompare(0, length(), srcChars, 0, srcLength); }
3721 
3722 inline int8_t
3724  int32_t _length,
3725  const UnicodeString& srcText,
3726  int32_t srcStart,
3727  int32_t srcLength) const
3728 { return doCompare(start, _length, srcText, srcStart, srcLength); }
3729 
3730 inline int8_t
3732  int32_t _length,
3733  const UChar *srcChars) const
3734 { return doCompare(start, _length, srcChars, 0, _length); }
3735 
3736 inline int8_t
3738  int32_t _length,
3739  const UChar *srcChars,
3740  int32_t srcStart,
3741  int32_t srcLength) const
3742 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
3743 
3744 inline int8_t
3746  int32_t limit,
3747  const UnicodeString& srcText,
3748  int32_t srcStart,
3749  int32_t srcLimit) const
3750 { return doCompare(start, limit - start,
3751  srcText, srcStart, srcLimit - srcStart); }
3752 
3753 inline int8_t
3754 UnicodeString::doCompareCodePointOrder(int32_t start,
3755  int32_t thisLength,
3756  const UnicodeString& srcText,
3757  int32_t srcStart,
3758  int32_t srcLength) const
3759 {
3760  if(srcText.isBogus()) {
3761  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3762  } else {
3763  srcText.pinIndices(srcStart, srcLength);
3764  return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3765  }
3766 }
3767 
3768 inline int8_t
3770 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3771 
3772 inline int8_t
3774  int32_t _length,
3775  const UnicodeString& srcText) const
3776 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3777 
3778 inline int8_t
3780  int32_t srcLength) const
3781 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3782 
3783 inline int8_t
3785  int32_t _length,
3786  const UnicodeString& srcText,
3787  int32_t srcStart,
3788  int32_t srcLength) const
3789 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3790 
3791 inline int8_t
3793  int32_t _length,
3794  const UChar *srcChars) const
3795 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3796 
3797 inline int8_t
3799  int32_t _length,
3800  const UChar *srcChars,
3801  int32_t srcStart,
3802  int32_t srcLength) const
3803 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3804 
3805 inline int8_t
3807  int32_t limit,
3808  const UnicodeString& srcText,
3809  int32_t srcStart,
3810  int32_t srcLimit) const
3811 { return doCompareCodePointOrder(start, limit - start,
3812  srcText, srcStart, srcLimit - srcStart); }
3813 
3814 inline int8_t
3815 UnicodeString::doCaseCompare(int32_t start,
3816  int32_t thisLength,
3817  const UnicodeString &srcText,
3818  int32_t srcStart,
3819  int32_t srcLength,
3820  uint32_t options) const
3821 {
3822  if(srcText.isBogus()) {
3823  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3824  } else {
3825  srcText.pinIndices(srcStart, srcLength);
3826  return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3827  }
3828 }
3829 
3830 inline int8_t
3831 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3832  return doCaseCompare(0, length(), text, 0, text.length(), options);
3833 }
3834 
3835 inline int8_t
3837  int32_t _length,
3838  const UnicodeString &srcText,
3839  uint32_t options) const {
3840  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3841 }
3842 
3843 inline int8_t
3845  int32_t srcLength,
3846  uint32_t options) const {
3847  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3848 }
3849 
3850 inline int8_t
3852  int32_t _length,
3853  const UnicodeString &srcText,
3854  int32_t srcStart,
3855  int32_t srcLength,
3856  uint32_t options) const {
3857  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3858 }
3859 
3860 inline int8_t
3862  int32_t _length,
3863  const UChar *srcChars,
3864  uint32_t options) const {
3865  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3866 }
3867 
3868 inline int8_t
3870  int32_t _length,
3871  const UChar *srcChars,
3872  int32_t srcStart,
3873  int32_t srcLength,
3874  uint32_t options) const {
3875  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3876 }
3877 
3878 inline int8_t
3880  int32_t limit,
3881  const UnicodeString &srcText,
3882  int32_t srcStart,
3883  int32_t srcLimit,
3884  uint32_t options) const {
3885  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3886 }
3887 
3888 inline int32_t
3890  int32_t srcStart,
3891  int32_t srcLength,
3892  int32_t start,
3893  int32_t _length) const
3894 {
3895  if(!srcText.isBogus()) {
3896  srcText.pinIndices(srcStart, srcLength);
3897  if(srcLength > 0) {
3898  return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3899  }
3900  }
3901  return -1;
3902 }
3903 
3904 inline int32_t
3906 { return indexOf(text, 0, text.length(), 0, length()); }
3907 
3908 inline int32_t
3910  int32_t start) const {
3911  pinIndex(start);
3912  return indexOf(text, 0, text.length(), start, length() - start);
3913 }
3914 
3915 inline int32_t
3917  int32_t start,
3918  int32_t _length) const
3919 { return indexOf(text, 0, text.length(), start, _length); }
3920 
3921 inline int32_t
3923  int32_t srcLength,
3924  int32_t start) const {
3925  pinIndex(start);
3926  return indexOf(srcChars, 0, srcLength, start, length() - start);
3927 }
3928 
3929 inline int32_t
3931  int32_t srcLength,
3932  int32_t start,
3933  int32_t _length) const
3934 { return indexOf(srcChars, 0, srcLength, start, _length); }
3935 
3936 inline int32_t
3938  int32_t start,
3939  int32_t _length) const
3940 { return doIndexOf(c, start, _length); }
3941 
3942 inline int32_t
3944  int32_t start,
3945  int32_t _length) const
3946 { return doIndexOf(c, start, _length); }
3947 
3948 inline int32_t
3950 { return doIndexOf(c, 0, length()); }
3951 
3952 inline int32_t
3954 { return indexOf(c, 0, length()); }
3955 
3956 inline int32_t
3958  int32_t start) const {
3959  pinIndex(start);
3960  return doIndexOf(c, start, length() - start);
3961 }
3962 
3963 inline int32_t
3965  int32_t start) const {
3966  pinIndex(start);
3967  return indexOf(c, start, length() - start);
3968 }
3969 
3970 inline int32_t
3972  int32_t srcLength,
3973  int32_t start,
3974  int32_t _length) const
3975 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3976 
3977 inline int32_t
3979  int32_t srcLength,
3980  int32_t start) const {
3981  pinIndex(start);
3982  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3983 }
3984 
3985 inline int32_t
3987  int32_t srcStart,
3988  int32_t srcLength,
3989  int32_t start,
3990  int32_t _length) const
3991 {
3992  if(!srcText.isBogus()) {
3993  srcText.pinIndices(srcStart, srcLength);
3994  if(srcLength > 0) {
3995  return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3996  }
3997  }
3998  return -1;
3999 }
4000 
4001 inline int32_t
4003  int32_t start,
4004  int32_t _length) const
4005 { return lastIndexOf(text, 0, text.length(), start, _length); }
4006 
4007 inline int32_t
4009  int32_t start) const {
4010  pinIndex(start);
4011  return lastIndexOf(text, 0, text.length(), start, length() - start);
4012 }
4013 
4014 inline int32_t
4016 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4017 
4018 inline int32_t
4020  int32_t start,
4021  int32_t _length) const
4022 { return doLastIndexOf(c, start, _length); }
4023 
4024 inline int32_t
4026  int32_t start,
4027  int32_t _length) const {
4028  return doLastIndexOf(c, start, _length);
4029 }
4030 
4031 inline int32_t
4033 { return doLastIndexOf(c, 0, length()); }
4034 
4035 inline int32_t
4037  return lastIndexOf(c, 0, length());
4038 }
4039 
4040 inline int32_t
4042  int32_t start) const {
4043  pinIndex(start);
4044  return doLastIndexOf(c, start, length() - start);
4045 }
4046 
4047 inline int32_t
4049  int32_t start) const {
4050  pinIndex(start);
4051  return lastIndexOf(c, start, length() - start);
4052 }
4053 
4054 inline UBool
4056 { return compare(0, text.length(), text, 0, text.length()) == 0; }
4057 
4058 inline UBool
4060  int32_t srcStart,
4061  int32_t srcLength) const
4062 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4063 
4064 inline UBool
4065 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
4066  if(srcLength < 0) {
4067  srcLength = u_strlen(srcChars);
4068  }
4069  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4070 }
4071 
4072 inline UBool
4073 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
4074  if(srcLength < 0) {
4075  srcLength = u_strlen(srcChars);
4076  }
4077  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4078 }
4079 
4080 inline UBool
4082 { return doCompare(length() - text.length(), text.length(),
4083  text, 0, text.length()) == 0; }
4084 
4085 inline UBool
4087  int32_t srcStart,
4088  int32_t srcLength) const {
4089  srcText.pinIndices(srcStart, srcLength);
4090  return doCompare(length() - srcLength, srcLength,
4091  srcText, srcStart, srcLength) == 0;
4092 }
4093 
4094 inline UBool
4096  int32_t srcLength) const {
4097  if(srcLength < 0) {
4098  srcLength = u_strlen(srcChars);
4099  }
4100  return doCompare(length() - srcLength, srcLength,
4101  srcChars, 0, srcLength) == 0;
4102 }
4103 
4104 inline UBool
4106  int32_t srcStart,
4107  int32_t srcLength) const {
4108  if(srcLength < 0) {
4109  srcLength = u_strlen(srcChars + srcStart);
4110  }
4111  return doCompare(length() - srcLength, srcLength,
4112  srcChars, srcStart, srcLength) == 0;
4113 }
4114 
4115 //========================================
4116 // replace
4117 //========================================
4118 inline UnicodeString&
4120  int32_t _length,
4121  const UnicodeString& srcText)
4122 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4123 
4124 inline UnicodeString&
4126  int32_t _length,
4127  const UnicodeString& srcText,
4128  int32_t srcStart,
4129  int32_t srcLength)
4130 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4131 
4132 inline UnicodeString&
4134  int32_t _length,
4135  const UChar *srcChars,
4136  int32_t srcLength)
4137 { return doReplace(start, _length, srcChars, 0, srcLength); }
4138 
4139 inline UnicodeString&
4141  int32_t _length,
4142  const UChar *srcChars,
4143  int32_t srcStart,
4144  int32_t srcLength)
4145 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4146 
4147 inline UnicodeString&
4149  int32_t _length,
4150  UChar srcChar)
4151 { return doReplace(start, _length, &srcChar, 0, 1); }
4152 
4153 inline UnicodeString&
4155  int32_t limit,
4156  const UnicodeString& srcText)
4157 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4158 
4159 inline UnicodeString&
4161  int32_t limit,
4162  const UnicodeString& srcText,
4163  int32_t srcStart,
4164  int32_t srcLimit)
4165 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4166 
4167 inline UnicodeString&
4169  const UnicodeString& newText)
4170 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4171  newText, 0, newText.length()); }
4172 
4173 inline UnicodeString&
4175  int32_t _length,
4176  const UnicodeString& oldText,
4177  const UnicodeString& newText)
4178 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4179  newText, 0, newText.length()); }
4180 
4181 // ============================
4182 // extract
4183 // ============================
4184 inline void
4185 UnicodeString::doExtract(int32_t start,
4186  int32_t _length,
4187  UnicodeString& target) const
4188 { target.replace(0, target.length(), *this, start, _length); }
4189 
4190 inline void
4192  int32_t _length,
4193  UChar *target,
4194  int32_t targetStart) const
4195 { doExtract(start, _length, target, targetStart); }
4196 
4197 inline void
4199  int32_t _length,
4200  UnicodeString& target) const
4201 { doExtract(start, _length, target); }
4202 
4203 #if !UCONFIG_NO_CONVERSION
4204 
4205 inline int32_t
4207  int32_t _length,
4208  char *dst,
4209  const char *codepage) const
4210 
4211 {
4212  // This dstSize value will be checked explicitly
4213  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4214 }
4215 
4216 #endif
4217 
4218 inline void
4220  int32_t limit,
4221  UChar *dst,
4222  int32_t dstStart) const {
4223  pinIndex(start);
4224  pinIndex(limit);
4225  doExtract(start, limit - start, dst, dstStart);
4226 }
4227 
4228 inline UnicodeString
4229 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4230  return tempSubString(start, limit - start);
4231 }
4232 
4233 inline UChar
4234 UnicodeString::doCharAt(int32_t offset) const
4235 {
4236  if((uint32_t)offset < (uint32_t)length()) {
4237  return getArrayStart()[offset];
4238  } else {
4239  return kInvalidUChar;
4240  }
4241 }
4242 
4243 inline UChar
4244 UnicodeString::charAt(int32_t offset) const
4245 { return doCharAt(offset); }
4246 
4247 inline UChar
4248 UnicodeString::operator[] (int32_t offset) const
4249 { return doCharAt(offset); }
4250 
4251 inline UBool
4253  return fShortLength == 0;
4254 }
4255 
4256 //========================================
4257 // Write implementation methods
4258 //========================================
4259 inline void
4260 UnicodeString::setLength(int32_t len) {
4261  if(len <= 127) {
4262  fShortLength = (int8_t)len;
4263  } else {
4264  fShortLength = (int8_t)-1;
4265  fUnion.fFields.fLength = len;
4266  }
4267 }
4268 
4269 inline void
4270 UnicodeString::setToEmpty() {
4271  fShortLength = 0;
4272  fFlags = kShortString;
4273 }
4274 
4275 inline void
4276 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4277  setLength(len);
4278  fUnion.fFields.fArray = array;
4279  fUnion.fFields.fCapacity = capacity;
4280 }
4281 
4282 inline UnicodeString&
4284 { return doReplace(0, length(), &ch, 0, 1); }
4285 
4286 inline UnicodeString&
4288 { return replace(0, length(), ch); }
4289 
4290 inline UnicodeString&
4292  int32_t srcStart,
4293  int32_t srcLength)
4294 {
4295  unBogus();
4296  return doReplace(0, length(), srcText, srcStart, srcLength);
4297 }
4298 
4299 inline UnicodeString&
4301  int32_t srcStart)
4302 {
4303  unBogus();
4304  srcText.pinIndex(srcStart);
4305  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4306 }
4307 
4308 inline UnicodeString&
4310 {
4311  return copyFrom(srcText);
4312 }
4313 
4314 inline UnicodeString&
4315 UnicodeString::setTo(const UChar *srcChars,
4316  int32_t srcLength)
4317 {
4318  unBogus();
4319  return doReplace(0, length(), srcChars, 0, srcLength);
4320 }
4321 
4322 inline UnicodeString&
4324 {
4325  unBogus();
4326  return doReplace(0, length(), &srcChar, 0, 1);
4327 }
4328 
4329 inline UnicodeString&
4331 {
4332  unBogus();
4333  return replace(0, length(), srcChar);
4334 }
4335 
4336 inline UnicodeString&
4338  int32_t srcStart,
4339  int32_t srcLength)
4340 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
4341 
4342 inline UnicodeString&
4344 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4345 
4346 inline UnicodeString&
4348  int32_t srcStart,
4349  int32_t srcLength)
4350 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4351 
4352 inline UnicodeString&
4354  int32_t srcLength)
4355 { return doReplace(length(), 0, srcChars, 0, srcLength); }
4356 
4357 inline UnicodeString&
4359 { return doReplace(length(), 0, &srcChar, 0, 1); }
4360 
4361 inline UnicodeString&
4363 { return doReplace(length(), 0, &ch, 0, 1); }
4364 
4365 inline UnicodeString&
4367  return append(ch);
4368 }
4369 
4370 inline UnicodeString&
4372 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4373 
4374 inline UnicodeString&
4376  const UnicodeString& srcText,
4377  int32_t srcStart,
4378  int32_t srcLength)
4379 { return doReplace(start, 0, srcText, srcStart, srcLength); }
4380 
4381 inline UnicodeString&
4383  const UnicodeString& srcText)
4384 { return doReplace(start, 0, srcText, 0, srcText.length()); }
4385 
4386 inline UnicodeString&
4388  const UChar *srcChars,
4389  int32_t srcStart,
4390  int32_t srcLength)
4391 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4392 
4393 inline UnicodeString&
4395  const UChar *srcChars,
4396  int32_t srcLength)
4397 { return doReplace(start, 0, srcChars, 0, srcLength); }
4398 
4399 inline UnicodeString&
4401  UChar srcChar)
4402 { return doReplace(start, 0, &srcChar, 0, 1); }
4403 
4404 inline UnicodeString&
4406  UChar32 srcChar)
4407 { return replace(start, 0, srcChar); }
4408 
4409 
4410 inline UnicodeString&
4412 {
4413  // remove() of a bogus string makes the string empty and non-bogus
4414  if(isBogus()) {
4415  setToEmpty();
4416  } else {
4417  fShortLength = 0;
4418  }
4419  return *this;
4420 }
4421 
4422 inline UnicodeString&
4424  int32_t _length)
4425 {
4426  if(start <= 0 && _length == INT32_MAX) {
4427  // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4428  return remove();
4429  }
4430  return doReplace(start, _length, NULL, 0, 0);
4431 }
4432 
4433 inline UnicodeString&
4435  int32_t limit)
4436 { return doReplace(start, limit - start, NULL, 0, 0); }
4437 
4438 inline UnicodeString &
4439 UnicodeString::retainBetween(int32_t start, int32_t limit) {
4440  truncate(limit);
4441  return doReplace(0, start, NULL, 0, 0);
4442 }
4443 
4444 inline UBool
4445 UnicodeString::truncate(int32_t targetLength)
4446 {
4447  if(isBogus() && targetLength == 0) {
4448  // truncate(0) of a bogus string makes the string empty and non-bogus
4449  unBogus();
4450  return FALSE;
4451  } else if((uint32_t)targetLength < (uint32_t)length()) {
4452  setLength(targetLength);
4453  return TRUE;
4454  } else {
4455  return FALSE;
4456  }
4457 }
4458 
4459 inline UnicodeString&
4461 { return doReverse(0, length()); }
4462 
4463 inline UnicodeString&
4465  int32_t _length)
4466 { return doReverse(start, _length); }
4467 
4469 
4470 #endif