Kaydet (Commit) 9206a08a authored tarafından Eike Rathke's avatar Eike Rathke

Upgrade to ICU 60.1

Change-Id: I6d90f51ee88c4e1005edbaa93d23cfb94cb2acfb
Reviewed-on: https://gerrit.libreoffice.org/44871Reviewed-by: 's avatarEike Rathke <erack@redhat.com>
Tested-by: 's avatarEike Rathke <erack@redhat.com>
üst a8687041
......@@ -8827,7 +8827,7 @@ SYSTEM_GENBRK=
SYSTEM_GENCCODE=
SYSTEM_GENCMN=
ICU_MAJOR=59
ICU_MAJOR=60
ICU_MINOR=1
ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE"
ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE"
......
......@@ -92,10 +92,10 @@ export HUNSPELL_SHA256SUM := 3cd9ceb062fe5814f668e4f22b2fa6e3ba0b339b921739541ce
export HUNSPELL_TARBALL := hunspell-1.6.2.tar.gz
export HYPHEN_SHA256SUM := 304636d4eccd81a14b6914d07b84c79ebb815288c76fe027b9ebff6ff24d5705
export HYPHEN_TARBALL := 5ade6ae2a99bc1e9e57031ca88d36dad-hyphen-2.8.8.tar.gz
export ICU_SHA256SUM := 7132fdaf9379429d004005217f10e00b7d2319d0fea22bdfddef8991c45b75fe
export ICU_TARBALL := icu4c-59_1-src.tgz
export ICU_DATA_SHA256SUM := 38a84c1289c32a96924ff286a2f33edc97671b4cc90e8c99708553a6b5a9e70a
export ICU_DATA_TARBALL := icu4c-59_1-data.zip
export ICU_SHA256SUM := f8f5a6c8fbf32c015a467972bdb1477dc5f5d5dfea908b6ed218715eeb5ee225
export ICU_TARBALL := icu4c-60_1-src.tgz
export ICU_DATA_SHA256SUM := bbb920c6e67de8d9dcc0f0ec5554870170d0cb5eff6f0bc9881efc605f68210c
export ICU_DATA_TARBALL := icu4c-60_1-data.zip
export JFREEREPORT_FLOW_ENGINE_SHA256SUM := 233f66e8d25c5dd971716d4200203a612a407649686ef3b52075d04b4c9df0dd
export JFREEREPORT_FLOW_ENGINE_TARBALL := ba2930200c9f019c2d93a8c88c651a0f-flow-engine-0.9.4.zip
export JFREEREPORT_FLUTE_SHA256SUM := 1b5b24f7bc543c0362b667692f78db8bab4ed6dafc6172f104d0bd3757d8a133
......
......@@ -26,6 +26,10 @@ ifneq ($(OS),WNT)
INVOKE_FPA:="CPU=\$$(EMPTY) $${FB_CPU_ARG}"
endif
ifeq ($(COM_IS_CLANG),TRUE)
firebird_NO_CXX11_NARROWING := -Wno-c++11-narrowing
endif
MAKE_PRE=$(call gb_Helper_extend_ld_path,$(call gb_UnpackedTarball_get_dir,icu)/source/lib) LC_ALL=C
MAKE_POST=$(if $(filter MACOSX,$(OS)),&& $(PERL) \
......@@ -70,6 +74,8 @@ $(call gb_ExternalProject_get_state_target,firebird,build):
$(if $(SYSTEM_LIBTOMMATH),$(LIBTOMMATH_CFLAGS), \
-L$(call gb_UnpackedTarball_get_dir,libtommath) \
) \
$(CXXFLAGS_CXX11) \
$(firebird_NO_CXX11_NARROWING) \
" \
&& export LDFLAGS=" \
$(if $(SYSTEM_ICU),$(ICU_LIBS), \
......
......@@ -34,10 +34,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
external/icu/icu4c-clang-cl.patch.1 \
$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.patch.1) \
external/icu/icu4c-khmerbreakengine.patch.1 \
external/icu/icu4c-59-werror-undef.patch.1 \
external/icu/icu4c-59-werror-shadow.patch.1 \
external/icu/icu4c-59-icu13329-xlocale.patch.1 \
external/icu/icu4c-changeset-40324.patch.1 \
))
$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
......
# xlocale.h is gone with glibc 2.26 and should never had been included.
# https://ssl.icu-project.org/trac/ticket/13329
diff -ur icu.org/source/i18n/digitlst.cpp icu/source/i18n/digitlst.cpp
--- icu.org/source/i18n/digitlst.cpp 2017-01-20 01:20:31.000000000 +0100
+++ icu/source/i18n/digitlst.cpp 2017-09-13 13:25:59.980160603 +0200
@@ -63,6 +63,8 @@
#if U_USE_STRTOD_L
# if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CYGWIN
# include <locale.h>
+# elif U_PLATFORM_IS_LINUX_BASED
+# include <ctype.h> // should be for all POSIX.2008 compliant platforms, but..
# else
# include <xlocale.h>
# endif
diff -ur icu.org/source/common/unicode/char16ptr.h icu/source/common/unicode/char16ptr.h
# https://ssl.icu-project.org/trac/ticket/13176
--- icu.org/source/common/unicode/char16ptr.h 2017-03-29 06:44:37.000000000 +0200
+++ icu/source/common/unicode/char16ptr.h 2017-04-24 11:56:47.707572355 +0200
@@ -107,14 +107,14 @@
#ifdef U_ALIASING_BARRIER
-Char16Ptr::Char16Ptr(char16_t *p) : p(p) {}
+Char16Ptr::Char16Ptr(char16_t *p_) : p(p_) {}
#if !U_CHAR16_IS_TYPEDEF
-Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {}
+Char16Ptr::Char16Ptr(uint16_t *p_) : p(cast(p_)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
-Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {}
+Char16Ptr::Char16Ptr(wchar_t *p_) : p(cast(p_)) {}
#endif
-Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {}
+Char16Ptr::Char16Ptr(std::nullptr_t p_) : p(p_) {}
Char16Ptr::~Char16Ptr() {
U_ALIASING_BARRIER(p);
}
@@ -215,14 +215,14 @@
#ifdef U_ALIASING_BARRIER
-ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {}
+ConstChar16Ptr::ConstChar16Ptr(const char16_t *p_) : p(p_) {}
#if !U_CHAR16_IS_TYPEDEF
-ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {}
+ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p_) : p(cast(p_)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
-ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {}
+ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p_) : p(cast(p_)) {}
#endif
-ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {}
+ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p_) : p(p_) {}
ConstChar16Ptr::~ConstChar16Ptr() {
U_ALIASING_BARRIER(p);
}
diff -ur icu.org/source/common/unicode/unistr.h icu/source/common/unicode/unistr.h
--- icu.org/source/common/unicode/unistr.h 2017-03-29 06:44:37.000000000 +0200
+++ icu/source/common/unicode/unistr.h 2017-04-24 11:59:51.782076511 +0200
......
diff -ur icu.org/source/common/unicode/uvernum.h icu/source/common/unicode/uvernum.h
# https://ssl.icu-project.org/trac/ticket/13175
--- icu.org/source/common/unicode/uvernum.h 2017-03-21 02:03:49.000000000 +0100
+++ icu/source/common/unicode/uvernum.h 2017-04-21 23:44:55.123597927 +0200
@@ -125,7 +125,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#if U_PLATFORM_HAS_WINUWP_API == 0
+#if !defined(U_PLATFORM_HAS_WINUWP_API) || (U_PLATFORM_HAS_WINUWP_API == 0)
#define U_ICU_VERSION_SHORT "59"
#else
// U_DISABLE_RENAMING does not impact dat file name
Index: /trunk/icu4c/source/common/utext.cpp
===================================================================
--- icu.orig/source/common/utext.cpp
+++ icu/source/common/utext.cpp
@@ -848,7 +848,13 @@
// Chunk size.
-// Must be less than 85, because of byte mapping from UChar indexes to native indexes.
-// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes
-// to two UChars.)
+// Must be less than 42 (256/6), because of byte mapping from UChar indexes to native indexes.
+// Worst case there are six UTF-8 bytes per UChar.
+// obsolete 6 byte form fd + 5 trails maps to fffd
+// obsolete 5 byte form fc + 4 trails maps to fffd
+// non-shortest 4 byte forms maps to fffd
+// normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
+// mapToUChars array size must allow for the worst case, 6.
+// This could be brought down to 4, by treating fd and fc as pure illegal,
+// rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
//
enum { UTF8_TEXT_CHUNK_SIZE=32 };
@@ -890,5 +896,5 @@
// one for a supplementary starting in the last normal position,
// and one for an entry for the buffer limit position.
- uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
+ uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*6+6]; // Map native offset from bufNativeStart to
// correspoding offset in filled part of buf.
int32_t align;
@@ -1033,4 +1039,5 @@
u8b = (UTF8Buf *)ut->p; // the current buffer
mapIndex = ix - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
return TRUE;
@@ -1299,4 +1306,8 @@
// If index is at the end, there is no character there to look at.
if (ix != ut->b) {
+ // Note: this function will only move the index back if it is on a trail byte
+ // and there is a preceding lead byte and the sequence from the lead
+ // through this trail could be part of a valid UTF-8 sequence
+ // Otherwise the index remains unchanged.
U8_SET_CP_START(s8, 0, ix);
}
@@ -1312,5 +1323,8 @@
uint8_t *mapToNative = u8b->mapToNative;
uint8_t *mapToUChars = u8b->mapToUChars;
- int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
+ int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
+ // Note that toUCharsMapStart can be negative. Happens when the remaining
+ // text from current position to the beginning is less than the buffer size.
+ // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region
// at end of buffer to leave room
@@ -1339,4 +1353,5 @@
// Special case ASCII range for speed.
buf[destIx] = (UChar)c;
+ U_ASSERT(toUCharsMapStart <= srcIx);
mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
@@ -1368,4 +1383,5 @@
mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
} while (sIx >= srcIx);
+ U_ASSERT(toUCharsMapStart <= (srcIx+1));
// Set native indexing limit to be the current position.
@@ -1542,4 +1558,5 @@
U_ASSERT(index<=ut->chunkNativeLimit);
int32_t mapIndex = index - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
U_ASSERT(offset>=0 && offset<=ut->chunkLength);
Index: /trunk/icu4c/source/test/intltest/utxttest.cpp
===================================================================
--- icu.orig/source/test/intltest/utxttest.cpp
+++ icu/source/test/intltest/utxttest.cpp
@@ -68,4 +68,6 @@
case 7: name = "Ticket12130";
if (exec) Ticket12130(); break;
+ case 8: name = "Ticket12888";
+ if (exec) Ticket12888(); break;
default: name = ""; break;
}
@@ -1584,2 +1586,62 @@
utext_close(&ut);
}
+
+// Ticket 12888: bad handling of illegal utf-8 containing many instances of the archaic, now illegal,
+// six byte utf-8 forms. Original implementation had an assumption that
+// there would be at most three utf-8 bytes per UTF-16 code unit.
+// The five and six byte sequences map to a single replacement character.
+
+void UTextTest::Ticket12888() {
+ const char *badString =
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80";
+
+ UErrorCode status = U_ZERO_ERROR;
+ LocalUTextPointer ut(utext_openUTF8(NULL, badString, -1, &status));
+ TEST_SUCCESS(status);
+ for (;;) {
+ UChar32 c = utext_next32(ut.getAlias());
+ if (c == U_SENTINEL) {
+ break;
+ }
+ }
+ int32_t endIdx = utext_getNativeIndex(ut.getAlias());
+ if (endIdx != (int32_t)strlen(badString)) {
+ errln("%s:%d expected=%d, actual=%d", __FILE__, __LINE__, strlen(badString), endIdx);
+ return;
+ }
+
+ for (int32_t prevIndex = endIdx; prevIndex>0;) {
+ UChar32 c = utext_previous32(ut.getAlias());
+ int32_t currentIndex = utext_getNativeIndex(ut.getAlias());
+ if (c != 0xfffd) {
+ errln("%s:%d (expected, actual, index) = (%d, %d, %d)\n",
+ __FILE__, __LINE__, 0xfffd, c, currentIndex);
+ break;
+ }
+ if (currentIndex != prevIndex - 6) {
+ errln("%s:%d: wrong index. Expected, actual = %d, %d",
+ __FILE__, __LINE__, prevIndex - 6, currentIndex);
+ break;
+ }
+ prevIndex = currentIndex;
+ }
+}
Index: /trunk/icu4c/source/test/intltest/utxttest.h
===================================================================
--- icu.orig/source/test/intltest/utxttest.h
+++ icu/source/test/intltest/utxttest.h
@@ -39,4 +39,5 @@
void Ticket10983();
void Ticket12130();
+ void Ticket12888();
private:
# https://ssl.icu-project.org/trac/changeset/40324/trunk/icu4c/source/i18n/zonemeta.cpp
Index: trunk/icu4c/source/i18n/zonemeta.cpp
===================================================================
--- icu.orig/source/i18n/zonemeta.cpp
+++ icu/source/i18n/zonemeta.cpp
@@ -691,5 +691,4 @@
if (U_FAILURE(status)) {
delete mzMappings;
- deleteOlsonToMetaMappingEntry(entry);
uprv_free(entry);
break;
......@@ -310,7 +310,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
@@ -886,180 +1035,204 @@
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
UVector32 &foundBreaks ) const {
- if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
- return 0; // Not enough characters for two words
- }
......@@ -760,7 +760,7 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
* @param text A UText representing the text. The iterator is left at
- * the end of the run of characters which the engine is capable of handling
+ * the end of the run of characters which the engine is capable of handling
* that starts from the first (or last) character in the range.
* that starts from the first character in the range.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
@@ -245,118 +306,120 @@
......@@ -819,7 +819,7 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
- virtual int32_t divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UStack &foundBreaks ) const;
- UVector32 &foundBreaks ) const;
-
-};
-
......@@ -875,7 +875,7 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
- virtual int32_t divideUpDictionaryRange( UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UStack &foundBreaks ) const;
- UVector32 &foundBreaks ) const;
-
-};
-
......@@ -931,7 +931,7 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UStack &foundBreaks ) const;
+ UVector32 &foundBreaks ) const;
+
+};
+
......@@ -989,7 +989,7 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UStack &foundBreaks ) const;
+ UVector32 &foundBreaks ) const;
+
+};
+
......
......@@ -65,18 +65,6 @@ diff -ur icu.org/source/common/ubidiimp.h icu/source/common/ubidiimp.h
#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
diff -ur icu.org/source/common/ucharstriebuilder.cpp icu/source/common/ucharstriebuilder.cpp
--- icu.org/source/common/ucharstriebuilder.cpp 2017-02-03 19:57:23.000000000 +0100
+++ icu/source/common/ucharstriebuilder.cpp 2017-04-21 22:46:25.375651159 +0200
@@ -287,7 +287,7 @@
UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(units) {
- hash=hash*37+ustr_hashUCharsN(units, len);
+ hash=hash*37U+ustr_hashUCharsN(units, len);
}
UBool
diff -ur icu.org/source/common/ucmndata.cpp icu/source/common/ucmndata.cpp
--- icu.org/source/common/ucmndata.cpp 2017-03-08 16:34:47.000000000 +0100
+++ icu/source/common/ucmndata.cpp 2017-04-21 22:46:25.376651159 +0200
......@@ -101,71 +89,3 @@ diff -ur icu.org/source/common/ucmndata.h icu/source/common/ucmndata.h
} UDataOffsetTOC;
/**
diff -ur icu.org/source/common/unicode/stringtriebuilder.h icu/source/common/unicode/stringtriebuilder.h
--- icu.org/source/common/unicode/stringtriebuilder.h 2017-03-10 23:01:34.000000000 +0100
+++ icu/source/common/unicode/stringtriebuilder.h 2017-04-21 22:47:43.395634383 +0200
@@ -276,7 +276,7 @@
void setValue(int32_t v) {
hasValue=TRUE;
value=v;
- hash=hash*37+v;
+ hash=hash*37U+v;
}
protected:
UBool hasValue;
@@ -307,7 +307,7 @@
class LinearMatchNode : public ValueNode {
public:
LinearMatchNode(int32_t len, Node *nextNode)
- : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
+ : ValueNode((0x333333U*37+len)*37+hashCode(nextNode)),
length(len), next(nextNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
@@ -342,7 +342,7 @@
equal[length]=NULL;
values[length]=value;
++length;
- hash=(hash*37+c)*37+value;
+ hash=(hash*37U+c)*37+value;
}
// Adds a unit which leads to another match node.
void add(int32_t c, Node *node) {
@@ -350,7 +350,7 @@
equal[length]=node;
values[length]=0;
++length;
- hash=(hash*37+c)*37+hashCode(node);
+ hash=(hash*37U+c)*37+hashCode(node);
}
protected:
Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
@@ -365,7 +365,7 @@
class SplitBranchNode : public BranchNode {
public:
SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
- : BranchNode(((0x555555*37+middleUnit)*37+
+ : BranchNode(((0x555555U*37+middleUnit)*37+
hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
virtual UBool operator==(const Node &other) const;
@@ -382,7 +382,7 @@
class BranchHeadNode : public ValueNode {
public:
BranchHeadNode(int32_t len, Node *subNode)
- : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
+ : ValueNode((0x666666U*37+len)*37+hashCode(subNode)),
length(len), next(subNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
diff -ur icu.org/source/i18n/collationdatareader.cpp icu/source/i18n/collationdatareader.cpp
--- icu.org/source/i18n/collationdatareader.cpp 2017-01-20 01:20:31.000000000 +0100
+++ icu/source/i18n/collationdatareader.cpp 2017-04-21 22:46:25.380651158 +0200
@@ -419,6 +419,7 @@
tailoring.data, ts, fastLatinPrimaries, UPRV_LENGTHOF(fastLatinPrimaries));
if(options == ts.options && ts.variableTop != 0 &&
reorderCodesLength == ts.reorderCodesLength &&
+ reorderCodesLength != 0 &&
uprv_memcmp(reorderCodes, ts.reorderCodes, reorderCodesLength * 4) == 0 &&
fastLatinOptions == ts.fastLatinOptions &&
(fastLatinOptions < 0 ||
......@@ -544,6 +544,27 @@ LineBreakResults SAL_CALL BreakIterator_Unicode::getLineBreak(
} else { //word boundary break
lbr.breakIndex = pLineBI->preceding(nStartPos);
lbr.breakType = BreakType::WORDBOUNDARY;
// Special case for Slash U+002F SOLIDUS in URI and path names.
// TR14 defines that as SY: Symbols Allowing Break After (A).
// This is unwanted in paths, see also i#17155
if (lbr.breakIndex > 0 && Text[lbr.breakIndex-1] == '/')
{
// Look backward and take any whitespace before as a break
// opportunity. This also glues something like "w/o".
// Avoid an overly long path and break it as was indicated.
// Overly long here is arbitrarily defined.
const sal_Int32 nOverlyLong = 66;
sal_Int32 nPos = lbr.breakIndex - 1;
while (nPos > 0 && lbr.breakIndex - nPos < nOverlyLong)
{
if (u_isWhitespace(Text.iterateCodePoints( &nPos, -1)))
{
lbr.breakIndex = nPos + 1;
break;
}
}
}
}
#define WJ 0x2060 // Word Joiner
......
......@@ -715,6 +715,17 @@ OString SAL_CALL unicode::getExemplarLanguageForUScriptCode(UScriptCode eScript)
case USCRIPT_SYMBOLS_EMOJI:
sRet = "mis"; // Zsye - Emoji variant
break;
#endif
#if (U_ICU_VERSION_MAJOR_NUM >= 60)
case USCRIPT_MASARAM_GONDI:
sRet = "gon-Gonm"; // macro language code, could be wsg,esg,gno
break;
case USCRIPT_SOYOMBO:
sRet = "mn-Soyo"; // abugida to write Mongolian, also Tibetan and Sanskrit
break;
case USCRIPT_ZANABAZAR_SQUARE:
sRet = "mn-Zanb"; // abugida to write Mongolian
break;
#endif
}
return sRet;
......
......@@ -1645,6 +1645,13 @@
#define RID_SUBSETSTR_OSAGE NC_("RID_SUBSETMAP", "Osage")
#define RID_SUBSETSTR_TANGUT NC_("RID_SUBSETMAP", "Tangut")
#define RID_SUBSETSTR_TANGUT_COMPONENTS NC_("RID_SUBSETMAP", "Tangut Components")
#define RID_SUBSETSTR_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F NC_("RID_SUBSETMAP", "CJK Unified Ideographs Extension F")
#define RID_SUBSETSTR_KANA_EXTENDED_A NC_("RID_SUBSETMAP", "Kana Extended-A")
#define RID_SUBSETSTR_MASARAM_GONDI NC_("RID_SUBSETMAP", "Masaram Gondi")
#define RID_SUBSETSTR_NUSHU NC_("RID_SUBSETMAP", "Nushu")
#define RID_SUBSETSTR_SOYOMBO NC_("RID_SUBSETMAP", "Soyombo")
#define RID_SUBSETSTR_SYRIAC_SUPPLEMENT NC_("RID_SUBSETMAP", "Syriac Supplement")
#define RID_SUBSETSTR_ZANABAZAR_SQUARE NC_("RID_SUBSETMAP", "Zanabazar Square")
// TSCP Classification
#define RID_CLASSIFICATION_CHANGE_CATEGORY NC_("RID_CLASSIFICATION_CHANGE_CATEGORY", "Do you really want to change the classification category?")
......
......@@ -1822,6 +1822,29 @@ void SubsetMap::InitList()
aAllSubsets.emplace_back( 0x18800, 0x18AFF, SvxResId(RID_SUBSETSTR_TANGUT_COMPONENTS) );
break;
#endif
#if (U_ICU_VERSION_MAJOR_NUM >= 60)
case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F:
aAllSubsets.emplace_back( 0x2CEB0, 0x2EBE0, SvxResId(RID_SUBSETSTR_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F) );
break;
case UBLOCK_KANA_EXTENDED_A:
aAllSubsets.emplace_back( 0x1B100, 0x1B12F, SvxResId(RID_SUBSETSTR_KANA_EXTENDED_A) );
break;
case UBLOCK_MASARAM_GONDI:
aAllSubsets.emplace_back( 0x11D00, 0x11D5F, SvxResId(RID_SUBSETSTR_MASARAM_GONDI) );
break;
case UBLOCK_NUSHU:
aAllSubsets.emplace_back( 0x1B170, 0x1B2FF, SvxResId(RID_SUBSETSTR_NUSHU) );
break;
case UBLOCK_SOYOMBO:
aAllSubsets.emplace_back( 0x11A50, 0x11AAF, SvxResId(RID_SUBSETSTR_SOYOMBO) );
break;
case UBLOCK_SYRIAC_SUPPLEMENT:
aAllSubsets.emplace_back( 0x0860, 0x086f, SvxResId(RID_SUBSETSTR_SYRIAC_SUPPLEMENT) );
break;
case UBLOCK_ZANABAZAR_SQUARE:
aAllSubsets.emplace_back( 0x11A00, 0x11A4F, SvxResId(RID_SUBSETSTR_ZANABAZAR_SQUARE) );
break;
#endif
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment