Kaydet (Commit) f0b38738 authored tarafından Caolán McNamara's avatar Caolán McNamara

Related: tdf#105426 upgrade to hunspell-1.6.0

Change-Id: I275222d1a7d78cbfb1ca046562fc8a3c314b0fae
Reviewed-on: https://gerrit.libreoffice.org/33454Reviewed-by: 's avatarCaolán McNamara <caolanm@redhat.com>
Tested-by: 's avatarCaolán McNamara <caolanm@redhat.com>
üst 44cb1d16
......@@ -9543,7 +9543,7 @@ if test "$with_system_hunspell" = "yes"; then
HUNSPELL_LIBS=-lhunspell
fi
AC_LANG_POP([C++])
libo_MINGW_CHECK_DLL([libhunspell-1.5])
libo_MINGW_CHECK_DLL([libhunspell-1.6])
HUNSPELL_CFLAGS=$(printf '%s' "$HUNSPELL_CFLAGS" | sed -e "s/-I/${ISYSTEM?}/g")
FilterLibs "${HUNSPELL_LIBS}"
HUNSPELL_LIBS="${filteredlibs}"
......@@ -9554,7 +9554,7 @@ else
if test "$COM" = "MSC"; then
HUNSPELL_LIBS="${WORKDIR}/LinkTarget/StaticLibrary/hunspell.lib"
else
HUNSPELL_LIBS="-L${WORKDIR}/UnpackedTarball/hunspell/src/hunspell/.libs -lhunspell-1.5"
HUNSPELL_LIBS="-L${WORKDIR}/UnpackedTarball/hunspell/src/hunspell/.libs -lhunspell-1.6"
fi
BUILD_TYPE="$BUILD_TYPE HUNSPELL"
fi
......
......@@ -60,7 +60,7 @@ export GRAPHITE_TARBALL := 3069842a88b8f40c6b83ad2850cda293-graphite2-minimal-1.
export HARFBUZZ_MD5SUM := 5986e1bfcd983d1f6caa53ef64c4abc5
export HARFBUZZ_TARBALL := harfbuzz-1.3.2.tar.bz2
export HSQLDB_TARBALL := 17410483b5b5f267aa18b7e00b65e6e0-hsqldb_1_8_0.zip
export HUNSPELL_TARBALL := 9849a2381bacbeb2714034ad825bede8-hunspell-1.5.4.tar.gz
export HUNSPELL_TARBALL := 047c3feb121261b76dc16cdb62f54483-hunspell-1.6.0.tar.gz
export HYPHEN_TARBALL := 5ade6ae2a99bc1e9e57031ca88d36dad-hyphen-2.8.8.tar.gz
export ICU_TARBALL := 1901302aaff1c1633ef81862663d2917-icu4c-58_1-src.tgz
export JFREEREPORT_FLOW_ENGINE_TARBALL := ba2930200c9f019c2d93a8c88c651a0f-flow-engine-0.9.4.zip
......
From bf05e232805f6c1fae5dea3c223de8bdaab425e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Mon, 23 Jan 2017 13:26:53 +0000
Subject: [PATCH 1/3] unroll this a bit
---
src/hunspell/csutil.cxx | 49 ++++++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 21 deletions(-)
diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
index ac5cd98..c1666a5 100644
--- a/src/hunspell/csutil.cxx
+++ b/src/hunspell/csutil.cxx
@@ -518,18 +518,20 @@ unsigned char ccase(const struct cs_info* csconv, int nIndex) {
w_char upper_utf(w_char u, int langnum) {
unsigned short idx = (u.h << 8) + u.l;
- if (idx != unicodetoupper(idx, langnum)) {
- u.h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
- u.l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
+ unsigned short upridx = unicodetoupper(idx, langnum);
+ if (idx != upridx) {
+ u.h = (unsigned char)(upridx >> 8);
+ u.l = (unsigned char)(upridx & 0x00FF);
}
return u;
}
w_char lower_utf(w_char u, int langnum) {
unsigned short idx = (u.h << 8) + u.l;
- if (idx != unicodetolower(idx, langnum)) {
- u.h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
- u.l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx) {
+ u.h = (unsigned char)(lwridx >> 8);
+ u.l = (unsigned char)(lwridx & 0x00FF);
}
return u;
}
@@ -551,12 +553,13 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
}
std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
- int langnum) {
+ int langnum) {
for (size_t i = 0; i < u.size(); ++i) {
unsigned short idx = (u[i].h << 8) + u[i].l;
- if (idx != unicodetolower(idx, langnum)) {
- u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
- u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx) {
+ u[i].h = (unsigned char)(lwridx >> 8);
+ u[i].l = (unsigned char)(lwridx & 0x00FF);
}
}
return u;
@@ -565,9 +568,10 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) {
for (size_t i = 0; i < u.size(); i++) {
unsigned short idx = (u[i].h << 8) + u[i].l;
- if (idx != unicodetoupper(idx, langnum)) {
- u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
- u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
+ unsigned short upridx = unicodetoupper(idx, langnum);
+ if (idx != upridx) {
+ u[i].h = (unsigned char)(upridx >> 8);
+ u[i].l = (unsigned char)(upridx & 0x00FF);
}
}
return u;
@@ -583,9 +587,10 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) {
if (!u.empty()) {
unsigned short idx = (u[0].h << 8) + u[0].l;
- if (idx != unicodetoupper(idx, langnum)) {
- u[0].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
- u[0].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
+ unsigned short upridx = unicodetoupper(idx, langnum);
+ if (idx != upridx) {
+ u[0].h = (unsigned char)(upridx >> 8);
+ u[0].l = (unsigned char)(upridx & 0x00FF);
}
}
return u;
@@ -601,9 +606,10 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) {
if (!u.empty()) {
unsigned short idx = (u[0].h << 8) + u[0].l;
- if (idx != unicodetolower(idx, langnum)) {
- u[0].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
- u[0].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx) {
+ u[0].h = (unsigned char)(lwridx >> 8);
+ u[0].l = (unsigned char)(lwridx & 0x00FF);
}
}
return u;
@@ -2533,9 +2539,10 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
size_t firstcap = 0;
for (size_t i = 0; i < word.size(); ++i) {
unsigned short idx = (word[i].h << 8) + word[i].l;
- if (idx != unicodetolower(idx, langnum))
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx)
ncap++;
- if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum))
+ if (unicodetoupper(idx, langnum) == lwridx)
nneutral++;
}
if (ncap) {
--
2.9.3
From 7c7f56e1c6fe510a2c5e826cc49aeae3f6614f86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Mon, 23 Jan 2017 13:36:20 +0000
Subject: [PATCH 3/3] Related: hunspell#406 use a basic_string<w_char> instead
of vector
kcachegrind reports 1,066,887,723 -> 894,015,631 on
echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
---
src/hunspell/w_char.hxx | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/hunspell/w_char.hxx b/src/hunspell/w_char.hxx
index 84ae13c..e112b5c 100644
--- a/src/hunspell/w_char.hxx
+++ b/src/hunspell/w_char.hxx
@@ -42,7 +42,6 @@
#define W_CHAR_HXX_
#include <string>
-#include <vector>
#ifndef GCC
struct w_char {
@@ -75,7 +74,7 @@ struct replentry {
namespace wide
{
- typedef std::vector<w_char> string;
+ typedef std::basic_string<w_char> string;
}
#endif
--
2.9.3
From 1393bd64581d6010a65d368e1031641391bdb154 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Mon, 23 Jan 2017 14:30:13 +0000
Subject: [PATCH 1/2] use a per-hashmgr persistent wide string scratch buffer
kcachegrind reports 894,015,631 -> 845,183,693
---
src/hunspell/hashmgr.cxx | 40 ++++++++++++++++++----------------------
src/hunspell/hashmgr.hxx | 1 +
2 files changed, 19 insertions(+), 22 deletions(-)
diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
index 6d92e9b..8d6189b 100644
--- a/src/hunspell/hashmgr.cxx
+++ b/src/hunspell/hashmgr.cxx
@@ -345,11 +345,10 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
flags2[flagslen] = ONLYUPCASEFLAG;
if (utf8) {
std::string st;
- wide::string w;
- u8_u16(w, word);
- mkallsmall_utf(w, langnum);
- mkinitcap_utf(w, langnum);
- u16_u8(st, w);
+ u8_u16(workbuf, word);
+ mkallsmall_utf(workbuf, langnum);
+ mkinitcap_utf(workbuf, langnum);
+ u16_u8(st, workbuf);
return add_word(st, wcl, flags2, flagslen + 1, dp, true);
} else {
std::string new_word(word);
@@ -366,9 +365,8 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
int len;
if (utf8) {
- wide::string dest_utf;
- len = u8_u16(dest_utf, word);
- *captype = get_captype_utf8(dest_utf, langnum);
+ len = u8_u16(workbuf, word);
+ *captype = get_captype_utf8(workbuf, langnum);
} else {
len = word.size();
*captype = get_captype(word, csconv);
@@ -688,13 +686,12 @@ int HashMgr::decode_flags(unsigned short** result, const std::string& flags, Fil
break;
}
case FLAG_UNI: { // UTF-8 characters
- wide::string w;
- u8_u16(w, flags);
- len = w.size();
+ u8_u16(workbuf, flags);
+ len = workbuf.size();
*result = (unsigned short*)malloc(len * sizeof(unsigned short));
if (!*result)
return -1;
- memcpy(*result, &w[0], len * sizeof(short));
+ memcpy(*result, &workbuf[0], len * sizeof(short));
break;
}
default: { // Ispell's one-character flags (erfg -> e r f g)
@@ -760,12 +757,11 @@ bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::strin
break;
}
case FLAG_UNI: { // UTF-8 characters
- wide::string w;
- u8_u16(w, flags);
- size_t len = w.size();
+ u8_u16(workbuf, flags);
+ size_t len = workbuf.size();
size_t origsize = result.size();
result.resize(origsize + len);
- memcpy(&result[origsize], &w[0], len * sizeof(short));
+ memcpy(&result[origsize], &workbuf[0], len * sizeof(short));
break;
}
default: { // Ispell's one-character flags (erfg -> e r f g)
@@ -793,10 +789,9 @@ unsigned short HashMgr::decode_flag(const char* f) const {
s = (unsigned short)i;
break;
case FLAG_UNI: {
- wide::string w;
- u8_u16(w, f);
- if (!w.empty())
- memcpy(&s, &w[0], 1 * sizeof(short));
+ u8_u16(workbuf, f);
+ if (!workbuf.empty())
+ memcpy(&s, &workbuf[0], 1 * sizeof(short));
break;
}
default:
@@ -820,8 +815,9 @@ char* HashMgr::encode_flag(unsigned short f) const {
ch = stream.str();
} else if (flag_mode == FLAG_UNI) {
const w_char* w_c = (const w_char*)&f;
- wide::string w(w_c, w_c + 1);
- u16_u8(ch, w);
+ workbuf.clear();
+ workbuf.push_back(*w_c);
+ u16_u8(ch, workbuf);
} else {
ch.push_back((unsigned char)(f));
}
diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
index 312c8ba..78ffb44 100644
--- a/src/hunspell/hashmgr.hxx
+++ b/src/hunspell/hashmgr.hxx
@@ -97,6 +97,7 @@ class HashMgr {
struct cs_info* csconv;
std::string ignorechars;
wide::string ignorechars_utf16;
+ mutable wide::string workbuf;
int numaliasf; // flag vector `compression' with aliases
unsigned short** aliasf;
unsigned short* aliasflen;
--
2.9.3
From 5c7bfa8d36b87a0649f6f88b20624c38a3a5f0ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Mon, 23 Jan 2017 14:43:19 +0000
Subject: [PATCH 2/2] use a per-hashmgr persistent wide string scratch buffer
kcachegrind reports 845,183,693 -> 812,760,392
---
src/hunspell/suggestmgr.cxx | 55 ++++++++++++++++++++-------------------------
src/hunspell/suggestmgr.hxx | 3 +++
2 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
index 1deec96..f5ea01b 100644
--- a/src/hunspell/suggestmgr.cxx
+++ b/src/hunspell/suggestmgr.cxx
@@ -491,10 +491,11 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst,
if (word[i] == word[i - 2]) {
state++;
if (state == 3) {
- wide::string candidate_utf(word, word + i - 1);
- candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
+ workbuf1.clear();
+ workbuf1.insert(workbuf1.end(), word, word + i - 1);
+ workbuf1.insert(workbuf1.end(), word + i + 1, word + wl);
std::string candidate;
- u16_u8(candidate, candidate_utf);
+ u16_u8(candidate, workbuf1);
testsug(wlst, candidate, cpdsuggest, NULL, NULL);
state = 0;
}
@@ -1050,12 +1051,11 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
std::string target;
std::string candidate;
- wide::string w_candidate;
if (ph) {
if (utf8) {
- u8_u16(w_candidate, word);
- mkallcap_utf(w_candidate, langnum);
- u16_u8(candidate, w_candidate);
+ u8_u16(workbuf1, word);
+ mkallcap_utf(workbuf1, langnum);
+ u16_u8(candidate, workbuf1);
} else {
candidate.assign(word);
if (!nonbmp)
@@ -1121,10 +1121,9 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
int scphon = -20000;
if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
if (utf8) {
- w_candidate.clear();
- u8_u16(w_candidate, HENTRY_WORD(hp));
- mkallcap_utf(w_candidate, langnum);
- u16_u8(candidate, w_candidate);
+ u8_u16(workbuf1, HENTRY_WORD(hp));
+ mkallcap_utf(workbuf1, langnum);
+ u16_u8(candidate, workbuf1);
} else {
candidate = HENTRY_WORD(hp);
mkallcap(candidate, csconv);
@@ -1804,11 +1803,10 @@ int SuggestMgr::ngram(int n,
return 0;
// lowering dictionary word
const wide::string* p_su2 = &su2;
- wide::string su2_copy;
if (opt & NGRAM_LOWERING) {
- su2_copy = su2;
- mkallsmall_utf(su2_copy, langnum);
- p_su2 = &su2_copy;
+ workbuf1 = su2;
+ mkallsmall_utf(workbuf1, langnum);
+ p_su2 = &workbuf1;
}
for (int j = 1; j <= n; j++) {
ns = 0;
@@ -1948,22 +1946,20 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
int diffpos[2];
*is_swap = 0;
if (utf8) {
- wide::string su1;
- wide::string su2;
- int l1 = u8_u16(su1, s1);
- int l2 = u8_u16(su2, s2);
+ int l1 = u8_u16(workbuf1, s1);
+ int l2 = u8_u16(workbuf2, s2);
if (l1 <= 0 || l2 <= 0)
return 0;
// decapitalize dictionary word
if (complexprefixes) {
- su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum);
+ workbuf2[l2 - 1] = lower_utf(workbuf2[l2 - 1], langnum);
} else {
- su2[0] = lower_utf(su2[0], langnum);
+ workbuf2[0] = lower_utf(workbuf2[0], langnum);
}
for (int i = 0; (i < l1) && (i < l2); i++) {
- if (su1[i] == su2[i]) {
+ if (workbuf1[i] == workbuf2[i]) {
num++;
} else {
if (diff < 2)
@@ -1972,8 +1968,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
}
}
if ((diff == 2) && (l1 == l2) &&
- (su1[diffpos[0]] == su2[diffpos[1]]) &&
- (su1[diffpos[1]] == su2[diffpos[0]]))
+ (workbuf1[diffpos[0]] == workbuf2[diffpos[1]]) &&
+ (workbuf1[diffpos[1]] == workbuf2[diffpos[0]]))
*is_swap = 1;
} else {
size_t i;
@@ -2004,8 +2000,7 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
int SuggestMgr::mystrlen(const char* word) {
if (utf8) {
- wide::string w;
- return u8_u16(w, word);
+ return u8_u16(workbuf1, word);
} else
return strlen(word);
}
@@ -2044,15 +2039,13 @@ void SuggestMgr::lcs(const char* s,
int* l2,
char** result) {
int n, m;
- wide::string su;
- wide::string su2;
char* b;
char* c;
int i;
int j;
if (utf8) {
- m = u8_u16(su, s);
- n = u8_u16(su2, s2);
+ m = u8_u16(workbuf1, s);
+ n = u8_u16(workbuf2, s2);
} else {
m = strlen(s);
n = strlen(s2);
@@ -2073,7 +2066,7 @@ void SuggestMgr::lcs(const char* s,
c[j] = 0;
for (i = 1; i <= m; i++) {
for (j = 1; j <= n; j++) {
- if (((utf8) && (su[i - 1] == su2[j - 1])) ||
+ if (((utf8) && (workbuf1[i - 1] == workbuf2[j - 1])) ||
((!utf8) && (s[i - 1] == s2[j - 1]))) {
c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1;
b[i * (n + 1) + j] = LCS_UPLEFT;
diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
index 9bfa933..80b0fd7 100644
--- a/src/hunspell/suggestmgr.hxx
+++ b/src/hunspell/suggestmgr.hxx
@@ -105,6 +105,9 @@ class SuggestMgr {
SuggestMgr& operator=(const SuggestMgr&);
private:
+ mutable wide::string workbuf1;
+ mutable wide::string workbuf2;
+
char* ckey;
size_t ckeyl;
wide::string ckey_utf;
--
2.9.3
......@@ -27,8 +27,10 @@ endif
$(call gb_ExternalProject_get_state_target,hunspell,build):
$(call gb_ExternalProject_run,build,\
$(if $(filter IOS MACOSX,$(OS)),ACLOCAL="aclocal -I $(SRCDIR)/m4/mac") \
LIBS="$(gb_STDLIBS) $(LIBS)" \
./configure --disable-shared --disable-nls --with-pic \
autoreconf && \
$(SHELL) ./configure --disable-shared --disable-nls --with-pic \
$(if $(CROSS_COMPILING),--build=$(BUILD_PLATFORM) --host=$(HOST_PLATFORM))\
$(if $(filter AIX,$(OS)),CFLAGS="-D_LINUX_SOURCE_COMPAT") \
$(if $(filter-out WNTGCC,$(OS)$(COM)),,LDFLAGS="-Wl,--enable-runtime-pseudo-reloc-v2") \
......
......@@ -17,10 +17,15 @@ $(eval $(call gb_UnpackedTarball_set_post_action,hunspell,\
))
endif
$(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,0))
$(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,1))
$(eval $(call gb_UnpackedTarball_add_patches,hunspell, \
external/hunspell/clangcl-werror.patch \
external/hunspell/0001-Revert-Remove-autotools-autogenerated-files.patch \
external/hunspell/0001-unroll-this-a-bit.patch \
external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch \
external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch \
external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \
external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \
))
# vim: set noet sw=4 ts=4:
--- src/hunspell/hunspell.hxx
+++ src/hunspell/hunspell.hxx
@@ -85,7 +85,7 @@
#define MAXSHARPS 5
#define MAXWORDLEN 176
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
+#if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
# define H_DEPRECATED __attribute__((__deprecated__))
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
# define H_DEPRECATED __declspec(deprecated)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment