Kaydet (Commit) 0b6fb1f4 authored tarafından Tor Lillqvist's avatar Tor Lillqvist

Put the dict_ja and _zh data in files instead of code for iOS

Map the file(s) into memory on demand. The executable file of an app
needs to be as small as possible. Including additional data files in
an app bundle is fine.

Change-Id: Ife9bfe99a2cf0473d459f38f50dfa3304b39e282
üst c0f2dc13
......@@ -11,6 +11,20 @@ $(eval $(call gb_CustomTarget_CustomTarget,i18npool/breakiterator))
i18npool_BIDIR := $(call gb_CustomTarget_get_workdir,i18npool/breakiterator)
ifeq ($(OS),IOS)
$(call gb_CustomTarget_get_target,i18npool/breakiterator) : \
$(i18npool_BIDIR)/dict_ja.data $(i18npool_BIDIR)/dict_zh.data $(i18npool_BIDIR)/OpenOffice_dat.c
$(i18npool_BIDIR)/dict_%.data : \
$(SRCDIR)/i18npool/source/breakiterator/data/%.dic \
$(call gb_Executable_get_runtime_dependencies,gendict) \
| $(i18npool_BIDIR)/.dir
$(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),$(true),DIC,1)
$(call gb_Helper_abbreviate_dirs,\
$(call gb_Helper_execute,gendict) $< $@ $(patsubst $(i18npool_BIDIR)/dict_%.cxx,%,$@))
else
$(call gb_CustomTarget_get_target,i18npool/breakiterator) : \
$(i18npool_BIDIR)/dict_ja.cxx $(i18npool_BIDIR)/dict_zh.cxx $(i18npool_BIDIR)/OpenOffice_dat.c
......@@ -22,6 +36,8 @@ $(i18npool_BIDIR)/dict_%.cxx : \
$(call gb_Helper_abbreviate_dirs,\
$(call gb_Helper_execute,gendict) $< $@ $(patsubst $(i18npool_BIDIR)/dict_%.cxx,%,$@))
endif
i18npool_BRKTXTS := \
char_in.brk \
char.brk \
......
......@@ -14,6 +14,14 @@ $(eval $(call gb_Executable_use_libraries,gendict,\
$(gb_UWINAPI) \
))
ifeq ($(gb_Side),build)
ifneq ($(shell grep OS=IOS $(BUILDDIR)/config_host.mk),)
$(eval $(call gb_Executable_add_cxxflags,gendict,\
-DDICT_JA_ZH_IN_DATAFILE \
))
endif
endif
$(eval $(call gb_Executable_add_exception_objects,gendict,\
i18npool/source/breakiterator/gendict \
))
......
......@@ -35,6 +35,12 @@ $(eval $(call gb_Library_use_externals,i18npool,\
icu_headers \
))
ifeq ($(OS),IOS)
$(eval $(call gb_Library_add_cxxflags,i18npool,\
-DDICT_JA_ZH_IN_DATAFILE \
))
endif
$(eval $(call gb_Library_add_exception_objects,i18npool,\
i18npool/source/breakiterator/breakiterator_cjk \
i18npool/source/breakiterator/breakiterator_ctl \
......
Internationalisation (i18npool) framework ensures that the suite is adaptable to the requirements of different
native languages, their local settings and customs, etc without source code modification.
native languages, their local settings and customs, etc without source code modification. (Wow, that is such marketing-speak...)
Specifically for locale data documentation please see i18npool/source/localedata/data/locale.dtd
See also [http://wiki.documentfoundation.org/Category:I18n]
On iOS we put the largest data generated here, the dict_ja and dict_zh
stuff, into separate files and not into code to keep the size of an
app binary down. Temporary test code:
static bool beenhere = false;
if (!beenhere) {
beenhere = true;
uno::Reference< uno::XComponentContext > xComponentContext(::cppu::defaultBootstrap_InitialComponentContext());
uno::Reference< lang::XMultiComponentFactory > xMultiComponentFactoryClient( xComponentContext->getServiceManager() );
uno::Reference< uno::XInterface > xInterface =
xMultiComponentFactoryClient->createInstanceWithContext( "com.sun.star.i18n.BreakIterator_ja", xComponentContext );
}
......@@ -17,7 +17,6 @@
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
......@@ -32,6 +31,22 @@ using std::vector;
using namespace ::rtl;
// For iOS, where we must strive for a minimal executable size, we
// keep the data produced by this utility not as large const tables in
// source code but instead as separate data files, to be bundled with
// an app, and mmapped in at run time.
// To test this easier on a desktop OS, just make sure
// DICT_JA_ZH_IN_DATAFILE is defined when building i18npool.
#ifdef DICT_JA_ZH_IN_DATAFILE
static sal_Int64 dataAreaOffset = 0;
static sal_Int64 lenArrayOffset = 0;
static sal_Int64 index1Offset = 0;
static sal_Int64 index2Offset = 0;
static sal_Int64 existMarkOffset = 0;
#endif
/* Utility gendict:
"BreakIterator_CJK provides input string caching and dictionary searching for
......@@ -60,12 +75,17 @@ static inline void set_exists(sal_uInt32 index)
static inline void printIncludes(FILE* source_fp)
{
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n", source_fp);
fputs("#include <sal/types.h>\n\n", source_fp);
#else
(void) source_fp;
#endif
}
static inline void printFunctions(FILE* source_fp, const char *lang)
{
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("#ifndef DISABLE_DYNLOADING\n", source_fp);
fputs ("SAL_DLLPUBLIC_EXPORT const sal_uInt8* getExistMark() { return existMark; }\n", source_fp);
fputs ("SAL_DLLPUBLIC_EXPORT const sal_Int16* getIndex1() { return index1; }\n", source_fp);
......@@ -79,12 +99,20 @@ static inline void printFunctions(FILE* source_fp, const char *lang)
fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Int32* getLenArray_%s() { return lenArray; }\n", lang);
fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Unicode* getDataArea_%s() { return dataArea; }\n", lang);
fputs ("#endif\n", source_fp);
#else
(void) source_fp;
(void) lang;
#endif
}
static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& lenArray)
{
// generate main dict. data array
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("static const sal_Unicode dataArea[] = {\n\t", source_fp);
#else
dataAreaOffset = ftell(source_fp);
#endif
sal_Char str[1024];
sal_uInt32 lenArrayCurr = 0;
sal_Unicode current = 0;
......@@ -114,28 +142,47 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa
// first character is stored in charArray, so start from second
for (i = 1; i < len; i++, lenArrayCurr++) {
set_exists(u[i]);
#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%04x, ", u[i]);
if ((lenArrayCurr & 0x0f) == 0x0f)
fputs("\n\t", source_fp);
#else
fwrite(&u[i], sizeof(u[i]), 1, source_fp);
#endif
}
}
lenArray.push_back( lenArrayCurr ); // store last ending pointer
charArray[current+1] = lenArray.size();
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("\n};\n", source_fp);
#endif
}
static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenArray)
{
#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
#else
lenArrayOffset = ftell(source_fp);
sal_uInt32 zero(0);
fwrite(&zero, sizeof(zero), 1, source_fp);
#endif
for (size_t k = 0; k < lenArray.size(); k++)
{
if( !(k & 0xf) )
fputs("\n\t", source_fp);
#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
#else
fwrite(&lenArray[k], sizeof(lenArray[k]), 1, source_fp);
#endif
}
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("\n};\n", source_fp );
#endif
}
/* FIXME?: what happens if in every range i there is at least one charArray != 0
......@@ -143,23 +190,40 @@ static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenA
=> then in index2, the last range will be ignored incorrectly */
static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
{
#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
#else
index1Offset = ftell(source_fp);
#endif
sal_Int16 count = 0;
for (sal_Int32 i = 0; i < 0x100; i++) {
sal_Int32 j = 0;
while( j < 0x100 && charArray[(i<<8) + j] == 0)
j++;
fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff));
set[i] = (j < 0x100 ? count++ : 0xff);
#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%02x, ", set[i]);
if ((i & 0x0f) == 0x0f)
fputs ("\n\t", source_fp);
#else
fwrite(&set[i], sizeof(set[i]), 1, source_fp);
#endif
}
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("};\n", source_fp);
#endif
}
static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
{
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("static const sal_Int32 index2[] = {\n\t", source_fp);
#else
index2Offset = ftell(source_fp);
#endif
sal_Int32 prev = 0;
for (sal_Int32 i = 0; i < 0x100; i++) {
if (set[i] != 0xff) {
......@@ -170,28 +234,48 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
k++;
prev = charArray[(i<<8) + j];
#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(k < 0x10000 ? charArray[k] + 1 : 0));
if ((j & 0x0f) == 0x0f)
fputs ("\n\t", source_fp);
#else
sal_uInt32 n = (k < 0x10000 ? charArray[k] + 1 : 0);
fwrite(&n, sizeof(n), 1, source_fp);
#endif
}
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("\n\t", source_fp);
#endif
}
}
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("\n};\n", source_fp);
#endif
}
/* Generates a bitmask for the existance of sal_Unicode values in dictionary;
it packs 8 sal_Bool values in 1 sal_uInt8 */
static inline void printExistsMask(FILE *source_fp)
{
#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
#else
existMarkOffset = ftell(source_fp);
#endif
for (unsigned int i = 0; i < 0x2000; i++)
{
#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%02x, ", exists[i]);
if ( (i & 0xf) == 0xf )
fputs("\n\t", source_fp);
#else
fwrite(&exists[i], sizeof(exists[i]), 1, source_fp);
#endif
}
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("\n};\n", source_fp);
#endif
}
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
......@@ -228,14 +312,25 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
sal_Int16 set[0x100];
printIncludes(source_fp);
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("extern \"C\" {\n", source_fp);
printDataArea(dictionary_fp, source_fp, lenArray);
printLenArray(source_fp, lenArray);
printIndex1(source_fp, set);
printIndex2(source_fp, set);
printExistsMask(source_fp);
printFunctions(source_fp, argv[3]);
#endif
printDataArea(dictionary_fp, source_fp, lenArray);
printLenArray(source_fp, lenArray);
printIndex1(source_fp, set);
printIndex2(source_fp, set);
printExistsMask(source_fp);
printFunctions(source_fp, argv[3]);
#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("}\n", source_fp);
#else
// Put pointers to the tables at the end of the file...
fwrite(&dataAreaOffset, sizeof(dataAreaOffset), 1, source_fp);
fwrite(&lenArrayOffset, sizeof(lenArrayOffset), 1, source_fp);
fwrite(&index1Offset, sizeof(index1Offset), 1, source_fp);
fwrite(&index2Offset, sizeof(index2Offset), 1, source_fp);
fwrite(&existMarkOffset, sizeof(existMarkOffset), 1, source_fp);
#endif
fclose(dictionary_fp);
fclose(source_fp);
......
......@@ -17,28 +17,22 @@
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <config_folders.h>
// xdictionary.cpp: implementation of the xdictionary class.
#include <osl/file.h>
#include <rtl/ustrbuf.hxx>
#include <rtl/bootstrap.hxx>
#include <com/sun/star/i18n/WordType.hpp>
#include <xdictionary.hxx>
#include <unicode/uchar.h>
#include <string.h>
#include <breakiteratorImpl.hxx>
// Construction/Destruction
namespace com { namespace sun { namespace star { namespace i18n {
#ifndef DISABLE_DYNLOADING
#ifdef DICT_JA_ZH_IN_DATAFILE
#elif !defined DISABLE_DYNLOADING
extern "C" { static void SAL_CALL thisModule() {} }
......@@ -74,8 +68,44 @@ xdictionary::xdictionary(const sal_Char *lang) :
boundary(),
japaneseWordBreak( sal_False )
{
index1 = 0;
#ifndef DISABLE_DYNLOADING
existMark = NULL;
index1 = NULL;
index2 = NULL;
lenArray = NULL;
dataArea = NULL;
#ifdef DICT_JA_ZH_IN_DATAFILE
if( strcmp( lang, "ja" ) == 0 || strcmp( lang, "zh" ) == 0 )
{
OUString sUrl( "$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/dict_" );
rtl::Bootstrap::expandMacros(sUrl);
if( strcmp( lang, "ja" ) == 0 )
sUrl += "ja.data";
else if( strcmp( lang, "zh" ) == 0 )
sUrl += "zh.data";
oslFileHandle aFileHandle;
sal_uInt64 nFileSize;
char *pMapping;
if( osl_openFile( sUrl.pData, &aFileHandle, osl_File_OpenFlag_Read ) == osl_File_E_None &&
osl_getFileSize( aFileHandle, &nFileSize) == osl_File_E_None &&
osl_mapFile( aFileHandle, (void **) &pMapping, nFileSize, 0, osl_File_MapFlag_RandomAccess ) == osl_File_E_None )
{
// We have the offsets to the parts of the file at its end, see gendict.cxx
sal_Int64 *pEOF = (sal_Int64*)(pMapping + nFileSize);
existMark = (sal_uInt8*) (pMapping + pEOF[-1]);
index2 = (sal_Int32*) (pMapping + pEOF[-2]);
index1 = (sal_Int16*) (pMapping + pEOF[-3]);
lenArray = (sal_Int32*) (pMapping + pEOF[-4]);
dataArea = (sal_Unicode*) (pMapping + pEOF[-5]);
}
}
#elif !defined DISABLE_DYNLOADING
#ifdef SAL_DLLPREFIX
OUStringBuffer aBuf( strlen(lang) + 7 + 6 ); // mostly "lib*.so" (with * == dict_zh)
aBuf.appendAscii( SAL_DLLPREFIX );
......@@ -97,16 +127,9 @@ xdictionary::xdictionary(const sal_Char *lang) :
func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getDataArea").pData );
dataArea = (sal_Unicode*) (*func)();
}
else
{
existMark = NULL;
index1 = NULL;
index2 = NULL;
lenArray = NULL;
dataArea = NULL;
}
#else
if( strcmp( lang, "ja" ) == 0 ) {
existMark = getExistMark_ja();
index1 = getIndex1_ja();
......@@ -121,14 +144,7 @@ xdictionary::xdictionary(const sal_Char *lang) :
lenArray = getLenArray_zh();
dataArea = getDataArea_zh();
}
else
{
existMark = NULL;
index1 = NULL;
index2 = NULL;
lenArray = NULL;
dataArea = NULL;
}
#endif
for (sal_Int32 i = 0; i < CACHE_MAX; i++)
......
......@@ -59,6 +59,9 @@ TiledLibreOffice_setup:
mkdir -p $(TiledLibreOffice_resource)/share/config
cp -R $(INSTDIR)/share/config/soffice.cfg $(TiledLibreOffice_resource)/share/config
# Japanese and Chinese dict files
cp $(WORKDIR)/CustomTarget/i18npool/breakiterator/dict_*.data $(TiledLibreOffice_resource)/share
# "registry"
cp -R $(INSTDIR)/share/registry $(TiledLibreOffice_resource)/share
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment