Kaydet (Commit) bfeeed3e authored tarafından Caolán McNamara's avatar Caolán McNamara

implement a new iscii (devangari) <-> unicode converter

this time with support for the multi-byte encodings possible
in ISCII

Change-Id: I1dc09e8836676ab614b531e8dc10f91a90b7c4fd
üst 29315d77
......@@ -41,6 +41,7 @@ $(eval $(call gb_Library_add_exception_objects,sal_textenc,\
sal/textenc/convertbig5hkscs \
sal/textenc/converteuctw \
sal/textenc/convertgb18030 \
sal/textenc/convertisciidevangari \
sal/textenc/convertiso2022cn \
sal/textenc/convertiso2022jp \
sal/textenc/convertiso2022kr \
......
......@@ -509,8 +509,8 @@ namespace
CPPUNIT_TEST( MimeCharsetFromTextEncoding_BIG5_HKSCS );
CPPUNIT_TEST( MimeCharsetFromTextEncoding_TIS_620 );
CPPUNIT_TEST( MimeCharsetFromTextEncoding_KOI8_U );
#if 0
CPPUNIT_TEST( MimeCharsetFromTextEncoding_ISCII_DEVANAGARI );
#if 0
CPPUNIT_TEST( MimeCharsetFromTextEncoding_JAVA_UTF8 );
#endif
......
......@@ -134,12 +134,13 @@ void testSingleByteCharSet(SingleByteCharSet const & rSet) {
| RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
| RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
&nInfo, &nConverted);
sal_uInt32 nExpectedInfo = (RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_UNDEFINED);
CPPUNIT_ASSERT_MESSAGE(
"failure #9",
(nSize == 0
&& (nInfo
== (RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_UNDEFINED))
&& (nInfo == nExpectedInfo)
&& nConverted == 0));
rtl_destroyTextToUnicodeContext(aConverter, aContext);
rtl_destroyTextToUnicodeConverter(aConverter);
......@@ -1098,41 +1099,6 @@ void Test::testSingleByte() {
0x0425,0x0418,0x0419,0x041A,0x041B,0x041C,0x041D,0x041E,
0x041F,0x042F,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
0x042C,0x042B,0x0417,0x0428,0x042D,0x0429,0x0427,0x042A } },
#if 0
{ RTL_TEXTENCODING_ISCII_DEVANAGARI,
{ 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
0xFFFF,0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,
0x0909,0x090A,0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,
0x0913,0x0914,0x0911,0x0915,0x0916,0x0917,0x0918,0x0919,
0x091A,0x091B,0x091C,0x091D,0x091E,0x091F,0x0920,0x0921,
0x0922,0x0923,0x0924,0x0925,0x0926,0x0927,0x0928,0x0929,
0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,0x095F,0x0930,
0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,0x0938,
0x0939,0xFFFF,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,
0x094D,0x093C,0x0964,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
0xFFFF,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
0x096D,0x096E,0x096F,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF } },
#endif
{ RTL_TEXTENCODING_ADOBE_STANDARD,
{ 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
......@@ -2498,6 +2464,43 @@ void Test::testComplex() {
false,
true,
false,
RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR },
{ RTL_TEXTENCODING_ISCII_DEVANAGARI,
RTL_CONSTASCII_STRINGPARAM(
"\xD7\xE6\x20\xD4\xCF\xE8\xD6\x20"
"\xC8\xD8\xD1\xE1\x20\xB3\xCA\xDC"
"\xCF\xC4\xDA\xD7\x20\xD8\xDB\xA2"
"\xC4\xDE\x20\xB1\xCF\x20\xCC\xDD"
"\xD7\xD1\xCC\xDA\xC6\x20\xC4\xE5"
"\xC6\xE5\xA2\x20\xB3\xE1\x20\xB3"
"\xBD\xE8\xBD\xCF\xC8\xC6\x20\xB3"
"\xE5\x20\xC9\xBD\xB3\xDA\xCF\x20"
"\xB8\xDD\xB3\xE1\x20\xC3\xE1\x20"
"\xEA"),
{ 0x0938, 0x094C, 0x0020, 0x0935, 0x0930, 0x094D, 0x0937, 0x0020,
0x092A, 0x0939, 0x0932, 0x0947, 0x0020, 0x0915, 0x092C, 0x0940,
0x0930, 0x0926, 0x093E, 0x0938, 0x0020, 0x0939, 0x093F, 0x0902,
0x0926, 0x0942, 0x0020, 0x0914, 0x0930, 0x0020, 0x092E, 0x0941,
0x0938, 0x0932, 0x092E, 0x093E, 0x0928, 0x0020, 0x0926, 0x094B,
0x0928, 0x094B, 0x0902, 0x0020, 0x0915, 0x0947, 0x0020, 0x0915,
0x091F, 0x094D, 0x091F, 0x0930, 0x092A, 0x0928, 0x0020, 0x0915,
0x094B, 0x0020, 0x092B, 0x091F, 0x0915, 0x093E, 0x0930, 0x0020,
0x091A, 0x0941, 0x0915, 0x0947, 0x0020, 0x0925, 0x0947, 0x0020,
0x0964 },
73,
false,
true,
true,
false,
RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR },
{ RTL_TEXTENCODING_ISCII_DEVANAGARI,
RTL_CONSTASCII_STRINGPARAM("\xE8\xE8\xE8\xE9\xA1\xE9\xEA\xE9"),
{ 0x094D, 0x200C, 0x094D, 0x200D, 0x0950, 0x93D },
6,
false,
true,
true,
false,
RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR }
};
for (std::size_t i = 0; i < SAL_N_ELEMENTS(data); ++i) {
......@@ -2870,10 +2873,8 @@ void Test::testInfo() {
{ RTL_TEXTENCODING_IBM_861, RTL_TEXTENCODING_INFO_MIME, true },
{ RTL_TEXTENCODING_IBM_863, RTL_TEXTENCODING_INFO_MIME, true },
{ RTL_TEXTENCODING_IBM_865, RTL_TEXTENCODING_INFO_MIME, true },
#if 0
{ RTL_TEXTENCODING_ISCII_DEVANAGARI, RTL_TEXTENCODING_INFO_ASCII, true },
{ RTL_TEXTENCODING_ISCII_DEVANAGARI, RTL_TEXTENCODING_INFO_MIME, false },
#endif
{ RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_ASCII, false },
{ RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_MIME, true },
{ RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_SYMBOL, false },
......
This diff is collapsed.
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <sal/types.h>
sal_Size ImplConvertIsciiDevanagariToUnicode(void const * pData,
void * pContext, char const * pSrcBuf, sal_Size nSrcBytes,
sal_Unicode * pDestBuf, sal_Size nDestChars, sal_uInt32 nFlags,
sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes);
sal_Size ImplConvertUnicodeToIsciiDevanagari(void const * pData,
void * pContext, sal_Unicode const * pSrcBuf, sal_Size nSrcChars,
char * pDestBuf, sal_Size nDestBytes, sal_uInt32 nFlags,
sal_uInt32 * pInfo, sal_Size * pSrcCvtChars);
void *ImplCreateIsciiDevanagariToUnicodeContext();
void ImplDestroyIsciiDevanagariToUnicodeContext(void * pContext);
void ImplResetIsciiDevanagariToUnicodeContext(void * pContext);
void *ImplCreateUnicodeToIsciiDevanagariContext();
void ImplResetUnicodeToIsciiDevanagariContext(void * pContext);
void ImplDestroyUnicodeToIsciiDevanagariContext(void * pContext);
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include "sal/config.h"
#include "rtl/tencinfo.h"
#include "rtl/textenc.h"
#include "convertisciidevangari.hxx"
static ImplTextEncodingData const aImplIsciiDevanagariTextEncodingData
= { { NULL,
&ImplConvertIsciiDevanagariToUnicode,
&ImplConvertUnicodeToIsciiDevanagari,
&ImplCreateIsciiDevanagariToUnicodeContext,
&ImplDestroyIsciiDevanagariToUnicodeContext,
&ImplResetIsciiDevanagariToUnicodeContext,
&ImplCreateUnicodeToIsciiDevanagariContext,
&ImplResetUnicodeToIsciiDevanagariContext,
&ImplDestroyUnicodeToIsciiDevanagariContext },
1,
2,
2,
1,
NULL,
"x-iscii-de ",
RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MULTIBYTE };
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
......@@ -100,13 +100,14 @@ static sal_uInt16 const aImplDoubleByteIdentifierTab[1] = { 0 };
#include "tcvttcn6.tab"
#include "tcvtuni1.tab"
#include "convertadobe.tab"
#include "convertbig5hkscs.tab"
#include "converteuctw.tab"
#include "convertgb18030.tab"
#include "convertisciidevangari.tab"
#include "convertiso2022cn.tab"
#include "convertiso2022jp.tab"
#include "convertiso2022kr.tab"
#include "convertadobe.tab"
extern "C" SAL_DLLPUBLIC_EXPORT ImplTextEncodingData const *
sal_getFullTextEncodingData( rtl_TextEncoding nEncoding )
......@@ -208,7 +209,7 @@ extern "C" SAL_DLLPUBLIC_EXPORT ImplTextEncodingData const *
&aImplBig5HkscsTextEncodingData, /* BIG5_HKSCS */
&aImplTis620TextEncodingData, /* TIS_620 */
&aImplKoi8UTextEncodingData, /* KOI8_U */
NULL, /* TODO! ISCII_DEVANAGARI */
&aImplIsciiDevanagariTextEncodingData, /* ISCII_DEVANAGARI */
NULL, /* JAVA_UTF8, see above */
&adobeStandardEncodingData, /* ADOBE_STANDARD */
&adobeSymbolEncodingData, /* ADOBE_SYMBOL */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment