Kaydet (Commit) 6b678c13 authored tarafından Eike Rathke's avatar Eike Rathke

added/checked/aligned MS-LangIDs and mappings from new MS-LCID.pdf

... found at http://msdn.microsoft.com/library/cc233965.aspx (the 5th or
6th place I'm aware of where MS defines LCIDs, all different ...)
* a bunch of new definitions up to Windows 8.1
* lots of cross-checks done with SIL, Ethnologue, ...

Change-Id: Ifae8e676558c6712fe752856acca600d05d0a63f
üst 315a0e27
......@@ -197,7 +197,7 @@ static const CountryEntry pTable[] =
{ COUNTRY_BELARUS, LANGUAGE_BELARUSIAN, false },
{ COUNTRY_MONACO, LANGUAGE_FRENCH_MONACO, true },
{ COUNTRY_UKRAINE, LANGUAGE_UKRAINIAN, false },
{ COUNTRY_SERBIA, LANGUAGE_SERBIAN_LATIN, false },
{ COUNTRY_SERBIA, LANGUAGE_SERBIAN_LATIN_SAM, false },
{ COUNTRY_CROATIA, LANGUAGE_CROATIAN, true }, // sub type of LANGUAGE_SERBIAN
{ COUNTRY_SLOVENIA, LANGUAGE_SLOVENIAN, false },
{ COUNTRY_MACEDONIA, LANGUAGE_MACEDONIAN, false },
......@@ -235,7 +235,7 @@ static const CountryEntry pTable[] =
{ COUNTRY_ISRAEL, LANGUAGE_HEBREW, false },
{ COUNTRY_BAHRAIN, LANGUAGE_ARABIC_BAHRAIN, true },
{ COUNTRY_QATAR, LANGUAGE_ARABIC_QATAR, true },
{ COUNTRY_MONGOLIA, LANGUAGE_MONGOLIAN, false },
{ COUNTRY_MONGOLIA, LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA, false },
{ COUNTRY_NEPAL, LANGUAGE_NEPALI, false },
{ COUNTRY_IRAN, LANGUAGE_FARSI, false },
{ COUNTRY_TAJIKISTAN, LANGUAGE_TAJIK, false },
......
......@@ -154,7 +154,7 @@ void TestLanguageTag::testAllTags()
CPPUNIT_ASSERT( aLocale.Language == "qlt" );
CPPUNIT_ASSERT( aLocale.Country == "CS" );
CPPUNIT_ASSERT( aLocale.Variant == s_sr_Latn_CS );
CPPUNIT_ASSERT( sr_Latn_CS.getLanguageType() == LANGUAGE_SERBIAN_LATIN );
CPPUNIT_ASSERT( sr_Latn_CS.getLanguageType() == LANGUAGE_SERBIAN_LATIN_SAM );
CPPUNIT_ASSERT( sr_Latn_CS.isValidBcp47() == true );
CPPUNIT_ASSERT( sr_Latn_CS.isIsoLocale() == false );
CPPUNIT_ASSERT( sr_Latn_CS.isIsoODF() == true );
......@@ -352,14 +352,15 @@ void TestLanguageTag::testAllTags()
}
// "no", "nb" and "nn" share the same primary language ID, which even is
// assigned to "no-NO" for legacy so none gets it assigned, all on-the-fly.
// assigned to "no-NO" for legacy so none gets it assigned, all on-the-fly
// except if there is a defined MS-LCID for LanguageScriptOnly (LSO).
{
LanguageTag no( "no", true );
CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( no.getLanguageType()) );
LanguageTag nb( "nb", true );
CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( nb.getLanguageType()) );
CPPUNIT_ASSERT( nb.getLanguageType() == LANGUAGE_NORWEGIAN_BOKMAL_LSO );
LanguageTag nn( "nn", true );
CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( nn.getLanguageType()) );
CPPUNIT_ASSERT( nn.getLanguageType() == LANGUAGE_NORWEGIAN_NYNORSK_LSO );
LanguageTag no_NO( "no-NO", true );
CPPUNIT_ASSERT( no_NO.getLanguageType() == LANGUAGE_NORWEGIAN );
}
......@@ -410,6 +411,29 @@ void TestLanguageTag::testAllTags()
// 'en-oed' is not a valid fallback!
}
#if USE_LIBLANGTAG
// 'zh-yue-HK' uses extlang and should be preferred 'yue-HK'
{
OUString s_zh_yue_HK( "zh-yue-HK" );
LanguageTag zh_yue_HK( s_zh_yue_HK );
lang::Locale aLocale = zh_yue_HK.getLocale();
CPPUNIT_ASSERT( zh_yue_HK.getBcp47() == "yue-HK" );
CPPUNIT_ASSERT( aLocale.Language == "yue" );
CPPUNIT_ASSERT( aLocale.Country == "HK" );
CPPUNIT_ASSERT( aLocale.Variant == "" );
CPPUNIT_ASSERT( zh_yue_HK.getLanguageType() == LANGUAGE_YUE_CHINESE_HONGKONG );
CPPUNIT_ASSERT( zh_yue_HK.isValidBcp47() == true );
CPPUNIT_ASSERT( zh_yue_HK.isIsoLocale() == true );
CPPUNIT_ASSERT( zh_yue_HK.isIsoODF() == true );
CPPUNIT_ASSERT( zh_yue_HK.getLanguageAndScript() == "yue" );
CPPUNIT_ASSERT( zh_yue_HK.getVariants() == "" );
::std::vector< OUString > zh_yue_HK_Fallbacks( zh_yue_HK.getFallbackStrings( true));
CPPUNIT_ASSERT( zh_yue_HK_Fallbacks.size() == 2);
CPPUNIT_ASSERT( zh_yue_HK_Fallbacks[0] == "yue-HK");
CPPUNIT_ASSERT( zh_yue_HK_Fallbacks[1] == "yue");
}
#endif
// 'qtz' is a local use known pseudolocale for key ID resource
{
OUString s_qtz( "qtz" );
......
......@@ -252,7 +252,8 @@ bool MsLangId::isTraditionalChinese( const ::com::sun::star::lang::Locale & rLoc
//static
bool MsLangId::isChinese( LanguageType nLang )
{
return MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_CHINESE);
return MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_CHINESE) ||
MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_YUE_CHINESE_HONGKONG);
}
//static
......@@ -266,9 +267,10 @@ bool MsLangId::isCJK( LanguageType nLang )
{
switch (nLang & LANGUAGE_MASK_PRIMARY)
{
case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_YUE_CHINESE_HONGKONG & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY:
return true;
default:
break;
......@@ -313,7 +315,9 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang )
switch( nLang )
{
// CTL
case LANGUAGE_MONGOLIAN_MONGOLIAN:
case LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA:
case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA:
case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO:
case LANGUAGE_USER_KURDISH_IRAN:
case LANGUAGE_USER_KURDISH_IRAQ:
case LANGUAGE_USER_KYRGYZ_CHINA:
......@@ -321,7 +325,8 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang )
break;
// "Western"
case LANGUAGE_MONGOLIAN:
case LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA:
case LANGUAGE_MONGOLIAN_CYRILLIC_LSO:
case LANGUAGE_USER_KURDISH_SYRIA:
case LANGUAGE_USER_KURDISH_TURKEY:
nScript = ::com::sun::star::i18n::ScriptType::LATIN;
......@@ -341,9 +346,10 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang )
switch ( nLang & LANGUAGE_MASK_PRIMARY )
{
// CJK catcher
case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_YUE_CHINESE_HONGKONG & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY:
nScript = ::com::sun::star::i18n::ScriptType::ASIAN;
break;
......@@ -433,11 +439,32 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bo
nLang = LANGUAGE_LUXEMBOURGISH_LUXEMBOURG;
break;
case LANGUAGE_OBSOLETE_USER_KABYLE:
nLang = LANGUAGE_TAMAZIGHT_LATIN;
nLang = LANGUAGE_TAMAZIGHT_LATIN_ALGERIA;
break;
case LANGUAGE_OBSOLETE_USER_CATALAN_VALENCIAN:
nLang = LANGUAGE_CATALAN_VALENCIAN;
break;
case LANGUAGE_OBSOLETE_USER_MALAGASY_PLATEAU:
nLang = LANGUAGE_MALAGASY_PLATEAU;
break;
case LANGUAGE_GAELIC_SCOTLAND_LEGACY:
nLang = LANGUAGE_GAELIC_SCOTLAND;
break;
case LANGUAGE_OBSOLETE_USER_TSWANA_BOTSWANA:
nLang = LANGUAGE_TSWANA_BOTSWANA;
break;
case LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA:
nLang = LANGUAGE_SERBIAN_LATIN_SERBIA;
break;
case LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO:
nLang = LANGUAGE_SERBIAN_LATIN_MONTENEGRO;
break;
case LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_SERBIA:
nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA;
break;
case LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO:
nLang = LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO;
break;
// The following are not strictly obsolete but should be mapped to a
// replacement locale when encountered.
......
......@@ -1174,7 +1174,7 @@ bool LanguageTagImpl::canonicalize()
return bChanged; // that's it
}
meIsLiblangtagNeeded = DECISION_YES;
SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for " << maBcp47);
SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
if (!mpImplLangtag)
{
......@@ -1187,7 +1187,7 @@ bool LanguageTagImpl::canonicalize()
if (lt_tag_parse( mpImplLangtag, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
{
char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p);
SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize " << maBcp47);
SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
if (pTag)
{
OUString aOld( maBcp47);
......@@ -1201,7 +1201,7 @@ bool LanguageTagImpl::canonicalize()
meIsIsoODF = DECISION_DONTKNOW;
if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p))
{
SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse " << maBcp47);
SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'");
free( pTag);
meIsValid = DECISION_NO;
return bChanged;
......@@ -1214,7 +1214,7 @@ bool LanguageTagImpl::canonicalize()
}
else
{
SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse " << maBcp47);
SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
}
meIsValid = DECISION_NO;
return bChanged;
......@@ -1536,11 +1536,13 @@ OUString LanguageTagImpl::getLanguageFromLangtag()
if (mpImplLangtag)
{
const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
SAL_WARN_IF( !pLangT, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
SAL_WARN_IF( !pLangT, "i18nlangtag",
"LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
if (!pLangT)
return aLanguage;
const char* pLang = lt_lang_get_tag( pLangT);
SAL_WARN_IF( !pLang, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
SAL_WARN_IF( !pLang, "i18nlangtag",
"LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
if (pLang)
aLanguage = OUString::createFromAscii( pLang);
}
......@@ -1595,11 +1597,12 @@ OUString LanguageTagImpl::getRegionFromLangtag()
SAL_WARN_IF( !pRegionT &&
maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
"i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
"i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
if (!pRegionT)
return aRegion;
const char* pRegion = lt_region_get_tag( pRegionT);
SAL_WARN_IF( !pRegion, "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
SAL_WARN_IF( !pRegion, "i18nlangtag",
"LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
if (pRegion)
aRegion = OUString::createFromAscii( pRegion);
}
......@@ -2399,7 +2402,10 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
}
}
if (eRet == EXTRACTED_NONE)
{
SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
rLanguage = rScript = rCountry = rVariants = OUString();
}
return eRet;
}
......
This diff is collapsed.
......@@ -152,8 +152,8 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE
< "Romanian (Moldova)" ; LANGUAGE_ROMANIAN_MOLDOVA ; > ;
< "Russian" ; LANGUAGE_RUSSIAN ; > ;
< "Sanskrit" ; LANGUAGE_SANSKRIT ; > ;
< "Serbian Cyrillic (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_CYRILLIC ; > ;
< "Serbian Latin (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_LATIN ; > ;
< "Serbian Cyrillic (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_CYRILLIC_SAM ; > ;
< "Serbian Latin (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_LATIN_SAM ; > ;
< "Serbian Cyrillic (Serbia)" ; LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA ; > ;
< "Serbian Latin (Serbia)" ; LANGUAGE_USER_SERBIAN_LATIN_SERBIA ; > ;
< "Serbian Cyrillic (Montenegro)" ; LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO ; > ;
......@@ -205,8 +205,8 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE
< "Dhivehi" ; LANGUAGE_DHIVEHI ; > ;
< "Northern Sotho" ; LANGUAGE_SEPEDI ; > ;
< "Gaelic (Scotland)" ; LANGUAGE_GAELIC_SCOTLAND ; > ;
< "Mongolian Cyrillic" ; LANGUAGE_MONGOLIAN ; > ;
< "Mongolian Mongolian" ; LANGUAGE_MONGOLIAN_MONGOLIAN ; > ;
< "Mongolian Cyrillic" ; LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA ; > ;
< "Mongolian Mongolian" ; LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA ; > ;
< "Interlingua" ; LANGUAGE_USER_INTERLINGUA ; > ;
< "Bosnian" ; LANGUAGE_BOSNIAN_BOSNIA_HERZEGOVINA ; > ;
< "Bengali (Bangladesh)" ; LANGUAGE_BENGALI_BANGLADESH ; > ;
......@@ -315,7 +315,7 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE
< "Yoruba" ; LANGUAGE_YORUBA ; > ;
< "Rusyn (Ukraine)" ; LANGUAGE_USER_RUSYN_UKRAINE ; > ;
< "Rusyn (Slovakia)" ; LANGUAGE_USER_RUSYN_SLOVAKIA ; > ;
< "Kabyle Latin" ; LANGUAGE_TAMAZIGHT_LATIN ; > ;
< "Kabyle Latin" ; LANGUAGE_TAMAZIGHT_LATIN_ALGERIA ; > ;
< "Yiddish (USA)" ; LANGUAGE_USER_YIDDISH_US ; > ;
< "Hawaiian" ; LANGUAGE_HAWAIIAN_UNITED_STATES ; > ;
< "Limbu" ; LANGUAGE_USER_LIMBU ; > ;
......
......@@ -586,9 +586,17 @@ OUString makeRepresentativeTextForLanguage(LanguageType eLang)
case LANGUAGE_KHMER & LANGUAGE_MASK_PRIMARY:
sRet = makeRepresentativeTextForScript(USCRIPT_KHMER);
break;
case LANGUAGE_MONGOLIAN & LANGUAGE_MASK_PRIMARY:
if (eLang == LANGUAGE_MONGOLIAN_MONGOLIAN)
sRet = makeRepresentativeTextForScript(USCRIPT_MONGOLIAN);
case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO & LANGUAGE_MASK_PRIMARY:
switch (eLang)
{
case LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA:
case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA:
case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO:
sRet = makeRepresentativeTextForScript(USCRIPT_MONGOLIAN);
break;
default:
break;
}
break;
case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
sRet = makeRepresentativeTextForScript(USCRIPT_JAPANESE);
......
......@@ -287,11 +287,18 @@ VCL_DLLPUBLIC sal_UCS4 GetLocalizedChar( sal_UCS4 nChar, LanguageType eLang )
case LANGUAGE_MALAYALAM & LANGUAGE_MASK_PRIMARY:
nOffset = 0x0D66 - '0'; // malayalam
break;
case LANGUAGE_MONGOLIAN & LANGUAGE_MASK_PRIMARY:
if (eLang == LANGUAGE_MONGOLIAN_MONGOLIAN)
nOffset = 0x1810 - '0'; // mongolian
else
nOffset = 0; // mongolian cyrillic
case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO & LANGUAGE_MASK_PRIMARY:
switch (eLang)
{
case LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA:
case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA:
case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO:
nOffset = 0x1810 - '0'; // mongolian
break;
default:
nOffset = 0; // mongolian cyrillic
break;
}
break;
case LANGUAGE_BURMESE & LANGUAGE_MASK_PRIMARY:
nOffset = 0x1040 - '0'; // myanmar
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment