Kaydet (Commit) 5dc52ee0 authored tarafından Khaled Hosny's avatar Khaled Hosny Kaydeden (comit) Eike Rathke

tdf#113694 Fix BreakIterator_CTL surrogate pairs

BreakIterator_CTL in the non CharacterIteratorMode::SKIPCELL mode did
not handle UTF-16 surrogate pairs at all, causing backspace to delete
lone surrogates which is really bad. Just copied the corresponding code
from BreakIterator_Unicode.

Additionally, BreakIterator_th was not correctly skipping non-Thai text
and always treating one character as Thai.

Change-Id: Ia379327e042ff602fc19a485c4cbd1a3683f9230
Reviewed-on: https://gerrit.libreoffice.org/54631Tested-by: 's avatarJenkins <ci@libreoffice.org>
Reviewed-by: 's avatarEike Rathke <erack@redhat.com>
üst 93aa646d
......@@ -919,6 +919,29 @@ void TestBreakIterator::testThai()
}
while (nPos > 0);
}
// tdf#113694
{
const sal_Unicode NON_BMP[] = { 0xD800, 0xDC00 };
OUString aTest(NON_BMP, SAL_N_ELEMENTS(NON_BMP));
sal_Int32 nDone=0;
sal_Int32 nPos;
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32>(SAL_N_ELEMENTS(NON_BMP)), nPos);
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(NON_BMP), aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32>(0), nPos);
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
i18n::CharacterIteratorMode::SKIPCHARACTER, 1, nDone);
CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32>(SAL_N_ELEMENTS(NON_BMP)), nPos);
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(NON_BMP), aLocale,
i18n::CharacterIteratorMode::SKIPCHARACTER, 1, nDone);
CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32>(0), nPos);
}
}
#ifdef TODO
......
......@@ -71,8 +71,8 @@ sal_Int32 SAL_CALL BreakIterator_CTL::previousCharacters( const OUString& Text,
} else
nStartPos = 0;
} else { // for BS to delete one char.
nDone = std::min(nStartPos, nCount);
nStartPos -= nDone;
for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
Text.iterateCodePoints(&nStartPos, -1);
}
return nStartPos;
......@@ -98,8 +98,8 @@ sal_Int32 SAL_CALL BreakIterator_CTL::nextCharacters(const OUString& Text,
} else
nStartPos = len;
} else {
nDone = std::min(len - nStartPos, nCount);
nStartPos += nDone;
for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
Text.iterateCodePoints(&nStartPos);
}
return nStartPos;
......
......@@ -127,7 +127,7 @@ void BreakIterator_th::makeIndex(const OUString& Text, sal_Int32 const nStartPos
sal_Int32 startPos = nStartPos;
while (startPos > 0 && is_Thai(str[startPos-1])) startPos--;
sal_Int32 endPos = std::min(len, nStartPos+1);
sal_Int32 endPos = nStartPos;
while (endPos < len && is_Thai(str[endPos])) endPos++;
sal_Int32 start, end, pos;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment