Kaydet (Commit) 3f0fba00 authored tarafından Stephan Bergmann's avatar Stephan Bergmann

New rtl::isUnicodeScalarValue, rtl::isSurrogate

There are apparently various places that want to check for a Unicode scalar
value rather than for a Unicode code point.  Changed those uses of
rtl::isUnicodeCodePoint where that was obvious.  (For changing
svtools/source/svrtf/svparser.cxx see 8e0fb74d
"Revert 'svtools: HTML import: don't put lone surrogates in OUString'".)  Other
uses of rtl::isUnicodeCodePoint might also want to use rtl::isUnicodeScalarValue
instead.

As a side effect, this change also introduces rtl::isSurrogate, which is useful
in a few places as well.

Change-Id: I9245f4f98b83877145a4d392f0ddb7c5d824a535
üst 2d22b39f
......@@ -113,8 +113,8 @@ bool isValidName(OUString const & name, bool setMember) {
for (sal_Int32 i = 0; i != name.getLength();) {
sal_uInt32 c = name.iterateCodePoints(&i);
if ((c < 0x20 && !(c == 0x09 || c == 0x0A || c == 0x0D))
|| rtl::isHighSurrogate(c) || rtl::isLowSurrogate(c) || c == 0xFFFE
|| c == 0xFFFF || (!setMember && c == '/'))
|| rtl::isSurrogate(c) || c == 0xFFFE || c == 0xFFFF
|| (!setMember && c == '/'))
{
return false;
}
......
......@@ -335,6 +335,20 @@ sal_uInt32 const surrogatesLowLast = 0xDFFF;
}
/// @endcond
/** Check for surrogate.
@param code A Unicode code point.
@return True if code is a surrogate code point (0xD800--0xDFFF).
@since LibreOffice 6.0
*/
inline bool isSurrogate(sal_uInt32 code) {
assert(isUnicodeCodePoint(code));
return code >= detail::surrogatesHighFirst
&& code <= detail::surrogatesLowLast;
}
/** Check for high surrogate.
@param code A Unicode code point.
......@@ -433,6 +447,19 @@ inline std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode * output) {
}
}
/** Check for Unicode scalar value.
@param code An integer.
@return True if code is a Unicode scalar value.
@since LibreOffice 6.0
*/
inline bool isUnicodeScalarValue(sal_uInt32 code)
{
return isUnicodeCodePoint(code) && !isSurrogate(code);
}
}
#endif
......
......@@ -133,9 +133,8 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
p += 3;
nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
}
if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
&& nEncoded >= nMin && !rtl::isHighSurrogate(nEncoded)
&& !rtl::isLowSurrogate(nEncoded))
if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
&& nEncoded >= nMin)
{
*pBegin = p;
*pType = EscapeChar;
......
......@@ -168,7 +168,7 @@ void rtl_uStringbuffer_insertUtf32(
{
sal_Unicode buf[2];
sal_Int32 len;
OSL_ASSERT(rtl::isUnicodeCodePoint(c) && !(c >= 0xD800 && c <= 0xDFFF));
OSL_ASSERT(rtl::isUnicodeScalarValue(c));
if (c <= 0xFFFF) {
buf[0] = (sal_Unicode) c;
len = 1;
......
......@@ -170,10 +170,7 @@ sal_Size ImplConvertUtf8ToUnicode(
}
break;
case 3:
if (nUtf32 < 0x800
|| (!bJavaUtf8
&& (rtl::isHighSurrogate(nUtf32)
|| rtl::isLowSurrogate(nUtf32))))
if (nUtf32 < 0x800 || (!bJavaUtf8 && rtl::isSurrogate(nUtf32)))
{
goto bad_input;
}
......
......@@ -406,7 +406,7 @@ inline bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr,
OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" );
nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
if( rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000 )
{
sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
......@@ -851,7 +851,7 @@ inline sal_Int32 calcXMLByteLength( const OUString& rStr,
{
// 2. surrogate: write as UTF-8 (if range is OK
nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
if( rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000 )
nOutputLength += 4;
nSurrogate = 0;
}
......
......@@ -110,8 +110,8 @@ OUString parsePart(
}
encoded |= (n & 0x3F) << shift;
}
if (!utf8 || !rtl::isUnicodeCodePoint(encoded) || encoded < min
|| (encoded >= 0xD800 && encoded <= 0xDFFF))
if (!utf8 || !rtl::isUnicodeScalarValue(encoded)
|| encoded < min)
{
break;
}
......
......@@ -423,7 +423,7 @@ sal_uInt32 SvParser<T>::GetNextChar()
while( 0 == nChars && !bErr );
}
if ( ! rtl::isUnicodeCodePoint( c ) )
if ( ! rtl::isUnicodeScalarValue( c ) )
c = '?' ;
if( bErr )
......
......@@ -4745,10 +4745,8 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
break;
nShift -= 6;
}
if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
&& nEncoded >= nMin
&& !rtl::isHighSurrogate(nEncoded)
&& !rtl::isLowSurrogate(nEncoded))
if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
&& nEncoded >= nMin)
{
rBegin = p;
nUTF32 = nEncoded;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment