Kaydet (Commit) f677885f authored tarafından Eike Rathke's avatar Eike Rathke

Resolves: tdf#125279 do not double _x005F_ escapement

Reading OOXML _x005F_ escaped content may not get unescaped, so
when writing back to OOXML do not attempt to escape it again, i.e.
write _x005F_xHHHH_ as is and not as _x005F_x005F_xHHHH_.

This is more a workaround, the proper fix would be to unescape
_x005F_ content upon read. But then the entire "invalid XML
character" escapement and handling control characters rat tail
would come into play.

Change-Id: I3d31dc84a362753c23a8c89f7a5d7bfd06e4367b
Reviewed-on: https://gerrit.libreoffice.org/73187
Tested-by: Jenkins
Reviewed-by: 's avatarEike Rathke <erack@redhat.com>
üst 63c4e82c
...@@ -199,6 +199,7 @@ namespace sax_fastparser { ...@@ -199,6 +199,7 @@ namespace sax_fastparser {
default: default:
if (mbXescape) if (mbXescape)
{ {
char c1, c2, c3, c4;
// Escape characters not valid in XML 1.0 as // Escape characters not valid in XML 1.0 as
// _xHHHH_. A literal "_xHHHH_" has to be // _xHHHH_. A literal "_xHHHH_" has to be
// escaped as _x005F_xHHHH_ (effectively // escaped as _x005F_xHHHH_ (effectively
...@@ -209,22 +210,44 @@ namespace sax_fastparser { ...@@ -209,22 +210,44 @@ namespace sax_fastparser {
if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen && if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen &&
pStr[i+6] == '_' && pStr[i+6] == '_' &&
((pStr[i+1] | 0x20) == 'x') && ((pStr[i+1] | 0x20) == 'x') &&
isHexDigit( pStr[i+2] ) && isHexDigit( c1 = pStr[i+2] ) &&
isHexDigit( pStr[i+3] ) && isHexDigit( c2 = pStr[i+3] ) &&
isHexDigit( pStr[i+4] ) && isHexDigit( c3 = pStr[i+4] ) &&
isHexDigit( pStr[i+5] )) isHexDigit( c4 = pStr[i+5] ))
{ {
// OOXML has the odd habit to write some // OOXML has the odd habit to write some
// names using this that when re-saving // names using this that when re-saving
// should *not* be escaped, specifically // should *not* be escaped, specifically
// _x0020_ for blanks in w:xpath values. // _x0020_ for blanks in w:xpath values.
if (strncmp( pStr+i+2, "0020", 4) != 0) if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0'))
{ {
writeBytes( "_x005F_", kXescapeLen); // When encountering "_x005F_xHHHH_"
// Remember this escapement so in // assume that is an already escaped
// _xHHHH_xHHHH_ only the first '_' is // sequence that was not unescaped and
// escaped. // shall be written as is, to not end
nNextXescape = i + kXescapeLen; // up with "_x005F_x005F_xHHHH_" and
// repeated..
if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' &&
i + kXescapeLen <= nLen - 6 &&
pStr[i+kXescapeLen+5] == '_' &&
((pStr[i+kXescapeLen+0] | 0x20) == 'x') &&
isHexDigit( pStr[i+kXescapeLen+1] ) &&
isHexDigit( pStr[i+kXescapeLen+2] ) &&
isHexDigit( pStr[i+kXescapeLen+3] ) &&
isHexDigit( pStr[i+kXescapeLen+4] ))
{
writeBytes( &c, 1 );
// Remember this fake escapement.
nNextXescape = i + kXescapeLen + 6;
}
else
{
writeBytes( "_x005F_", kXescapeLen);
// Remember this escapement so in
// _xHHHH_xHHHH_ only the first '_'
// is escaped.
nNextXescape = i + kXescapeLen;
}
break; break;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment