Kaydet (Commit) ce51bf1a authored tarafından Zolnai Tamás's avatar Zolnai Tamás

Make a bit cleaner transformation of help strings

*Not escape tags and double quots in tags,
 but find tags(icu regexp) when merge and
 use this infromation to make strings valid.
*Define a new Quot function for helpex,
 which works with icu UnicodeCharacter.
*Move tag search to xmlparse.cxx and use icu
 just in helpex.
*QuotHTML not unescape just replace xml charcters.
 (unescaping is also useless in uimerge.cxx)
*Move UnQuotHTML() to helper.
 (was used it in xmlparse.cxx and cfgmerge.cxx)
*Use UnQuotHTML() in uimerge.cxx too.

Change-Id: Ice8940ef69279709a1c5d84c6ae1b0d62a71ca76
üst 6ea8d4a5
......@@ -49,8 +49,7 @@ $(eval $(call gb_Executable_add_exception_objects,cfgex,\
$(eval $(call gb_Executable_use_externals,cfgex,\
boost_headers \
icuuc \
icui18n \
libxml2 \
))
# vim:set noet sw=4 ts=4:
......@@ -45,8 +45,6 @@ $(eval $(call gb_Executable_add_exception_objects,localize,\
$(eval $(call gb_Executable_use_externals,localize,\
boost_headers \
icuuc \
icui18n \
))
# vim:set noet sw=4 ts=4:
......@@ -30,8 +30,6 @@ $(eval $(call gb_Executable_add_exception_objects,propex,\
$(eval $(call gb_Executable_use_externals,propex,\
boost_headers \
icuuc \
icui18n \
))
# vim: set noet sw=4 ts=4:
......@@ -31,9 +31,6 @@ $(eval $(call gb_Executable_add_exception_objects,stringex,\
$(eval $(call gb_Executable_use_externals,stringex,\
boost_headers \
libxml2 \
icuuc \
icui18n \
icu_headers \
))
# vim: set noet sw=4 ts=4:
......@@ -51,9 +51,6 @@ $(eval $(call gb_Executable_add_exception_objects,transex3,\
$(eval $(call gb_Executable_use_externals,transex3,\
boost_headers \
icuuc \
icui18n \
icu_headers \
libxml2 \
))
......
......@@ -31,9 +31,6 @@ $(eval $(call gb_Executable_add_exception_objects,treex,\
$(eval $(call gb_Executable_use_externals,treex,\
boost_headers \
libxml2 \
icuuc \
icui18n \
icu_headers \
))
# vim: set noet sw=4 ts=4:
......@@ -31,9 +31,6 @@ $(eval $(call gb_Executable_use_externals,uiex,\
libxml2 \
libxslt \
boost_headers \
icuuc \
icui18n \
icu_headers \
))
# vim: set noet sw=4 ts=4:
......@@ -46,8 +46,6 @@ $(eval $(call gb_Executable_add_exception_objects,ulfex,\
$(eval $(call gb_Executable_use_externals,ulfex,\
boost_headers \
icuuc \
icui18n \
))
# vim:set noet sw=4 ts=4:
......@@ -39,7 +39,6 @@ $(eval $(call gb_StaticLibrary_set_include,transex,\
$(eval $(call gb_StaticLibrary_use_externals,transex,\
boost_headers \
icu_headers \
libxml2 \
))
......
......@@ -25,7 +25,8 @@
namespace helper {
OString QuotHTML(const rtl::OString &rString);
OString QuotHTML( const OString &rString );
OString UnQuotHTML( const OString& rString );
bool isWellFormedXML( OString const & text );
......
......@@ -229,11 +229,11 @@ protected:
class XMLUtil{
public:
/// Quot the XML characters and replace \n \t
static void QuotHTML( rtl::OUString &rString );
/// Quot the XML characters
static OUString QuotHTML( const OUString& rString );
/// UnQuot the XML characters and restore \n \t
static void UnQuotHTML ( rtl::OUString &rString );
/// UnQuot the XML characters
static OUString UnQuotHTML( const OUString &rString );
};
......
......@@ -27,6 +27,7 @@
#include "boost/scoped_ptr.hpp"
#include "rtl/strbuf.hxx"
#include "helper.hxx"
#include "export.hxx"
#include "cfgmerge.hxx"
#include "tokens.h"
......@@ -136,33 +137,6 @@ static OString lcl_QuoteHTML( const OString& rString )
return sReturn.makeStringAndClear();
}
static OString lcl_UnquoteHTML( const OString& rString )
{
rtl::OStringBuffer sReturn;
for (sal_Int32 i = 0; i != rString.getLength();) {
if (rString.match("&", i)) {
sReturn.append('&');
i += RTL_CONSTASCII_LENGTH("&");
} else if (rString.match("<", i)) {
sReturn.append('<');
i += RTL_CONSTASCII_LENGTH("&lt;");
} else if (rString.match("&gt;", i)) {
sReturn.append('>');
i += RTL_CONSTASCII_LENGTH("&gt;");
} else if (rString.match("&quot;", i)) {
sReturn.append('"');
i += RTL_CONSTASCII_LENGTH("&quot;");
} else if (rString.match("&apos;", i)) {
sReturn.append('\'');
i += RTL_CONSTASCII_LENGTH("&apos;");
} else {
sReturn.append(rString[i]);
++i;
}
}
return sReturn.makeStringAndClear();
}
} // anonymous namespace
//
......@@ -489,7 +463,7 @@ void CfgExport::WorkOnResourceEnd()
if ( sText.isEmpty())
sText = sFallback;
sText = lcl_UnquoteHTML( sText );
sText = helper::UnQuotHTML( sText );
common::writePoEntry(
"Cfgex", pOutputStream, sPath, pStackData->sResTyp,
......@@ -504,7 +478,7 @@ void CfgExport::WorkOnText(
const rtl::OString &rIsoLang
)
{
if( rIsoLang.getLength() ) rText = lcl_UnquoteHTML( rText );
if( rIsoLang.getLength() ) rText = helper::UnQuotHTML( rText );
}
......
......@@ -11,45 +11,58 @@
namespace helper {
rtl::OString QuotHTML(const rtl::OString &rString)
OString QuotHTML(const OString &rString)
{
rtl::OStringBuffer sReturn;
for (sal_Int32 i = 0; i < rString.getLength(); ++i) {
switch (rString[i]) {
case '\\':
if (i < rString.getLength()) {
switch (rString[i + 1]) {
case '"':
case '<':
case '>':
case '\\':
++i;
break;
}
}
// fall through
default:
sReturn.append(rString[i]);
break;
OStringBuffer sReturn;
for (sal_Int32 i = 0; i < rString.getLength(); ++i)
{
switch (rString[i])
{
case '<':
sReturn.append("&lt;");
break;
case '>':
sReturn.append("&gt;");
break;
case '"':
sReturn.append("&quot;");
break;
case '&':
if (rString.match("&amp;", i))
sReturn.append('&');
else
sReturn.append("&amp;");
break;
default:
sReturn.append(rString[i]);
break;
}
}
return sReturn.makeStringAndClear();
}
OString UnQuotHTML( const OString& rString )
{
OStringBuffer sReturn;
for (sal_Int32 i = 0; i != rString.getLength();) {
if (rString.match("&amp;", i)) {
sReturn.append('&');
i += RTL_CONSTASCII_LENGTH("&amp;");
} else if (rString.match("&lt;", i)) {
sReturn.append('<');
i += RTL_CONSTASCII_LENGTH("&lt;");
} else if (rString.match("&gt;", i)) {
sReturn.append('>');
i += RTL_CONSTASCII_LENGTH("&gt;");
} else if (rString.match("&quot;", i)) {
sReturn.append('"');
i += RTL_CONSTASCII_LENGTH("&quot;");
} else if (rString.match("&apos;", i)) {
sReturn.append('\'');
i += RTL_CONSTASCII_LENGTH("&apos;");
} else {
sReturn.append(rString[i]);
++i;
}
}
return sReturn.makeStringAndClear();
......
......@@ -242,10 +242,10 @@ void HelpParser::ProcessHelp( LangHashMap* aLangHM , const rtl::OString& sCur ,
nPreSpaces++;
pEntrys->GetText( sNewText, STRING_TYP_TEXT, sCur , true );
OUString sNewdata;
if (helper::isWellFormedXML(helper::QuotHTML(sNewText)))
OUString sTemp = OStringToOUString(sNewText, RTL_TEXTENCODING_UTF8);
if (helper::isWellFormedXML(OUStringToOString(XMLUtil::QuotHTML(sTemp),RTL_TEXTENCODING_UTF8)))
{
sNewdata = sSourceText.copy(0,nPreSpaces) +
rtl::OStringToOUString(sNewText, RTL_TEXTENCODING_UTF8);
sNewdata = sSourceText.copy(0,nPreSpaces) + sTemp;
}
else
{
......
......@@ -17,15 +17,12 @@
#include <string>
#include <boost/crc.hpp>
#include <unicode/regex.h>
#include "po.hxx"
#define POESCAPED OString("\\n\\t\\r\\\\\\\"")
#define POUNESCAPED OString("\n\t\r\\\"")
using namespace U_ICU_NAMESPACE;
/** Container of po entry
Provide all file operations related to LibreOffice specific
......@@ -282,92 +279,17 @@ namespace
const OString& rText,const bool bHelpText = false )
{
if ( bHelpText )
return lcl_UnEscapeText(rText,"\\<\\>\\\"\\\\","<>\"\\");
return rText;
else
return lcl_UnEscapeText(rText,"\\n\\t\\r","\n\t\r");
}
//Find all special tag in a string using a regular expression
static void lcl_FindAllTag(
const OString& rText,std::vector<OString>& o_vFoundTags )
{
UErrorCode nIcuErr = U_ZERO_ERROR;
static const sal_uInt32 nSearchFlags =
UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
OUString sLocaleText( OStringToOUString(rText,RTL_TEXTENCODING_UTF8) );
static const OUString sPattern(
"<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>");
static const UnicodeString sSearchPat(
reinterpret_cast<const UChar*>(sPattern.getStr()),
sPattern.getLength() );
UnicodeString sSource(
reinterpret_cast<const UChar*>(
sLocaleText.getStr()), sLocaleText.getLength() );
RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
aRegexMatcher.reset( sSource );
int64_t nStartPos = 0;
while( aRegexMatcher.find(nStartPos, nIcuErr) &&
nIcuErr == U_ZERO_ERROR )
{
UnicodeString sMatch =
aRegexMatcher.group(nIcuErr);
o_vFoundTags.push_back(
OUStringToOString(
OUString(
reinterpret_cast<const sal_Unicode*>(
sMatch.getBuffer()),sMatch.length()),
RTL_TEXTENCODING_UTF8));
nStartPos = aRegexMatcher.start(nIcuErr)+1;
}
}
//Escape special tags
static OString lcl_EscapeTags( const OString& rText )
{
typedef std::vector<OString> StrVec_t;
static const OString vInitializer[] = {
"ahelp", "link", "item", "emph", "defaultinline",
"switchinline", "caseinline", "variable",
"bookmark_value", "image", "embedvar", "alt" };
static const StrVec_t vTagsForEscape( vInitializer,
vInitializer + sizeof(vInitializer) / sizeof(vInitializer[0]) );
StrVec_t vFoundTags;
lcl_FindAllTag(rText,vFoundTags);
OString sResult = rText;
for(StrVec_t::const_iterator pFound = vFoundTags.begin();
pFound != vFoundTags.end(); ++pFound)
{
bool bEscapeThis = false;
for(StrVec_t::const_iterator pEscape = vTagsForEscape.begin();
pEscape != vTagsForEscape.end(); ++pEscape)
{
if (pFound->startsWith("<" + *pEscape) ||
*pFound == "</" + *pEscape + ">")
{
bEscapeThis = true;
break;
}
}
if( bEscapeThis || *pFound=="<br/>" ||
*pFound =="<help-id-missing/>")
{
OString sToReplace = "\\<" +
pFound->copy(1,pFound->getLength()-2).
replaceAll("\"","\\\"") + "\\>";
sResult = sResult.replaceAll(*pFound, sToReplace);
}
}
return sResult;
}
//Escape to get merge string
static OString lcl_EscapeMergeText(
const OString& rText,const bool bHelpText = false )
{
if ( bHelpText )
return lcl_EscapeTags(rText.replaceAll("\\","\\\\"));
return rText;
else
return lcl_EscapeText(rText,"\n\t\r","\\n\\t\\r");
}
......
......@@ -64,7 +64,7 @@ int extractTranslations()
vIDs.push_back(helper::xmlStrToOString(content));
xmlFree(content);
}
OString sText = helper::xmlStrToOString(xmlNodeGetContent(nodeLevel2));
OString sText = helper::UnQuotHTML(helper::xmlStrToOString(xmlNodeGetContent(nodeLevel2)));
common::writePoEntry(
"Uiex", aPOStream, sInputFileName, vIDs[0],
(vIDs.size()>=2) ? vIDs[1] : OString(),
......
......@@ -20,6 +20,7 @@
#include <iterator> /* std::iterator*/
#include <cassert>
#include <stdio.h>
#include <sal/alloca.h>
......@@ -32,7 +33,9 @@
#include <osl/thread.hxx>
#include <osl/process.h>
#include <rtl/strbuf.hxx>
#include <unicode/regex.h>
using namespace U_ICU_NAMESPACE;
using namespace std;
using namespace osl;
......@@ -195,12 +198,10 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur )
for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) {
rStream << " ";
rtl::OUString sData( (*pElement->GetAttributeList())[ j ]->GetName() );
XMLUtil::QuotHTML( sData );
WriteString( rStream , sData );
WriteString( rStream , XMLUtil::QuotHTML( sData ) );
rStream << "=\"";
sData = (*pElement->GetAttributeList())[ j ]->GetValue();
XMLUtil::QuotHTML( sData );
WriteString( rStream , sData );
WriteString( rStream , XMLUtil::QuotHTML( sData ) );
rStream << "\"";
}
if ( !pElement->GetChildList())
......@@ -218,8 +219,7 @@ sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur )
case XML_NODE_TYPE_DATA: {
XMLData *pData = ( XMLData * ) pCur;
rtl::OUString sData( pData->GetData());
XMLUtil::QuotHTML( sData );
WriteString( rStream, sData );
WriteString( rStream, XMLUtil::QuotHTML( sData ) );
}
break;
case XML_NODE_TYPE_COMMENT: {
......@@ -717,7 +717,7 @@ void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement
XMLElement *pElement = ( XMLElement * ) pCur;
if( !pElement->GetName().equalsIgnoreAsciiCase("comment") ){
buffer.append( OUString("\\<") );
buffer.append( OUString("<") );
buffer.append( pElement->GetName() );
if ( pElement->GetAttributeList()){
for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ){
......@@ -727,24 +727,24 @@ void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement
buffer.append( OUString(" ") );
buffer.append( aAttrName );
buffer.append( OUString("=") );
buffer.append( OUString("\\\"") );
buffer.append( OUString("\"") );
buffer.append( (*pElement->GetAttributeList())[ j ]->GetValue() );
buffer.append( OUString("\\\"") );
buffer.append( OUString("\"") );
}
}
}
if ( !pElement->GetChildList())
buffer.append( OUString("/\\>") );
buffer.append( OUString("/>") );
else {
buffer.append( OUString("\\>") );
buffer.append( OUString(">") );
XMLChildNode* tmp=NULL;
for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ){
tmp = (*pElement->GetChildList())[ k ];
Print( tmp, buffer , false);
}
buffer.append( OUString("\\</") );
buffer.append( OUString("</") );
buffer.append( pElement->GetName() );
buffer.append( OUString("\\>") );
buffer.append( OUString(">") );
}
}
}
......@@ -1172,41 +1172,114 @@ XMLFile *SimpleXMLParser::Execute( const rtl::OUString &rFileName, XMLFile* pXML
return pXMLFile;
}
namespace
{
void XMLUtil::QuotHTML( OUString &rString )
static icu::UnicodeString lcl_QuotRange(
const icu::UnicodeString& rString, const sal_Int32 nStart,
const sal_Int32 nEnd, bool bInsideTag = false )
{
const OString sString(OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
rString = OStringToOUString(helper::QuotHTML( sString ), RTL_TEXTENCODING_UTF8);
icu::UnicodeString sReturn;
assert( nStart > 0 && nStart < rString.length() );
assert( nEnd > 0 && nEnd < rString.length() );
for (sal_Int32 i = nStart; i <= nEnd; ++i)
{
switch (rString[i])
{
case '<':
sReturn.append("&lt;");
break;
case '>':
sReturn.append("&gt;");
break;
case '"':
if( !bInsideTag )
sReturn.append("&quot;");
else
sReturn.append(rString[i]);
break;
case '&':
if (rString.startsWith("&amp;", i, 5))
sReturn.append('&');
else
sReturn.append("&amp;");
break;
default:
sReturn.append(rString[i]);
break;
}
}
return sReturn;
}
void XMLUtil::UnQuotHTML( rtl::OUString &rString ){
rtl::OStringBuffer sReturn;
rtl::OString sString(rtl::OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
for (sal_Int32 i = 0; i != sString.getLength();) {
if (sString[i] == '\\') {
sReturn.append(RTL_CONSTASCII_STRINGPARAM("\\\\"));
++i;
} else if (sString.match("&amp;", i)) {
sReturn.append('&');
i += RTL_CONSTASCII_LENGTH("&amp;");
} else if (sString.match("&lt;", i)) {
sReturn.append('<');
i += RTL_CONSTASCII_LENGTH("&lt;");
} else if (sString.match("&gt;", i)) {
sReturn.append('>');
i += RTL_CONSTASCII_LENGTH("&gt;");
} else if (sString.match("&quot;", i)) {
sReturn.append('"');
i += RTL_CONSTASCII_LENGTH("&quot;");
} else if (sString.match("&apos;", i)) {
sReturn.append('\'');
i += RTL_CONSTASCII_LENGTH("&apos;");
} else {
sReturn.append(sString[i]);
++i;
static bool lcl_isTag( const icu::UnicodeString& rString )
{
const int nSize = 12;
static const icu::UnicodeString vTags[nSize] = {
"ahelp", "link", "item", "emph", "defaultinline",
"switchinline", "caseinline", "variable",
"bookmark_value", "image", "embedvar", "alt" };
for( int nIndex = 0; nIndex < nSize; ++nIndex )
{
if( rString.startsWith("<" + vTags[nIndex]) ||
rString == "</" + vTags[nIndex] + ">" )
return true;
}
return rString == "<br/>" || rString =="<help-id-missing/>";
}
} /// anonymous namespace
OUString XMLUtil::QuotHTML( const OUString &rString )
{
if( rString.trim().isEmpty() )
return rString;
UErrorCode nIcuErr = U_ZERO_ERROR;
static const sal_uInt32 nSearchFlags =
UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
static const OUString sPattern(
"<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>");
static const UnicodeString sSearchPat(
reinterpret_cast<const UChar*>(sPattern.getStr()),
sPattern.getLength() );
icu::UnicodeString sSource(
reinterpret_cast<const UChar*>(
rString.getStr()), rString.getLength() );
RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
aRegexMatcher.reset( sSource );
icu::UnicodeString sReturn;
int32_t nEndPos = 0;
int32_t nStartPos = 0;
while( aRegexMatcher.find(nStartPos, nIcuErr) && nIcuErr == U_ZERO_ERROR )
{
nStartPos = aRegexMatcher.start(nIcuErr);
sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos-1));
nEndPos = aRegexMatcher.end(nIcuErr);
icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr);
if( lcl_isTag(sMatch) )
{
sReturn.append("<");
sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-2, true));
sReturn.append(">");
}
else
sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos-1));
++nStartPos;
}
rString = rtl::OStringToOUString(sReturn.makeStringAndClear(), RTL_TEXTENCODING_UTF8);
sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()-1));
sReturn.append('\0');
return OUString(reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer()));
}
OUString XMLUtil::UnQuotHTML( const OUString& rString )
{
const OString sString(OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
return OStringToOUString(helper::UnQuotHTML(sString), RTL_TEXTENCODING_UTF8);
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment