Kaydet (Commit) 8dec85a3 authored tarafından Eike Rathke's avatar Eike Rathke

Resolves: tdf#113977 implement REGEX() spreadsheet function

REGEX( Text ; Expression [ ; Replacement ] )

Using ICU regular expressions
http://userguide.icu-project.org/strings/regexp

Change-Id: I4cb9b8ba77cfb5b8faab93037aa0d947609383d7
Reviewed-on: https://gerrit.libreoffice.org/62332Reviewed-by: 's avatarEike Rathke <erack@redhat.com>
Tested-by: Jenkins
üst ae081cc1
......@@ -465,6 +465,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_ODFF[] =
{ "COM.MICROSOFT.ENCODEURL" , SC_OPCODE_ENCODEURL },
{ "ORG.LIBREOFFICE.RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
{ "ORG.LIBREOFFICE.ROUNDSIG" , SC_OPCODE_ROUNDSIG },
{ "ORG.LIBREOFFICE.REGEX" , SC_OPCODE_REGEX },
{ nullptr, -1 }
};
......@@ -907,6 +908,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_OOXML[] =
{ "_xlfn.ENCODEURL" , SC_OPCODE_ENCODEURL },
{ "_xlfn.ORG.LIBREOFFICE.RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
{ "_xlfn.ORG.LIBREOFFICE.ROUNDSIG" , SC_OPCODE_ROUNDSIG },
{ "_xlfn.ORG.LIBREOFFICE.REGEX" , SC_OPCODE_REGEX },
{ nullptr, -1 }
};
......@@ -1354,6 +1356,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_PODF[] =
{ "ENCODEURL" , SC_OPCODE_ENCODEURL },
{ "RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
{ "ROUNDSIG" , SC_OPCODE_ROUNDSIG },
{ "REGEX" , SC_OPCODE_REGEX },
{ nullptr, -1 }
};
......@@ -1800,6 +1803,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_API[] =
{ "ENCODEURL" , SC_OPCODE_ENCODEURL },
{ "RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
{ "ROUNDSIG" , SC_OPCODE_ROUNDSIG },
{ "REGEX" , SC_OPCODE_REGEX },
{ nullptr, -1 }
};
......@@ -2245,6 +2249,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH[] =
{ "ENCODEURL" , SC_OPCODE_ENCODEURL },
{ "RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
{ "ROUNDSIG" , SC_OPCODE_ROUNDSIG },
{ "REGEX" , SC_OPCODE_REGEX },
{ nullptr, -1 }
};
......@@ -2674,6 +2679,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES[] =
{ NC_("RID_STRLIST_FUNCTION_NAMES", "ROUNDSIG") , SC_OPCODE_ROUNDSIG },
{ NC_("RID_STRLIST_FUNCTION_NAMES", "FINDB") , SC_OPCODE_FINDB },
{ NC_("RID_STRLIST_FUNCTION_NAMES", "SEARCHB") , SC_OPCODE_SEARCHB },
{ NC_("RID_STRLIST_FUNCTION_NAMES", "REGEX") , SC_OPCODE_REGEX },
{ nullptr, -1 }
};
......
......@@ -505,7 +505,8 @@
#define SC_OPCODE_REPLACEB 494
#define SC_OPCODE_FINDB 495
#define SC_OPCODE_SEARCHB 496
#define SC_OPCODE_STOP_2_PAR 497 /* last function with two or more parameters' OpCode + 1 */
#define SC_OPCODE_REGEX 497
#define SC_OPCODE_STOP_2_PAR 498 /* last function with two or more parameters' OpCode + 1 */
#define SC_OPCODE_STOP_FUNCTION SC_OPCODE_STOP_2_PAR /* last function's OpCode + 1 */
#define SC_OPCODE_LAST_OPCODE_ID (SC_OPCODE_STOP_FUNCTION - 1) /* last OpCode */
......
......@@ -343,6 +343,7 @@ enum OpCode : sal_uInt16
ocFindB = SC_OPCODE_FINDB,
ocSearchB = SC_OPCODE_SEARCHB,
ocNumberValue = SC_OPCODE_NUMBERVALUE,
ocRegex = SC_OPCODE_REGEX,
// Matrix functions
ocMatValue = SC_OPCODE_MAT_VALUE,
ocMatDet = SC_OPCODE_MAT_DET,
......@@ -808,6 +809,7 @@ inline std::string OpCodeEnumToString(OpCode eCode)
case ocText: return "Text";
case ocSubstitute: return "Substitute";
case ocRept: return "Rept";
case ocRegex: return "Regex";
case ocConcat: return "Concat";
case ocConcat_MS: return "Concat_MS";
case ocTextJoin_MS: return "TextJoin_MS";
......
......@@ -577,6 +577,7 @@
#define HID_FUNC_REPLACEB "SC_HID_FUNC_REPLACEB"
#define HID_FUNC_FINDB "SC_HID_FUNC_FINDB"
#define HID_FUNC_SEARCHB "SC_HID_FUNC_SEARCHB"
#define HID_FUNC_REGEX "SC_HID_FUNC_REGEX"
#endif
......
......@@ -3816,6 +3816,18 @@ const char* SC_OPCODE_SUBSTITUTE_ARY[] =
NC_("SC_OPCODE_SUBSTITUTE", "Which occurrence of the old text is to be replaced.")
};
// -=*# Resource for function REGEX #*=-
const char* SC_OPCODE_REGEX_ARY[] =
{
NC_("SC_OPCODE_REGEX", "Matches and optionally replaces text using regular expressions."),
NC_("SC_OPCODE_REGEX", "Text"),
NC_("SC_OPCODE_REGEX", "The text to be operated on."),
NC_("SC_OPCODE_REGEX", "Expression"),
NC_("SC_OPCODE_REGEX", "The regular expression to be matched."),
NC_("SC_OPCODE_REGEX", "Replacement"),
NC_("SC_OPCODE_REGEX", "The replacement text and expression.")
};
// -=*# Resource for function BASE #*=-
const char* SC_OPCODE_BASE_ARY[] =
{
......
......@@ -2654,6 +2654,7 @@ void Test::testFunctionLists()
"MIDB",
"NUMBERVALUE",
"PROPER",
"REGEX",
"REPLACE",
"REPLACEB",
"REPT",
......
......@@ -807,7 +807,8 @@ ScFunctionList::ScFunctionList()
{ SC_OPCODE_ROUNDSIG, ENTRY(SC_OPCODE_ROUNDSIG_ARY), 0, ID_FUNCTION_GRP_MATH, HID_FUNC_ROUNDSIG, 2, { 0, 0 } },
{ SC_OPCODE_REPLACEB, ENTRY(SC_OPCODE_REPLACEB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_REPLACEB, 4, { 0, 0, 0, 0 } },
{ SC_OPCODE_FINDB, ENTRY(SC_OPCODE_FINDB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_FINDB, 3, { 0, 0, 1 } },
{ SC_OPCODE_SEARCHB, ENTRY(SC_OPCODE_SEARCHB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_SEARCHB, 3, { 0, 0, 1 } }
{ SC_OPCODE_SEARCHB, ENTRY(SC_OPCODE_SEARCHB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_SEARCHB, 3, { 0, 0, 1 } },
{ SC_OPCODE_REGEX, ENTRY(SC_OPCODE_REGEX_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_REGEX, 3, { 0, 0, 1 } }
};
ScFuncDesc* pDesc = nullptr;
......
......@@ -665,6 +665,7 @@ private:
void ScText();
void ScSubstitute();
void ScRept();
void ScRegex();
void ScConcat();
void ScConcat_MS();
void ScTextJoin_MS();
......
......@@ -9222,6 +9222,61 @@ void ScInterpreter::ScSearch()
}
}
void ScInterpreter::ScRegex()
{
sal_uInt8 nParamCount = GetByte();
if (MustHaveParamCount( nParamCount, 2, 3))
{
bool bReplacement = false;
OUString aReplacement;
if (nParamCount == 3)
{
// A missing argument is not an empty string to replace the match.
if (IsMissing())
Pop();
else
{
aReplacement = GetString().getString();
bReplacement = true;
}
}
OUString aExpression = GetString().getString();
OUString aText = GetString().getString();
if (nGlobalError != FormulaError::NONE)
{
PushError( nGlobalError);
return;
}
sal_Int32 nPos = 0;
sal_Int32 nEndPos = aText.getLength();
utl::SearchParam aParam( aExpression, utl::SearchParam::SearchType::Regexp);
css::util::SearchResult aResult;
utl::TextSearch aSearch( aParam, *ScGlobal::pCharClass);
const bool bMatch = aSearch.SearchForward( aText, &nPos, &nEndPos, &aResult);
if (!bMatch)
PushNoValue();
else
{
assert(aResult.subRegExpressions >= 1);
if (!bReplacement)
PushString( aText.copy( aResult.startOffset[0], aResult.endOffset[0] - aResult.startOffset[0]));
else
{
/* TODO: global replacement of multiple occurrences, introduce
* extra parameter with flag 'g'? Loop over positions after
* nEndPos until none left? How to keep the offsets in sync
* after replacement? That should be done by
* ReplaceBackReferences(). */
aSearch.ReplaceBackReferences( aReplacement, aText, aResult);
PushString( aReplacement);
}
}
}
}
void ScInterpreter::ScMid()
{
if ( MustHaveParamCount( GetByte(), 3 ) )
......
......@@ -4215,6 +4215,7 @@ StackVar ScInterpreter::Interpret()
case ocMid : ScMid(); break;
case ocText : ScText(); break;
case ocSubstitute : ScSubstitute(); break;
case ocRegex : ScRegex(); break;
case ocRept : ScRept(); break;
case ocConcat : ScConcat(); break;
case ocConcat_MS : ScConcat_MS(); break;
......
......@@ -639,7 +639,8 @@ static const XclFunctionInfo saFuncTable_OOoLO[] =
EXC_FUNCENTRY_OOO( ocForecast_ETS_MUL, 3, 6, 0, "ORG.LIBREOFFICE.FORECAST.ETS.MULT" ),
EXC_FUNCENTRY_OOO( ocForecast_ETS_PIM, 3, 7, 0, "ORG.LIBREOFFICE.FORECAST.ETS.PI.MULT" ),
EXC_FUNCENTRY_OOO( ocForecast_ETS_STM, 3, 6, 0, "ORG.LIBREOFFICE.FORECAST.ETS.STAT.MULT" ),
EXC_FUNCENTRY_OOO( ocRoundSig, 2, 2, 0, "ORG.LIBREOFFICE.ROUNDSIG" )
EXC_FUNCENTRY_OOO( ocRoundSig, 2, 2, 0, "ORG.LIBREOFFICE.ROUNDSIG" ),
EXC_FUNCENTRY_OOO( ocRegex, 2, 3, 0, "ORG.LIBREOFFICE.REGEX" )
};
#undef EXC_FUNCENTRY_OOO_IBR
......
......@@ -910,7 +910,8 @@ static const FunctionData saFuncTableOOoLO[] =
{ "ORG.LIBREOFFICE.FORECAST.ETS.MULT", "ORG.LIBREOFFICE.FORECAST.ETS.MULT", NOID, NOID, 3, 6, V, { VR, VA, VR }, FuncFlags::MACROCALL_NEW },
{ "ORG.LIBREOFFICE.FORECAST.ETS.PI.MULT", "ORG.LIBREOFFICE.FORECAST.ETS.PI.MULT", NOID, NOID, 4, 7, V, { VR, VA, VR }, FuncFlags::MACROCALL_NEW },
{ "ORG.LIBREOFFICE.FORECAST.ETS.STAT.MULT", "ORG.LIBREOFFICE.FORECAST.ETS.STAT.MULT", NOID, NOID, 3, 6, V, { VR, VA, VR }, FuncFlags::MACROCALL_NEW },
{ "ORG.LIBREOFFICE.ROUNDSIG", "ORG.LIBREOFFICE.ROUNDSIG", NOID, NOID, 2, 2, V, { RX }, FuncFlags::MACROCALL_NEW }
{ "ORG.LIBREOFFICE.ROUNDSIG", "ORG.LIBREOFFICE.ROUNDSIG", NOID, NOID, 2, 2, V, { RX }, FuncFlags::MACROCALL_NEW },
{ "ORG.LIBREOFFICE.REGEX", "ORG.LIBREOFFICE.REGEX", NOID, NOID, 2, 3, V, { RX }, FuncFlags::MACROCALL_NEW }
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment