test_textsearch.cxx 12.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#include <cppuhelper/bootstrap.hxx>
#include <cppuhelper/basemutex.hxx>
#include <com/sun/star/util/SearchFlags.hpp>
#include <com/sun/star/util/SearchOptions.hpp>
24 25
#include <com/sun/star/util/SearchAlgorithms2.hpp>
#include <com/sun/star/util/XTextSearch2.hpp>
26
#include <com/sun/star/i18n/Transliteration.hpp>
27
#include <unotest/bootstrapfixturebase.hxx>
28
#include <i18nutil/transliteration.hxx>
29 30 31 32 33 34 35 36 37 38 39 40 41

#include <unicode/regex.h>

#include <rtl/strbuf.hxx>
#include <rtl/ustrbuf.hxx>

using namespace ::com::sun::star;
using namespace U_ICU_NAMESPACE;
typedef U_ICU_NAMESPACE::UnicodeString IcuUniString;

class TestTextSearch : public test::BootstrapFixtureBase
{
public:
42 43
    virtual void setUp() override;
    virtual void tearDown() override;
44 45 46

    void testICU();
    void testSearches();
47
    void testWildcardSearch();
48 49 50 51

    CPPUNIT_TEST_SUITE(TestTextSearch);
    CPPUNIT_TEST(testICU);
    CPPUNIT_TEST(testSearches);
52
    CPPUNIT_TEST(testWildcardSearch);
53 54 55
    CPPUNIT_TEST_SUITE_END();
private:
    uno::Reference<util::XTextSearch> m_xSearch;
56
    uno::Reference<util::XTextSearch2> m_xSearch2;
57 58 59 60 61 62 63 64 65 66
};

// Sanity check our ICU first ...
void TestTextSearch::testICU()
{
    UErrorCode nErr = U_ZERO_ERROR;
    sal_uInt32 nSearchFlags = UREGEX_UWORD | UREGEX_CASE_INSENSITIVE;

    OUString aString( "abcdefgh" );
    OUString aPattern( "e" );
67
    IcuUniString aSearchPat( reinterpret_cast<const UChar*>(aPattern.getStr()), aPattern.getLength() );
68

69
    std::unique_ptr<RegexMatcher> pRegexMatcher(new RegexMatcher( aSearchPat, nSearchFlags, nErr ));
70

71
    IcuUniString aSource( reinterpret_cast<const UChar*>(aString.getStr()), aString.getLength() );
72 73 74
    pRegexMatcher->reset( aSource );

    CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
75 76 77 78 79
    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
    CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(4), pRegexMatcher->start( nErr ) );
    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
    CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(5), pRegexMatcher->end( nErr ) );
    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
80 81 82 83

    OUString aString2( "acababaabcababadcdaa" );
    OUString aPattern2( "a" );

84
    IcuUniString aSearchPat2( reinterpret_cast<const UChar*>(aPattern2.getStr()), aPattern2.getLength() );
85
    pRegexMatcher.reset(new RegexMatcher( aSearchPat2, nSearchFlags, nErr ));
86

87
    IcuUniString aSource2( reinterpret_cast<const UChar*>(aString2.getStr()), aString2.getLength() );
88 89 90
    pRegexMatcher->reset( aSource2 );

    CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
91 92 93 94 95
    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
    CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(0), pRegexMatcher->start( nErr ) );
    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
    CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(1), pRegexMatcher->end( nErr ) );
    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
96 97 98 99 100 101
}

void TestTextSearch::testSearches()
{
    OUString str( "acababaabcababadcdaa" );
    sal_Int32 startPos = 2, endPos = 20 ;
102 103 104
    OUString const searchStr( "(ab)*a(c|d)+" );
    sal_Int32 const fStartRes = 10, fEndRes = 18 ;
    sal_Int32 const bStartRes = 18, bEndRes = 10 ;
105 106 107 108 109 110 111 112 113 114 115 116 117

    // set options
    util::SearchOptions aOptions;
    aOptions.algorithmType = util::SearchAlgorithms_REGEXP ;
    aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
    aOptions.searchString = searchStr;
    m_xSearch->setOptions( aOptions );

    util::SearchResult aRes;

    // search forward
    aRes = m_xSearch->searchForward( str, startPos, endPos );
    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
118 119
    CPPUNIT_ASSERT_EQUAL( fStartRes, aRes.startOffset[0] );
    CPPUNIT_ASSERT_EQUAL( fEndRes, aRes.endOffset[0] );
120 121 122 123

    // search backwards
    aRes = m_xSearch->searchBackward( str, endPos, startPos );
    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
124 125
    CPPUNIT_ASSERT_EQUAL( bStartRes, aRes.startOffset[0] );
    CPPUNIT_ASSERT_EQUAL( bEndRes, aRes.endOffset[0] );
126

127
    aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
128
                                | TransliterationFlags::IGNORE_WIDTH);
129 130 131
    aOptions.searchString = "([^ ]*)[ ]*([^ ]*)";
    m_xSearch->setOptions(aOptions);
    aRes = m_xSearch->searchForward("11 22 33", 2, 7);
132
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.subRegExpressions);
133 134 135
    CPPUNIT_ASSERT((aRes.startOffset[0] == 2) && (aRes.endOffset[0] == 5));
    CPPUNIT_ASSERT((aRes.startOffset[1] == 2) && (aRes.endOffset[1] == 2));
    CPPUNIT_ASSERT((aRes.startOffset[2] == 3) && (aRes.endOffset[2] == 5));
136 137
}

138 139 140 141 142 143 144
void TestTextSearch::testWildcardSearch()
{
    util::SearchOptions2 aOptions;
    OUString aText;
    util::SearchResult aRes;

    aOptions.AlgorithmType2 = util::SearchAlgorithms2::WILDCARD ;
145 146 147
    aOptions.WildcardEscapeCharacter = '~';
    // aOptions.searchFlag = ::css::util::SearchFlags::WILD_MATCH_SELECTION;
    // is not set, so substring match is allowed.
148
    aOptions.transliterateFlags = sal_Int32(::css::i18n::TransliterationModules::TransliterationModules_IGNORE_CASE);
149 150 151 152 153 154
    aText = "abAca";

    aOptions.searchString = "a";
    m_xSearch2->setOptions2( aOptions );
    // match first "a", [0,1)
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
155
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
156 157 158
    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 1));
    // match last "a", (5,4]
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
159
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
160 161 162 163 164 165
    CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 4));

    aOptions.searchString = "a?";
    m_xSearch2->setOptions2( aOptions );
    // match "ab", [0,2)
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
166
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
167 168 169
    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 2));
    // match "ac", (4,2]
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
170
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
171 172 173 174 175 176
    CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 2));

    aOptions.searchString = "a*c";
    m_xSearch2->setOptions2( aOptions );
    // match "abac", [0,4) XXX NOTE: first match forward
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
177
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
178 179 180
    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 4));
    // match "ac", (4,2] XXX NOTE: first match backward, not greedy
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
181
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
182 183 184 185 186 187
    CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 2));

    aOptions.searchString = "b*a";
    m_xSearch2->setOptions2( aOptions );
    // match "ba", [1,3) XXX NOTE: first match forward, not greedy
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
188
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
189 190 191
    CPPUNIT_ASSERT((aRes.startOffset[0] == 1) && (aRes.endOffset[0] == 3));
    // match "baca", (5,1] XXX NOTE: first match backward
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
192
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
193 194 195 196 197 198 199 200
    CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 1));

    aText = "ab?ca";

    aOptions.searchString = "?~??";
    m_xSearch2->setOptions2( aOptions );
    // match "b?c", [1,4)
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
201
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
202 203 204
    CPPUNIT_ASSERT((aRes.startOffset[0] == 1) && (aRes.endOffset[0] == 4));
    // match "b?c", (4,1]
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
205
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
206 207 208 209 210 211 212 213
    CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 1));

    aText = "ab*ca";

    aOptions.searchString = "?~*?";
    m_xSearch2->setOptions2( aOptions );
    // match "b?c", [1,4)
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
214
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
215 216 217
    CPPUNIT_ASSERT((aRes.startOffset[0] == 1) && (aRes.endOffset[0] == 4));
    // match "b?c", (4,1]
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
218
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
219 220 221 222 223 224
    CPPUNIT_ASSERT((aRes.startOffset[0] == 4) && (aRes.endOffset[0] == 1));

    aOptions.searchString = "ca?";
    m_xSearch2->setOptions2( aOptions );
    // no match
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
225
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
226 227
    // no match
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
228
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
229 230 231 232 233

    aOptions.searchString = "ca*";
    m_xSearch2->setOptions2( aOptions );
    // match "ca", [3,5)
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
234
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
235 236 237
    CPPUNIT_ASSERT((aRes.startOffset[0] == 3) && (aRes.endOffset[0] == 5));
    // match "ca", (5,3]
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
238
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
239 240 241 242 243 244
    CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 3));

    aOptions.searchString = "*ca*";
    m_xSearch2->setOptions2( aOptions );
    // match "abaca", [0,5)
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
245
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
246 247 248
    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 5));
    // match "abaca", (5,0]
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
249
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
250
    CPPUNIT_ASSERT((aRes.startOffset[0] == 5) && (aRes.endOffset[0] == 0));
Eike Rathke's avatar
Eike Rathke committed
251 252 253 254 255 256

    aText = "123123";
    aOptions.searchString = "*2?";
    m_xSearch2->setOptions2( aOptions );
    // match first "123", [0,3)
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
257
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
Eike Rathke's avatar
Eike Rathke committed
258 259 260
    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 3));
    // match "123123", (6,0]    Yes this looks odd, but it is as searching "?2*" forward.
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
261
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
Eike Rathke's avatar
Eike Rathke committed
262 263 264 265 266 267
    CPPUNIT_ASSERT((aRes.startOffset[0] == 6) && (aRes.endOffset[0] == 0));

    aOptions.searchFlag |= util::SearchFlags::WILD_MATCH_SELECTION;
    m_xSearch2->setOptions2( aOptions );
    // match "123123", [0,6) with greedy '*'
    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
268
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
Eike Rathke's avatar
Eike Rathke committed
269 270 271
    CPPUNIT_ASSERT((aRes.startOffset[0] == 0) && (aRes.endOffset[0] == 6));
    // match "123123", (6,0]
    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
272
    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
Eike Rathke's avatar
Eike Rathke committed
273
    CPPUNIT_ASSERT((aRes.startOffset[0] == 6) && (aRes.endOffset[0] == 0));
274 275
}

276 277 278
void TestTextSearch::setUp()
{
    BootstrapFixtureBase::setUp();
279
    m_xSearch.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch"), uno::UNO_QUERY_THROW);
280
    m_xSearch2.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch2"), uno::UNO_QUERY_THROW);
281 282 283 284 285
}

void TestTextSearch::tearDown()
{
    m_xSearch.clear();
286
    m_xSearch2.clear();
287 288 289 290 291 292 293 294
    BootstrapFixtureBase::tearDown();
}

CPPUNIT_TEST_SUITE_REGISTRATION(TestTextSearch);

CPPUNIT_PLUGIN_IMPLEMENT();

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */