You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@openoffice.apache.org by hd...@apache.org on 2012/09/07 15:27:55 UTC
svn commit: r1382018 - /incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx

Author: hdu
Date: Fri Sep  7 13:27:55 2012
New Revision: 1382018

URL: http://svn.apache.org/viewvc?rev=1382018&view=rev
Log:
#i120598# better emulation of regexp word-start and word-end operators

The emulation of the word-start and word-end operators provided
the previous regexp engine can be approximated much better
by using the ICU-regexp exngines powerful look-around feature.

Patch-by: Herbert Duerr
Found-by: ldgolds33@yahoo.com

Modified:
    incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx

Modified: incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx
URL: http://svn.apache.org/viewvc/incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx?rev=1382018&r1=1382017&r2=1382018&view=diff
==============================================================================
--- incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx (original)
+++ incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx Fri Sep  7 13:27:55 2012
@@ -723,13 +723,20 @@ void TextSearch::RESrchPrepare( const ::
 	IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength());
 #ifndef DISABLE_WORDBOUND_EMULATION
 	// for conveniance specific syntax elements of the old regex engine are emulated
-	// by using regular word boundary matching \b to replace \< and \>
-	static const IcuUniString aChevronPattern( "\\\\<|\\\\>", -1, IcuUniString::kInvariant);
-	static const IcuUniString aChevronReplace( "\\\\b", -1, IcuUniString::kInvariant);
-	static RegexMatcher aChevronMatcher( aChevronPattern, 0, nIcuErr);
-	aChevronMatcher.reset( aIcuSearchPatStr);
-	aIcuSearchPatStr = aChevronMatcher.replaceAll( aChevronReplace, nIcuErr);
-	aChevronMatcher.reset();
+	// - by replacing \< with "word-break followed by a look-ahead word-char"
+	static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant);
+	static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant);
+	static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
+	aChevronMatcherB.reset( aIcuSearchPatStr);
+	aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
+	aChevronMatcherB.reset();
+	// - by replacing \> with "look-behind word-char followed by a word-break"
+	static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant);
+	static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant);
+	static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
+	aChevronMatcherE.reset( aIcuSearchPatStr);
+	aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
+	aChevronMatcherE.reset();
 #endif
 	pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
 	if( nIcuErr)