You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2010/05/04 13:18:50 UTC
svn commit: r940806 [1/5] - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/backwards/src/ lucene/backwards/src/java/org/apache/lucene/search/
lucene/backwards/src/test/org/apache/lucene/analysis/
lucene/backwards/src/test/org/apache/lucene/analys...
Author: uschindler
Date: Tue May 4 11:18:46 2010
New Revision: 940806
URL: http://svn.apache.org/viewvc?rev=940806&view=rev
Log:
Merge CharTermAttribute changes and related ones to stable branch, these are revisions:
- dev/trunk: 932163,932369,932698,932747,932749,932773,932862,935521,940451
- lucene/branches/flex: 924791,924850
The move of WikipediaTokenizer was not merged, so the jflex updates to 1.5 are not yet merged here. Robert, when you have done this, simply copy the trunk version of Wikipedia Tokenizer and its build files over the merged ones.
Added:
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
- copied, changed from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
- copied unchanged from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
- copied, changed from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
- copied unchanged from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
- copied unchanged from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
- copied, changed from r924791, lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
- copied, changed from r924791, lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
- copied, changed from r924791, lucene/java/branches/flex_1458/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
Removed:
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/ (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java
lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java
lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java
lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java (contents, props changed)
lucene/dev/branches/branch_3x/lucene/build.xml (contents, props changed)
lucene/dev/branches/branch_3x/lucene/common-build.xml
lucene/dev/branches/branch_3x/lucene/contrib/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml
lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java
lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java
lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java
lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserTokenManager.java
lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java
lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/Analyzer.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerTokenFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/LengthFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/StopFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/Token.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java (props changed)
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/collation/CollationKeyFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (props changed)
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/QueryTermVector.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java (contents, props changed)
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerTokenFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestToken.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpls.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/document/TestDateTools.java (props changed)
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/document/TestNumberTools.java (props changed)
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (props changed)
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestPayloads.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (contents, props changed)
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/CHANGES.txt
lucene/dev/branches/branch_3x/solr/build.xml
lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml
lucene/dev/branches/branch_3x/solr/contrib/dataimporthandler/build.xml
lucene/dev/branches/branch_3x/solr/contrib/extraction/build.xml
lucene/dev/branches/branch_3x/solr/contrib/velocity/build.xml
lucene/dev/branches/branch_3x/solr/lib/commons-httpclient-3.1.jar (props changed)
lucene/dev/branches/branch_3x/solr/lib/jcl-over-slf4j-1.5.5.jar (props changed)
lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/ (props changed)
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/TokenizerChain.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/TrimFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/BoolField.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/FieldType.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/TextField.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
lucene/dev/branches/branch_3x/solr/src/maven/solr-core-pom.xml.template (props changed)
lucene/dev/branches/branch_3x/solr/src/maven/solr-solrj-pom.xml.template (props changed)
lucene/dev/branches/branch_3x/solr/src/solrj/org/ (props changed)
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/client/ (props changed)
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
lucene/dev/branches/branch_3x/solr/src/webapp/src/org/apache/solr/client/solrj/embedded/ (props changed)
lucene/dev/branches/branch_3x/solr/src/webapp/web/admin/analysis.jsp
Propchange: lucene/dev/branches/branch_3x/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -0,0 +1 @@
+/lucene/dev/trunk:932163,932369,932698,932747,932749,932773,932862,935521,940451
Propchange: lucene/dev/branches/branch_3x/lucene/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,6 @@
+/lucene/dev/trunk:932749
+/lucene/dev/trunk/lucene:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458:924791,924850
/lucene/java/branches/lucene_2_4:748824
/lucene/java/branches/lucene_2_9:817269-818600,825998,829134,829881,831036,896850,909334
/lucene/java/branches/lucene_2_9_back_compat_tests:818601-821336
Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Tue May 4 11:18:46 2010
@@ -1,6 +1,6 @@
Lucene Change Log
-======================= Trunk (not yet released) =======================
+======================= Lucene 3.x (not yet released) =======================
Changes in backwards compatibility policy
@@ -40,6 +40,17 @@ Changes in backwards compatibility polic
FSDirectory.FSIndexInput. Anyone extending this class will have to
fix their code on upgrading. (Earwin Burrfoot via Mike McCandless)
+* LUCENE-2372: StandardAnalyzer, KeywordAnalyzer, PerFieldAnalyzerWrapper
+ are now final. Also removed the now obsolete and deprecated
+ Analyzer.setOverridesTokenStreamMethod(). (Uwe Schindler)
+
+* LUCENE-2302: The new interface for term attributes, CharTermAttribute,
+ now implements CharSequence. This requires the toString() methods of
+ CharTermAttribute, deprecated TermAttribute, and Token to return only
+ the term text and no other attribute contents.
+ TODO: Point to new attribute inspection API coming with LUCENE-2374.
+ (Uwe Schindler, Robert Muir)
+
Changes in runtime behavior
* LUCENE-1923: Made IndexReader.toString() produce something
@@ -118,6 +129,17 @@ API Changes
FSDirectory to see a sample of how such tracking might look like, if needed
in your custom Directories. (Earwin Burrfoot via Mike McCandless)
+* LUCENE-2302: Deprecated TermAttribute and replaced by a new
+ CharTermAttribute. The change is backwards compatible, so
+ mixed new/old TokenStreams all work on the same char[] buffer
+ independent of which interface they use. CharTermAttribute
+ has shorter method names and implements CharSequence and
+ Appendable. This allows usage like Java's StringBuilder in
+ addition to direct char[] access. Also terms can directly be
+ used in places where CharSequence is allowed (e.g. regular
+ expressions).
+ (Uwe Schindler, Robert Muir)
+
Bug fixes
* LUCENE-2119: Don't throw NegativeArraySizeException if you pass
@@ -161,6 +183,9 @@ Bug fixes
* LUCENE-2365: IndexWriter.newestSegment (used normally for testing)
is fixed to return null if there are no segments. (Karthick
Sankarachary via Mike McCandless)
+
+* LUCENE-2074: Reduce buffer size of lexer back to default on reset.
+ (Ruben Laguna, Shai Erera via Uwe Schindler)
New features
@@ -226,6 +251,9 @@ New features
files between FSDirectory instances. (Earwin Burrfoot via Mike
McCandless).
+* LUCENE-2074: Make StandardTokenizer fit for Unicode 4.0, if the
+ matchVersion parameter is Version.LUCENE_31. (Uwe Schindler)
+
Optimizations
* LUCENE-2075: Terms dict cache is now shared across threads instead
@@ -300,9 +328,12 @@ Build
into core, and moved the ICU-based collation support into contrib/icu.
(Robert Muir)
-* LUCENE-2326: Removed SVN checkouts for backwards tests. The backwards branch
- is now included in the svn repository using "svn copy" after release.
- (Uwe Schindler)
+* LUCENE-2326: Removed SVN checkouts for backwards tests. The backwards
+ branch is now included in the svn repository using "svn copy"
+ after release. (Uwe Schindler)
+
+* LUCENE-2074: Regenerating StandardTokenizerImpl files now needs
+ JFlex 1.5 (currently only available on SVN). (Uwe Schindler)
Test Cases
Propchange: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,2 +1,4 @@
+/lucene/dev/trunk/lucene/CHANGES.txt:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/CHANGES.txt:924791,924850
/lucene/java/branches/lucene_2_9/CHANGES.txt:896850,909334
/lucene/java/trunk/CHANGES.txt:924483-925561
Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -0,0 +1,3 @@
+/lucene/dev/trunk/lucene/backwards/src:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src:924850
+/lucene/java/trunk/backwards/src:924781
Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:924791,924850
/lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:748824
/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:817269-818600,825998,829134,829881,831036
/lucene/java/branches/lucene_2_9_back_compat_tests/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:818601-821336
Modified: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java (original)
+++ lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java Tue May 4 11:18:46 2010
@@ -120,6 +120,7 @@ public class TestAnalyzers extends BaseT
String[] y = StandardTokenizer.TOKEN_TYPES;
}
+ /* StandardAnalyzer was made final in 3.1:
private static class MyStandardAnalyzer extends StandardAnalyzer {
public MyStandardAnalyzer() {
super(org.apache.lucene.util.Version.LUCENE_CURRENT);
@@ -139,6 +140,7 @@ public class TestAnalyzers extends BaseT
assertTrue(ts.incrementToken());
assertFalse(ts.incrementToken());
}
+ */
}
class PayloadSetter extends TokenFilter {
Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:924791,924850
/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:748824
/lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:825998,829134,829881,831036
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:880754,880793,880823,881216,881317,881376,881473,881549,881820,882467,882890,883076,883080,912383
Modified: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java Tue May 4 11:18:46 2010
@@ -31,6 +31,7 @@ public class TestToken extends LuceneTes
super(name);
}
+ /* toString changed in 3.1:
public void testCtor() throws Exception {
Token t = new Token();
char[] content = "hello".toCharArray();
@@ -60,6 +61,7 @@ public class TestToken extends LuceneTes
assertEquals("(hello,6,22,type=junk)", t.toString());
assertEquals(0, t.getFlags());
}
+ */
public void testResize() {
Token t = new Token();
@@ -139,6 +141,7 @@ public class TestToken extends LuceneTes
assertEquals(20000, t.termLength());
}
+ /* toString changed in 3.1:
public void testToString() throws Exception {
char[] b = {'a', 'l', 'o', 'h', 'a'};
Token t = new Token("", 0, 5);
@@ -148,6 +151,7 @@ public class TestToken extends LuceneTes
t.setTermBuffer("hi there");
assertEquals("(hi there,0,5)", t.toString());
}
+ */
public void testTermBufferEquals() throws Exception {
Token t1a = new Token();
Modified: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java (original)
+++ lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java Tue May 4 11:18:46 2010
@@ -107,10 +107,10 @@ public class TestTermAttributeImpl exten
char[] b = {'a', 'l', 'o', 'h', 'a'};
TermAttributeImpl t = new TermAttributeImpl();
t.setTermBuffer(b, 0, 5);
- assertEquals("term=aloha", t.toString());
+ assertEquals("aloha", t.toString());
t.setTermBuffer("hi there");
- assertEquals("term=hi there", t.toString());
+ assertEquals("hi there", t.toString());
}
public void testMixedStringArray() throws Exception {
Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/document/TestDateTools.java:924791,924850
/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/document/TestDateTools.java:748824
/lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java:825998,829134,829881,831036
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/document/TestDateTools.java:880754,880793,880823,881216,881317,881376,881473,881549,881820,882467,882890,883076,883080,912383
Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/document/TestNumberTools.java:924791,924850
/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/document/TestNumberTools.java:748824
/lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java:825998,829134,829881,831036
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/document/TestNumberTools.java:880754,880793,880823,881216,881317,881376,881473,881549,881820,882467,882890,883076,883080,912383
Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:924791,924850
/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:748824
/lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:825998,829134,829881,831036
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:880754,880793,880823,881216,881317,881376,881473,881549,881820,882467,882890,883076,883080,912383
Modified: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java (original)
+++ lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java Tue May 4 11:18:46 2010
@@ -78,22 +78,22 @@ public class TestAttributeSource extends
public void testCloneAttributes() {
final AttributeSource src = new AttributeSource();
- final TermAttribute termAtt = src.addAttribute(TermAttribute.class);
+ final FlagsAttribute flagsAtt = src.addAttribute(FlagsAttribute.class);
final TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class);
- termAtt.setTermBuffer("TestTerm");
+ flagsAtt.setFlags(1234);
typeAtt.setType("TestType");
final AttributeSource clone = src.cloneAttributes();
final Iterator<Class<? extends Attribute>> it = clone.getAttributeClassesIterator();
- assertEquals("TermAttribute must be the first attribute", TermAttribute.class, it.next());
+ assertEquals("FlagsAttribute must be the first attribute", FlagsAttribute.class, it.next());
assertEquals("TypeAttribute must be the second attribute", TypeAttribute.class, it.next());
assertFalse("No more attributes", it.hasNext());
- final TermAttribute termAtt2 = clone.getAttribute(TermAttribute.class);
+ final FlagsAttribute flagsAtt2 = clone.getAttribute(FlagsAttribute.class);
final TypeAttribute typeAtt2 = clone.getAttribute(TypeAttribute.class);
- assertNotSame("TermAttribute of original and clone must be different instances", termAtt2, termAtt);
+ assertNotSame("FlagsAttribute of original and clone must be different instances", flagsAtt2, flagsAtt);
assertNotSame("TypeAttribute of original and clone must be different instances", typeAtt2, typeAtt);
- assertEquals("TermAttribute of original and clone must be equal", termAtt2, termAtt);
+ assertEquals("FlagsAttribute of original and clone must be equal", flagsAtt2, flagsAtt);
assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt);
}
Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java:924791,924850
/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/util/TestAttributeSource.java:748824
/lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java:817269-818600,825998,829134,829881,831036
/lucene/java/branches/lucene_2_9_back_compat_tests/src/test/org/apache/lucene/util/TestAttributeSource.java:818601-821336
Modified: lucene/dev/branches/branch_3x/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/build.xml?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/build.xml Tue May 4 11:18:46 2010
@@ -232,7 +232,8 @@ The source distribution does not contain
</sequential>
</target>
-
+ <target name="compile-core" depends="jflex-notice, javacc-notice, common.compile-core"/>
+
<!-- ================================================================== -->
<!-- B U I L D D E M O -->
<!-- ================================================================== -->
@@ -603,26 +604,6 @@ The source distribution does not contain
<!-- ================================================================== -->
<!-- Build the JavaCC files into the source tree -->
<!-- ================================================================== -->
- <target name="jjdoc">
- <mkdir dir="${build.dir}/docs/grammars"/>
- <jjdoc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
- outputfile="${build.dir}/docs/grammars/QueryParser.html"
- javacchome="${javacc.home}"
- />
- <jjdoc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
- outputfile="${build.dir}/docs/grammars/HTMLParser.html"
- javacchome="${javacc.home}"
- />
- <jjdoc target="contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj"
- outputfile="${build.dir}/docs/grammars/Surround_QueryParser.html"
- javacchome="${javacc.home}"
- />
- <jjdoc target="contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj"
- outputfile="${build.dir}/docs/grammars/StandardSyntaxParser.html"
- javacchome="${javacc.home}"
- />
- </target>
-
<target name="clean-javacc">
<delete>
<fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
@@ -644,7 +625,7 @@ The source distribution does not contain
</delete>
</target>
- <target name="javacc" depends="clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser, javacc-contrib-surround, javacc-contrib-precedence"/>
+ <target name="javacc" depends="init,javacc-check,clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser,javacc-contrib-surround"/>
<target name="javacc-QueryParser" depends="init,javacc-check" if="javacc.present">
<sequential>
@@ -684,25 +665,21 @@ The source distribution does not contain
/>
</target>
- <target name="javacc-contrib-precedence" depends="init,javacc-check" if="javacc.present">
- <ant target="javacc"
- dir="contrib/misc"
- antfile="build.xml"
- />
- </target>
-
<!-- ================================================================== -->
<!-- Build the JFlex files into the source tree -->
<!-- ================================================================== -->
- <target name="jflex" depends="clean-jflex,jflex-StandardAnalyzer" />
+ <target name="jflex" depends="jflex-check, clean-jflex,jflex-StandardAnalyzer" />
<target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
- <taskdef classname="JFlex.anttask.JFlexTask" name="jflex">
- <classpath location="${jflex.home}/lib/JFlex.jar" />
+ <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
+ <classpath refid="jflex.classpath"/>
</taskdef>
- <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex"
+ <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex"
+ outdir="src/java/org/apache/lucene/analysis/standard"
+ nobak="on" />
+ <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
</target>
Propchange: lucene/dev/branches/branch_3x/lucene/build.xml
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,2 +1,5 @@
+/lucene/dev/trunk/build.xml:932749
+/lucene/dev/trunk/lucene/build.xml:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/build.xml:924791,924850
/lucene/java/branches/lucene_2_9/build.xml:896850,909334
/lucene/java/trunk/build.xml:924483-925561
Modified: lucene/dev/branches/branch_3x/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/common-build.xml?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/common-build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/common-build.xml Tue May 4 11:18:46 2010
@@ -92,6 +92,21 @@
<property name="javacc.home" location="${common.dir}"/>
<property name="jflex.home" location="${common.dir}"/>
+ <path id="jflex.classpath">
+ <fileset dir="${jflex.home}/">
+ <!-- for a JFlex trunk checkout: -->
+ <include name="jflex/target/*.jar"/>
+ <!-- for a JFlex distribution (not yet available): -->
+ <include name="lib/*.jar"/>
+ </fileset>
+ </path>
+
+ <path id="javacc.classpath">
+ <fileset dir="${javacc.home}/">
+ <include name="bin/lib/*.jar"/>
+ </fileset>
+ </path>
+
<property name="backwards.dir" location="backwards"/>
<property name="build.dir.backwards" location="${build.dir}/backwards"/>
@@ -141,18 +156,6 @@
</condition>
<available
- property="javacc.present"
- classname="org.javacc.parser.Main"
- classpath="${javacc.home}/bin/lib/javacc.jar"
- />
-
- <available
- property="jflex.present"
- classname="JFlex.anttask.JFlexTask"
- classpath="${jflex.home}/lib/JFlex.jar"
- />
-
- <available
property="maven.ant.tasks.present"
classname="org.apache.maven.artifact.ant.Pom"
/>
@@ -169,29 +172,30 @@
-->
<target name="javacc-uptodate-check">
<uptodate property="javacc.files.uptodate">
- <srcfiles dir="src" includes="**/*.jj" />
+ <srcfiles dir="src/java" includes="**/*.jj" />
<mapper type="glob" from="*.jj" to="*.java"/>
</uptodate>
</target>
- <target name="javacc-notice" unless="javacc.files.uptodate">
+ <target name="javacc-notice" depends="javacc-uptodate-check" unless="javacc.files.uptodate">
<echo>
One or more of the JavaCC .jj files is newer than its corresponding
.java file. Run the "javacc" target to regenerate the artifacts.
</echo>
</target>
- <target name="init" depends="javacc-uptodate-check, javacc-notice, jflex-uptodate-check, jflex-notice">
+ <target name="init">
+ <!-- currently empty -->
</target>
<target name="jflex-uptodate-check">
<uptodate property="jflex.files.uptodate">
- <srcfiles dir="src" includes="**/*.jflex" />
+ <srcfiles dir="src/java" includes="**/*.jflex" />
<mapper type="glob" from="*.jflex" to="*.java"/>
</uptodate>
</target>
- <target name="jflex-notice" unless="jflex.files.uptodate">
+ <target name="jflex-notice" depends="jflex-uptodate-check" unless="jflex.files.uptodate">
<echo>
One or more of the JFlex .jflex files is newer than its corresponding
.java file. Run the "jflex" target to regenerate the artifacts.
@@ -199,13 +203,15 @@
</target>
<target name="javacc-check">
+ <available property="javacc.present" classname="org.javacc.parser.Main">
+ <classpath refid="javacc.classpath"/>
+ </available>
<fail unless="javacc.present">
##################################################################
JavaCC not found.
JavaCC Home: ${javacc.home}
- JavaCC JAR: ${javacc.jar}
- Please download and install JavaCC from:
+ Please download and install JavaCC 4.1 from:
<http://javacc.dev.java.net>
@@ -227,22 +233,25 @@
</target>
<target name="jflex-check">
+ <available property="jflex.present" classname="jflex.anttask.JFlexTask">
+ <classpath refid="jflex.classpath"/>
+ </available>
<fail unless="jflex.present">
##################################################################
JFlex not found.
JFlex Home: ${jflex.home}
- Please download and install JFlex from:
+ Please install the jFlex 1.5 version (currently not released)
+ from its SVN repository:
- <http://jflex.de/download.html>
+ svn co http://jflex.svn.sourceforge.net/svnroot/jflex/trunk jflex
+ cd jflex
+ mvn install
Then, create a build.properties file either in your home
directory, or within the Lucene directory and set the jflex.home
- property to the path where JFlex is installed. For example,
- if you installed JFlex in /usr/local/java/jflex-1.4.1, then set the
- jflex.home property to:
-
- jflex.home=/usr/local/java/jflex-1.4.1
+ property to the path where the JFlex trunk checkout is located
+ (in the above example its the directory called "jflex").
##################################################################
</fail>
Propchange: lucene/dev/branches/branch_3x/lucene/contrib/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/contrib:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/contrib:924791,924850
/lucene/java/branches/lucene_2_4/contrib:748824
/lucene/java/branches/lucene_2_9/contrib:817269-818600,825998,829134,829816,829881,831036,896850,909334
/lucene/java/branches/lucene_2_9_back_compat_tests/contrib:818601-821336
Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Tue May 4 11:18:46 2010
@@ -115,6 +115,10 @@ New features
the ability to override any stemmer with a custom dictionary map.
(Robert Muir, Uwe Schindler, Simon Willnauer)
+ * LUCENE-2400: ShingleFilter was changed to don't output all-filler shingles and
+ unigrams, and uses a more performant algorithm to build grams using a linked list
+ of AttributeSource.cloneAttributes() instances and the new copyTo() method.
+
Build
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
Propchange: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/contrib/CHANGES.txt:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/contrib/CHANGES.txt:924791,924850
/lucene/java/branches/lucene_2_4/contrib/CHANGES.txt:748824
/lucene/java/branches/lucene_2_9/contrib/CHANGES.txt:817269-818600,825998,826775,829134,829816,829881,831036,896850,909334
/lucene/java/branches/lucene_2_9_back_compat_tests/contrib/CHANGES.txt:818601-821336
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java Tue May 4 11:18:46 2010
@@ -18,14 +18,16 @@ package org.apache.lucene.analysis.shing
*/
import java.io.IOException;
+import java.util.Iterator;
import java.util.LinkedList;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
/**
@@ -66,12 +68,12 @@ public final class ShingleFilter extends
*/
public static final String TOKEN_SEPARATOR = " ";
-
/**
* The sequence of input stream tokens (or filler tokens, if necessary)
* that will be composed to form output shingles.
*/
- private LinkedList<State> inputWindow = new LinkedList<State>();
+ private LinkedList<InputWindowToken> inputWindow
+ = new LinkedList<InputWindowToken>();
/**
* The number of input tokens in the next output token. This is the "n" in
@@ -80,9 +82,9 @@ public final class ShingleFilter extends
private CircularSequence gramSize;
/**
- * Shingle text is composed here.
+ * Shingle and unigram text is composed here.
*/
- private StringBuilder shingleBuilder = new StringBuilder();
+ private StringBuilder gramBuilder = new StringBuilder();
/**
* The token type attribute value to use - default is "shingle"
@@ -111,18 +113,31 @@ public final class ShingleFilter extends
private int minShingleSize;
/**
- * The remaining number of filler tokens inserted into the input stream
+ * The remaining number of filler tokens to be inserted into the input stream
* from which shingles are composed, to handle position increments greater
* than one.
*/
private int numFillerTokensToInsert;
/**
- * The next input stream token.
+ * When the next input stream token has a position increment greater than
+ * one, it is stored in this field until sufficient filler tokens have been
+ * inserted to account for the position increment.
+ */
+ private AttributeSource nextInputStreamToken;
+
+ /**
+ * Whether or not there is a next input stream token.
*/
- private State nextInputStreamToken;
+ private boolean isNextInputStreamToken = false;
+
+ /**
+ * Whether at least one unigram or shingle has been output at the current
+ * position.
+ */
+ private boolean isOutputHere = false;
- private final TermAttribute termAtt;
+ private final CharTermAttribute termAtt;
private final OffsetAttribute offsetAtt;
private final PositionIncrementAttribute posIncrAtt;
private final TypeAttribute typeAtt;
@@ -140,7 +155,7 @@ public final class ShingleFilter extends
super(input);
setMaxShingleSize(maxShingleSize);
setMinShingleSize(minShingleSize);
- this.termAtt = addAttribute(TermAttribute.class);
+ this.termAtt = addAttribute(CharTermAttribute.class);
this.offsetAtt = addAttribute(OffsetAttribute.class);
this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
this.typeAtt = addAttribute(TypeAttribute.class);
@@ -241,23 +256,49 @@ public final class ShingleFilter extends
this.tokenSeparator = null == tokenSeparator ? "" : tokenSeparator;
}
- /* (non-Javadoc)
- * @see org.apache.lucene.analysis.TokenStream#next()
- */
@Override
public final boolean incrementToken() throws IOException {
- boolean tokenAvailable = false;
+ boolean tokenAvailable = false;
+ int builtGramSize = 0;
if (gramSize.atMinValue() || inputWindow.size() < gramSize.getValue()) {
shiftInputWindow();
+ gramBuilder.setLength(0);
+ } else {
+ builtGramSize = gramSize.getPreviousValue();
}
- if ( ! inputWindow.isEmpty()) {
- restoreState(inputWindow.getFirst());
- if (1 == gramSize.getValue()) {
- posIncrAtt.setPositionIncrement(1);
- gramSize.advance();
- tokenAvailable = true;
- } else if (inputWindow.size() >= gramSize.getValue()) {
- getNextShingle();
+ if (inputWindow.size() >= gramSize.getValue()) {
+ boolean isAllFiller = true;
+ InputWindowToken nextToken = null;
+ Iterator<InputWindowToken> iter = inputWindow.iterator();
+ for (int gramNum = 1 ;
+ iter.hasNext() && builtGramSize < gramSize.getValue() ;
+ ++gramNum) {
+ nextToken = iter.next();
+ if (builtGramSize < gramNum) {
+ if (builtGramSize > 0) {
+ gramBuilder.append(tokenSeparator);
+ }
+ gramBuilder.append(nextToken.termAtt.buffer(), 0,
+ nextToken.termAtt.length());
+ ++builtGramSize;
+ }
+ if (isAllFiller && nextToken.isFiller) {
+ if (gramNum == gramSize.getValue()) {
+ gramSize.advance();
+ }
+ } else {
+ isAllFiller = false;
+ }
+ }
+ if ( ! isAllFiller && builtGramSize == gramSize.getValue()) {
+ inputWindow.getFirst().attSource.copyTo(this);
+ posIncrAtt.setPositionIncrement(isOutputHere ? 0 : 1);
+ termAtt.setEmpty().append(gramBuilder);
+ if (gramSize.getValue() > 1) {
+ typeAtt.setType(tokenType);
+ }
+ offsetAtt.setOffset(offsetAtt.startOffset(), nextToken.offsetAtt.endOffset());
+ isOutputHere = true;
gramSize.advance();
tokenAvailable = true;
}
@@ -266,82 +307,68 @@ public final class ShingleFilter extends
}
/**
- * <p>Makes the next token a shingle of length {@link #gramSize},
- * composed of tokens taken from {@link #inputWindow}.
- * <p>Callers of this method must first insure that there are at least
- * <code>gramSize</code> tokens available in <code>inputWindow</code>.
- */
- private void getNextShingle() {
- int startOffset = offsetAtt.startOffset();
-
- int minTokNum = gramSize.getValue() - 1; // zero-based inputWindow position
- if (gramSize.getValue() == minShingleSize) {
- // Clear the shingle text buffer if this is the first shingle
- // at the current position in the input stream.
- shingleBuilder.setLength(0);
- minTokNum = 0;
- }
- for (int tokNum = minTokNum ; tokNum < gramSize.getValue() ; ++tokNum) {
- if (tokNum > 0) {
- shingleBuilder.append(tokenSeparator);
- }
- restoreState(inputWindow.get(tokNum));
- shingleBuilder.append(termAtt.termBuffer(), 0, termAtt.termLength());
- }
- char[] termBuffer = termAtt.termBuffer();
- int termLength = shingleBuilder.length();
- if (termBuffer.length < termLength) {
- termBuffer = termAtt.resizeTermBuffer(termLength);
- }
- shingleBuilder.getChars(0, termLength, termBuffer, 0);
- termAtt.setTermLength(termLength);
- posIncrAtt.setPositionIncrement(gramSize.atMinValue() ? 1 : 0);
- typeAtt.setType(tokenType);
- offsetAtt.setOffset(startOffset, offsetAtt.endOffset());
- }
-
- /**
* <p>Get the next token from the input stream.
* <p>If the next token has <code>positionIncrement > 1</code>,
* <code>positionIncrement - 1</code> {@link #FILLER_TOKEN}s are
* inserted first.
- * @return false for end of stream; true otherwise
+ * @param target Where to put the new token; if null, a new instance is created.
+ * @return On success, the populated token; null otherwise
* @throws IOException if the input stream has a problem
*/
- private boolean getNextToken() throws IOException {
- boolean success = false;
+ private InputWindowToken getNextToken(InputWindowToken target)
+ throws IOException {
+ InputWindowToken newTarget = target;
if (numFillerTokensToInsert > 0) {
- insertFillerToken();
- success = true;
- } else if (null != nextInputStreamToken) {
- restoreState(nextInputStreamToken);
- nextInputStreamToken = null;
- success = true;
+ if (null == target) {
+ newTarget = new InputWindowToken(nextInputStreamToken.cloneAttributes());
+ } else {
+ nextInputStreamToken.copyTo(target.attSource);
+ }
+ // A filler token occupies no space
+ newTarget.offsetAtt.setOffset(newTarget.offsetAtt.startOffset(),
+ newTarget.offsetAtt.startOffset());
+ newTarget.termAtt.copyBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.length);
+ newTarget.isFiller = true;
+ --numFillerTokensToInsert;
+ } else if (isNextInputStreamToken) {
+ if (null == target) {
+ newTarget = new InputWindowToken(nextInputStreamToken.cloneAttributes());
+ } else {
+ nextInputStreamToken.copyTo(target.attSource);
+ }
+ isNextInputStreamToken = false;
+ newTarget.isFiller = false;
} else if (input.incrementToken()) {
+ if (null == target) {
+ newTarget = new InputWindowToken(cloneAttributes());
+ } else {
+ this.copyTo(target.attSource);
+ }
if (posIncrAtt.getPositionIncrement() > 1) {
- numFillerTokensToInsert = posIncrAtt.getPositionIncrement() - 1;
- insertFillerToken();
+ // Each output shingle must contain at least one input token,
+ // so no more than (maxShingleSize - 1) filler tokens will be inserted.
+ numFillerTokensToInsert
+ = Math.min(posIncrAtt.getPositionIncrement() - 1, maxShingleSize - 1);
+ // Save the current token as the next input stream token
+ if (null == nextInputStreamToken) {
+ nextInputStreamToken = cloneAttributes();
+ } else {
+ this.copyTo(nextInputStreamToken);
+ }
+ isNextInputStreamToken = true;
+ // A filler token occupies no space
+ newTarget.offsetAtt.setOffset(offsetAtt.startOffset(), offsetAtt.startOffset());
+ newTarget.termAtt.copyBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.length);
+ newTarget.isFiller = true;
+ --numFillerTokensToInsert;
+ } else {
+ newTarget.isFiller = false;
}
- success = true;
- }
- return success;
- }
-
- /**
- * Inserts a {@link #FILLER_TOKEN} and decrements
- * {@link #numFillerTokensToInsert}.
- */
- private void insertFillerToken() {
- if (null == nextInputStreamToken) {
- nextInputStreamToken = captureState();
} else {
- restoreState(nextInputStreamToken);
+ newTarget = null;
}
- --numFillerTokensToInsert;
- // A filler token occupies no space
- offsetAtt.setOffset(offsetAtt.startOffset(), offsetAtt.startOffset());
- termAtt.setTermBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.length);
- }
+ return newTarget;
+ }
/**
* <p>Fills {@link #inputWindow} with input stream tokens, if available,
@@ -351,16 +378,29 @@ public final class ShingleFilter extends
* @throws IOException if there's a problem getting the next token
*/
private void shiftInputWindow() throws IOException {
+ InputWindowToken firstToken = null;
if (inputWindow.size() > 0) {
- inputWindow.removeFirst();
+ firstToken = inputWindow.removeFirst();
}
- while (getNextToken()) {
- inputWindow.add(captureState());
- if (inputWindow.size() == maxShingleSize) {
- break;
+ while (inputWindow.size() < maxShingleSize) {
+ if (null != firstToken) { // recycle the firstToken, if available
+ if (null != getNextToken(firstToken)) {
+ inputWindow.add(firstToken); // the firstToken becomes the last
+ firstToken = null;
+ } else {
+ break; // end of input stream
+ }
+ } else {
+ InputWindowToken nextToken = getNextToken(null);
+ if (null != nextToken) {
+ inputWindow.add(nextToken);
+ } else {
+ break; // end of input stream
+ }
}
}
gramSize.reset();
+ isOutputHere = false;
}
@Override
@@ -369,6 +409,7 @@ public final class ShingleFilter extends
gramSize.reset();
inputWindow.clear();
numFillerTokensToInsert = 0;
+ isOutputHere = false;
}
@@ -383,6 +424,7 @@ public final class ShingleFilter extends
*/
private class CircularSequence {
private int value;
+ private int previousValue;
private int minValue;
public CircularSequence() {
@@ -405,10 +447,9 @@ public final class ShingleFilter extends
* <b>{ [ 1, ] {@link #minShingleSize} [ , ... , {@link #maxShingleSize} ] }</b>.
* <p>1 is included in the circular sequence only if
* {@link #outputUnigrams} = true.
- *
- * @return the next member in the circular sequence
*/
- public int advance() {
+ public void advance() {
+ previousValue = value;
if (value == 1) {
value = minShingleSize;
} else if (value == maxShingleSize) {
@@ -416,7 +457,6 @@ public final class ShingleFilter extends
} else {
++value;
}
- return value;
}
/**
@@ -428,7 +468,7 @@ public final class ShingleFilter extends
* {@link #outputUnigrams} = true.
*/
public void reset() {
- value = minValue;
+ previousValue = value = minValue;
}
/**
@@ -443,5 +483,25 @@ public final class ShingleFilter extends
public boolean atMinValue() {
return value == minValue;
}
+
+ /**
+ * @return the value this instance had before the last advance() call
+ */
+ public int getPreviousValue() {
+ return previousValue;
+ }
+ }
+
+ private class InputWindowToken {
+ final AttributeSource attSource;
+ final CharTermAttribute termAtt;
+ final OffsetAttribute offsetAtt;
+ boolean isFiller = false;
+
+ public InputWindowToken(AttributeSource attSource) {
+ this.attSource = attSource;
+ this.termAtt = attSource.getAttribute(CharTermAttribute.class);
+ this.offsetAtt = attSource.getAttribute(OffsetAttribute.class);
+ }
}
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java Tue May 4 11:18:46 2010
@@ -34,7 +34,7 @@ public class ShingleFilterTest extends B
protected int index = 0;
protected Token[] testToken;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
private OffsetAttribute offsetAtt;
private PositionIncrementAttribute posIncrAtt;
private TypeAttribute typeAtt;
@@ -42,7 +42,7 @@ public class ShingleFilterTest extends B
public TestTokenStream(Token[] testToken) {
super();
this.testToken = testToken;
- this.termAtt = addAttribute(TermAttribute.class);
+ this.termAtt = addAttribute(CharTermAttribute.class);
this.offsetAtt = addAttribute(OffsetAttribute.class);
this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
this.typeAtt = addAttribute(TypeAttribute.class);
@@ -53,7 +53,7 @@ public class ShingleFilterTest extends B
clearAttributes();
if (index < testToken.length) {
Token t = testToken[index++];
- termAtt.setTermBuffer(t.termBuffer(), 0, t.termLength());
+ termAtt.copyBuffer(t.buffer(), 0, t.length());
offsetAtt.setOffset(t.startOffset(), t.endOffset());
posIncrAtt.setPositionIncrement(t.getPositionIncrement());
typeAtt.setType(TypeAttributeImpl.DEFAULT_TYPE);
@@ -103,17 +103,20 @@ public class ShingleFilterTest extends B
createToken("please divide", 0, 13),
createToken("divide", 7, 13),
createToken("divide _", 7, 19),
- createToken("_", 19, 19),
createToken("_ sentence", 19, 27),
createToken("sentence", 19, 27),
createToken("sentence _", 19, 33),
- createToken("_", 33, 33),
createToken("_ shingles", 33, 39),
createToken("shingles", 33, 39),
};
public static final int[] BI_GRAM_POSITION_INCREMENTS_WITH_HOLES = new int[] {
- 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+ 1, 0, 1, 0, 1, 1, 0, 1, 1
+ };
+
+ private static final String[] BI_GRAM_TYPES_WITH_HOLES = {
+ "word", "shingle",
+ "word", "shingle", "shingle", "word", "shingle", "shingle", "word"
};
public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] {
@@ -642,18 +645,157 @@ public class ShingleFilterTest extends B
"word"
};
+ public static final Token[] TEST_TOKEN_POS_INCR_EQUAL_TO_N = new Token[] {
+ createToken("please", 0, 6),
+ createToken("divide", 7, 13),
+ createToken("this", 14, 18),
+ createToken("sentence", 29, 37, 3),
+ createToken("into", 38, 42),
+ createToken("shingles", 43, 49),
+ };
+
+ public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N = new Token[] {
+ createToken("please", 0, 6),
+ createToken("please divide", 0, 13),
+ createToken("please divide this", 0, 18),
+ createToken("divide", 7, 13),
+ createToken("divide this", 7, 18),
+ createToken("divide this _", 7, 29),
+ createToken("this", 14, 18),
+ createToken("this _", 14, 29),
+ createToken("this _ _", 14, 29),
+ createToken("_ _ sentence", 29, 37),
+ createToken("_ sentence", 29, 37),
+ createToken("_ sentence into", 29, 42),
+ createToken("sentence", 29, 37),
+ createToken("sentence into", 29, 42),
+ createToken("sentence into shingles", 29, 49),
+ createToken("into", 38, 42),
+ createToken("into shingles", 38, 49),
+ createToken("shingles", 43, 49)
+ };
+
+ public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N = new int[] {
+ 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1
+ };
+
+ public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N = new String[] {
+ "word", "shingle", "shingle",
+ "word", "shingle", "shingle",
+ "word", "shingle", "shingle",
+ "shingle", "shingle", "shingle", "word", "shingle", "shingle",
+ "word", "shingle",
+ "word"
+ };
+
+ public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new Token[] {
+ createToken("please divide", 0, 13),
+ createToken("please divide this", 0, 18),
+ createToken("divide this", 7, 18),
+ createToken("divide this _", 7, 29),
+ createToken("this _", 14, 29),
+ createToken("this _ _", 14, 29),
+ createToken("_ _ sentence", 29, 37),
+ createToken("_ sentence", 29, 37),
+ createToken("_ sentence into", 29, 42),
+ createToken("sentence into", 29, 42),
+ createToken("sentence into shingles", 29, 49),
+ createToken("into shingles", 38, 49),
+ };
+
+ public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new int[] {
+ 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1
+ };
+
+ public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new String[] {
+ "shingle", "shingle",
+ "shingle", "shingle",
+ "shingle", "shingle",
+ "shingle", "shingle", "shingle",
+ "shingle", "shingle",
+ "shingle",
+ };
+
+ public static final Token[] TEST_TOKEN_POS_INCR_GREATER_THAN_N = new Token[] {
+ createToken("please", 0, 6),
+ createToken("divide", 57, 63, 8),
+ createToken("this", 64, 68),
+ createToken("sentence", 69, 77),
+ createToken("into", 78, 82),
+ createToken("shingles", 83, 89),
+ };
+
+ public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N = new Token[] {
+ createToken("please", 0, 6),
+ createToken("please _", 0, 57),
+ createToken("please _ _", 0, 57),
+ createToken("_ _ divide", 57, 63),
+ createToken("_ divide", 57, 63),
+ createToken("_ divide this", 57, 68),
+ createToken("divide", 57, 63),
+ createToken("divide this", 57, 68),
+ createToken("divide this sentence", 57, 77),
+ createToken("this", 64, 68),
+ createToken("this sentence", 64, 77),
+ createToken("this sentence into", 64, 82),
+ createToken("sentence", 69, 77),
+ createToken("sentence into", 69, 82),
+ createToken("sentence into shingles", 69, 89),
+ createToken("into", 78, 82),
+ createToken("into shingles", 78, 89),
+ createToken("shingles", 83, 89)
+ };
+
+ public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N = new int[] {
+ 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+ };
+ public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N = new String[] {
+ "word", "shingle", "shingle",
+ "shingle",
+ "shingle", "shingle",
+ "word", "shingle", "shingle",
+ "word", "shingle", "shingle",
+ "word", "shingle", "shingle",
+ "word", "shingle",
+ "word"
+ };
+
+ public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new Token[] {
+ createToken("please _", 0, 57),
+ createToken("please _ _", 0, 57),
+ createToken("_ _ divide", 57, 63),
+ createToken("_ divide", 57, 63),
+ createToken("_ divide this", 57, 68),
+ createToken("divide this", 57, 68),
+ createToken("divide this sentence", 57, 77),
+ createToken("this sentence", 64, 77),
+ createToken("this sentence into", 64, 82),
+ createToken("sentence into", 69, 82),
+ createToken("sentence into shingles", 69, 89),
+ createToken("into shingles", 78, 89),
+ };
+
+ public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new int[] {
+ 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1
+ };
+
+ public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new String[] {
+ "shingle", "shingle",
+ "shingle", "shingle",
+ "shingle", "shingle",
+ "shingle", "shingle", "shingle", "shingle", "shingle",
+ "shingle",
+ };
+
@Override
protected void setUp() throws Exception {
super.setUp();
testTokenWithHoles = new Token[] {
createToken("please", 0, 6),
createToken("divide", 7, 13),
- createToken("sentence", 19, 27),
- createToken("shingles", 33, 39),
+ createToken("sentence", 19, 27, 2),
+ createToken("shingles", 33, 39, 2),
};
-
- testTokenWithHoles[2].setPositionIncrement(2);
- testTokenWithHoles[3].setPositionIncrement(2);
}
/*
@@ -667,7 +809,8 @@ public class ShingleFilterTest extends B
public void testBiGramFilterWithHoles() throws IOException {
this.shingleFilterTest(2, testTokenWithHoles, BI_GRAM_TOKENS_WITH_HOLES,
- BI_GRAM_POSITION_INCREMENTS, BI_GRAM_TYPES,
+ BI_GRAM_POSITION_INCREMENTS_WITH_HOLES,
+ BI_GRAM_TYPES_WITH_HOLES,
true);
}
@@ -832,8 +975,32 @@ public class ShingleFilterTest extends B
TRI_GRAM_POSITION_INCREMENTS_NULL_SEPARATOR,
TRI_GRAM_TYPES_NULL_SEPARATOR, true);
}
+
+ public void testPositionIncrementEqualToN() throws IOException {
+ this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N,
+ TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N,
+ TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N, true);
+ }
+
+ public void testPositionIncrementEqualToNWithoutUnigrams() throws IOException {
+ this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS,
+ TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS,
+ TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, false);
+ }
+ public void testPositionIncrementGreaterThanN() throws IOException {
+ this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N,
+ TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N,
+ TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N, true);
+ }
+
+ public void testPositionIncrementGreaterThanNWithoutUnigrams() throws IOException {
+ this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS,
+ TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS,
+ TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, false);
+ }
+
public void testReset() throws Exception {
Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence"));
TokenStream filter = new ShingleFilter(wsTokenizer, 2);
@@ -896,18 +1063,24 @@ public class ShingleFilterTest extends B
int endOffsets[] = new int[tokensToCompare.length];
for (int i = 0; i < tokensToCompare.length; i++) {
- text[i] = tokensToCompare[i].term();
+ text[i] = new String(tokensToCompare[i].buffer(),0, tokensToCompare[i].length());
startOffsets[i] = tokensToCompare[i].startOffset();
endOffsets[i] = tokensToCompare[i].endOffset();
}
assertTokenStreamContents(filter, text, startOffsets, endOffsets, types, positionIncrements);
}
+
+ private static Token createToken(String term, int start, int offset) {
+ return createToken(term, start, offset, 1);
+ }
- private static Token createToken(String term, int start, int offset)
+ private static Token createToken
+ (String term, int start, int offset, int positionIncrement)
{
Token token = new Token(start, offset);
- token.setTermBuffer(term);
+ token.copyBuffer(term.toCharArray(), 0, term.length());
+ token.setPositionIncrement(positionIncrement);
return token;
}
}
Propchange: lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/contrib/highlighter/src/test:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/contrib/highlighter/src/test:924791,924850
/lucene/java/branches/lucene_2_4/contrib/highlighter/src/test:748824
/lucene/java/branches/lucene_2_9/contrib/highlighter/src/test:817269-818600,825998,826775,829134,829816,829881,831036,896850,909334
/lucene/java/branches/lucene_2_9_back_compat_tests/contrib/highlighter/src/test:818601-821336
Propchange: lucene/dev/branches/branch_3x/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May 4 11:18:46 2010
@@ -1,2 +1,4 @@
+/lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:924791,924850
/lucene/java/branches/lucene_2_9/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:896850,909334
/lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:924483-925561
Modified: lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml Tue May 4 11:18:46 2010
@@ -25,73 +25,79 @@
<import file="../contrib-build.xml"/>
+ <target name="compile-core" depends="javacc-notice, common.compile-core"/>
+
<!--
NOTE: see the README.javacc for details on how to fully regenerate the parser
-->
<target name="javacc" depends="javacc-flexible,javacc-precedence"/>
- <target name="javacc-flexible" depends="init,javacc-check" if="javacc.present">
+ <target name="javacc-flexible" depends="javacc-check">
+ <delete>
+ <fileset dir="src/java/org/apache/lucene/queryParser/standard/parser" includes="*.java">
+ <containsregexp expression="Generated.*By.*JavaCC"/>
+ </fileset>
+ </delete>
<invoke-javacc target="src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj"
outputDir="src/java/org/apache/lucene/queryParser/standard/parser"
/>
- <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
- match="public class ParseException extends Exception"
- replace="public class ParseException extends QueryNodeParseException"
- flags="g"
- byline="false"/>
- <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
- match="package org.apache.lucene.queryParser.standard.parser;"
- replace="package org.apache.lucene.queryParser.standard.parser;
-
- import org.apache.lucene.messages.Message;
- import org.apache.lucene.messages.MessageImpl;
- import org.apache.lucene.queryParser.core.*;
- import org.apache.lucene.queryParser.core.messages.*;"
- flags="g"
- byline="false"/>
- <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
- match="^ public ParseException\(Token currentTokenVal.*$(\s\s[^}].*\n)* \}"
- replace=" public ParseException(Token currentTokenVal,
- int[][] expectedTokenSequencesVal, String[] tokenImageVal) {
- super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, initialise(
- currentTokenVal, expectedTokenSequencesVal, tokenImageVal)));
- this.currentToken = currentTokenVal;
- this.expectedTokenSequences = expectedTokenSequencesVal;
- this.tokenImage = tokenImageVal;
- }"
- flags="gm"
- byline="false"/>
- <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
- match="^ public ParseException\(String message.*$(\s\s[^}].*\n)* \}"
- replace=" public ParseException(Message message) {
- super(message);
- }"
- flags="gm"
- byline="false"/>
- <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
- match="^ public ParseException\(\).*$(\s\s[^}].*\n)* \}"
- replace=" public ParseException() {
- super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, "Error"));
- }"
- flags="gm"
- byline="false"/>
- <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
- match="^ public String getMessage\(\).*$(\s\s\s\s[^}].*\n)* \}"
- replace=" private static String initialise(Token currentToken,
- int[][] expectedTokenSequences, String[] tokenImage) {
- String eol = System.getProperty("line.separator", "\n");"
- flags="gm"
- byline="false"/>
- <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
- match="\s*protected String add_escapes.*"
- replace=" static private String add_escapes(String str) {"
- flags="g"
- byline="false"/>
+ <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+ match="public class ParseException extends Exception"
+ replace="public class ParseException extends QueryNodeParseException"
+ flags="g"
+ byline="false"/>
+ <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+ match="package org.apache.lucene.queryParser.standard.parser;"
+ replace="package org.apache.lucene.queryParser.standard.parser;${line.separator}
+${line.separator}
+import org.apache.lucene.messages.Message;${line.separator}
+import org.apache.lucene.messages.MessageImpl;${line.separator}
+import org.apache.lucene.queryParser.core.*;${line.separator}
+import org.apache.lucene.queryParser.core.messages.*;"
+ flags="g"
+ byline="false"/>
+ <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+ match="^ public ParseException\(Token currentTokenVal.*$(\s\s[^}].*\n)* \}"
+ replace=" public ParseException(Token currentTokenVal,${line.separator}
+ int[][] expectedTokenSequencesVal, String[] tokenImageVal) {${line.separator}
+ super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, initialise(${line.separator}
+ currentTokenVal, expectedTokenSequencesVal, tokenImageVal)));${line.separator}
+ this.currentToken = currentTokenVal;${line.separator}
+ this.expectedTokenSequences = expectedTokenSequencesVal;${line.separator}
+ this.tokenImage = tokenImageVal;${line.separator}
+ }"
+ flags="gm"
+ byline="false"/>
+ <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+ match="^ public ParseException\(String message.*$(\s\s[^}].*\n)* \}"
+ replace=" public ParseException(Message message) {${line.separator}
+ super(message);${line.separator}
+ }"
+ flags="gm"
+ byline="false"/>
+ <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+ match="^ public ParseException\(\).*$(\s\s[^}].*\n)* \}"
+ replace=" public ParseException() {${line.separator}
+ super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, "Error"));${line.separator}
+ }"
+ flags="gm"
+ byline="false"/>
+ <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+ match="^ public String getMessage\(\).*$(\s\s\s\s[^}].*\n)* \}"
+ replace=" private static String initialise(Token currentToken, int[][] expectedTokenSequences, String[] tokenImage) {${line.separator}
+ String eol = System.getProperty("line.separator", "\n");"
+ flags="gm"
+ byline="false"/>
+ <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+ match="\s*protected String add_escapes.*"
+ replace=" static private String add_escapes(String str) {"
+ flags="g"
+ byline="true"/>
</target>
<property name="javacc.precedence.path" location="src/java/org/apache/lucene/queryParser/precedence"/>
- <target name="javacc-precedence" depends="javacc-check" description="generate precedence query parser from jj (requires javacc 3.2)">
+ <target name="javacc-precedence" depends="javacc-check" description="generate precedence query parser from jj (requires javacc 4.1)">
<delete>
<fileset dir="${javacc.precedence.path}" includes="*.java">
<containsregexp expression="Generated.*By.*JavaCC"/>