You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2010/05/04 13:18:50 UTC

svn commit: r940806 [1/5] - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/src/ lucene/backwards/src/java/org/apache/lucene/search/ lucene/backwards/src/test/org/apache/lucene/analysis/ lucene/backwards/src/test/org/apache/lucene/analys...

Author: uschindler
Date: Tue May  4 11:18:46 2010
New Revision: 940806

URL: http://svn.apache.org/viewvc?rev=940806&view=rev
Log:
Merge CharTermAttribute changes and related ones to stable branch, these are revisions:
- dev/trunk: 932163,932369,932698,932747,932749,932773,932862,935521,940451
- lucene/branches/flex: 924791,924850

The move of WikipediaTokenizer was not merged, so the jflex updates to 1.5 are not yet merged here. Robert, when you have done this, simply copy the trunk version of Wikipedia Tokenizer and its build files over the merged ones.

Added:
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
      - copied, changed from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
      - copied unchanged from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
      - copied, changed from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
      - copied unchanged from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
      - copied unchanged from r932163, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
      - copied, changed from r924791, lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
      - copied, changed from r924791, lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
      - copied, changed from r924791, lucene/java/branches/flex_1458/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
Removed:
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java   (contents, props changed)
    lucene/dev/branches/branch_3x/lucene/build.xml   (contents, props changed)
    lucene/dev/branches/branch_3x/lucene/common-build.xml
    lucene/dev/branches/branch_3x/lucene/contrib/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
    lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml
    lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java
    lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java
    lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java
    lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserTokenManager.java
    lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java
    lucene/dev/branches/branch_3x/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/Analyzer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerTokenFilter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/LengthFilter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/StopFilter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/Token.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/collation/CollationKeyFilter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/QueryTermVector.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java   (contents, props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerTokenFilter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestToken.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpls.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/document/TestDateTools.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/document/TestNumberTools.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestPayloads.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java   (contents, props changed)
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/build.xml
    lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml
    lucene/dev/branches/branch_3x/solr/contrib/dataimporthandler/build.xml
    lucene/dev/branches/branch_3x/solr/contrib/extraction/build.xml
    lucene/dev/branches/branch_3x/solr/contrib/velocity/build.xml
    lucene/dev/branches/branch_3x/solr/lib/commons-httpclient-3.1.jar   (props changed)
    lucene/dev/branches/branch_3x/solr/lib/jcl-over-slf4j-1.5.5.jar   (props changed)
    lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/TokenizerChain.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/TrimFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/BoolField.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/FieldType.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/TextField.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
    lucene/dev/branches/branch_3x/solr/src/maven/solr-core-pom.xml.template   (props changed)
    lucene/dev/branches/branch_3x/solr/src/maven/solr-solrj-pom.xml.template   (props changed)
    lucene/dev/branches/branch_3x/solr/src/solrj/org/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/client/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
    lucene/dev/branches/branch_3x/solr/src/webapp/src/org/apache/solr/client/solrj/embedded/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/webapp/web/admin/analysis.jsp

Propchange: lucene/dev/branches/branch_3x/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -0,0 +1 @@
+/lucene/dev/trunk:932163,932369,932698,932747,932749,932773,932862,935521,940451

Propchange: lucene/dev/branches/branch_3x/lucene/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,6 @@
+/lucene/dev/trunk:932749
+/lucene/dev/trunk/lucene:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458:924791,924850
 /lucene/java/branches/lucene_2_4:748824
 /lucene/java/branches/lucene_2_9:817269-818600,825998,829134,829881,831036,896850,909334
 /lucene/java/branches/lucene_2_9_back_compat_tests:818601-821336

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Tue May  4 11:18:46 2010
@@ -1,6 +1,6 @@
 Lucene Change Log
 
-======================= Trunk (not yet released) =======================
+======================= Lucene 3.x (not yet released) =======================
 
 Changes in backwards compatibility policy
 
@@ -40,6 +40,17 @@ Changes in backwards compatibility polic
   FSDirectory.FSIndexInput. Anyone extending this class will have to
   fix their code on upgrading. (Earwin Burrfoot via Mike McCandless)
 
+* LUCENE-2372: StandardAnalyzer, KeywordAnalyzer, PerFieldAnalyzerWrapper
+  are now final.  Also removed the now obsolete and deprecated
+  Analyzer.setOverridesTokenStreamMethod().  (Uwe Schindler)
+
+* LUCENE-2302: The new interface for term attributes, CharTermAttribute,
+  now implements CharSequence. This requires the toString() methods of
+  CharTermAttribute, deprecated TermAttribute, and Token to return only
+  the term text and no other attribute contents.
+  TODO: Point to new attribute inspection API coming with LUCENE-2374.
+  (Uwe Schindler, Robert Muir)
+
 Changes in runtime behavior
 
 * LUCENE-1923: Made IndexReader.toString() produce something
@@ -118,6 +129,17 @@ API Changes
   FSDirectory to see a sample of how such tracking might look like, if needed
   in your custom Directories.  (Earwin Burrfoot via Mike McCandless)
 
+* LUCENE-2302: Deprecated TermAttribute and replaced by a new
+  CharTermAttribute. The change is backwards compatible, so
+  mixed new/old TokenStreams all work on the same char[] buffer
+  independent of which interface they use. CharTermAttribute
+  has shorter method names and implements CharSequence and
+  Appendable. This allows usage like Java's StringBuilder in
+  addition to direct char[] access. Also terms can directly be
+  used in places where CharSequence is allowed (e.g. regular
+  expressions).
+  (Uwe Schindler, Robert Muir)
+
 Bug fixes
 
 * LUCENE-2119: Don't throw NegativeArraySizeException if you pass
@@ -161,6 +183,9 @@ Bug fixes
 * LUCENE-2365: IndexWriter.newestSegment (used normally for testing)
   is fixed to return null if there are no segments.  (Karthick
   Sankarachary via Mike McCandless)
+
+* LUCENE-2074: Reduce buffer size of lexer back to default on reset.
+  (Ruben Laguna, Shai Erera via Uwe Schindler)
   
 New features
 
@@ -226,6 +251,9 @@ New features
   files between FSDirectory instances.  (Earwin Burrfoot via Mike
   McCandless).
   
+* LUCENE-2074: Make StandardTokenizer fit for Unicode 4.0, if the
+  matchVersion parameter is Version.LUCENE_31. (Uwe Schindler)
+
 Optimizations
 
 * LUCENE-2075: Terms dict cache is now shared across threads instead
@@ -300,9 +328,12 @@ Build
   into core, and moved the ICU-based collation support into contrib/icu.  
   (Robert Muir)
 
-* LUCENE-2326: Removed SVN checkouts for backwards tests. The backwards branch
-  is now included in the svn repository using "svn copy" after release.
-  (Uwe Schindler)
+* LUCENE-2326: Removed SVN checkouts for backwards tests. The backwards
+  branch is now included in the svn repository using "svn copy"
+  after release. (Uwe Schindler)
+
+* LUCENE-2074: Regenerating StandardTokenizerImpl files now needs
+  JFlex 1.5 (currently only available on SVN). (Uwe Schindler)
 
 Test Cases
 

Propchange: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,2 +1,4 @@
+/lucene/dev/trunk/lucene/CHANGES.txt:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/CHANGES.txt:924791,924850
 /lucene/java/branches/lucene_2_9/CHANGES.txt:896850,909334
 /lucene/java/trunk/CHANGES.txt:924483-925561

Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -0,0 +1,3 @@
+/lucene/dev/trunk/lucene/backwards/src:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src:924850
+/lucene/java/trunk/backwards/src:924781

Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:924791,924850
 /lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:748824
 /lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:817269-818600,825998,829134,829881,831036
 /lucene/java/branches/lucene_2_9_back_compat_tests/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:818601-821336

Modified: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java (original)
+++ lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java Tue May  4 11:18:46 2010
@@ -120,6 +120,7 @@ public class TestAnalyzers extends BaseT
     String[] y = StandardTokenizer.TOKEN_TYPES;
   }
 
+  /* StandardAnalyzer was made final in 3.1:
   private static class MyStandardAnalyzer extends StandardAnalyzer {
     public MyStandardAnalyzer() {
       super(org.apache.lucene.util.Version.LUCENE_CURRENT);
@@ -139,6 +140,7 @@ public class TestAnalyzers extends BaseT
     assertTrue(ts.incrementToken());
     assertFalse(ts.incrementToken());
   }
+  */
 }
 
 class PayloadSetter extends TokenFilter {

Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:924791,924850
 /lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:748824
 /lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:825998,829134,829881,831036
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:880754,880793,880823,881216,881317,881376,881473,881549,881820,882467,882890,883076,883080,912383

Modified: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java Tue May  4 11:18:46 2010
@@ -31,6 +31,7 @@ public class TestToken extends LuceneTes
     super(name);
   }
 
+  /* toString changed in 3.1:
   public void testCtor() throws Exception {
     Token t = new Token();
     char[] content = "hello".toCharArray();
@@ -60,6 +61,7 @@ public class TestToken extends LuceneTes
     assertEquals("(hello,6,22,type=junk)", t.toString());
     assertEquals(0, t.getFlags());
   }
+  */
 
   public void testResize() {
     Token t = new Token();
@@ -139,6 +141,7 @@ public class TestToken extends LuceneTes
     assertEquals(20000, t.termLength());
   }
 
+  /* toString changed in 3.1:
   public void testToString() throws Exception {
     char[] b = {'a', 'l', 'o', 'h', 'a'};
     Token t = new Token("", 0, 5);
@@ -148,6 +151,7 @@ public class TestToken extends LuceneTes
     t.setTermBuffer("hi there");
     assertEquals("(hi there,0,5)", t.toString());
   }
+  */
 
   public void testTermBufferEquals() throws Exception {
     Token t1a = new Token();

Modified: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java (original)
+++ lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java Tue May  4 11:18:46 2010
@@ -107,10 +107,10 @@ public class TestTermAttributeImpl exten
     char[] b = {'a', 'l', 'o', 'h', 'a'};
     TermAttributeImpl t = new TermAttributeImpl();
     t.setTermBuffer(b, 0, 5);
-    assertEquals("term=aloha", t.toString());
+    assertEquals("aloha", t.toString());
 
     t.setTermBuffer("hi there");
-    assertEquals("term=hi there", t.toString());
+    assertEquals("hi there", t.toString());
   }
 
   public void testMixedStringArray() throws Exception {

Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/document/TestDateTools.java:924791,924850
 /lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/document/TestDateTools.java:748824
 /lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java:825998,829134,829881,831036
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/document/TestDateTools.java:880754,880793,880823,881216,881317,881376,881473,881549,881820,882467,882890,883076,883080,912383

Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/document/TestNumberTools.java:924791,924850
 /lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/document/TestNumberTools.java:748824
 /lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java:825998,829134,829881,831036
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/document/TestNumberTools.java:880754,880793,880823,881216,881317,881376,881473,881549,881820,882467,882890,883076,883080,912383

Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:924791,924850
 /lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:748824
 /lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:825998,829134,829881,831036
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:880754,880793,880823,881216,881317,881376,881473,881549,881820,882467,882890,883076,883080,912383

Modified: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java (original)
+++ lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java Tue May  4 11:18:46 2010
@@ -78,22 +78,22 @@ public class TestAttributeSource extends
   
   public void testCloneAttributes() {
     final AttributeSource src = new AttributeSource();
-    final TermAttribute termAtt = src.addAttribute(TermAttribute.class);
+    final FlagsAttribute flagsAtt = src.addAttribute(FlagsAttribute.class);
     final TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class);
-    termAtt.setTermBuffer("TestTerm");
+    flagsAtt.setFlags(1234);
     typeAtt.setType("TestType");
     
     final AttributeSource clone = src.cloneAttributes();
     final Iterator<Class<? extends Attribute>> it = clone.getAttributeClassesIterator();
-    assertEquals("TermAttribute must be the first attribute", TermAttribute.class, it.next());
+    assertEquals("FlagsAttribute must be the first attribute", FlagsAttribute.class, it.next());
     assertEquals("TypeAttribute must be the second attribute", TypeAttribute.class, it.next());
     assertFalse("No more attributes", it.hasNext());
     
-    final TermAttribute termAtt2 = clone.getAttribute(TermAttribute.class);
+    final FlagsAttribute flagsAtt2 = clone.getAttribute(FlagsAttribute.class);
     final TypeAttribute typeAtt2 = clone.getAttribute(TypeAttribute.class);
-    assertNotSame("TermAttribute of original and clone must be different instances", termAtt2, termAtt);
+    assertNotSame("FlagsAttribute of original and clone must be different instances", flagsAtt2, flagsAtt);
     assertNotSame("TypeAttribute of original and clone must be different instances", typeAtt2, typeAtt);
-    assertEquals("TermAttribute of original and clone must be equal", termAtt2, termAtt);
+    assertEquals("FlagsAttribute of original and clone must be equal", flagsAtt2, flagsAtt);
     assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt);
   }
   

Propchange: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java:924791,924850
 /lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/util/TestAttributeSource.java:748824
 /lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java:817269-818600,825998,829134,829881,831036
 /lucene/java/branches/lucene_2_9_back_compat_tests/src/test/org/apache/lucene/util/TestAttributeSource.java:818601-821336

Modified: lucene/dev/branches/branch_3x/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/build.xml?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/build.xml Tue May  4 11:18:46 2010
@@ -232,7 +232,8 @@ The source distribution does not contain
     </sequential>
   </target>
 
-
+  <target name="compile-core" depends="jflex-notice, javacc-notice, common.compile-core"/>
+  
   <!-- ================================================================== -->
   <!-- B U I L D  D E M O                                                 -->
   <!-- ================================================================== -->
@@ -603,26 +604,6 @@ The source distribution does not contain
   <!-- ================================================================== -->
   <!-- Build the JavaCC files into the source tree                        -->
   <!-- ================================================================== -->
-  <target name="jjdoc">
-    <mkdir dir="${build.dir}/docs/grammars"/>
-    <jjdoc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
-           outputfile="${build.dir}/docs/grammars/QueryParser.html"
-           javacchome="${javacc.home}"
-    />
-    <jjdoc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
-           outputfile="${build.dir}/docs/grammars/HTMLParser.html"
-           javacchome="${javacc.home}"
-    />
-    <jjdoc target="contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj"
-           outputfile="${build.dir}/docs/grammars/Surround_QueryParser.html"
-           javacchome="${javacc.home}"
-    />
-    <jjdoc target="contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj"
-           outputfile="${build.dir}/docs/grammars/StandardSyntaxParser.html"
-           javacchome="${javacc.home}"
-    />
-  </target>
-
   <target name="clean-javacc">
     <delete>
       <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
@@ -644,7 +625,7 @@ The source distribution does not contain
     </delete>
   </target>
 
-  <target name="javacc" depends="clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser, javacc-contrib-surround, javacc-contrib-precedence"/>
+  <target name="javacc" depends="init,javacc-check,clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser,javacc-contrib-surround"/>
 
   <target name="javacc-QueryParser" depends="init,javacc-check" if="javacc.present">
     <sequential>
@@ -684,25 +665,21 @@ The source distribution does not contain
     />
   </target>
   
-  <target name="javacc-contrib-precedence" depends="init,javacc-check" if="javacc.present">
-    <ant target="javacc"
-      dir="contrib/misc"
-      antfile="build.xml" 
-    />
-  </target>
-  
   <!-- ================================================================== -->
   <!-- Build the JFlex files into the source tree                         -->
   <!-- ================================================================== -->
 
-  <target name="jflex" depends="clean-jflex,jflex-StandardAnalyzer" />
+  <target name="jflex" depends="jflex-check, clean-jflex,jflex-StandardAnalyzer" />
 
   <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
-    <taskdef classname="JFlex.anttask.JFlexTask" name="jflex">
-      <classpath location="${jflex.home}/lib/JFlex.jar" />
+    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
+			<classpath refid="jflex.classpath"/>
     </taskdef>
 
-    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex"
+    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex"
+           outdir="src/java/org/apache/lucene/analysis/standard"
+           nobak="on" />
+    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex"
            outdir="src/java/org/apache/lucene/analysis/standard"
            nobak="on" />
   </target>

Propchange: lucene/dev/branches/branch_3x/lucene/build.xml
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,2 +1,5 @@
+/lucene/dev/trunk/build.xml:932749
+/lucene/dev/trunk/lucene/build.xml:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/build.xml:924791,924850
 /lucene/java/branches/lucene_2_9/build.xml:896850,909334
 /lucene/java/trunk/build.xml:924483-925561

Modified: lucene/dev/branches/branch_3x/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/common-build.xml?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/common-build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/common-build.xml Tue May  4 11:18:46 2010
@@ -92,6 +92,21 @@
   <property name="javacc.home" location="${common.dir}"/>
   <property name="jflex.home" location="${common.dir}"/>
 
+  <path id="jflex.classpath">
+    <fileset dir="${jflex.home}/">
+      <!-- for a JFlex trunk checkout: -->
+      <include name="jflex/target/*.jar"/>
+      <!-- for a JFlex distribution (not yet available): -->
+      <include name="lib/*.jar"/>
+    </fileset>
+  </path>
+
+  <path id="javacc.classpath">
+    <fileset dir="${javacc.home}/">
+      <include name="bin/lib/*.jar"/>
+    </fileset>
+  </path>
+
   <property name="backwards.dir" location="backwards"/>
   <property name="build.dir.backwards" location="${build.dir}/backwards"/>
 
@@ -141,18 +156,6 @@
    </condition>
 
   <available
-    property="javacc.present"
-    classname="org.javacc.parser.Main"
-    classpath="${javacc.home}/bin/lib/javacc.jar"
-    />
-
-   <available
-    property="jflex.present"
-    classname="JFlex.anttask.JFlexTask"
-    classpath="${jflex.home}/lib/JFlex.jar"
-    />
-
-  <available
     property="maven.ant.tasks.present"
     classname="org.apache.maven.artifact.ant.Pom"
   />
@@ -169,29 +172,30 @@
   -->
   <target name="javacc-uptodate-check">
     <uptodate property="javacc.files.uptodate">
-      <srcfiles dir="src" includes="**/*.jj" />
+      <srcfiles dir="src/java" includes="**/*.jj" />
       <mapper type="glob" from="*.jj" to="*.java"/>
     </uptodate>
   </target>
 
-  <target name="javacc-notice" unless="javacc.files.uptodate">
+  <target name="javacc-notice" depends="javacc-uptodate-check" unless="javacc.files.uptodate">
     <echo>
       One or more of the JavaCC .jj files is newer than its corresponding
       .java file.  Run the "javacc" target to regenerate the artifacts.
     </echo>
   </target>
 
-  <target name="init" depends="javacc-uptodate-check, javacc-notice, jflex-uptodate-check, jflex-notice">
+  <target name="init">
+    <!-- currently empty -->
   </target>
 
   <target name="jflex-uptodate-check">
     <uptodate property="jflex.files.uptodate">
-      <srcfiles dir="src" includes="**/*.jflex" />
+      <srcfiles dir="src/java" includes="**/*.jflex" />
       <mapper type="glob" from="*.jflex" to="*.java"/>
     </uptodate>
   </target>
  
-  <target name="jflex-notice" unless="jflex.files.uptodate">
+  <target name="jflex-notice" depends="jflex-uptodate-check" unless="jflex.files.uptodate">
     <echo>
       One or more of the JFlex .jflex files is newer than its corresponding
       .java file.  Run the "jflex" target to regenerate the artifacts.
@@ -199,13 +203,15 @@
   </target>
 
   <target name="javacc-check">
+    <available property="javacc.present" classname="org.javacc.parser.Main">
+      <classpath refid="javacc.classpath"/>
+    </available>
     <fail unless="javacc.present">
       ##################################################################
       JavaCC not found.
       JavaCC Home: ${javacc.home}
-      JavaCC JAR: ${javacc.jar}
 
-      Please download and install JavaCC from:
+      Please download and install JavaCC 4.1 from:
 
       &lt;http://javacc.dev.java.net&gt;
 
@@ -227,22 +233,25 @@
   </target>
 	
   <target name="jflex-check">
+    <available property="jflex.present" classname="jflex.anttask.JFlexTask">
+      <classpath refid="jflex.classpath"/>
+    </available>
     <fail unless="jflex.present">
       ##################################################################
       JFlex not found.
       JFlex Home: ${jflex.home}
 
-      Please download and install JFlex from:
+      Please install the jFlex 1.5 version (currently not released)
+      from its SVN repository:
 
-      &lt;http://jflex.de/download.html&gt;
+       svn co http://jflex.svn.sourceforge.net/svnroot/jflex/trunk jflex
+       cd jflex
+       mvn install
 
       Then, create a build.properties file either in your home
       directory, or within the Lucene directory and set the jflex.home
-      property to the path where JFlex is installed. For example,
-      if you installed JFlex in /usr/local/java/jflex-1.4.1, then set the
-      jflex.home property to:
-
-      jflex.home=/usr/local/java/jflex-1.4.1
+      property to the path where the JFlex trunk checkout is located
+      (in the above example its the directory called "jflex").
 
       ##################################################################
     </fail>

Propchange: lucene/dev/branches/branch_3x/lucene/contrib/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/contrib:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/contrib:924791,924850
 /lucene/java/branches/lucene_2_4/contrib:748824
 /lucene/java/branches/lucene_2_9/contrib:817269-818600,825998,829134,829816,829881,831036,896850,909334
 /lucene/java/branches/lucene_2_9_back_compat_tests/contrib:818601-821336

Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Tue May  4 11:18:46 2010
@@ -115,6 +115,10 @@ New features
    the ability to override any stemmer with a custom dictionary map.
    (Robert Muir, Uwe Schindler, Simon Willnauer)
 
+ * LUCENE-2400: ShingleFilter was changed to don't output all-filler shingles and 
+   unigrams, and uses a more performant algorithm to build grams using a linked list
+   of AttributeSource.cloneAttributes() instances and the new copyTo() method.
+
 Build
 
  * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 

Propchange: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/contrib/CHANGES.txt:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/contrib/CHANGES.txt:924791,924850
 /lucene/java/branches/lucene_2_4/contrib/CHANGES.txt:748824
 /lucene/java/branches/lucene_2_9/contrib/CHANGES.txt:817269-818600,825998,826775,829134,829816,829881,831036,896850,909334
 /lucene/java/branches/lucene_2_9_back_compat_tests/contrib/CHANGES.txt:818601-821336

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java Tue May  4 11:18:46 2010
@@ -18,14 +18,16 @@ package org.apache.lucene.analysis.shing
  */
 
 import java.io.IOException;
+import java.util.Iterator;
 import java.util.LinkedList;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
 
 
 /**
@@ -66,12 +68,12 @@ public final class ShingleFilter extends
    */
   public static final String TOKEN_SEPARATOR = " ";
 
-
   /**
    * The sequence of input stream tokens (or filler tokens, if necessary)
    * that will be composed to form output shingles.
    */
-  private LinkedList<State> inputWindow = new LinkedList<State>();
+  private LinkedList<InputWindowToken> inputWindow
+    = new LinkedList<InputWindowToken>();
   
   /**
    * The number of input tokens in the next output token.  This is the "n" in
@@ -80,9 +82,9 @@ public final class ShingleFilter extends
   private CircularSequence gramSize;
 
   /**
-   * Shingle text is composed here.
+   * Shingle and unigram text is composed here.
    */
-  private StringBuilder shingleBuilder = new StringBuilder();
+  private StringBuilder gramBuilder = new StringBuilder();
 
   /**
    * The token type attribute value to use - default is "shingle"
@@ -111,18 +113,31 @@ public final class ShingleFilter extends
   private int minShingleSize;
 
   /**
-   * The remaining number of filler tokens inserted into the input stream
+   * The remaining number of filler tokens to be inserted into the input stream
    * from which shingles are composed, to handle position increments greater
    * than one.
    */
   private int numFillerTokensToInsert;
 
   /**
-   * The next input stream token.
+   * When the next input stream token has a position increment greater than
+   * one, it is stored in this field until sufficient filler tokens have been
+   * inserted to account for the position increment. 
+   */
+  private AttributeSource nextInputStreamToken;
+
+  /**
+   * Whether or not there is a next input stream token.
    */
-  private State nextInputStreamToken;
+  private boolean isNextInputStreamToken = false;
+
+  /**
+   * Whether at least one unigram or shingle has been output at the current 
+   * position.
+   */
+  private boolean isOutputHere = false;
   
-  private final TermAttribute termAtt;
+  private final CharTermAttribute termAtt;
   private final OffsetAttribute offsetAtt;
   private final PositionIncrementAttribute posIncrAtt;
   private final TypeAttribute typeAtt;
@@ -140,7 +155,7 @@ public final class ShingleFilter extends
     super(input);
     setMaxShingleSize(maxShingleSize);
     setMinShingleSize(minShingleSize);
-    this.termAtt = addAttribute(TermAttribute.class);
+    this.termAtt = addAttribute(CharTermAttribute.class);
     this.offsetAtt = addAttribute(OffsetAttribute.class);
     this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
     this.typeAtt = addAttribute(TypeAttribute.class);
@@ -241,23 +256,49 @@ public final class ShingleFilter extends
     this.tokenSeparator = null == tokenSeparator ? "" : tokenSeparator;
   }
 
-  /* (non-Javadoc)
-   * @see org.apache.lucene.analysis.TokenStream#next()
-   */
   @Override
   public final boolean incrementToken() throws IOException {
-    boolean tokenAvailable = false; 
+    boolean tokenAvailable = false;
+    int builtGramSize = 0;
     if (gramSize.atMinValue() || inputWindow.size() < gramSize.getValue()) {
       shiftInputWindow();
+      gramBuilder.setLength(0);
+    } else {
+      builtGramSize = gramSize.getPreviousValue();
     }
-    if ( ! inputWindow.isEmpty()) {
-      restoreState(inputWindow.getFirst());
-      if (1 == gramSize.getValue()) {
-        posIncrAtt.setPositionIncrement(1);
-        gramSize.advance();
-        tokenAvailable = true;
-      } else if (inputWindow.size() >= gramSize.getValue()) {
-        getNextShingle();
+    if (inputWindow.size() >= gramSize.getValue()) {
+      boolean isAllFiller = true;
+      InputWindowToken nextToken = null;
+      Iterator<InputWindowToken> iter = inputWindow.iterator();
+      for (int gramNum = 1 ;
+           iter.hasNext() && builtGramSize < gramSize.getValue() ;
+           ++gramNum) {
+        nextToken = iter.next();
+        if (builtGramSize < gramNum) {
+          if (builtGramSize > 0) {
+            gramBuilder.append(tokenSeparator);
+          }
+          gramBuilder.append(nextToken.termAtt.buffer(), 0, 
+                             nextToken.termAtt.length());
+          ++builtGramSize;
+        }
+        if (isAllFiller && nextToken.isFiller) {
+          if (gramNum == gramSize.getValue()) {
+            gramSize.advance();
+          }
+        } else { 
+          isAllFiller = false;
+        }
+      }
+      if ( ! isAllFiller && builtGramSize == gramSize.getValue()) {
+        inputWindow.getFirst().attSource.copyTo(this);
+        posIncrAtt.setPositionIncrement(isOutputHere ? 0 : 1);
+        termAtt.setEmpty().append(gramBuilder);
+        if (gramSize.getValue() > 1) {
+          typeAtt.setType(tokenType);
+        }
+        offsetAtt.setOffset(offsetAtt.startOffset(), nextToken.offsetAtt.endOffset());
+        isOutputHere = true;
         gramSize.advance();
         tokenAvailable = true;
       }
@@ -266,82 +307,68 @@ public final class ShingleFilter extends
   }
 
   /**
-   * <p>Makes the next token a shingle of length {@link #gramSize}, 
-   * composed of tokens taken from {@link #inputWindow}.
-   * <p>Callers of this method must first insure that there are at least 
-   * <code>gramSize</code> tokens available in <code>inputWindow</code>.
-   */
-  private void getNextShingle() {
-    int startOffset = offsetAtt.startOffset();
-
-    int minTokNum = gramSize.getValue() - 1; // zero-based inputWindow position
-    if (gramSize.getValue() == minShingleSize) {
-      // Clear the shingle text buffer if this is the first shingle
-      // at the current position in the input stream.
-      shingleBuilder.setLength(0);
-      minTokNum = 0;
-    }
-    for (int tokNum = minTokNum ; tokNum < gramSize.getValue() ; ++tokNum) {
-      if (tokNum > 0) {
-        shingleBuilder.append(tokenSeparator);
-      }
-      restoreState(inputWindow.get(tokNum));
-      shingleBuilder.append(termAtt.termBuffer(), 0, termAtt.termLength());
-    }
-    char[] termBuffer = termAtt.termBuffer();
-    int termLength = shingleBuilder.length();
-    if (termBuffer.length < termLength) {
-      termBuffer = termAtt.resizeTermBuffer(termLength);
-    }
-    shingleBuilder.getChars(0, termLength, termBuffer, 0);
-    termAtt.setTermLength(termLength);
-    posIncrAtt.setPositionIncrement(gramSize.atMinValue() ? 1 : 0);
-    typeAtt.setType(tokenType);
-    offsetAtt.setOffset(startOffset, offsetAtt.endOffset());
-  }
-  
-  /**
    * <p>Get the next token from the input stream.
    * <p>If the next token has <code>positionIncrement > 1</code>,
    * <code>positionIncrement - 1</code> {@link #FILLER_TOKEN}s are
    * inserted first.
-   * @return false for end of stream; true otherwise
+   * @param target Where to put the new token; if null, a new instance is created.
+   * @return On success, the populated token; null otherwise
    * @throws IOException if the input stream has a problem
    */
-  private boolean getNextToken() throws IOException {
-    boolean success = false;
+  private InputWindowToken getNextToken(InputWindowToken target) 
+    throws IOException {
+    InputWindowToken newTarget = target;
     if (numFillerTokensToInsert > 0) {
-      insertFillerToken();
-      success = true;
-    } else if (null != nextInputStreamToken) {
-      restoreState(nextInputStreamToken);
-      nextInputStreamToken = null;
-      success = true;
+      if (null == target) {
+        newTarget = new InputWindowToken(nextInputStreamToken.cloneAttributes());
+      } else {
+        nextInputStreamToken.copyTo(target.attSource);
+      }
+      // A filler token occupies no space
+      newTarget.offsetAtt.setOffset(newTarget.offsetAtt.startOffset(), 
+                                    newTarget.offsetAtt.startOffset());
+      newTarget.termAtt.copyBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.length);
+      newTarget.isFiller = true;
+      --numFillerTokensToInsert;
+    } else if (isNextInputStreamToken) {
+      if (null == target) {
+        newTarget = new InputWindowToken(nextInputStreamToken.cloneAttributes());
+      } else {
+        nextInputStreamToken.copyTo(target.attSource);
+      }
+      isNextInputStreamToken = false;
+      newTarget.isFiller = false;
     } else if (input.incrementToken()) {
+      if (null == target) {
+        newTarget = new InputWindowToken(cloneAttributes());
+      } else {
+        this.copyTo(target.attSource);
+      }
       if (posIncrAtt.getPositionIncrement() > 1) {
-        numFillerTokensToInsert = posIncrAtt.getPositionIncrement() - 1;
-        insertFillerToken();
+        // Each output shingle must contain at least one input token, 
+        // so no more than (maxShingleSize - 1) filler tokens will be inserted.
+        numFillerTokensToInsert 
+          = Math.min(posIncrAtt.getPositionIncrement() - 1, maxShingleSize - 1);
+        // Save the current token as the next input stream token
+        if (null == nextInputStreamToken) {
+          nextInputStreamToken = cloneAttributes();
+        } else {
+          this.copyTo(nextInputStreamToken);
+        }
+        isNextInputStreamToken = true;
+        // A filler token occupies no space
+        newTarget.offsetAtt.setOffset(offsetAtt.startOffset(), offsetAtt.startOffset());
+        newTarget.termAtt.copyBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.length);
+        newTarget.isFiller = true;
+        --numFillerTokensToInsert;
+      } else {
+        newTarget.isFiller = false;
       }
-      success = true;
-    }
-    return success;
-	}
-
-  /**
-   * Inserts a {@link #FILLER_TOKEN} and decrements
-   * {@link #numFillerTokensToInsert}.
-   */
-  private void insertFillerToken() {
-    if (null == nextInputStreamToken) {
-      nextInputStreamToken = captureState();
     } else {
-      restoreState(nextInputStreamToken);
+      newTarget = null;
     }
-    --numFillerTokensToInsert;
-    // A filler token occupies no space
-    offsetAtt.setOffset(offsetAtt.startOffset(), offsetAtt.startOffset());
-    termAtt.setTermBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.length);
-  }
+    return newTarget;
+	}
 
   /**
    * <p>Fills {@link #inputWindow} with input stream tokens, if available, 
@@ -351,16 +378,29 @@ public final class ShingleFilter extends
    * @throws IOException if there's a problem getting the next token
    */
   private void shiftInputWindow() throws IOException {
+    InputWindowToken firstToken = null;
     if (inputWindow.size() > 0) {
-      inputWindow.removeFirst();
+      firstToken = inputWindow.removeFirst();
     }
-    while (getNextToken()) {
-      inputWindow.add(captureState());
-      if (inputWindow.size() == maxShingleSize) {
-        break;
+    while (inputWindow.size() < maxShingleSize) {
+      if (null != firstToken) {  // recycle the firstToken, if available
+        if (null != getNextToken(firstToken)) {
+          inputWindow.add(firstToken); // the firstToken becomes the last
+          firstToken = null;
+        } else {
+          break; // end of input stream
+        }
+      } else {
+        InputWindowToken nextToken = getNextToken(null);
+        if (null != nextToken) {
+          inputWindow.add(nextToken);
+        } else {
+          break; // end of input stream
+        }
       }
     }
     gramSize.reset();
+    isOutputHere = false;
   }
 
   @Override
@@ -369,6 +409,7 @@ public final class ShingleFilter extends
     gramSize.reset();
     inputWindow.clear();
     numFillerTokensToInsert = 0;
+    isOutputHere = false;
   }
 
 
@@ -383,6 +424,7 @@ public final class ShingleFilter extends
    */
   private class CircularSequence {
     private int value;
+    private int previousValue;
     private int minValue;
     
     public CircularSequence() {
@@ -405,10 +447,9 @@ public final class ShingleFilter extends
      * <b>{ [ 1, ] {@link #minShingleSize} [ , ... , {@link #maxShingleSize} ] }</b>.
      * <p>1 is included in the circular sequence only if 
      * {@link #outputUnigrams} = true.
-     * 
-     * @return the next member in the circular sequence
      */
-    public int advance() {
+    public void advance() {
+      previousValue = value;
       if (value == 1) {
         value = minShingleSize;
       } else if (value == maxShingleSize) {
@@ -416,7 +457,6 @@ public final class ShingleFilter extends
       } else {
         ++value;
       }
-      return value;
     }
 
     /**
@@ -428,7 +468,7 @@ public final class ShingleFilter extends
      * {@link #outputUnigrams} = true.
      */
     public void reset() {
-      value = minValue;
+      previousValue = value = minValue;
     }
 
     /**
@@ -443,5 +483,25 @@ public final class ShingleFilter extends
     public boolean atMinValue() {
       return value == minValue;
     }
+
+    /**
+     * @return the value this instance had before the last advance() call
+     */
+    public int getPreviousValue() {
+      return previousValue;
+    }
+  }
+    
+  private class InputWindowToken {
+    final AttributeSource attSource;
+    final CharTermAttribute termAtt;
+    final OffsetAttribute offsetAtt;
+    boolean isFiller = false;
+      
+    public InputWindowToken(AttributeSource attSource) {
+      this.attSource = attSource;
+      this.termAtt = attSource.getAttribute(CharTermAttribute.class);
+      this.offsetAtt = attSource.getAttribute(OffsetAttribute.class);
+    }
   }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java Tue May  4 11:18:46 2010
@@ -34,7 +34,7 @@ public class ShingleFilterTest extends B
     protected int index = 0;
     protected Token[] testToken;
     
-    private TermAttribute termAtt;
+    private CharTermAttribute termAtt;
     private OffsetAttribute offsetAtt;
     private PositionIncrementAttribute posIncrAtt;
     private TypeAttribute typeAtt;
@@ -42,7 +42,7 @@ public class ShingleFilterTest extends B
     public TestTokenStream(Token[] testToken) {
       super();
       this.testToken = testToken;
-      this.termAtt = addAttribute(TermAttribute.class);
+      this.termAtt = addAttribute(CharTermAttribute.class);
       this.offsetAtt = addAttribute(OffsetAttribute.class);
       this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
       this.typeAtt = addAttribute(TypeAttribute.class);
@@ -53,7 +53,7 @@ public class ShingleFilterTest extends B
       clearAttributes();
       if (index < testToken.length) {
         Token t = testToken[index++];
-        termAtt.setTermBuffer(t.termBuffer(), 0, t.termLength());
+        termAtt.copyBuffer(t.buffer(), 0, t.length());
         offsetAtt.setOffset(t.startOffset(), t.endOffset());
         posIncrAtt.setPositionIncrement(t.getPositionIncrement());
         typeAtt.setType(TypeAttributeImpl.DEFAULT_TYPE);
@@ -103,17 +103,20 @@ public class ShingleFilterTest extends B
     createToken("please divide", 0, 13),
     createToken("divide", 7, 13),
     createToken("divide _", 7, 19),
-    createToken("_", 19, 19),
     createToken("_ sentence", 19, 27),
     createToken("sentence", 19, 27),
     createToken("sentence _", 19, 33),
-    createToken("_", 33, 33),
     createToken("_ shingles", 33, 39),
     createToken("shingles", 33, 39),
   };
 
   public static final int[] BI_GRAM_POSITION_INCREMENTS_WITH_HOLES = new int[] {
-    1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+    1, 0, 1, 0, 1, 1, 0, 1, 1
+  };
+
+  private static final String[] BI_GRAM_TYPES_WITH_HOLES = {
+    "word", "shingle", 
+    "word", "shingle", "shingle", "word", "shingle", "shingle", "word"
   };
 
   public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] {
@@ -642,18 +645,157 @@ public class ShingleFilterTest extends B
     "word"
   };
   
+  public static final Token[] TEST_TOKEN_POS_INCR_EQUAL_TO_N = new Token[] {
+    createToken("please", 0, 6),
+    createToken("divide", 7, 13),
+    createToken("this", 14, 18),
+    createToken("sentence", 29, 37, 3),
+    createToken("into", 38, 42),
+    createToken("shingles", 43, 49),
+  };
+
+  public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("divide", 7, 13),
+    createToken("divide this", 7, 18),
+    createToken("divide this _", 7, 29),
+    createToken("this", 14, 18),
+    createToken("this _", 14, 29),
+    createToken("this _ _", 14, 29),
+    createToken("_ _ sentence", 29, 37),
+    createToken("_ sentence", 29, 37),
+    createToken("_ sentence into", 29, 42),
+    createToken("sentence", 29, 37),
+    createToken("sentence into", 29, 42),
+    createToken("sentence into shingles", 29, 49),
+    createToken("into", 38, 42),
+    createToken("into shingles", 38, 49),
+    createToken("shingles", 43, 49)
+  };
+  
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N = new int[] {
+    1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1
+  };
+  
+  public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N = new String[] {
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "shingle", "shingle", "shingle", "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("divide this", 7, 18),
+    createToken("divide this _", 7, 29),
+    createToken("this _", 14, 29),
+    createToken("this _ _", 14, 29),
+    createToken("_ _ sentence", 29, 37),
+    createToken("_ sentence", 29, 37),
+    createToken("_ sentence into", 29, 42),
+    createToken("sentence into", 29, 42),
+    createToken("sentence into shingles", 29, 49),
+    createToken("into shingles", 38, 49),
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1
+  };
+
+  public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle",
+  };
+
+  public static final Token[] TEST_TOKEN_POS_INCR_GREATER_THAN_N = new Token[] {
+    createToken("please", 0, 6),
+    createToken("divide", 57, 63, 8),
+    createToken("this", 64, 68),
+    createToken("sentence", 69, 77),
+    createToken("into", 78, 82),
+    createToken("shingles", 83, 89),
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please _", 0, 57),
+    createToken("please _ _", 0, 57),
+    createToken("_ _ divide", 57, 63),
+    createToken("_ divide", 57, 63),
+    createToken("_ divide this", 57, 68),
+    createToken("divide", 57, 63),
+    createToken("divide this", 57, 68),
+    createToken("divide this sentence", 57, 77),
+    createToken("this", 64, 68),
+    createToken("this sentence", 64, 77),
+    createToken("this sentence into", 64, 82),
+    createToken("sentence", 69, 77),
+    createToken("sentence into", 69, 82),
+    createToken("sentence into shingles", 69, 89),
+    createToken("into", 78, 82),
+    createToken("into shingles", 78, 89),
+    createToken("shingles", 83, 89)
+  };
+  
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N = new int[] {
+    1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+  };
+  public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N = new String[] {
+    "word", "shingle", "shingle",
+    "shingle",
+    "shingle", "shingle", 
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please _", 0, 57),
+    createToken("please _ _", 0, 57),
+    createToken("_ _ divide", 57, 63),
+    createToken("_ divide", 57, 63),
+    createToken("_ divide this", 57, 68),
+    createToken("divide this", 57, 68),
+    createToken("divide this sentence", 57, 77),
+    createToken("this sentence", 64, 77),
+    createToken("this sentence into", 64, 82),
+    createToken("sentence into", 69, 82),
+    createToken("sentence into shingles", 69, 89),
+    createToken("into shingles", 78, 89),
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new int[] {
+    1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1
+  };
+
+  public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle", "shingle", "shingle", "shingle",
+    "shingle",
+  };
+
   @Override
   protected void setUp() throws Exception {
     super.setUp();
     testTokenWithHoles = new Token[] {
       createToken("please", 0, 6),
       createToken("divide", 7, 13),
-      createToken("sentence", 19, 27),
-      createToken("shingles", 33, 39),
+      createToken("sentence", 19, 27, 2),
+      createToken("shingles", 33, 39, 2),
     };
-
-    testTokenWithHoles[2].setPositionIncrement(2);
-    testTokenWithHoles[3].setPositionIncrement(2);
   }
 
   /*
@@ -667,7 +809,8 @@ public class ShingleFilterTest extends B
 
   public void testBiGramFilterWithHoles() throws IOException {
     this.shingleFilterTest(2, testTokenWithHoles, BI_GRAM_TOKENS_WITH_HOLES,
-                           BI_GRAM_POSITION_INCREMENTS, BI_GRAM_TYPES,
+                           BI_GRAM_POSITION_INCREMENTS_WITH_HOLES, 
+                           BI_GRAM_TYPES_WITH_HOLES, 
                            true);
   }
 
@@ -832,8 +975,32 @@ public class ShingleFilterTest extends B
                            TRI_GRAM_POSITION_INCREMENTS_NULL_SEPARATOR, 
                            TRI_GRAM_TYPES_NULL_SEPARATOR, true);
   }
+
+  public void testPositionIncrementEqualToN() throws IOException {
+    this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N,
+                           TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N, 
+                           TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N, true);
+  }
+  
+  public void testPositionIncrementEqualToNWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS,
+                           TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, 
+                           TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, false);
+  }
   
   
+  public void testPositionIncrementGreaterThanN() throws IOException {
+    this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N,
+                           TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N, 
+                           TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N, true);
+  }
+  
+  public void testPositionIncrementGreaterThanNWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS,
+                           TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, 
+                           TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, false);
+  }
+  
   public void testReset() throws Exception {
     Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence"));
     TokenStream filter = new ShingleFilter(wsTokenizer, 2);
@@ -896,18 +1063,24 @@ public class ShingleFilterTest extends B
     int endOffsets[] = new int[tokensToCompare.length];
     
     for (int i = 0; i < tokensToCompare.length; i++) {
-      text[i] = tokensToCompare[i].term();
+      text[i] = new String(tokensToCompare[i].buffer(),0, tokensToCompare[i].length());
       startOffsets[i] = tokensToCompare[i].startOffset();
       endOffsets[i] = tokensToCompare[i].endOffset();
     }
     
     assertTokenStreamContents(filter, text, startOffsets, endOffsets, types, positionIncrements);
   }
+  
+  private static Token createToken(String term, int start, int offset) {
+    return createToken(term, start, offset, 1);
+  }
 
-  private static Token createToken(String term, int start, int offset)
+  private static Token createToken
+    (String term, int start, int offset, int positionIncrement)
   {
     Token token = new Token(start, offset);
-    token.setTermBuffer(term);
+    token.copyBuffer(term.toCharArray(), 0, term.length());
+    token.setPositionIncrement(positionIncrement);
     return token;
   }
 }

Propchange: lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,3 +1,5 @@
+/lucene/dev/trunk/lucene/contrib/highlighter/src/test:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/contrib/highlighter/src/test:924791,924850
 /lucene/java/branches/lucene_2_4/contrib/highlighter/src/test:748824
 /lucene/java/branches/lucene_2_9/contrib/highlighter/src/test:817269-818600,825998,826775,829134,829816,829881,831036,896850,909334
 /lucene/java/branches/lucene_2_9_back_compat_tests/contrib/highlighter/src/test:818601-821336

Propchange: lucene/dev/branches/branch_3x/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  4 11:18:46 2010
@@ -1,2 +1,4 @@
+/lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:932163,932369,932698,932747,932749,932773,932862,935521,940451
+/lucene/java/branches/flex_1458/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:924791,924850
 /lucene/java/branches/lucene_2_9/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:896850,909334
 /lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:924483-925561

Modified: lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml?rev=940806&r1=940805&r2=940806&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/queryparser/build.xml Tue May  4 11:18:46 2010
@@ -25,73 +25,79 @@
 
   <import file="../contrib-build.xml"/>
  
+  <target name="compile-core" depends="javacc-notice, common.compile-core"/>
+
   <!--
     NOTE: see the README.javacc for details on how to fully regenerate the parser
   -->
   <target name="javacc" depends="javacc-flexible,javacc-precedence"/>
 
-  <target name="javacc-flexible" depends="init,javacc-check" if="javacc.present">
+  <target name="javacc-flexible" depends="javacc-check">
+    <delete>
+      <fileset dir="src/java/org/apache/lucene/queryParser/standard/parser" includes="*.java">
+        <containsregexp expression="Generated.*By.*JavaCC"/>
+      </fileset>
+    </delete>
     <invoke-javacc target="src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj"
                    outputDir="src/java/org/apache/lucene/queryParser/standard/parser"
     />
-  	    <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
-  	                         match="public class ParseException extends Exception"
-  	                         replace="public class ParseException extends QueryNodeParseException"
-  	                         flags="g"
-  	                         byline="false"/>
-  	    <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
-  	                         match="package org.apache.lucene.queryParser.standard.parser;"
-  	                         replace="package org.apache.lucene.queryParser.standard.parser;&#10;
-  	&#10;
-  	import org.apache.lucene.messages.Message;&#10;
-  	import org.apache.lucene.messages.MessageImpl;&#10;
-  	import org.apache.lucene.queryParser.core.*;&#10;
-  	import org.apache.lucene.queryParser.core.messages.*;"
-  	                         flags="g"
-  	                         byline="false"/>
-  	    <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
-  	                         match="^  public ParseException\(Token currentTokenVal.*$(\s\s[^}].*\n)*  \}"
-  	                         replace="  public ParseException(Token currentTokenVal,&#10;
-  	        int[][] expectedTokenSequencesVal, String[] tokenImageVal) {&#10;
-  	    super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, initialise(&#10;
-  	      currentTokenVal, expectedTokenSequencesVal, tokenImageVal)));&#10;
-  	    this.currentToken = currentTokenVal;&#10;
-  	    this.expectedTokenSequences = expectedTokenSequencesVal;&#10;
-  	    this.tokenImage = tokenImageVal;&#10;
-  	  }"
-  	                         flags="gm"
-  	                         byline="false"/>
-  	    <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
-  	                         match="^  public ParseException\(String message.*$(\s\s[^}].*\n)*  \}"
-  	                         replace="  public ParseException(Message message) {&#10;
-  	    super(message);&#10;                      
-  	  }"
-  	                         flags="gm"
-  	                         byline="false"/>
-  	    <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
-  	                         match="^  public ParseException\(\).*$(\s\s[^}].*\n)*  \}"
-  	                         replace="  public ParseException() {&#10;
-  	    super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, &quot;Error&quot;));&#10;
-  	  }"
-  	                         flags="gm"
-  	                         byline="false"/>
-  	    <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
-  	                         match="^  public String getMessage\(\).*$(\s\s\s\s[^}].*\n)*    \}"
-  	                         replace="  private static String initialise(Token currentToken, &#10;
-  	      int[][] expectedTokenSequences, String[] tokenImage) {&#10;
-  	    String eol = System.getProperty(&quot;line.separator&quot;, &quot;\n&quot;);"
-  	                         flags="gm"
-  	                         byline="false"/>
-  	    <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
-  	                         match="\s*protected String add_escapes.*"
-  	                         replace="  static private String add_escapes(String str) {"
-  	                         flags="g"
-  	                         byline="false"/>
+        <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+                             match="public class ParseException extends Exception"
+                             replace="public class ParseException extends QueryNodeParseException"
+                             flags="g"
+                             byline="false"/>
+        <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+                             match="package org.apache.lucene.queryParser.standard.parser;"
+                             replace="package org.apache.lucene.queryParser.standard.parser;${line.separator}
+${line.separator}
+import org.apache.lucene.messages.Message;${line.separator}
+import org.apache.lucene.messages.MessageImpl;${line.separator}
+import org.apache.lucene.queryParser.core.*;${line.separator}
+import org.apache.lucene.queryParser.core.messages.*;"
+                             flags="g"
+                             byline="false"/>
+        <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+                             match="^  public ParseException\(Token currentTokenVal.*$(\s\s[^}].*\n)*  \}"
+                             replace="  public ParseException(Token currentTokenVal,${line.separator}
+    int[][] expectedTokenSequencesVal, String[] tokenImageVal) {${line.separator}
+    super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, initialise(${line.separator}
+    currentTokenVal, expectedTokenSequencesVal, tokenImageVal)));${line.separator}
+    this.currentToken = currentTokenVal;${line.separator}
+    this.expectedTokenSequences = expectedTokenSequencesVal;${line.separator}
+    this.tokenImage = tokenImageVal;${line.separator}
+  }"
+                             flags="gm"
+                             byline="false"/>
+        <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+                             match="^  public ParseException\(String message.*$(\s\s[^}].*\n)*  \}"
+                             replace="  public ParseException(Message message) {${line.separator}
+    super(message);${line.separator}                      
+  }"
+                             flags="gm"
+                             byline="false"/>
+        <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+                             match="^  public ParseException\(\).*$(\s\s[^}].*\n)*  \}"
+                             replace="  public ParseException() {${line.separator}
+    super(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, &quot;Error&quot;));${line.separator}
+  }"
+                             flags="gm"
+                             byline="false"/>
+        <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+                             match="^  public String getMessage\(\).*$(\s\s\s\s[^}].*\n)*    \}"
+                             replace="  private static String initialise(Token currentToken, int[][] expectedTokenSequences, String[] tokenImage) {${line.separator}
+    String eol = System.getProperty(&quot;line.separator&quot;, &quot;\n&quot;);"
+                             flags="gm"
+                             byline="false"/>
+        <replaceregexp file="src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java"
+                             match="\s*protected String add_escapes.*"
+                             replace="  static private String add_escapes(String str) {"
+                             flags="g"
+                             byline="true"/>
   </target>
   
   <property name="javacc.precedence.path" location="src/java/org/apache/lucene/queryParser/precedence"/>
 
-  <target name="javacc-precedence" depends="javacc-check" description="generate precedence query parser from jj (requires javacc 3.2)">
+  <target name="javacc-precedence" depends="javacc-check" description="generate precedence query parser from jj (requires javacc 4.1)">
     <delete>
       <fileset dir="${javacc.precedence.path}" includes="*.java">
         <containsregexp expression="Generated.*By.*JavaCC"/>