You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC

svn commit: r1534320 [24/39] - in /lucene/dev/branches/lucene4956: ./ dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/ dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/...

Modified: lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java Mon Oct 21 18:58:24 2013
@@ -41,8 +41,8 @@ import org.apache.lucene.analysis.Tokeni
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.search.suggest.TermFreq;
-import org.apache.lucene.search.suggest.TermFreqArrayIterator;
+import org.apache.lucene.search.suggest.Input;
+import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LuceneTestCase;
@@ -54,16 +54,16 @@ import org.apache.lucene.util.fst.Util;
 public class FuzzySuggesterTest extends LuceneTestCase {
   
   public void testRandomEdits() throws IOException {
-    List<TermFreq> keys = new ArrayList<TermFreq>();
+    List<Input> keys = new ArrayList<Input>();
     int numTerms = atLeast(100);
     for (int i = 0; i < numTerms; i++) {
-      keys.add(new TermFreq("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
+      keys.add(new Input("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
     }
-    keys.add(new TermFreq("foo bar boo far", 12));
+    keys.add(new Input("foo bar boo far", 12));
     MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
     FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
                                                   0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
     int numIters = atLeast(10);
     for (int i = 0; i < numIters; i++) {
       String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
@@ -75,16 +75,16 @@ public class FuzzySuggesterTest extends 
   }
   
   public void testNonLatinRandomEdits() throws IOException {
-    List<TermFreq> keys = new ArrayList<TermFreq>();
+    List<Input> keys = new ArrayList<Input>();
     int numTerms = atLeast(100);
     for (int i = 0; i < numTerms; i++) {
-      keys.add(new TermFreq("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
+      keys.add(new Input("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
     }
-    keys.add(new TermFreq("фуу бар буу фар", 12));
+    keys.add(new Input("фуу бар буу фар", 12));
     MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
     FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
         0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
     int numIters = atLeast(10);
     for (int i = 0; i < numIters; i++) {
       String addRandomEdit = addRandomEdit("фуу бар буу", 0);
@@ -97,15 +97,15 @@ public class FuzzySuggesterTest extends 
 
   /** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
   public void testKeyword() throws Exception {
-    TermFreq keys[] = new TermFreq[] {
-        new TermFreq("foo", 50),
-        new TermFreq("bar", 10),
-        new TermFreq("barbar", 12),
-        new TermFreq("barbara", 6)
+    Input keys[] = new Input[] {
+        new Input("foo", 50),
+        new Input("bar", 10),
+        new Input("barbar", 12),
+        new Input("barbara", 6)
     };
     
     FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
     
     List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("bariar", random()), false, 2);
     assertEquals(2, results.size());
@@ -172,14 +172,14 @@ public class FuzzySuggesterTest extends 
    * basic "standardanalyzer" test with stopword removal
    */
   public void testStandard() throws Exception {
-    TermFreq keys[] = new TermFreq[] {
-        new TermFreq("the ghost of christmas past", 50),
+    Input keys[] = new Input[] {
+        new Input("the ghost of christmas past", 50),
     };
     
     Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
     FuzzySuggester suggester = new FuzzySuggester(standard);
     suggester.setPreservePositionIncrements(false);
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
     
     List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
     assertEquals(1, results.size());
@@ -200,16 +200,16 @@ public class FuzzySuggesterTest extends 
   }
 
   public void testNoSeps() throws Exception {
-    TermFreq[] keys = new TermFreq[] {
-      new TermFreq("ab cd", 0),
-      new TermFreq("abcd", 1),
+    Input[] keys = new Input[] {
+      new Input("ab cd", 0),
+      new Input("abcd", 1),
     };
 
     int options = 0;
 
     Analyzer a = new MockAnalyzer(random());
     FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, 1, true, 1, 3, false);
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
     // TODO: would be nice if "ab " would allow the test to
     // pass, and more generally if the analyzer can know
     // that the user's current query has ended at a word, 
@@ -270,12 +270,12 @@ public class FuzzySuggesterTest extends 
       }
     };
 
-    TermFreq keys[] = new TermFreq[] {
-        new TermFreq("wifi network is slow", 50),
-        new TermFreq("wi fi network is fast", 10),
+    Input keys[] = new Input[] {
+        new Input("wifi network is slow", 50),
+        new Input("wi fi network is fast", 10),
     };
     FuzzySuggester suggester = new FuzzySuggester(analyzer);
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
     
     List<LookupResult> results = suggester.lookup("wifi network", false, 10);
     if (VERBOSE) {
@@ -290,7 +290,7 @@ public class FuzzySuggesterTest extends 
 
   public void testEmpty() throws Exception {
     FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
-    suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
+    suggester.build(new InputArrayIterator(new Input[0]));
 
     List<LookupResult> result = suggester.lookup("a", false, 20);
     assertTrue(result.isEmpty());
@@ -344,12 +344,12 @@ public class FuzzySuggesterTest extends 
       }
     };
 
-    TermFreq keys[] = new TermFreq[] {
-        new TermFreq("ab xc", 50),
-        new TermFreq("ba xd", 50),
+    Input keys[] = new Input[] {
+        new Input("ab xc", 50),
+        new Input("ba xd", 50),
     };
     FuzzySuggester suggester = new FuzzySuggester(analyzer);
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
     List<LookupResult> results = suggester.lookup("ab x", false, 1);
     assertTrue(results.size() == 1);
   }
@@ -418,11 +418,11 @@ public class FuzzySuggesterTest extends 
 
     Analyzer a = getUnusualAnalyzer();
     FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
-    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
-          new TermFreq("x y", 1),
-          new TermFreq("x y z", 3),
-          new TermFreq("x", 2),
-          new TermFreq("z z z", 20),
+    suggester.build(new InputArrayIterator(new Input[] {
+          new Input("x y", 1),
+          new Input("x y z", 3),
+          new Input("x", 2),
+          new Input("z z z", 20),
         }));
 
     //System.out.println("ALL: " + suggester.lookup("x y", false, 6));
@@ -458,11 +458,11 @@ public class FuzzySuggesterTest extends 
     Analyzer a = getUnusualAnalyzer();
     FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
 
-    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
-          new TermFreq("x y", 1),
-          new TermFreq("x y z", 3),
-          new TermFreq("x", 2),
-          new TermFreq("z z z", 20),
+    suggester.build(new InputArrayIterator(new Input[] {
+          new Input("x y", 1),
+          new Input("x y z", 3),
+          new Input("x", 2),
+          new Input("z z z", 20),
         }));
 
     for(int topN=1;topN<6;topN++) {
@@ -491,19 +491,19 @@ public class FuzzySuggesterTest extends 
   }
   
   // Holds surface form separately:
-  private static class TermFreq2 implements Comparable<TermFreq2> {
+  private static class TermFreqPayload2 implements Comparable<TermFreqPayload2> {
     public final String surfaceForm;
     public final String analyzedForm;
     public final long weight;
 
-    public TermFreq2(String surfaceForm, String analyzedForm, long weight) {
+    public TermFreqPayload2(String surfaceForm, String analyzedForm, long weight) {
       this.surfaceForm = surfaceForm;
       this.analyzedForm = analyzedForm;
       this.weight = weight;
     }
 
     @Override
-    public int compareTo(TermFreq2 other) {
+    public int compareTo(TermFreqPayload2 other) {
       int cmp = analyzedForm.compareTo(other.analyzedForm);
       if (cmp != 0) {
         return cmp;
@@ -596,11 +596,11 @@ public class FuzzySuggesterTest extends 
 
     int numQueries = atLeast(100);
     
-    final List<TermFreq2> slowCompletor = new ArrayList<TermFreq2>();
+    final List<TermFreqPayload2> slowCompletor = new ArrayList<TermFreqPayload2>();
     final TreeSet<String> allPrefixes = new TreeSet<String>();
     final Set<String> seen = new HashSet<String>();
     
-    TermFreq[] keys = new TermFreq[numQueries];
+    Input[] keys = new Input[numQueries];
 
     boolean preserveSep = random().nextBoolean();
     boolean unicodeAware = random().nextBoolean();
@@ -666,17 +666,17 @@ public class FuzzySuggesterTest extends 
       }
       // we can probably do Integer.MAX_VALUE here, but why worry.
       int weight = random().nextInt(1<<24);
-      keys[i] = new TermFreq(key, weight);
+      keys[i] = new Input(key, weight);
 
-      slowCompletor.add(new TermFreq2(key, analyzedKey, weight));
+      slowCompletor.add(new TermFreqPayload2(key, analyzedKey, weight));
     }
 
     if (VERBOSE) {
       // Don't just sort original list, to avoid VERBOSE
       // altering the test:
-      List<TermFreq2> sorted = new ArrayList<TermFreq2>(slowCompletor);
+      List<TermFreqPayload2> sorted = new ArrayList<TermFreqPayload2>(slowCompletor);
       Collections.sort(sorted);
-      for(TermFreq2 ent : sorted) {
+      for(TermFreqPayload2 ent : sorted) {
         System.out.println("  surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
       }
     }
@@ -684,7 +684,7 @@ public class FuzzySuggesterTest extends 
     Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
     FuzzySuggester suggester = new FuzzySuggester(a, a,
                                                   preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, 1, false, 1, 3, unicodeAware);
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
 
     for (String prefix : allPrefixes) {
 
@@ -756,7 +756,7 @@ public class FuzzySuggesterTest extends 
       assertTrue(automaton.isDeterministic());
       // TODO: could be faster... but its slowCompletor for a reason
       BytesRef spare = new BytesRef();
-      for (TermFreq2 e : slowCompletor) {
+      for (TermFreqPayload2 e : slowCompletor) {
         spare.copyChars(e.analyzedForm);
         Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
         for (IntsRef intsRef : finiteStrings) {
@@ -825,14 +825,14 @@ public class FuzzySuggesterTest extends 
     Analyzer a = new MockAnalyzer(random());
     FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, 1, true, 1, 3, false);
 
-    List<TermFreq> keys = Arrays.asList(new TermFreq[] {
-        new TermFreq("a", 40),
-        new TermFreq("a ", 50),
-        new TermFreq(" a", 60),
+    List<Input> keys = Arrays.asList(new Input[] {
+        new Input("a", 40),
+        new Input("a ", 50),
+        new Input(" a", 60),
       });
 
     Collections.shuffle(keys, random());
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
 
     List<LookupResult> results = suggester.lookup("a", false, 5);
     assertEquals(2, results.size());
@@ -846,15 +846,15 @@ public class FuzzySuggesterTest extends 
     Analyzer a = new MockAnalyzer(random());
     FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, 2, true, 1, 3, false);
 
-    List<TermFreq> keys = Arrays.asList(new TermFreq[] {
-        new TermFreq("foo bar", 40),
-        new TermFreq("foo bar baz", 50),
-        new TermFreq("barbaz", 60),
-        new TermFreq("barbazfoo", 10),
+    List<Input> keys = Arrays.asList(new Input[] {
+        new Input("foo bar", 40),
+        new Input("foo bar baz", 50),
+        new Input("barbaz", 60),
+        new Input("barbazfoo", 10),
       });
 
     Collections.shuffle(keys, random());
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
 
     assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString());
     assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString());
@@ -929,25 +929,25 @@ public class FuzzySuggesterTest extends 
 
   public void testRandom2() throws Throwable {
     final int NUM = atLeast(200);
-    final List<TermFreq> answers = new ArrayList<TermFreq>();
+    final List<Input> answers = new ArrayList<Input>();
     final Set<String> seen = new HashSet<String>();
     for(int i=0;i<NUM;i++) {
       final String s = randomSimpleString(8);
       if (!seen.contains(s)) {
-        answers.add(new TermFreq(s, random().nextInt(1000)));
+        answers.add(new Input(s, random().nextInt(1000)));
         seen.add(s);
       }
     }
 
-    Collections.sort(answers, new Comparator<TermFreq>() {
+    Collections.sort(answers, new Comparator<Input>() {
         @Override
-        public int compare(TermFreq a, TermFreq b) {
+        public int compare(Input a, Input b) {
           return a.term.compareTo(b.term);
         }
       });
     if (VERBOSE) {
       System.out.println("\nTEST: targets");
-      for(TermFreq tf : answers) {
+      for(Input tf : answers) {
         System.out.println("  " + tf.term.utf8ToString() + " freq=" + tf.v);
       }
     }
@@ -965,7 +965,7 @@ public class FuzzySuggesterTest extends 
     }
 
     Collections.shuffle(answers, random());
-    suggest.build(new TermFreqArrayIterator(answers.toArray(new TermFreq[answers.size()])));
+    suggest.build(new InputArrayIterator(answers.toArray(new Input[answers.size()])));
 
     final int ITERS = atLeast(100);
     for(int iter=0;iter<ITERS;iter++) {
@@ -1004,10 +1004,10 @@ public class FuzzySuggesterTest extends 
     }
   }
 
-  private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<TermFreq> answers, String frag) {
+  private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<Input> answers, String frag) {
     final List<LookupResult> results = new ArrayList<LookupResult>();
     final int fragLen = frag.length();
-    for(TermFreq tf : answers) {
+    for(Input tf : answers) {
       //System.out.println("  check s=" + tf.term.utf8ToString());
       boolean prefixMatches = true;
       for(int i=0;i<prefixLen;i++) {

Modified: lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java Mon Oct 21 18:58:24 2013
@@ -28,8 +28,8 @@ import org.apache.lucene.util.*;
  * Unit tests for {@link FSTCompletion}.
  */
 public class FSTCompletionTest extends LuceneTestCase {
-  public static TermFreq tf(String t, int v) {
-    return new TermFreq(t, v);
+  public static Input tf(String t, int v) {
+    return new Input(t, v);
   }
 
   private FSTCompletion completion;
@@ -40,15 +40,15 @@ public class FSTCompletionTest extends L
     super.setUp();
 
     FSTCompletionBuilder builder = new FSTCompletionBuilder();
-    for (TermFreq tf : evalKeys()) {
+    for (Input tf : evalKeys()) {
       builder.add(tf.term, (int) tf.v);
     }
     completion = builder.build();
     completionAlphabetical = new FSTCompletion(completion.getFST(), false, true);
   }
 
-  private TermFreq[] evalKeys() {
-    final TermFreq[] keys = new TermFreq[] {
+  private Input[] evalKeys() {
+    final Input[] keys = new Input[] {
         tf("one", 0),
         tf("oneness", 1),
         tf("onerous", 1),
@@ -157,17 +157,17 @@ public class FSTCompletionTest extends L
     FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);
     
     Random r = random();
-    List<TermFreq> keys = new ArrayList<TermFreq>();
+    List<Input> keys = new ArrayList<Input>();
     for (int i = 0; i < 5000; i++) {
-      keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
+      keys.add(new Input(_TestUtil.randomSimpleString(r), -1));
     }
 
-    lookup.build(new TermFreqArrayIterator(keys));
+    lookup.build(new InputArrayIterator(keys));
 
     // All the weights were constant, so all returned buckets must be constant, whatever they
     // are.
     Long previous = null; 
-    for (TermFreq tf : keys) {
+    for (Input tf : keys) {
       Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue();
       if (previous != null) {
         assertEquals(previous, current);
@@ -177,11 +177,11 @@ public class FSTCompletionTest extends L
   }  
 
   public void testMultilingualInput() throws Exception {
-    List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki();
+    List<Input> input = LookupBenchmarkTest.readTop50KWiki();
 
     FSTCompletionLookup lookup = new FSTCompletionLookup();
-    lookup.build(new TermFreqArrayIterator(input));
-    for (TermFreq tf : input) {
+    lookup.build(new InputArrayIterator(input));
+    for (Input tf : input) {
       assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random())));
       assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString());
     }
@@ -198,17 +198,17 @@ public class FSTCompletionTest extends L
   }
 
   public void testRandom() throws Exception {
-    List<TermFreq> freqs = new ArrayList<TermFreq>();
+    List<Input> freqs = new ArrayList<Input>();
     Random rnd = random();
     for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
       int weight = rnd.nextInt(100); 
-      freqs.add(new TermFreq("" + rnd.nextLong(), weight));
+      freqs.add(new Input("" + rnd.nextLong(), weight));
     }
 
     FSTCompletionLookup lookup = new FSTCompletionLookup();
-    lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));
+    lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()])));
 
-    for (TermFreq tf : freqs) {
+    for (Input tf : freqs) {
       final String term = tf.term.utf8ToString();
       for (int i = 1; i < term.length(); i++) {
         String prefix = term.substring(0, i);

Modified: lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java Mon Oct 21 18:58:24 2013
@@ -20,8 +20,8 @@ package org.apache.lucene.search.suggest
 import java.util.*;
 
 import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.search.suggest.TermFreq;
-import org.apache.lucene.search.suggest.TermFreqArrayIterator;
+import org.apache.lucene.search.suggest.Input;
+import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
@@ -29,16 +29,16 @@ import org.apache.lucene.util._TestUtil;
 public class WFSTCompletionTest extends LuceneTestCase {
   
   public void testBasic() throws Exception {
-    TermFreq keys[] = new TermFreq[] {
-        new TermFreq("foo", 50),
-        new TermFreq("bar", 10),
-        new TermFreq("barbar", 12),
-        new TermFreq("barbara", 6)
+    Input keys[] = new Input[] {
+        new Input("foo", 50),
+        new Input("bar", 10),
+        new Input("barbar", 12),
+        new Input("barbara", 6)
     };
     
     Random random = new Random(random().nextLong());
     WFSTCompletionLookup suggester = new WFSTCompletionLookup();
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
     
     // top N of 2, but only foo is available
     List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
@@ -81,9 +81,9 @@ public class WFSTCompletionTest extends 
 
     WFSTCompletionLookup suggester = new WFSTCompletionLookup(true);
 
-    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
-          new TermFreq("x y", 20),
-          new TermFreq("x", 2),
+    suggester.build(new InputArrayIterator(new Input[] {
+          new Input("x y", 20),
+          new Input("x", 2),
         }));
 
     for(int topN=1;topN<4;topN++) {
@@ -105,9 +105,9 @@ public class WFSTCompletionTest extends 
 
     WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
 
-    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
-          new TermFreq("x y", 20),
-          new TermFreq("x", 2),
+    suggester.build(new InputArrayIterator(new Input[] {
+          new Input("x y", 20),
+          new Input("x", 2),
         }));
 
     for(int topN=1;topN<4;topN++) {
@@ -131,7 +131,7 @@ public class WFSTCompletionTest extends 
     final TreeMap<String,Long> slowCompletor = new TreeMap<String,Long>();
     final TreeSet<String> allPrefixes = new TreeSet<String>();
     
-    TermFreq[] keys = new TermFreq[numWords];
+    Input[] keys = new Input[numWords];
     
     for (int i = 0; i < numWords; i++) {
       String s;
@@ -150,11 +150,11 @@ public class WFSTCompletionTest extends 
       // we can probably do Integer.MAX_VALUE here, but why worry.
       int weight = random().nextInt(1<<24);
       slowCompletor.put(s, (long)weight);
-      keys[i] = new TermFreq(s, weight);
+      keys[i] = new Input(s, weight);
     }
 
     WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
-    suggester.build(new TermFreqArrayIterator(keys));
+    suggester.build(new InputArrayIterator(keys));
 
     Random random = new Random(random().nextLong());
     for (String prefix : allPrefixes) {
@@ -205,16 +205,16 @@ public class WFSTCompletionTest extends 
 
     WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
 
-    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
-          new TermFreq(key1, 50),
-          new TermFreq(key2, 50),
+    suggester.build(new InputArrayIterator(new Input[] {
+          new Input(key1, 50),
+          new Input(key2, 50),
         }));
   }
 
   public void testEmpty() throws Exception {
     WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
 
-    suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
+    suggester.build(new InputArrayIterator(new Input[0]));
     List<LookupResult> result = suggester.lookup("a", false, 20);
     assertTrue(result.isEmpty());
   }

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml Mon Oct 21 18:58:24 2013
@@ -29,11 +29,11 @@
     </configurations>
 
     <dependencies defaultconf="default">
-      <dependency org="org.apache.ant" name="ant" rev="1.8.2" transitive="false" />
+      <dependency org="org.apache.ant" name="ant" rev="${/org.apache.ant/ant}" transitive="false" />
 
-      <dependency org="junit" name="junit" rev="4.10" transitive="false" conf="default->*;junit4-stdalone->*" />
-      <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="2.0.10" transitive="false" conf="default->*;junit4-stdalone->*" />
-      <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="2.0.10" transitive="false" conf="default->*;junit4-stdalone->*" />
+      <dependency org="junit" name="junit" rev="${/junit/junit}" transitive="false" conf="default->*;junit4-stdalone->*" />
+      <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="${/com.carrotsearch.randomizedtesting/junit4-ant}" transitive="false" conf="default->*;junit4-stdalone->*" />
+      <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="${/com.carrotsearch.randomizedtesting/randomizedtesting-runner}" transitive="false" conf="default->*;junit4-stdalone->*" />
 
       <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
     </dependencies>

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Mon Oct 21 18:58:24 2013
@@ -111,7 +111,8 @@ public abstract class BaseTokenStreamTes
   //     arriving to pos Y have the same endOffset)
   //   - offsets only move forwards (startOffset >=
   //     lastStartOffset)
-  public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset,
+  public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
+                                               int posLengths[], Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts,
                                                boolean offsetsAreCorrect) throws IOException {
     assertNotNull(output);
     CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
@@ -135,7 +136,7 @@ public abstract class BaseTokenStreamTes
     }
     
     PositionIncrementAttribute posIncrAtt = null;
-    if (posIncrements != null) {
+    if (posIncrements != null || finalPosInc != null) {
       assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
       posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
     }
@@ -145,6 +146,12 @@ public abstract class BaseTokenStreamTes
       assertTrue("has no PositionLengthAttribute", ts.hasAttribute(PositionLengthAttribute.class));
       posLengthAtt = ts.getAttribute(PositionLengthAttribute.class);
     }
+
+    KeywordAttribute keywordAtt = null;
+    if (keywordAtts != null) {
+      assertTrue("has no KeywordAttribute", ts.hasAttribute(KeywordAttribute.class));
+      keywordAtt = ts.getAttribute(KeywordAttribute.class);
+    }
     
     // Maps position to the start/end offset:
     final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>();
@@ -161,22 +168,31 @@ public abstract class BaseTokenStreamTes
       if (typeAtt != null) typeAtt.setType("bogusType");
       if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
       if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
+      if (keywordAtt != null) keywordAtt.setKeyword((i&1) == 0);
       
       checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
       assertTrue("token "+i+" does not exist", ts.incrementToken());
       assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled());
       
       assertEquals("term "+i, output[i], termAtt.toString());
-      if (startOffsets != null)
+      if (startOffsets != null) {
         assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset());
-      if (endOffsets != null)
+      }
+      if (endOffsets != null) {
         assertEquals("endOffset "+i, endOffsets[i], offsetAtt.endOffset());
-      if (types != null)
+      }
+      if (types != null) {
         assertEquals("type "+i, types[i], typeAtt.type());
-      if (posIncrements != null)
+      }
+      if (posIncrements != null) {
         assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement());
-      if (posLengths != null)
+      }
+      if (posLengths != null) {
         assertEquals("posLength "+i, posLengths[i], posLengthAtt.getPositionLength());
+      }
+      if (keywordAtts != null) {
+        assertEquals("keywordAtt " + i, keywordAtts[i], keywordAtt.isKeyword());
+      }
       
       // we can enforce some basic things about a few attributes even if the caller doesn't check:
       if (offsetAtt != null) {
@@ -239,17 +255,47 @@ public abstract class BaseTokenStreamTes
         assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
       }
     }
-    assertFalse("TokenStream has more tokens than expected (expected count=" + output.length + ")", ts.incrementToken());
+
+    if (ts.incrementToken()) {
+      fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token=" + termAtt.toString());
+    }
+
+    // repeat our extra safety checks for end()
+    ts.clearAttributes();
+    if (termAtt != null) termAtt.setEmpty().append("bogusTerm");
+    if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
+    if (typeAtt != null) typeAtt.setType("bogusType");
+    if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
+    if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
+    
+    checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
+
     ts.end();
+    assertTrue("super.end()/clearAttributes() was not called correctly in end()", checkClearAtt.getAndResetClearCalled());
+    
     if (finalOffset != null) {
-      assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset());
+      assertEquals("finalOffset", finalOffset.intValue(), offsetAtt.endOffset());
     }
     if (offsetAtt != null) {
       assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0);
     }
+    if (finalPosInc != null) {
+      assertEquals("finalPosInc", finalPosInc.intValue(), posIncrAtt.getPositionIncrement());
+    }
+
     ts.close();
   }
   
+  public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
+                                               int posLengths[], Integer finalOffset, boolean[] keywordAtts,
+                                               boolean offsetsAreCorrect) throws IOException {
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect);
+  }
+
+  public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset, boolean offsetsAreCorrect) throws IOException {
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, offsetsAreCorrect);
+  }
+
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset) throws IOException {
     assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, true);
   }
@@ -295,14 +341,17 @@ public abstract class BaseTokenStreamTes
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
+    checkResetException(a, input);
     assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException {
+    checkResetException(a, input);
     assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length());
   }
 
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect) throws IOException {
+    checkResetException(a, input);
     assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect);
   }
   
@@ -329,30 +378,43 @@ public abstract class BaseTokenStreamTes
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
     assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null);
   }
-  
 
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
-    assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException {
-    assertAnalyzesToReuse(a, input, output, null, null, null, null);
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, String[] types) throws IOException {
-    assertAnalyzesToReuse(a, input, output, null, null, types, null);
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException {
-    assertAnalyzesToReuse(a, input, output, null, null, null, posIncrements);
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
-    assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, null);
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
-    assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, posIncrements);
+  static void checkResetException(Analyzer a, String input) throws IOException {
+    TokenStream ts = a.tokenStream("bogus", input);
+    try {
+      if (ts.incrementToken()) {
+        //System.out.println(ts.reflectAsString(false));
+        fail("didn't get expected exception when reset() not called");
+      }
+    } catch (IllegalStateException expected) {
+      // ok
+    } catch (AssertionError expected) {
+      // ok: MockTokenizer
+      assertTrue(expected.getMessage(), expected.getMessage() != null && expected.getMessage().contains("wrong state"));
+    } catch (Exception unexpected) {
+      unexpected.printStackTrace(System.err);
+      fail("got wrong exception when reset() not called: " + unexpected);
+    } finally {
+      // consume correctly
+      ts.reset();
+      while (ts.incrementToken()) {}
+      ts.end();
+      ts.close();
+    }
+    
+    // check for a missing close()
+    ts = a.tokenStream("bogus", input);
+    ts.reset();
+    while (ts.incrementToken()) {}
+    ts.end();
+    try {
+      ts = a.tokenStream("bogus", input);
+      fail("didn't get expected exception when close() not called");
+    } catch (IllegalStateException expected) {
+      // ok
+    } finally {
+      ts.close();
+    }
   }
 
   // simple utility method for testing stemmers
@@ -361,10 +423,6 @@ public abstract class BaseTokenStreamTes
     assertAnalyzesTo(a, input, new String[]{expected});
   }
   
-  public static void checkOneTermReuse(Analyzer a, final String input, final String expected) throws IOException {
-    assertAnalyzesToReuse(a, input, new String[]{expected});
-  }
-  
   /** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
   public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
     checkRandomData(random, a, iterations, 20, false, true);
@@ -430,6 +488,7 @@ public abstract class BaseTokenStreamTes
   }
 
   public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean offsetsAreCorrect) throws IOException {
+    checkResetException(a, "best effort");
     long seed = random.nextLong();
     boolean useCharFilter = random.nextBoolean();
     Directory dir = null;

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java Mon Oct 21 18:58:24 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis;
  * limitations under the License.
  */
 
+import java.io.IOException;
+
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@@ -34,9 +36,28 @@ public final class CannedTokenStream ext
   private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
-  
+  private final int finalOffset;
+  private final int finalPosInc;
+
   public CannedTokenStream(Token... tokens) {
     this.tokens = tokens;
+    finalOffset = 0;
+    finalPosInc = 0;
+  }
+
+  /** If you want trailing holes, pass a non-zero
+   *  finalPosInc. */
+  public CannedTokenStream(int finalPosInc, int finalOffset, Token... tokens) {
+    this.tokens = tokens;
+    this.finalOffset = finalOffset;
+    this.finalPosInc = finalPosInc;
+  }
+
+  @Override
+  public void end() throws IOException {
+    super.end();
+    posIncrAtt.setPositionIncrement(finalPosInc);
+    offsetAtt.setOffset(finalOffset, finalOffset);
   }
   
   @Override

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java Mon Oct 21 18:58:24 2013
@@ -258,14 +258,17 @@ public abstract class CollationTestBase 
 
     for (int i = 0; i < numTestPoints; i++) {
       String term = _TestUtil.randomSimpleString(random());
-      TokenStream ts = analyzer.tokenStream("fake", term);
-      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
-      BytesRef bytes = termAtt.getBytesRef();
-      ts.reset();
-      assertTrue(ts.incrementToken());
-      termAtt.fillBytesRef();
-      // ensure we make a copy of the actual bytes too
-      map.put(term, BytesRef.deepCopyOf(bytes));
+      try (TokenStream ts = analyzer.tokenStream("fake", term)) {
+        TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
+        BytesRef bytes = termAtt.getBytesRef();
+        ts.reset();
+        assertTrue(ts.incrementToken());
+        termAtt.fillBytesRef();
+        // ensure we make a copy of the actual bytes too
+        map.put(term, BytesRef.deepCopyOf(bytes));
+        assertFalse(ts.incrementToken());
+        ts.end();
+      }
     }
     
     Thread threads[] = new Thread[numThreads];
@@ -277,13 +280,16 @@ public abstract class CollationTestBase 
             for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
               String term = mapping.getKey();
               BytesRef expected = mapping.getValue();
-              TokenStream ts = analyzer.tokenStream("fake", term);
-              TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
-              BytesRef bytes = termAtt.getBytesRef();
-              ts.reset();
-              assertTrue(ts.incrementToken());
-              termAtt.fillBytesRef();
-              assertEquals(expected, bytes);
+              try (TokenStream ts = analyzer.tokenStream("fake", term)) {
+                TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
+                BytesRef bytes = termAtt.getBytesRef();
+                ts.reset();
+                assertTrue(ts.incrementToken());
+                termAtt.fillBytesRef();
+                assertEquals(expected, bytes);
+                assertFalse(ts.incrementToken());
+                ts.end();
+              }
             }
           } catch (IOException e) {
             throw new RuntimeException(e);

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java Mon Oct 21 18:58:24 2013
@@ -106,7 +106,7 @@ public abstract class LookaheadTokenFilt
 
   /** This is called when all input tokens leaving a given
    *  position have been returned.  Override this and
-   *  call createToken and then set whichever token's
+   *  call insertToken and then set whichever token's
    *  attributes you want, if you want to inject
    *  a token starting from this position. */
   protected void afterPosition() throws IOException {
@@ -222,6 +222,18 @@ public abstract class LookaheadTokenFilt
           if (DEBUG) {
             System.out.println("  END");
           }
+          afterPosition();
+          if (insertPending) {
+            // Subclass inserted a token at this same
+            // position:
+            if (DEBUG) {
+              System.out.println("  return inserted token");
+            }
+            assert insertedTokenConsistent();
+            insertPending = false;
+            return true;
+          }
+
           return false;
         }
       } else {
@@ -260,7 +272,7 @@ public abstract class LookaheadTokenFilt
     final int posLen = posLenAtt.getPositionLength();
     final Position endPosData = positions.get(outputPos + posLen);
     assert endPosData.endOffset != -1;
-    assert offsetAtt.endOffset() == endPosData.endOffset;
+    assert offsetAtt.endOffset() == endPosData.endOffset: "offsetAtt.endOffset=" + offsetAtt.endOffset() + " vs expected=" + endPosData.endOffset;
     return true;
   }
 

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -60,7 +60,7 @@ public final class MockAnalyzer extends 
    * @param filter DFA describing how terms should be filtered (set of stopwords, etc)
    */
   public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter) {
-    super(new PerFieldReuseStrategy());
+    super(PER_FIELD_REUSE_STRATEGY);
     // TODO: this should be solved in a different way; Random should not be shared (!).
     this.random = new Random(random.nextLong());
     this.runAutomaton = runAutomaton;

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java Mon Oct 21 18:58:24 2013
@@ -105,10 +105,19 @@ public final class MockGraphTokenFilter 
   }
 
   @Override
+  public void close() throws IOException {
+    super.close();
+    this.random = null;
+  }
+
+  @Override
   public boolean incrementToken() throws IOException {
     if (DEBUG) {
       System.out.println("MockGraphTF.incr inputPos=" + inputPos + " outputPos=" + outputPos);
     }
+    if (random == null) {
+      throw new IllegalStateException("incrementToken called in wrong state!");
+    }
     return nextToken();
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java Mon Oct 21 18:58:24 2013
@@ -58,7 +58,8 @@ public final class MockTokenFilter exten
 
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  
+  private int skippedPositions;
+
   /**
    * Create a new MockTokenFilter.
    * 
@@ -76,7 +77,7 @@ public final class MockTokenFilter exten
     // initial token with posInc=0 ever
     
     // return the first non-stop word found
-    int skippedPositions = 0;
+    skippedPositions = 0;
     while (input.incrementToken()) {
       if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
         posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
@@ -87,4 +88,16 @@ public final class MockTokenFilter exten
     // reached EOS -- return false
     return false;
   }
+
+  @Override
+  public void end() throws IOException {
+    super.end();
+    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    skippedPositions = 0;
+  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java Mon Oct 21 18:58:24 2013
@@ -64,6 +64,11 @@ public class MockTokenizer extends Token
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   int off = 0;
+  
+  // buffered state (previous codepoint and offset). we replay this once we
+  // hit a reject state in case its permissible as the start of a new term.
+  int bufferedCodePoint = -1; // -1 indicates empty buffer
+  int bufferedOff = -1;
 
   // TODO: "register" with LuceneTestCase to ensure all streams are closed() ?
   // currently, we can only check that the lifecycle is correct if someone is reusing,
@@ -121,8 +126,16 @@ public class MockTokenizer extends Token
                             : "incrementToken() called while in wrong state: " + streamState;
     clearAttributes();
     for (;;) {
-      int startOffset = off;
-      int cp = readCodePoint();
+      int startOffset;
+      int cp;
+      if (bufferedCodePoint >= 0) {
+        cp = bufferedCodePoint;
+        startOffset = bufferedOff;
+        bufferedCodePoint = -1;
+      } else {
+        startOffset = off;
+        cp = readCodePoint();
+      }
       if (cp < 0) {
         break;
       } else if (isTokenChar(cp)) {
@@ -138,6 +151,14 @@ public class MockTokenizer extends Token
           cp = readCodePoint();
         } while (cp >= 0 && isTokenChar(cp));
         
+        if (termAtt.length() < maxTokenLength) {
+          // buffer up, in case the "rejected" char can start a new word of its own
+          bufferedCodePoint = cp;
+          bufferedOff = endOffset;
+        } else {
+          // otherwise, its because we hit term limit.
+          bufferedCodePoint = -1;
+        }
         int correctedStartOffset = correctOffset(startOffset);
         int correctedEndOffset = correctOffset(endOffset);
         assert correctedStartOffset >= 0;
@@ -146,8 +167,11 @@ public class MockTokenizer extends Token
         lastOffset = correctedStartOffset;
         assert correctedEndOffset >= correctedStartOffset;
         offsetAtt.setOffset(correctedStartOffset, correctedEndOffset);
-        streamState = State.INCREMENT;
-        return true;
+        if (state == -1 || runAutomaton.isAccept(state)) {
+          // either we hit a reject state (longest match), or end-of-text, but in an accept state
+          streamState = State.INCREMENT;
+          return true;
+        }
       }
     }
     streamState = State.INCREMENT_FALSE;
@@ -203,9 +227,11 @@ public class MockTokenizer extends Token
   }
 
   protected boolean isTokenChar(int c) {
-    state = runAutomaton.step(state, c);
     if (state < 0) {
       state = runAutomaton.getInitialState();
+    }
+    state = runAutomaton.step(state, c);
+    if (state < 0) {
       return false;
     } else {
       return true;
@@ -221,6 +247,7 @@ public class MockTokenizer extends Token
     super.reset();
     state = runAutomaton.getInitialState();
     lastOffset = off = 0;
+    bufferedCodePoint = -1;
     assert !enableChecks || streamState != State.RESET : "double reset()";
     streamState = State.RESET;
   }
@@ -244,6 +271,7 @@ public class MockTokenizer extends Token
 
   @Override
   public void end() throws IOException {
+    super.end();
     int finalOffset = correctOffset(off);
     offsetAtt.setOffset(finalOffset, finalOffset);
     // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java Mon Oct 21 18:58:24 2013
@@ -41,7 +41,7 @@ public class VocabularyAssert {
     while ((inputWord = vocReader.readLine()) != null) {
       String expectedWord = outputReader.readLine();
       Assert.assertNotNull(expectedWord);
-      BaseTokenStreamTestCase.checkOneTermReuse(a, inputWord, expectedWord);
+      BaseTokenStreamTestCase.checkOneTerm(a, inputWord, expectedWord);
     }
   }
   
@@ -55,7 +55,7 @@ public class VocabularyAssert {
       if (inputLine.startsWith("#") || inputLine.trim().length() == 0)
         continue; /* comment */
       String words[] = inputLine.split("\t");
-      BaseTokenStreamTestCase.checkOneTermReuse(a, words[0], words[1]);
+      BaseTokenStreamTestCase.checkOneTerm(a, words[0], words[1]);
     }
   }
   

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java Mon Oct 21 18:58:24 2013
@@ -23,10 +23,10 @@ import org.apache.lucene.codecs.NormsFor
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene46.Lucene46Codec;
 
 /**
- * Acts like {@link Lucene42Codec} but with additional asserts.
+ * Acts like {@link Lucene46Codec} but with additional asserts.
  */
 public final class AssertingCodec extends FilterCodec {
 
@@ -37,7 +37,7 @@ public final class AssertingCodec extend
   private final NormsFormat norms = new AssertingNormsFormat();
 
   public AssertingCodec() {
-    super("Asserting", new Lucene42Codec());
+    super("Asserting", new Lucene46Codec());
   }
 
   @Override

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java Mon Oct 21 18:58:24 2013
@@ -24,7 +24,7 @@ import java.util.NoSuchElementException;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
+import org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat;
 import org.apache.lucene.index.AssertingAtomicReader;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.FieldInfo;
@@ -33,15 +33,17 @@ import org.apache.lucene.index.SegmentRe
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.RamUsageEstimator;
 
 /**
- * Just like {@link Lucene42DocValuesFormat} but with additional asserts.
+ * Just like {@link Lucene45DocValuesFormat} but with additional asserts.
  */
 public class AssertingDocValuesFormat extends DocValuesFormat {
-  private final DocValuesFormat in = new Lucene42DocValuesFormat();
+  private final DocValuesFormat in = new Lucene45DocValuesFormat();
   
   public AssertingDocValuesFormat() {
     super("Asserting");
@@ -75,11 +77,10 @@ public class AssertingDocValuesFormat ex
     public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
       int count = 0;
       for (Number v : values) {
-        assert v != null;
         count++;
       }
       assert count == maxDoc;
-      checkIterator(values.iterator(), maxDoc);
+      checkIterator(values.iterator(), maxDoc, true);
       in.addNumericField(field, values);
     }
     
@@ -87,12 +88,11 @@ public class AssertingDocValuesFormat ex
     public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
       int count = 0;
       for (BytesRef b : values) {
-        assert b != null;
-        assert b.isValid();
+        assert b == null || b.isValid();
         count++;
       }
       assert count == maxDoc;
-      checkIterator(values.iterator(), maxDoc);
+      checkIterator(values.iterator(), maxDoc, true);
       in.addBinaryField(field, values);
     }
     
@@ -117,15 +117,17 @@ public class AssertingDocValuesFormat ex
       for (Number v : docToOrd) {
         assert v != null;
         int ord = v.intValue();
-        assert ord >= 0 && ord < valueCount;
-        seenOrds.set(ord);
+        assert ord >= -1 && ord < valueCount;
+        if (ord >= 0) {
+          seenOrds.set(ord);
+        }
         count++;
       }
       
       assert count == maxDoc;
       assert seenOrds.cardinality() == valueCount;
-      checkIterator(values.iterator(), valueCount);
-      checkIterator(docToOrd.iterator(), maxDoc);
+      checkIterator(values.iterator(), valueCount, false);
+      checkIterator(docToOrd.iterator(), maxDoc, false);
       in.addSortedField(field, values, docToOrd);
     }
     
@@ -169,38 +171,80 @@ public class AssertingDocValuesFormat ex
       
       assert docCount == maxDoc;
       assert seenOrds.cardinality() == valueCount;
-      checkIterator(values.iterator(), valueCount);
-      checkIterator(docToOrdCount.iterator(), maxDoc);
-      checkIterator(ords.iterator(), ordCount);
+      checkIterator(values.iterator(), valueCount, false);
+      checkIterator(docToOrdCount.iterator(), maxDoc, false);
+      checkIterator(ords.iterator(), ordCount, false);
       in.addSortedSetField(field, values, docToOrdCount, ords);
     }
+    
+    @Override
+    public void close() throws IOException {
+      in.close();
+    }
+  }
+  
+  static class AssertingNormsConsumer extends DocValuesConsumer {
+    private final DocValuesConsumer in;
+    private final int maxDoc;
+    
+    AssertingNormsConsumer(DocValuesConsumer in, int maxDoc) {
+      this.in = in;
+      this.maxDoc = maxDoc;
+    }
 
-    private <T> void checkIterator(Iterator<T> iterator, long expectedSize) {
-      for (long i = 0; i < expectedSize; i++) {
-        boolean hasNext = iterator.hasNext();
-        assert hasNext;
-        T v = iterator.next();
+    @Override
+    public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+      int count = 0;
+      for (Number v : values) {
         assert v != null;
-        try {
-          iterator.remove();
-          throw new AssertionError("broken iterator (supports remove): " + iterator);
-        } catch (UnsupportedOperationException expected) {
-          // ok
-        }
-      }
-      assert !iterator.hasNext();
-      try {
-        iterator.next();
-        throw new AssertionError("broken iterator (allows next() when hasNext==false) " + iterator);
-      } catch (NoSuchElementException expected) {
-        // ok
+        count++;
       }
+      assert count == maxDoc;
+      checkIterator(values.iterator(), maxDoc, false);
+      in.addNumericField(field, values);
     }
-    
+
     @Override
     public void close() throws IOException {
       in.close();
     }
+
+    @Override
+    public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
+      throw new IllegalStateException();
+    }
+  }
+  
+  private static <T> void checkIterator(Iterator<T> iterator, long expectedSize, boolean allowNull) {
+    for (long i = 0; i < expectedSize; i++) {
+      boolean hasNext = iterator.hasNext();
+      assert hasNext;
+      T v = iterator.next();
+      assert allowNull || v != null;
+      try {
+        iterator.remove();
+        throw new AssertionError("broken iterator (supports remove): " + iterator);
+      } catch (UnsupportedOperationException expected) {
+        // ok
+      }
+    }
+    assert !iterator.hasNext();
+    try {
+      iterator.next();
+      throw new AssertionError("broken iterator (allows next() when hasNext==false) " + iterator);
+    } catch (NoSuchElementException expected) {
+      // ok
+    }
   }
   
   static class AssertingDocValuesProducer extends DocValuesProducer {
@@ -244,10 +288,24 @@ public class AssertingDocValuesFormat ex
       assert values != null;
       return new AssertingAtomicReader.AssertingSortedSetDocValues(values, maxDoc);
     }
+    
+    @Override
+    public Bits getDocsWithField(FieldInfo field) throws IOException {
+      assert field.getDocValuesType() != null;
+      Bits bits = in.getDocsWithField(field);
+      assert bits != null;
+      assert bits.length() == maxDoc;
+      return new AssertingAtomicReader.AssertingBits(bits);
+    }
 
     @Override
     public void close() throws IOException {
       in.close();
     }
+
+    @Override
+    public long ramBytesUsed() {
+      return in.ramBytesUsed();
+    }
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java Mon Oct 21 18:58:24 2013
@@ -22,7 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingDocValuesConsumer;
+import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingNormsConsumer;
 import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingDocValuesProducer;
 import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.index.SegmentReadState;
@@ -38,7 +38,7 @@ public class AssertingNormsFormat extend
   public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
     DocValuesConsumer consumer = in.normsConsumer(state);
     assert consumer != null;
-    return new AssertingDocValuesConsumer(consumer, state.segmentInfo.getDocCount());
+    return new AssertingNormsConsumer(consumer, state.segmentInfo.getDocCount());
   }
 
   @Override

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -18,24 +18,22 @@ package org.apache.lucene.codecs.asserti
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 import java.util.Iterator;
 
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsConsumer;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
 import org.apache.lucene.index.AssertingAtomicReader;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.OpenBitSet;
 
 /**
  * Just like {@link Lucene41PostingsFormat} but with additional asserts.
@@ -49,7 +47,7 @@ public final class AssertingPostingsForm
   
   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    return new AssertingFieldsConsumer(in.fieldsConsumer(state));
+    return new AssertingFieldsConsumer(state, in.fieldsConsumer(state));
   }
 
   @Override
@@ -86,165 +84,125 @@ public final class AssertingPostingsForm
     public int size() {
       return in.size();
     }
-  }
-  
-  static class AssertingFieldsConsumer extends FieldsConsumer {
-    private final FieldsConsumer in;
-    
-    AssertingFieldsConsumer(FieldsConsumer in) {
-      this.in = in;
-    }
-    
-    @Override
-    public TermsConsumer addField(FieldInfo field) throws IOException {
-      TermsConsumer consumer = in.addField(field);
-      assert consumer != null;
-      return new AssertingTermsConsumer(consumer, field);
-    }
 
     @Override
-    public void close() throws IOException {
-      in.close();
+    public long ramBytesUsed() {
+      return in.ramBytesUsed();
     }
   }
-  
-  static enum TermsConsumerState { INITIAL, START, FINISHED };
-  static class AssertingTermsConsumer extends TermsConsumer {
-    private final TermsConsumer in;
-    private final FieldInfo fieldInfo;
-    private BytesRef lastTerm = null;
-    private TermsConsumerState state = TermsConsumerState.INITIAL;
-    private AssertingPostingsConsumer lastPostingsConsumer = null;
-    private long sumTotalTermFreq = 0;
-    private long sumDocFreq = 0;
-    private OpenBitSet visitedDocs = new OpenBitSet();
-    
-    AssertingTermsConsumer(TermsConsumer in, FieldInfo fieldInfo) {
-      this.in = in;
-      this.fieldInfo = fieldInfo;
-    }
-    
-    @Override
-    public PostingsConsumer startTerm(BytesRef text) throws IOException {
-      assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0;
-      state = TermsConsumerState.START;
-      assert lastTerm == null || in.getComparator().compare(text, lastTerm) > 0;
-      lastTerm = BytesRef.deepCopyOf(text);
-      return lastPostingsConsumer = new AssertingPostingsConsumer(in.startTerm(text), fieldInfo, visitedDocs);
-    }
 
-    @Override
-    public void finishTerm(BytesRef text, TermStats stats) throws IOException {
-      assert state == TermsConsumerState.START;
-      state = TermsConsumerState.INITIAL;
-      assert text.equals(lastTerm);
-      assert stats.docFreq > 0; // otherwise, this method should not be called.
-      assert stats.docFreq == lastPostingsConsumer.docFreq;
-      sumDocFreq += stats.docFreq;
-      if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
-        assert stats.totalTermFreq == -1;
-      } else {
-        assert stats.totalTermFreq == lastPostingsConsumer.totalTermFreq;
-        sumTotalTermFreq += stats.totalTermFreq;
-      }
-      in.finishTerm(text, stats);
-    }
-
-    @Override
-    public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
-      assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0;
-      state = TermsConsumerState.FINISHED;
-      assert docCount >= 0;
-      assert docCount == visitedDocs.cardinality();
-      assert sumDocFreq >= docCount;
-      assert sumDocFreq == this.sumDocFreq;
-      if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
-        assert sumTotalTermFreq == -1;
-      } else {
-        assert sumTotalTermFreq >= sumDocFreq;
-        assert sumTotalTermFreq == this.sumTotalTermFreq;
-      }
-      in.finish(sumTotalTermFreq, sumDocFreq, docCount);
-    }
+  static class AssertingFieldsConsumer extends FieldsConsumer {
+    private final FieldsConsumer in;
+    private final SegmentWriteState writeState;
 
-    @Override
-    public Comparator<BytesRef> getComparator() throws IOException {
-      return in.getComparator();
-    }
-  }
-  
-  static enum PostingsConsumerState { INITIAL, START };
-  static class AssertingPostingsConsumer extends PostingsConsumer {
-    private final PostingsConsumer in;
-    private final FieldInfo fieldInfo;
-    private final OpenBitSet visitedDocs;
-    private PostingsConsumerState state = PostingsConsumerState.INITIAL;
-    private int freq;
-    private int positionCount;
-    private int lastPosition = 0;
-    private int lastStartOffset = 0;
-    int docFreq = 0;
-    long totalTermFreq = 0;
-    
-    AssertingPostingsConsumer(PostingsConsumer in, FieldInfo fieldInfo, OpenBitSet visitedDocs) {
+    AssertingFieldsConsumer(SegmentWriteState writeState, FieldsConsumer in) {
+      this.writeState = writeState;
       this.in = in;
-      this.fieldInfo = fieldInfo;
-      this.visitedDocs = visitedDocs;
     }
-
-    @Override
-    public void startDoc(int docID, int freq) throws IOException {
-      assert state == PostingsConsumerState.INITIAL;
-      state = PostingsConsumerState.START;
-      assert docID >= 0;
-      if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
-        assert freq == -1;
-        this.freq = 0; // we don't expect any positions here
-      } else {
-        assert freq > 0;
-        this.freq = freq;
-        totalTermFreq += freq;
-      }
-      this.positionCount = 0;
-      this.lastPosition = 0;
-      this.lastStartOffset = 0;
-      docFreq++;
-      visitedDocs.set(docID);
-      in.startDoc(docID, freq);
-    }
-
+    
     @Override
-    public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
-      assert state == PostingsConsumerState.START;
-      assert positionCount < freq;
-      positionCount++;
-      assert position >= lastPosition || position == -1; /* we still allow -1 from old 3.x indexes */
-      lastPosition = position;
-      if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
-        assert startOffset >= 0;
-        assert startOffset >= lastStartOffset;
-        lastStartOffset = startOffset;
-        assert endOffset >= startOffset;
-      } else {
-        assert startOffset == -1;
-        assert endOffset == -1;
-      }
-      if (payload != null) {
-        assert fieldInfo.hasPayloads();
-      }
-      in.addPosition(position, payload, startOffset, endOffset);
-    }
+    public void write(Fields fields) throws IOException {
+      in.write(fields);
 
-    @Override
-    public void finishDoc() throws IOException {
-      assert state == PostingsConsumerState.START;
-      state = PostingsConsumerState.INITIAL;
-      if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
-        assert positionCount == 0; // we should not have fed any positions!
-      } else {
-        assert positionCount == freq;
+      // TODO: more asserts?  can we somehow run a
+      // "limited" CheckIndex here???  Or ... can we improve
+      // AssertingFieldsProducer and us it also to wrap the
+      // incoming Fields here?
+ 
+      String lastField = null;
+      TermsEnum termsEnum = null;
+
+      for(String field : fields) {
+
+        FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
+        assert fieldInfo != null;
+        assert lastField == null || lastField.compareTo(field) < 0;
+        lastField = field;
+
+        Terms terms = fields.terms(field);
+        if (terms == null) {
+          continue;
+        }
+        assert terms != null;
+
+        termsEnum = terms.iterator(termsEnum);
+        BytesRef lastTerm = null;
+        DocsEnum docsEnum = null;
+        DocsAndPositionsEnum posEnum = null;
+
+        boolean hasFreqs = fieldInfo.getIndexOptions().compareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS) >= 0;
+        boolean hasPositions = fieldInfo.getIndexOptions().compareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+        boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+        boolean hasPayloads = terms.hasPayloads();
+
+        assert hasPositions == terms.hasPositions();
+        assert hasOffsets == terms.hasOffsets();
+
+        while(true) {
+          BytesRef term = termsEnum.next();
+          if (term == null) {
+            break;
+          }
+          assert lastTerm == null || lastTerm.compareTo(term) < 0;
+          if (lastTerm == null) {
+            lastTerm = BytesRef.deepCopyOf(term);
+          } else {
+            lastTerm.copyBytes(term);
+          }
+
+          int flags = 0;
+          if (hasPositions == false) {
+            if (hasFreqs) {
+              flags = flags | DocsEnum.FLAG_FREQS;
+            }
+            docsEnum = termsEnum.docs(null, docsEnum, flags);
+          } else {
+            if (hasPayloads) {
+              flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
+            }
+            if (hasOffsets) {
+              flags = flags | DocsAndPositionsEnum.FLAG_OFFSETS;
+            }
+            posEnum = termsEnum.docsAndPositions(null, posEnum, flags);
+            docsEnum = posEnum;
+          }
+
+          assert docsEnum != null : "termsEnum=" + termsEnum + " hasPositions=" + hasPositions;
+
+          int lastDocID = -1;
+
+          while(true) {
+            int docID = docsEnum.nextDoc();
+            if (docID == DocsEnum.NO_MORE_DOCS) {
+              break;
+            }
+            assert docID > lastDocID;
+            lastDocID = docID;
+            if (hasFreqs) {
+              int freq = docsEnum.freq();
+              assert freq > 0;
+
+              if (hasPositions) {
+                int lastPos = -1;
+                int lastStartOffset = -1;
+                for(int i=0;i<freq;i++) {
+                  int pos = posEnum.nextPosition();
+                  assert pos >= lastPos: "pos=" + pos + " vs lastPos=" + lastPos + " i=" + i + " freq=" + freq;
+                  lastPos = pos;
+
+                  if (hasOffsets) {
+                    int startOffset = posEnum.startOffset();
+                    int endOffset = posEnum.endOffset();
+                    assert endOffset >= startOffset;
+                    assert startOffset >= lastStartOffset;
+                    lastStartOffset = startOffset;
+                  }
+                }
+              }
+            }
+          }
+        }
       }
-      in.finishDoc();
     }
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java Mon Oct 21 18:58:24 2013
@@ -71,6 +71,11 @@ public class AssertingStoredFieldsFormat
     public StoredFieldsReader clone() {
       return new AssertingStoredFieldsReader(in.clone(), maxDoc);
     }
+
+    @Override
+    public long ramBytesUsed() {
+      return in.ramBytesUsed();
+    }
   }
 
   enum Status {

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java Mon Oct 21 18:58:24 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.asserti
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.TermVectorsReader;
@@ -71,6 +70,11 @@ public class AssertingTermVectorsFormat 
     public TermVectorsReader clone() {
       return new AssertingTermVectorsReader(in.clone());
     }
+
+    @Override
+    public long ramBytesUsed() {
+      return in.ramBytesUsed();
+    }
   }
 
   enum Status {
@@ -176,11 +180,6 @@ public class AssertingTermVectorsFormat 
     }
 
     @Override
-    public Comparator<BytesRef> getComparator() throws IOException {
-      return in.getComparator();
-    }
-
-    @Override
     public void close() throws IOException {
       in.close();
     }

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java Mon Oct 21 18:58:24 2013
@@ -71,4 +71,9 @@ public final class TestBloomFilteredLuce
       throws IOException {
     return delegate.fieldsProducer(state);
   }
+
+  @Override
+  public String toString() {
+    return "TestBloomFilteredLucene41Postings(" + delegate + ")";
+  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java Mon Oct 21 18:58:24 2013
@@ -23,10 +23,12 @@ import org.apache.lucene.codecs.NormsFor
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
+import org.apache.lucene.codecs.diskdv.DiskNormsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene46.Lucene46Codec;
 
 /** Codec that tries to use as little ram as possible because he spent all his money on beer */
 // TODO: better name :) 
@@ -39,13 +41,14 @@ public class CheapBastardCodec extends F
   private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat();
   private final TermVectorsFormat termVectors = new Lucene40TermVectorsFormat();
   // these go to disk for all docvalues/norms datastructures
-  private final DocValuesFormat docValues = new CheapBastardDocValuesFormat();
-  private final NormsFormat norms = new CheapBastardNormsFormat();
+  private final DocValuesFormat docValues = new DiskDocValuesFormat();
+  private final NormsFormat norms = new DiskNormsFormat();
 
   public CheapBastardCodec() {
-    super("CheapBastard", new Lucene42Codec());
+    super("CheapBastard", new Lucene46Codec());
   }
   
+  @Override
   public PostingsFormat postingsFormat() {
     return postings;
   }

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java Mon Oct 21 18:58:24 2013
@@ -23,13 +23,13 @@ import org.apache.lucene.codecs.FilterCo
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene46.Lucene46Codec;
 
 import com.carrotsearch.randomizedtesting.generators.RandomInts;
 
 /**
  * A codec that uses {@link CompressingStoredFieldsFormat} for its stored
- * fields and delegates to {@link Lucene42Codec} for everything else.
+ * fields and delegates to {@link Lucene46Codec} for everything else.
  */
 public abstract class CompressingCodec extends FilterCodec {
 
@@ -73,7 +73,7 @@ public abstract class CompressingCodec e
    * Creates a compressing codec with a given segment suffix
    */
   public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize) {
-    super(name, new Lucene42Codec());
+    super(name, new Lucene46Codec());
     this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize);
     this.termVectorsFormat = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize);
   }

Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java Mon Oct 21 18:58:24 2013
@@ -1,8 +1,6 @@
 package org.apache.lucene.codecs.compressing;
 
-import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
 import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
 import org.apache.lucene.util.packed.PackedInts;
 
@@ -42,9 +40,4 @@ public class FastCompressingCodec extend
   public NormsFormat normsFormat() {
     return new Lucene42NormsFormat(PackedInts.FAST);
   }
-
-  @Override
-  public DocValuesFormat docValuesFormat() {
-    return new Lucene42DocValuesFormat(PackedInts.FAST);
-  }
 }