You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC

svn commit: r1534320 [6/39] - in /lucene/dev/branches/lucene4956: ./ dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/ dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/ ...

Modified: lucene/dev/branches/lucene4956/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -44,16 +44,16 @@ public class TestMorfologikAnalyzer exte
   /** Test stemming of single tokens with Morfologik library. */
   public final void testSingleTokens() throws IOException {
     Analyzer a = getTestAnalyzer();
-    assertAnalyzesToReuse(a, "a", new String[] { "a" });
-    assertAnalyzesToReuse(a, "liście", new String[] { "liście", "liść", "list", "lista" });
-    assertAnalyzesToReuse(a, "danych", new String[] { "dany", "dana", "dane", "dać" });
-    assertAnalyzesToReuse(a, "ęóąśłżźćń", new String[] { "ęóąśłżźćń" });
+    assertAnalyzesTo(a, "a", new String[] { "a" });
+    assertAnalyzesTo(a, "liście", new String[] { "liście", "liść", "list", "lista" });
+    assertAnalyzesTo(a, "danych", new String[] { "dany", "dana", "dane", "dać" });
+    assertAnalyzesTo(a, "ęóąśłżźćń", new String[] { "ęóąśłżźćń" });
   }
 
   /** Test stemming of multiple tokens and proper term metrics. */
   public final void testMultipleTokens() throws IOException {
     Analyzer a = getTestAnalyzer();
-    assertAnalyzesToReuse(
+    assertAnalyzesTo(
       a,
       "liście danych",
       new String[] { "liście", "liść", "list", "lista", "dany", "dana", "dane", "dać" },
@@ -61,7 +61,7 @@ public class TestMorfologikAnalyzer exte
       new int[] { 6, 6, 6, 6, 13, 13, 13, 13 },
       new int[] { 1, 0, 0, 0, 1, 0, 0, 0 });
 
-    assertAnalyzesToReuse(
+    assertAnalyzesTo(
         a,
         "T. Gl\u00FCcksberg",
         new String[] { "tom", "tona", "Gl\u00FCcksberg" },
@@ -72,50 +72,52 @@ public class TestMorfologikAnalyzer exte
 
   @SuppressWarnings("unused")
   private void dumpTokens(String input) throws IOException {
-    TokenStream ts = getTestAnalyzer().tokenStream("dummy", input);
-    ts.reset();
+    try (TokenStream ts = getTestAnalyzer().tokenStream("dummy", input)) {
+      ts.reset();
 
-    MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
-    CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
-    while (ts.incrementToken()) {
-      System.out.println(charTerm.toString() + " => " + attribute.getTags());
+      MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
+      CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
+      while (ts.incrementToken()) {
+        System.out.println(charTerm.toString() + " => " + attribute.getTags());
+      }
+      ts.end();
     }
   }
 
   /** Test reuse of MorfologikFilter with leftover stems. */
   public final void testLeftoverStems() throws IOException {
     Analyzer a = getTestAnalyzer();
-    TokenStream ts_1 = a.tokenStream("dummy", "liście");
-    CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class);
-    ts_1.reset();
-    ts_1.incrementToken();
-    assertEquals("first stream", "liście", termAtt_1.toString());
-    ts_1.end();
-    ts_1.close();
-
-    TokenStream ts_2 = a.tokenStream("dummy", "danych");
-    CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
-    ts_2.reset();
-    ts_2.incrementToken();
-    assertEquals("second stream", "dany", termAtt_2.toString());
-    ts_2.end();
-    ts_2.close();
+    try (TokenStream ts_1 = a.tokenStream("dummy", "liście")) {
+      CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class);
+      ts_1.reset();
+      ts_1.incrementToken();
+      assertEquals("first stream", "liście", termAtt_1.toString());
+      ts_1.end();
+    }
+
+    try (TokenStream ts_2 = a.tokenStream("dummy", "danych")) {
+      CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
+      ts_2.reset();
+      ts_2.incrementToken();
+      assertEquals("second stream", "dany", termAtt_2.toString());
+      ts_2.end();
+    }
   }
 
   /** Test stemming of mixed-case tokens. */
   public final void testCase() throws IOException {
     Analyzer a = getTestAnalyzer();
 
-    assertAnalyzesToReuse(a, "AGD",      new String[] { "AGD", "artykuły gospodarstwa domowego" });
-    assertAnalyzesToReuse(a, "agd",      new String[] { "artykuły gospodarstwa domowego" });
+    assertAnalyzesTo(a, "AGD",      new String[] { "AGD", "artykuły gospodarstwa domowego" });
+    assertAnalyzesTo(a, "agd",      new String[] { "artykuły gospodarstwa domowego" });
 
-    assertAnalyzesToReuse(a, "Poznania", new String[] { "Poznań" });
-    assertAnalyzesToReuse(a, "poznania", new String[] { "poznanie", "poznać" });
+    assertAnalyzesTo(a, "Poznania", new String[] { "Poznań" });
+    assertAnalyzesTo(a, "poznania", new String[] { "poznanie", "poznać" });
 
-    assertAnalyzesToReuse(a, "Aarona",   new String[] { "Aaron" });
-    assertAnalyzesToReuse(a, "aarona",   new String[] { "aarona" });
+    assertAnalyzesTo(a, "Aarona",   new String[] { "Aaron" });
+    assertAnalyzesTo(a, "aarona",   new String[] { "aarona" });
 
-    assertAnalyzesToReuse(a, "Liście",   new String[] { "liście", "liść", "list", "lista" });
+    assertAnalyzesTo(a, "Liście",   new String[] { "liście", "liść", "list", "lista" });
   }
 
   private void assertPOSToken(TokenStream ts, String term, String... tags) throws IOException {
@@ -140,28 +142,27 @@ public class TestMorfologikAnalyzer exte
 
   /** Test morphosyntactic annotations. */
   public final void testPOSAttribute() throws IOException {
-    TokenStream ts = getTestAnalyzer().tokenStream("dummy", "liście");
-
-    ts.reset();
-    assertPOSToken(ts, "liście",  
+    try (TokenStream ts = getTestAnalyzer().tokenStream("dummy", "liście")) {
+      ts.reset();
+      assertPOSToken(ts, "liście",  
         "subst:sg:acc:n2",
         "subst:sg:nom:n2",
         "subst:sg:voc:n2");
 
-    assertPOSToken(ts, "liść",  
+      assertPOSToken(ts, "liść",  
         "subst:pl:acc:m3",
         "subst:pl:nom:m3",
         "subst:pl:voc:m3");
 
-    assertPOSToken(ts, "list",  
+      assertPOSToken(ts, "list",  
         "subst:sg:loc:m3",
         "subst:sg:voc:m3");
 
-    assertPOSToken(ts, "lista", 
+      assertPOSToken(ts, "lista", 
         "subst:sg:dat:f",
         "subst:sg:loc:f");
-    ts.end();
-    ts.close();
+      ts.end();
+    }
   }
 
   /** */
@@ -183,7 +184,7 @@ public class TestMorfologikAnalyzer exte
       }
     };
 
-    assertAnalyzesToReuse(
+    assertAnalyzesTo(
       a,
       "liście danych",
       new String[] { "liście", "dany", "dana", "dane", "dać" },

Modified: lucene/dev/branches/lucene4956/lucene/analysis/phonetic/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/phonetic/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/phonetic/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/phonetic/ivy.xml Mon Oct 21 18:58:24 2013
@@ -19,7 +19,7 @@
 <ivy-module version="2.0">
     <info organisation="org.apache.lucene" module="analyzers-phonetic"/>
     <dependencies>
-      <dependency org="commons-codec" name="commons-codec" rev="1.7" transitive="false"/>
+      <dependency org="commons-codec" name="commons-codec" rev="${/commons-codec/commons-codec}" transitive="false"/>
       <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
     </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java Mon Oct 21 18:58:24 2013
@@ -105,6 +105,6 @@ public class DoubleMetaphoneFilterTest e
         return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, 8, random().nextBoolean()));
       }
     };
-    checkOneTermReuse(a, "", "");
+    checkOneTerm(a, "", "");
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Mon Oct 21 18:58:24 2013
@@ -106,7 +106,7 @@ public class TestBeiderMorseFilter exten
         return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
       }
     };
-    checkOneTermReuse(a, "", "");
+    checkOneTerm(a, "", "");
   }
   
   public void testCustomAttribute() throws IOException {

Modified: lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java Mon Oct 21 18:58:24 2013
@@ -51,27 +51,6 @@ public class TestDoubleMetaphoneFilterFa
     assertTokenStreamContents(filteredStream, new String[] { "ANTRNXNL" });
   }
   
-  /**
-   * Ensure that reset() removes any state (buffered tokens)
-   */
-  public void testReset() throws Exception {
-    DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new HashMap<String, String>());
-    TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
-
-    TokenStream filteredStream = factory.create(inputStream);
-    CharTermAttribute termAtt = filteredStream.addAttribute(CharTermAttribute.class);
-    assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
-    
-    filteredStream.reset();
-    assertTrue(filteredStream.incrementToken());
-    assertEquals(13, termAtt.length());
-    assertEquals("international", termAtt.toString());
-    filteredStream.reset();
-    
-    // ensure there are no more tokens, such as ANTRNXNL
-    assertFalse(filteredStream.incrementToken());
-  }
-  
   /** Test that bogus arguments result in exception */
   public void testBogusArguments() throws Exception {
     try {

Modified: lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java Mon Oct 21 18:58:24 2013
@@ -113,7 +113,7 @@ public class TestPhoneticFilter extends 
           return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean()));
         }
       };
-      checkOneTermReuse(a, "", "");
+      checkOneTerm(a, "", "");
     }
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Mon Oct 21 18:58:24 2013
@@ -108,11 +108,13 @@ public final class SentenceTokenizer ext
 
   @Override
   public void reset() throws IOException {
+    super.reset();
     tokenStart = tokenEnd = 0;
   }
 
   @Override
-  public void end() {
+  public void end() throws IOException {
+    super.end();
     // set final offset
     final int finalOffset = correctOffset(tokenEnd);
     offsetAtt.setOffset(finalOffset, finalOffset);

Modified: lucene/dev/branches/lucene4956/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -79,7 +79,7 @@ public class TestSmartChineseAnalyzer ex
     String result[] = { "我", "购买", "了", "道具", "和", "服装", "," };
     for (Analyzer analyzer : analyzers) {
       assertAnalyzesTo(analyzer, sentence, result);
-      assertAnalyzesToReuse(analyzer, sentence, result);
+      assertAnalyzesTo(analyzer, sentence, result);
     }
   }
   
@@ -167,11 +167,11 @@ public class TestSmartChineseAnalyzer ex
   
   public void testReusableTokenStream() throws Exception {
     Analyzer a = new SmartChineseAnalyzer(Version.LUCENE_CURRENT);
-    assertAnalyzesToReuse(a, "我购买 Tests 了道具和服装", 
+    assertAnalyzesTo(a, "我购买 Tests 了道具和服装", 
         new String[] { "我", "购买", "test", "了", "道具", "和", "服装"},
         new int[] { 0, 1, 4, 10, 11, 13, 14 },
         new int[] { 1, 3, 9, 11, 13, 14, 16 });
-    assertAnalyzesToReuse(a, "我购买了道具和服装。",
+    assertAnalyzesTo(a, "我购买了道具和服装。",
         new String[] { "我", "购买", "了", "道具", "和", "服装" },
         new int[] { 0, 1, 3, 4, 6, 7 },
         new int[] { 1, 3, 4, 6, 7, 9 });
@@ -184,9 +184,11 @@ public class TestSmartChineseAnalyzer ex
       sb.append("我购买了道具和服装。");
     }
     Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
-    TokenStream stream = analyzer.tokenStream("", sb.toString());
-    stream.reset();
-    while (stream.incrementToken()) {
+    try (TokenStream stream = analyzer.tokenStream("", sb.toString())) {
+      stream.reset();
+      while (stream.incrementToken()) {
+      }
+      stream.end();
     }
   }
   
@@ -197,9 +199,11 @@ public class TestSmartChineseAnalyzer ex
       sb.append("我购买了道具和服装");
     }
     Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
-    TokenStream stream = analyzer.tokenStream("", sb.toString());
-    stream.reset();
-    while (stream.incrementToken()) {
+    try (TokenStream stream = analyzer.tokenStream("", sb.toString())) {
+      stream.reset();
+      while (stream.incrementToken()) {
+      }
+      stream.end();
     }
   }
   

Modified: lucene/dev/branches/lucene4956/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -34,8 +34,8 @@ public class TestPolishAnalyzer extends 
   public void testBasics() throws IOException {
     Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT);
     // stemming
-    checkOneTermReuse(a, "studenta", "student");
-    checkOneTermReuse(a, "studenci", "student");
+    checkOneTerm(a, "studenta", "student");
+    checkOneTerm(a, "studenci", "student");
     // stopword
     assertAnalyzesTo(a, "był", new String[] {});
   }
@@ -45,8 +45,8 @@ public class TestPolishAnalyzer extends 
     CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("studenta"), false);;
     Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT, 
         PolishAnalyzer.getDefaultStopSet(), exclusionSet);
-    checkOneTermReuse(a, "studenta", "studenta");
-    checkOneTermReuse(a, "studenci", "student");
+    checkOneTerm(a, "studenta", "studenta");
+    checkOneTerm(a, "studenci", "student");
   }
   
   /** blast some random strings through the analyzer */

Modified: lucene/dev/branches/lucene4956/lucene/analysis/uima/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/uima/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/uima/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/uima/ivy.xml Mon Oct 21 18:58:24 2013
@@ -19,9 +19,9 @@
 <ivy-module version="2.0">
     <info organisation="org.apache.lucene" module="analyzers-uima"/>
     <dependencies>
-      <dependency org="org.apache.uima" name="Tagger" rev="2.3.1" transitive="false"/>
-      <dependency org="org.apache.uima" name="WhitespaceTokenizer" rev="2.3.1" transitive="false"/>
-      <dependency org="org.apache.uima" name="uimaj-core" rev="2.3.1" transitive="false"/>
+      <dependency org="org.apache.uima" name="Tagger" rev="${/org.apache.uima/Tagger}" transitive="false"/>
+      <dependency org="org.apache.uima" name="WhitespaceTokenizer" rev="${/org.apache.uima/WhitespaceTokenizer}" transitive="false"/>
+      <dependency org="org.apache.uima" name="uimaj-core" rev="${/org.apache.uima/uimaj-core}" transitive="false"/>
       <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
     </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Mon Oct 21 18:58:24 2013
@@ -89,11 +89,7 @@ public abstract class BaseUIMATokenizer 
 
   @Override
   public void reset() throws IOException {
-    iterator = null;
-  }
-
-  @Override
-  public void end() throws IOException {
+    super.reset();
     iterator = null;
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java Mon Oct 21 18:58:24 2013
@@ -86,7 +86,7 @@ public final class UIMAAnnotationsTokeni
 
   @Override
   public void end() throws IOException {
-    offsetAttr.setOffset(finalOffset, finalOffset);
     super.end();
+    offsetAttr.setOffset(finalOffset, finalOffset);
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java Mon Oct 21 18:58:24 2013
@@ -107,8 +107,8 @@ public final class UIMATypeAwareAnnotati
 
   @Override
   public void end() throws IOException {
-    offsetAttr.setOffset(finalOffset, finalOffset);
     super.end();
+    offsetAttr.setOffset(finalOffset, finalOffset);
   }
 
 

Modified: lucene/dev/branches/lucene4956/lucene/benchmark/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/benchmark/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/benchmark/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/benchmark/ivy.xml Mon Oct 21 18:58:24 2013
@@ -19,9 +19,9 @@
 <ivy-module version="2.0">
     <info organisation="org.apache.lucene" module="benchmark"/>
     <dependencies>
-      <dependency org="org.apache.commons" name="commons-compress" rev="1.4.1" transitive="false"/>
-      <dependency org="xerces" name="xercesImpl" rev="2.9.1" transitive="false"/>
-      <dependency org="net.sourceforge.nekohtml" name="nekohtml" rev="1.9.17" transitive="false"/>
+      <dependency org="org.apache.commons" name="commons-compress" rev="${/org.apache.commons/commons-compress}" transitive="false"/>
+      <dependency org="xerces" name="xercesImpl" rev="${/xerces/xercesImpl}" transitive="false"/>
+      <dependency org="net.sourceforge.nekohtml" name="nekohtml" rev="${/net.sourceforge.nekohtml/nekohtml}" transitive="false"/>
       <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
     </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/lucene4956/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java (original)
+++ lucene/dev/branches/lucene4956/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java Mon Oct 21 18:58:24 2013
@@ -90,6 +90,8 @@ public class ReadTokensTask extends Perf
         termAtt.fillBytesRef();
         tokenCount++;
       }
+      stream.end();
+      stream.close();
     }
     totalTokenCount += tokenCount;
     return tokenCount;

Modified: lucene/dev/branches/lucene4956/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/lucene4956/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Mon Oct 21 18:58:24 2013
@@ -342,7 +342,7 @@ public class TestPerfTasksLogic extends 
     Benchmark benchmark = execBenchmark(algLines);
 
     DirectoryReader r = DirectoryReader.open(benchmark.getRunData().getDirectory());
-    SortedDocValues idx = FieldCache.DEFAULT.getTermsIndex(new SlowCompositeReaderWrapper(r), "country");
+    SortedDocValues idx = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(r), "country");
     final int maxDoc = r.maxDoc();
     assertEquals(1000, maxDoc);
     for(int i=0;i<1000;i++) {

Modified: lucene/dev/branches/lucene4956/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/build.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/build.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/build.xml Mon Oct 21 18:58:24 2013
@@ -169,6 +169,11 @@
     </license-check-macro>
   </target>
 
+  <target name="check-lib-versions" depends="compile-tools,resolve,load-custom-tasks"
+          description="Verify that the '/org/name' keys in ivy-versions.properties are sorted lexically and are neither duplicates nor orphans, and that all dependencies in all ivy.xml files use rev=&quot;$${/org/name}&quot; format.">
+    <lib-versions-check-macro dir="${common.dir}/.." centralized.versions.file="${common.dir}/ivy-versions.properties"/>
+  </target>
+
   <target name="check-forbidden-apis" depends="compile-tools,compile-test,install-forbidden-apis,-forbidden-apis-classpath,-check-forbidden-jdk-apis,-check-forbidden-test-apis,-check-system-out" description="Check forbidden API calls in compiled class files"/>
 
   <!-- TODO: Make the forbidden API checks per module! -->
@@ -283,6 +288,7 @@
     <!-- codecs: problems -->
     <!-- core: problems -->
     <check-missing-javadocs dir="build/docs/demo" level="method"/>
+    <check-missing-javadocs dir="build/docs/expressions" level="method"/>
     <!-- facet: problems -->
     <!-- grouping: problems -->
     <!-- highlighter: problems -->
@@ -597,6 +603,12 @@
     <modules-crawl target="jar-core"/>
   </target>
 
+  <target name="jar-src" description="create source jars for all modules">
+    <ant dir="${common.dir}/core" target="jar-src" inheritAll="false" />
+    <ant dir="${common.dir}/test-framework" target="jar-src" inheritAll="false" />
+    <modules-crawl target="jar-src"/>
+  </target>
+
   <target name="get-jenkins-line-docs" unless="enwiki.exists">
     <sequential>
       <!-- TODO: can get .lzma instead (it's ~17% smaller) but there's no builtin ant support...? -->

Modified: lucene/dev/branches/lucene4956/lucene/classification/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/classification/build.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/classification/build.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/classification/build.xml Mon Oct 21 18:58:24 2013
@@ -28,7 +28,6 @@
     <path refid="base.classpath"/>
     <pathelement path="${lucene-core.jar}"/>
     <pathelement path="${queries.jar}"/>
-    <pathelement path="${project.classpath}"/>
     <pathelement location="${build.dir}/classes/java" />
   </path>
 

Modified: lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java (original)
+++ lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java Mon Oct 21 18:58:24 2013
@@ -18,6 +18,7 @@ package org.apache.lucene.classification
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.search.Query;
 
 import java.io.IOException;
 
@@ -47,4 +48,16 @@ public interface Classifier<T> {
   public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer)
       throws IOException;
 
+  /**
+   * Train the classifier using the underlying Lucene index
+   * @param atomicReader the reader to use to access the Lucene index
+   * @param textFieldName the name of the field used to compare documents
+   * @param classFieldName the name of the field containing the class assigned to documents
+   * @param analyzer the analyzer used to tokenize / filter the unseen text
+   * @param query the query to filter which documents use for training
+   * @throws IOException If there is a low-level I/O error.
+   */
+  public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer, Query query)
+      throws IOException;
+
 }

Modified: lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java (original)
+++ lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java Mon Oct 21 18:58:24 2013
@@ -19,6 +19,8 @@ package org.apache.lucene.classification
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.queries.mlt.MoreLikeThis;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
@@ -43,6 +45,7 @@ public class KNearestNeighborClassifier 
   private String classFieldName;
   private IndexSearcher indexSearcher;
   private int k;
+  private Query query;
 
   /**
    * Create a {@link Classifier} using kNN algorithm
@@ -59,9 +62,18 @@ public class KNearestNeighborClassifier 
   @Override
   public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
     if (mlt == null) {
-      throw new IOException("You must first call Classifier#train first");
+      throw new IOException("You must first call Classifier#train");
+    }
+    Query q;
+    if (query != null) {
+      Query mltQuery = mlt.like(new StringReader(text), textFieldName);
+      BooleanQuery bq = new BooleanQuery();
+      bq.add(query, BooleanClause.Occur.MUST);
+      bq.add(mltQuery, BooleanClause.Occur.MUST);
+      q = bq;
+    } else {
+      q = mlt.like(new StringReader(text), textFieldName);
     }
-    Query q = mlt.like(new StringReader(text), textFieldName);
     TopDocs topDocs = indexSearcher.search(q, k);
     return selectClassFromNeighbors(topDocs);
   }
@@ -71,13 +83,11 @@ public class KNearestNeighborClassifier 
     Map<BytesRef, Integer> classCounts = new HashMap<BytesRef, Integer>();
     for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
       BytesRef cl = new BytesRef(indexSearcher.doc(scoreDoc.doc).getField(classFieldName).stringValue());
-      if (cl != null) {
-        Integer count = classCounts.get(cl);
-        if (count != null) {
-          classCounts.put(cl, count + 1);
-        } else {
-          classCounts.put(cl, 1);
-        }
+      Integer count = classCounts.get(cl);
+      if (count != null) {
+        classCounts.put(cl, count + 1);
+      } else {
+        classCounts.put(cl, 1);
       }
     }
     double max = 0;
@@ -98,11 +108,20 @@ public class KNearestNeighborClassifier 
    */
   @Override
   public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer) throws IOException {
+    train(atomicReader, textFieldName, classFieldName, analyzer, null);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer, Query query) throws IOException {
     this.textFieldName = textFieldName;
     this.classFieldName = classFieldName;
     mlt = new MoreLikeThis(atomicReader);
     mlt.setAnalyzer(analyzer);
     mlt.setFieldNames(new String[]{textFieldName});
     indexSearcher = new IndexSearcher(atomicReader);
+    this.query = query;
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java (original)
+++ lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java Mon Oct 21 18:58:24 2013
@@ -27,6 +27,7 @@ import org.apache.lucene.index.TermsEnum
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.search.WildcardQuery;
@@ -49,6 +50,7 @@ public class SimpleNaiveBayesClassifier 
   private int docsWithClassSize;
   private Analyzer analyzer;
   private IndexSearcher indexSearcher;
+  private Query query;
 
   /**
    * Creates a new NaiveBayes classifier.
@@ -62,7 +64,7 @@ public class SimpleNaiveBayesClassifier 
    * {@inheritDoc}
    */
   @Override
-  public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer)
+  public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer, Query query)
       throws IOException {
     this.atomicReader = atomicReader;
     this.indexSearcher = new IndexSearcher(this.atomicReader);
@@ -70,13 +72,29 @@ public class SimpleNaiveBayesClassifier 
     this.classFieldName = classFieldName;
     this.analyzer = analyzer;
     this.docsWithClassSize = countDocsWithClass();
+    this.query = query;
+  }
+
+  @Override
+  public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer) throws IOException {
+    train(atomicReader, textFieldName, classFieldName, analyzer, null);
   }
 
   private int countDocsWithClass() throws IOException {
     int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
     if (docCount == -1) { // in case codec doesn't support getDocCount
       TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
-      indexSearcher.search(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))),
+      Query q;
+      if (query != null) {
+        BooleanQuery bq = new BooleanQuery();
+        WildcardQuery wq = new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING)));
+        bq.add(wq, BooleanClause.Occur.MUST);
+        bq.add(query, BooleanClause.Occur.MUST);
+        q = bq;
+      } else {
+        q = new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING)));
+      }
+      indexSearcher.search(q,
           totalHitCountCollector);
       docCount = totalHitCountCollector.getTotalHits();
     }
@@ -85,14 +103,14 @@ public class SimpleNaiveBayesClassifier 
 
   private String[] tokenizeDoc(String doc) throws IOException {
     Collection<String> result = new LinkedList<String>();
-    TokenStream tokenStream = analyzer.tokenStream(textFieldName, doc);
-    CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
-    tokenStream.reset();
-    while (tokenStream.incrementToken()) {
-      result.add(charTermAttribute.toString());
+    try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, doc)) {
+      CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
+      tokenStream.reset();
+      while (tokenStream.incrementToken()) {
+        result.add(charTermAttribute.toString());
+      }
+      tokenStream.end();
     }
-    tokenStream.end();
-    tokenStream.close();
     return result.toArray(new String[result.size()]);
   }
 
@@ -102,7 +120,7 @@ public class SimpleNaiveBayesClassifier 
   @Override
   public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
     if (atomicReader == null) {
-      throw new IOException("You must first call Classifier#train first");
+      throw new IOException("You must first call Classifier#train");
     }
     double max = 0d;
     BytesRef foundClass = new BytesRef();
@@ -157,6 +175,9 @@ public class SimpleNaiveBayesClassifier 
     BooleanQuery booleanQuery = new BooleanQuery();
     booleanQuery.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.MUST));
     booleanQuery.add(new BooleanClause(new TermQuery(new Term(classFieldName, c)), BooleanClause.Occur.MUST));
+    if (query != null) {
+      booleanQuery.add(query, BooleanClause.Occur.MUST);
+    }
     TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
     indexSearcher.search(booleanQuery, totalHitCountCollector);
     return totalHitCountCollector.getTotalHits();

Modified: lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/package.html?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/package.html (original)
+++ lucene/dev/branches/lucene4956/lucene/classification/src/java/org/apache/lucene/classification/package.html Mon Oct 21 18:58:24 2013
@@ -17,7 +17,7 @@
 <html>
 <body>
 Uses already seen data (the indexed documents) to classify new documents.
-Currently only contains a (simplistic) Lucene based Naive Bayes classifier 
-and a k-Nearest Neighbor classifier
+Currently only contains a (simplistic) Lucene based Naive Bayes classifier,
+a k-Nearest Neighbor classifier and a Perceptron based classifier
 </body>
 </html>

Modified: lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java Mon Oct 21 18:58:24 2013
@@ -21,14 +21,20 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
 import org.junit.After;
 import org.junit.Before;
 
+import java.io.IOException;
+import java.util.Random;
+
 /**
  * Base class for testing {@link Classifier}s
  */
@@ -40,8 +46,9 @@ public abstract class ClassificationTest
   public static final BytesRef TECHNOLOGY_RESULT = new BytesRef("technology");
 
   private RandomIndexWriter indexWriter;
-  private String textFieldName;
   private Directory dir;
+
+  String textFieldName;
   String categoryFieldName;
   String booleanFieldName;
 
@@ -64,83 +71,145 @@ public abstract class ClassificationTest
     dir.close();
   }
 
+  protected void checkCorrectClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName) throws Exception {
+     checkCorrectClassification(classifier, inputDoc, expectedResult, analyzer, textFieldName, classFieldName, null);
+  }
 
-  protected void checkCorrectClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String classFieldName) throws Exception {
-    SlowCompositeReaderWrapper compositeReaderWrapper = null;
+  protected void checkCorrectClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName, Query query) throws Exception {
+    AtomicReader atomicReader = null;
     try {
-      populateIndex(analyzer);
-      compositeReaderWrapper = new SlowCompositeReaderWrapper(indexWriter.getReader());
-      classifier.train(compositeReaderWrapper, textFieldName, classFieldName, analyzer);
+      populateSampleIndex(analyzer);
+      atomicReader = SlowCompositeReaderWrapper.wrap(indexWriter.getReader());
+      classifier.train(atomicReader, textFieldName, classFieldName, analyzer, query);
       ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
       assertNotNull(classificationResult.getAssignedClass());
       assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
       assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0);
     } finally {
-      if (compositeReaderWrapper != null)
-        compositeReaderWrapper.close();
+      if (atomicReader != null)
+        atomicReader.close();
+    }
+  }
+
+  protected void checkPerformance(Classifier<T> classifier, Analyzer analyzer, String classFieldName) throws Exception {
+    AtomicReader atomicReader = null;
+    long trainStart = System.currentTimeMillis();
+    long trainEnd = 0l;
+    try {
+      populatePerformanceIndex(analyzer);
+      atomicReader = SlowCompositeReaderWrapper.wrap(indexWriter.getReader());
+      classifier.train(atomicReader, textFieldName, classFieldName, analyzer);
+      trainEnd = System.currentTimeMillis();
+      long trainTime = trainEnd - trainStart;
+      assertTrue("training took more than 2 mins : " + trainTime / 1000 + "s", trainTime < 120000);
+    } finally {
+      if (atomicReader != null)
+        atomicReader.close();
+    }
+  }
+
+  private void populatePerformanceIndex(Analyzer analyzer) throws IOException {
+    indexWriter.deleteAll();
+    indexWriter.commit();
+
+    FieldType ft = new FieldType(TextField.TYPE_STORED);
+    ft.setStoreTermVectors(true);
+    ft.setStoreTermVectorOffsets(true);
+    ft.setStoreTermVectorPositions(true);
+    int docs = 1000;
+    Random random = random();
+    for (int i = 0; i < docs; i++) {
+      boolean b = random.nextBoolean();
+      Document doc = new Document();
+      doc.add(new Field(textFieldName, createRandomString(random), ft));
+      doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft));
+      doc.add(new Field(booleanFieldName, String.valueOf(b), ft));
+      indexWriter.addDocument(doc, analyzer);
+    }
+    indexWriter.commit();
+  }
+
+  private String createRandomString(Random random) {
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < 20; i++) {
+      builder.append(_TestUtil.randomSimpleString(random, 5));
+      builder.append(" ");
     }
+    return builder.toString();
   }
 
-  private void populateIndex(Analyzer analyzer) throws Exception {
+  private void populateSampleIndex(Analyzer analyzer) throws Exception {
+
+    indexWriter.deleteAll();
+    indexWriter.commit();
 
     FieldType ft = new FieldType(TextField.TYPE_STORED);
     ft.setStoreTermVectors(true);
     ft.setStoreTermVectorOffsets(true);
     ft.setStoreTermVectorPositions(true);
 
+    String text;
+
     Document doc = new Document();
-    doc.add(new Field(textFieldName, "The traveling press secretary for Mitt Romney lost his cool and cursed at reporters " +
+    text = "The traveling press secretary for Mitt Romney lost his cool and cursed at reporters " +
         "who attempted to ask questions of the Republican presidential candidate in a public plaza near the Tomb of " +
-        "the Unknown Soldier in Warsaw Tuesday.", ft));
+        "the Unknown Soldier in Warsaw Tuesday.";
+    doc.add(new Field(textFieldName, text, ft));
     doc.add(new Field(categoryFieldName, "politics", ft));
-    doc.add(new Field(booleanFieldName, "false", ft));
+    doc.add(new Field(booleanFieldName, "true", ft));
 
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
-    doc.add(new Field(textFieldName, "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
-        " States, that he will be tougher against Iran's nuclear ambitions than President Barack Obama.", ft));
+    text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
+        " States, that he will be tougher against Iran's nuclear ambitions than President Barack Obama.";
+    doc.add(new Field(textFieldName, text, ft));
     doc.add(new Field(categoryFieldName, "politics", ft));
-    doc.add(new Field(booleanFieldName, "false", ft));
+    doc.add(new Field(booleanFieldName, "true", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
-    doc.add(new Field(textFieldName, "And there's a threshold question that he has to answer for the American people and " +
+    text = "And there's a threshold question that he has to answer for the American people and " +
         "that's whether he is prepared to be commander-in-chief,\" she continued. \"As we look to the past events, we " +
-        "know that this raises some questions about his preparedness and we'll see how the rest of his trip goes.\"", ft));
+        "know that this raises some questions about his preparedness and we'll see how the rest of his trip goes.\"";
+    doc.add(new Field(textFieldName, text, ft));
     doc.add(new Field(categoryFieldName, "politics", ft));
-    doc.add(new Field(booleanFieldName, "false", ft));
+    doc.add(new Field(booleanFieldName, "true", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
-    doc.add(new Field(textFieldName, "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
+    text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
         "keep quiet and not go there,\" said Alan Lizotte, dean and professor at the State University of New York at " +
-        "Albany's School of Criminal Justice.", ft));
+        "Albany's School of Criminal Justice.";
+    doc.add(new Field(textFieldName, text, ft));
     doc.add(new Field(categoryFieldName, "politics", ft));
-    doc.add(new Field(booleanFieldName, "false", ft));
+    doc.add(new Field(booleanFieldName, "true", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
-    doc.add(new Field(textFieldName, "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
+    text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
         "technology at the University of Wisconsin-La Crosse, documented the historic moment and shared it with the " +
-        "world through the Internet.", ft));
+        "world through the Internet.";
+    doc.add(new Field(textFieldName, text, ft));
     doc.add(new Field(categoryFieldName, "technology", ft));
-    doc.add(new Field(booleanFieldName, "true", ft));
+    doc.add(new Field(booleanFieldName, "false", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
-    doc.add(new Field(textFieldName, "So, about all those experts and analysts who've spent the past year or so saying " +
-        "Facebook was going to make a phone. A new expert has stepped forward to say it's not going to happen.", ft));
+    text = "So, about all those experts and analysts who've spent the past year or so saying " +
+        "Facebook was going to make a phone. A new expert has stepped forward to say it's not going to happen.";
+    doc.add(new Field(textFieldName, text, ft));
     doc.add(new Field(categoryFieldName, "technology", ft));
-    doc.add(new Field(booleanFieldName, "true", ft));
+    doc.add(new Field(booleanFieldName, "false", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
-    doc.add(new Field(textFieldName, "More than 400 million people trust Google with their e-mail, and 50 million store files" +
+    text = "More than 400 million people trust Google with their e-mail, and 50 million store files" +
         " in the cloud using the Dropbox service. People manage their bank accounts, pay bills, trade stocks and " +
-        "generally transfer or store huge volumes of personal data online.", ft));
+        "generally transfer or store huge volumes of personal data online.";
+    doc.add(new Field(textFieldName, text, ft));
     doc.add(new Field(categoryFieldName, "technology", ft));
-    doc.add(new Field(booleanFieldName, "true", ft));
+    doc.add(new Field(booleanFieldName, "false", ft));
     indexWriter.addDocument(doc, analyzer);
 
     indexWriter.commit();

Modified: lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java Mon Oct 21 18:58:24 2013
@@ -17,6 +17,8 @@
 package org.apache.lucene.classification;
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.BytesRef;
 import org.junit.Test;
 
@@ -27,7 +29,17 @@ public class KNearestNeighborClassifierT
 
   @Test
   public void testBasicUsage() throws Exception {
-    checkCorrectClassification(new KNearestNeighborClassifier(1), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new MockAnalyzer(random()), categoryFieldName);
+    checkCorrectClassification(new KNearestNeighborClassifier(1), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new MockAnalyzer(random()), textFieldName, categoryFieldName);
+  }
+
+  @Test
+  public void testBasicUsageWithQuery() throws Exception {
+    checkCorrectClassification(new KNearestNeighborClassifier(1), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new MockAnalyzer(random()), textFieldName, categoryFieldName, new TermQuery(new Term(textFieldName, "it")));
+  }
+
+  @Test
+  public void testPerformance() throws Exception {
+    checkPerformance(new KNearestNeighborClassifier(100), new MockAnalyzer(random()), categoryFieldName);
   }
 
 }

Modified: lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java Mon Oct 21 18:58:24 2013
@@ -21,11 +21,11 @@ import org.apache.lucene.analysis.MockAn
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
-import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
 import org.apache.lucene.analysis.reverse.ReverseStringFilter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.Version;
 import org.junit.Test;
 
 import java.io.Reader;
@@ -39,13 +39,18 @@ public class SimpleNaiveBayesClassifierT
 
   @Test
   public void testBasicUsage() throws Exception {
-    checkCorrectClassification(new SimpleNaiveBayesClassifier(), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new MockAnalyzer(random()), categoryFieldName);
-    checkCorrectClassification(new SimpleNaiveBayesClassifier(), POLITICS_INPUT, POLITICS_RESULT, new MockAnalyzer(random()), categoryFieldName);
+    checkCorrectClassification(new SimpleNaiveBayesClassifier(), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new MockAnalyzer(random()), textFieldName, categoryFieldName);
+    checkCorrectClassification(new SimpleNaiveBayesClassifier(), POLITICS_INPUT, POLITICS_RESULT, new MockAnalyzer(random()), textFieldName, categoryFieldName);
+  }
+
+  @Test
+  public void testBasicUsageWithQuery() throws Exception {
+    checkCorrectClassification(new SimpleNaiveBayesClassifier(), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new MockAnalyzer(random()), textFieldName, categoryFieldName, new TermQuery(new Term(textFieldName, "it")));
   }
 
   @Test
   public void testNGramUsage() throws Exception {
-    checkCorrectClassification(new SimpleNaiveBayesClassifier(), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new NGramAnalyzer(), categoryFieldName);
+    checkCorrectClassification(new SimpleNaiveBayesClassifier(), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new NGramAnalyzer(), textFieldName, categoryFieldName);
   }
 
   private class NGramAnalyzer extends Analyzer {
@@ -56,4 +61,9 @@ public class SimpleNaiveBayesClassifierT
     }
   }
 
+  @Test
+  public void testPerformance() throws Exception {
+    checkPerformance(new SimpleNaiveBayesClassifier(), new MockAnalyzer(random()), categoryFieldName);
+  }
+
 }

Modified: lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java Mon Oct 21 18:58:24 2013
@@ -131,9 +131,15 @@ public class DataSplitterTest extends Lu
       closeQuietly(testReader);
       closeQuietly(cvReader);
     } finally {
-      trainingIndex.close();
-      testIndex.close();
-      crossValidationIndex.close();
+      if (trainingIndex != null) {
+        trainingIndex.close();
+      }
+      if (testIndex != null) {
+        testIndex.close();
+      }
+      if (crossValidationIndex != null) {
+        crossValidationIndex.close();
+      }
     }
   }
 

Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Mon Oct 21 18:58:24 2013
@@ -19,7 +19,6 @@ package org.apache.lucene.codecs.blockte
 
 import java.io.IOException;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.Iterator;
 import java.util.TreeMap;
 
@@ -142,6 +141,7 @@ public class BlockTermsReader extends Fi
         final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
         final long sumDocFreq = in.readVLong();
         final int docCount = in.readVInt();
+        final int longsSize = version >= BlockTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
         if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
           throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
         }
@@ -151,7 +151,7 @@ public class BlockTermsReader extends Fi
         if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
           throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
         }
-        FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
+        FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
         if (previous != null) {
           throw new CorruptIndexException("duplicate fields: " + fieldInfo.name + " (resource=" + in + ")");
         }
@@ -230,8 +230,9 @@ public class BlockTermsReader extends Fi
     final long sumTotalTermFreq;
     final long sumDocFreq;
     final int docCount;
+    final int longsSize;
 
-    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
       assert numTerms > 0;
       this.fieldInfo = fieldInfo;
       this.numTerms = numTerms;
@@ -239,16 +240,17 @@ public class BlockTermsReader extends Fi
       this.sumTotalTermFreq = sumTotalTermFreq;
       this.sumDocFreq = sumDocFreq;
       this.docCount = docCount;
+      this.longsSize = longsSize;
     }
 
     @Override
-    public Comparator<BytesRef> getComparator() {
-      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    public TermsEnum iterator(TermsEnum reuse) throws IOException {
+      return new SegmentTermsEnum();
     }
 
     @Override
-    public TermsEnum iterator(TermsEnum reuse) throws IOException {
-      return new SegmentTermsEnum();
+    public boolean hasFreqs() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
     }
 
     @Override
@@ -326,6 +328,10 @@ public class BlockTermsReader extends Fi
       private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
       private int metaDataUpto;
 
+      private long[] longs;
+      private byte[] bytes;
+      private ByteArrayDataInput bytesReader;
+
       public SegmentTermsEnum() throws IOException {
         in = BlockTermsReader.this.in.clone();
         in.seek(termsStartPointer);
@@ -339,11 +345,7 @@ public class BlockTermsReader extends Fi
         termSuffixes = new byte[128];
         docFreqBytes = new byte[64];
         //System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
-      }
-
-      @Override
-      public Comparator<BytesRef> getComparator() {
-        return BytesRef.getUTF8SortedAsUnicodeComparator();
+        longs = new long[longsSize];
       }
 
       // TODO: we may want an alternate mode here which is
@@ -415,7 +417,7 @@ public class BlockTermsReader extends Fi
           assert result;
 
           indexIsCurrent = true;
-          didIndexNext = false;      
+          didIndexNext = false;
 
           if (doOrd) {
             state.ord = indexEnum.ord()-1;
@@ -789,11 +791,20 @@ public class BlockTermsReader extends Fi
         //System.out.println("  freq bytes len=" + len);
         in.readBytes(docFreqBytes, 0, len);
         freqReader.reset(docFreqBytes, 0, len);
-        metaDataUpto = 0;
 
-        state.termBlockOrd = 0;
+        // metadata
+        len = in.readVInt();
+        if (bytes == null) {
+          bytes = new byte[ArrayUtil.oversize(len, 1)];
+          bytesReader = new ByteArrayDataInput();
+        } else if (bytes.length < len) {
+          bytes = new byte[ArrayUtil.oversize(len, 1)];
+        }
+        in.readBytes(bytes, 0, len);
+        bytesReader.reset(bytes, 0, len);
 
-        postingsReader.readTermsBlock(in, fieldInfo, state);
+        metaDataUpto = 0;
+        state.termBlockOrd = 0;
 
         indexIsCurrent = false;
         //System.out.println("  indexIsCurrent=" + indexIsCurrent);
@@ -811,9 +822,7 @@ public class BlockTermsReader extends Fi
 
           // lazily catch up on metadata decode:
           final int limit = state.termBlockOrd;
-          // We must set/incr state.termCount because
-          // postings impl can look at this
-          state.termBlockOrd = metaDataUpto;
+          boolean absolute = metaDataUpto == 0;
           // TODO: better API would be "jump straight to term=N"???
           while (metaDataUpto < limit) {
             //System.out.println("  decode mdUpto=" + metaDataUpto);
@@ -825,16 +834,21 @@ public class BlockTermsReader extends Fi
 
             // TODO: if docFreq were bulk decoded we could
             // just skipN here:
+
+            // docFreq, totalTermFreq
             state.docFreq = freqReader.readVInt();
             //System.out.println("    dF=" + state.docFreq);
             if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
               state.totalTermFreq = state.docFreq + freqReader.readVLong();
               //System.out.println("    totTF=" + state.totalTermFreq);
             }
-
-            postingsReader.nextTerm(fieldInfo, state);
+            // metadata
+            for (int i = 0; i < longs.length; i++) {
+              longs[i] = bytesReader.readVLong();
+            }
+            postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
             metaDataUpto++;
-            state.termBlockOrd++;
+            absolute = false;
           }
         } else {
           //System.out.println("  skip! seekPending");
@@ -842,4 +856,11 @@ public class BlockTermsReader extends Fi
       }
     }
   }
+
+  @Override
+  public long ramBytesUsed() {
+    long sizeInBytes = (postingsReader!=null) ? postingsReader.ramBytesUsed() : 0;
+    sizeInBytes += (indexReader!=null) ? indexReader.ramBytesUsed() : 0;
+    return sizeInBytes;
+  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java Mon Oct 21 18:58:24 2013
@@ -17,26 +17,29 @@ package org.apache.lucene.codecs.blockte
  * limitations under the License.
  */
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Comparator;
 import java.util.List;
 
+import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.PostingsConsumer;
 import org.apache.lucene.codecs.PostingsWriterBase;
 import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
-import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
 
@@ -52,14 +55,15 @@ import org.apache.lucene.util.RamUsageEs
  * @lucene.experimental
  */
 
-public class BlockTermsWriter extends FieldsConsumer {
+public class BlockTermsWriter extends FieldsConsumer implements Closeable {
 
   final static String CODEC_NAME = "BLOCK_TERMS_DICT";
 
   // Initial format
   public static final int VERSION_START = 0;
   public static final int VERSION_APPEND_ONLY = 1;
-  public static final int VERSION_CURRENT = VERSION_APPEND_ONLY;
+  public static final int VERSION_META_ARRAY = 2;
+  public static final int VERSION_CURRENT = VERSION_META_ARRAY;
 
   /** Extension of terms file */
   static final String TERMS_EXTENSION = "tib";
@@ -69,6 +73,7 @@ public class BlockTermsWriter extends Fi
   final FieldInfos fieldInfos;
   FieldInfo currentField;
   private final TermsIndexWriterBase termsIndexWriter;
+  private final int maxDoc;
 
   private static class FieldMetaData {
     public final FieldInfo fieldInfo;
@@ -77,8 +82,9 @@ public class BlockTermsWriter extends Fi
     public final long sumTotalTermFreq;
     public final long sumDocFreq;
     public final int docCount;
+    public final int longsSize;
 
-    public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+    public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
       assert numTerms > 0;
       this.fieldInfo = fieldInfo;
       this.termsStartPointer = termsStartPointer;
@@ -86,6 +92,7 @@ public class BlockTermsWriter extends Fi
       this.sumTotalTermFreq = sumTotalTermFreq;
       this.sumDocFreq = sumDocFreq;
       this.docCount = docCount;
+      this.longsSize = longsSize;
     }
   }
 
@@ -98,6 +105,7 @@ public class BlockTermsWriter extends Fi
       throws IOException {
     final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
     this.termsIndexWriter = termsIndexWriter;
+    maxDoc = state.segmentInfo.getDocCount();
     out = state.directory.createOutput(termsFileName, state.context);
     boolean success = false;
     try {
@@ -109,7 +117,7 @@ public class BlockTermsWriter extends Fi
       
       //System.out.println("BTW.init seg=" + state.segmentName);
       
-      postingsWriter.start(out); // have consumer write its format/header
+      postingsWriter.init(out); // have consumer write its format/header
       success = true;
     } finally {
       if (!success) {
@@ -123,7 +131,43 @@ public class BlockTermsWriter extends Fi
   }
 
   @Override
-  public TermsConsumer addField(FieldInfo field) throws IOException {
+  public void write(Fields fields) throws IOException {
+
+    boolean success = false;
+    try {
+      for(String field : fields) {
+
+        Terms terms = fields.terms(field);
+        if (terms == null) {
+          continue;
+        }
+
+        TermsEnum termsEnum = terms.iterator(null);
+
+        TermsWriter termsWriter = addField(fieldInfos.fieldInfo(field));
+
+        while (true) {
+          BytesRef term = termsEnum.next();
+          if (term == null) {
+            break;
+          }
+
+          termsWriter.write(term, termsEnum);
+        }
+
+        termsWriter.finish();
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(this);
+      } else {
+        IOUtils.closeWhileHandlingException(this);
+      }
+    }
+  }
+
+  private TermsWriter addField(FieldInfo field) throws IOException {
     //System.out.println("\nBTW.addField seg=" + segment + " field=" + field.name);
     assert currentField == null || currentField.name.compareTo(field.name) < 0;
     currentField = field;
@@ -131,11 +175,8 @@ public class BlockTermsWriter extends Fi
     return new TermsWriter(fieldIndexWriter, field, postingsWriter);
   }
 
-  @Override
   public void close() throws IOException {
-
     try {
-      
       final long dirStart = out.getFilePointer();
 
       out.writeVInt(fields.size());
@@ -148,6 +189,9 @@ public class BlockTermsWriter extends Fi
         }
         out.writeVLong(field.sumDocFreq);
         out.writeVInt(field.docCount);
+        if (VERSION_CURRENT >= VERSION_META_ARRAY) {
+          out.writeVInt(field.longsSize);
+        }
       }
       writeTrailer(dirStart);
     } finally {
@@ -161,18 +205,20 @@ public class BlockTermsWriter extends Fi
   
   private static class TermEntry {
     public final BytesRef term = new BytesRef();
-    public TermStats stats;
+    public BlockTermState state;
   }
 
-  class TermsWriter extends TermsConsumer {
+  class TermsWriter {
     private final FieldInfo fieldInfo;
     private final PostingsWriterBase postingsWriter;
     private final long termsStartPointer;
     private long numTerms;
     private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
+    private final FixedBitSet docsSeen;
     long sumTotalTermFreq;
     long sumDocFreq;
     int docCount;
+    int longsSize;
 
     private TermEntry[] pendingTerms;
 
@@ -185,35 +231,32 @@ public class BlockTermsWriter extends Fi
     {
       this.fieldInfo = fieldInfo;
       this.fieldIndexWriter = fieldIndexWriter;
+      this.docsSeen = new FixedBitSet(maxDoc);
       pendingTerms = new TermEntry[32];
       for(int i=0;i<pendingTerms.length;i++) {
         pendingTerms[i] = new TermEntry();
       }
       termsStartPointer = out.getFilePointer();
-      postingsWriter.setField(fieldInfo);
       this.postingsWriter = postingsWriter;
+      this.longsSize = postingsWriter.setField(fieldInfo);
     }
     
-    @Override
-    public Comparator<BytesRef> getComparator() {
-      return BytesRef.getUTF8SortedAsUnicodeComparator();
-    }
-
-    @Override
-    public PostingsConsumer startTerm(BytesRef text) throws IOException {
-      //System.out.println("BTW: startTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment);
-      postingsWriter.startTerm();
-      return postingsWriter;
-    }
-
     private final BytesRef lastPrevTerm = new BytesRef();
 
-    @Override
-    public void finishTerm(BytesRef text, TermStats stats) throws IOException {
+    void write(BytesRef text, TermsEnum termsEnum) throws IOException {
 
-      assert stats.docFreq > 0;
+      BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen);
+      if (state == null) {
+        // No docs for this term:
+        return;
+      }
+      sumDocFreq += state.docFreq;
+      sumTotalTermFreq += state.totalTermFreq;
+
+      assert state.docFreq > 0;
       //System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);
 
+      TermStats stats = new TermStats(state.docFreq, state.totalTermFreq);
       final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
 
       if (isIndexTerm) {
@@ -237,17 +280,14 @@ public class BlockTermsWriter extends Fi
       }
       final TermEntry te = pendingTerms[pendingCount];
       te.term.copyBytes(text);
-      te.stats = stats;
+      te.state = state;
 
       pendingCount++;
-
-      postingsWriter.finishTerm(stats);
       numTerms++;
     }
 
     // Finishes all terms in this field
-    @Override
-    public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
+    void finish() throws IOException {
       if (pendingCount > 0) {
         flushBlock();
       }
@@ -262,9 +302,10 @@ public class BlockTermsWriter extends Fi
         fields.add(new FieldMetaData(fieldInfo,
                                      numTerms,
                                      termsStartPointer,
-                                     sumTotalTermFreq,
+                                     fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 ? sumTotalTermFreq : -1,
                                      sumDocFreq,
-                                     docCount));
+                                     docsSeen.cardinality(),
+                                     longsSize));
       }
     }
 
@@ -285,6 +326,7 @@ public class BlockTermsWriter extends Fi
     }
 
     private final RAMOutputStream bytesWriter = new RAMOutputStream();
+    private final RAMOutputStream bufferWriter = new RAMOutputStream();
 
     private void flushBlock() throws IOException {
       //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
@@ -318,19 +360,34 @@ public class BlockTermsWriter extends Fi
       // TODO: cutover to better intblock codec.  simple64?
       // write prefix, suffix first:
       for(int termCount=0;termCount<pendingCount;termCount++) {
-        final TermStats stats = pendingTerms[termCount].stats;
-        assert stats != null;
-        bytesWriter.writeVInt(stats.docFreq);
+        final BlockTermState state = pendingTerms[termCount].state;
+        assert state != null;
+        bytesWriter.writeVInt(state.docFreq);
         if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
-          bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
+          bytesWriter.writeVLong(state.totalTermFreq-state.docFreq);
         }
       }
+      out.writeVInt((int) bytesWriter.getFilePointer());
+      bytesWriter.writeTo(out);
+      bytesWriter.reset();
 
+      // 4th pass: write the metadata 
+      long[] longs = new long[longsSize];
+      boolean absolute = true;
+      for(int termCount=0;termCount<pendingCount;termCount++) {
+        final BlockTermState state = pendingTerms[termCount].state;
+        postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
+        for (int i = 0; i < longsSize; i++) {
+          bytesWriter.writeVLong(longs[i]);
+        }
+        bufferWriter.writeTo(bytesWriter);
+        bufferWriter.reset();
+        absolute = false;
+      }
       out.writeVInt((int) bytesWriter.getFilePointer());
       bytesWriter.writeTo(out);
       bytesWriter.reset();
 
-      postingsWriter.flushTermsBlock(pendingCount, pendingCount);
       lastPrevTerm.copyBytes(pendingTerms[pendingCount-1].term);
       pendingCount = 0;
     }

Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java Mon Oct 21 18:58:24 2013
@@ -256,6 +256,12 @@ public class FixedGapTermsIndexReader ex
         clone.close();
       }
     }
+    
+    /** Returns approximate RAM bytes used */
+    public long ramBytesUsed() {
+      return ((termOffsets!=null)? termOffsets.ramBytesUsed() : 0) + 
+          ((termsDictOffsets!=null)? termsDictOffsets.ramBytesUsed() : 0);
+    }
   }
 
   @Override
@@ -271,4 +277,15 @@ public class FixedGapTermsIndexReader ex
     dirOffset = input.readLong();
     input.seek(dirOffset);
   }
+
+  @Override
+  public long ramBytesUsed() {
+    long sizeInBytes = ((termBytes!=null) ? termBytes.ramBytesUsed() : 0) + 
+        ((termBytesReader!=null)? termBytesReader.ramBytesUsed() : 0);
+    
+    for(FieldIndexData entry : fields.values()) {
+      sizeInBytes += entry.ramBytesUsed();
+    }
+    return sizeInBytes;
+  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/TermsIndexReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/TermsIndexReaderBase.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/TermsIndexReaderBase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/TermsIndexReaderBase.java Mon Oct 21 18:58:24 2013
@@ -70,4 +70,7 @@ public abstract class TermsIndexReaderBa
     /** Only implemented if {@link TermsIndexReaderBase#supportsOrd()} returns true. */
     public abstract long ord();
   }
+
+  /** Returns approximate RAM bytes used */
+  public abstract long ramBytesUsed();
 }

Modified: lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java Mon Oct 21 18:58:24 2013
@@ -169,6 +169,11 @@ public class VariableGapTermsIndexReader
       w.close();
       */
     }
+
+    /** Returns approximate RAM bytes used */
+    public long ramBytesUsed() {
+      return fst == null ? 0 : fst.sizeInBytes();
+    }
   }
 
   @Override
@@ -191,4 +196,13 @@ public class VariableGapTermsIndexReader
     }
     input.seek(dirOffset);
   }
+
+  @Override
+  public long ramBytesUsed() {
+    long sizeInBytes = 0;
+    for(FieldIndexData entry : fields.values()) {
+      sizeInBytes += entry.ramBytesUsed();
+    }
+    return sizeInBytes;
+  }
 }