You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC

svn commit: r1487777 [14/50] - in /lucene/dev/branches/security: ./ dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/ma...

Modified: lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java Thu May 30 07:53:18 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.phone
  */
 
 import java.io.StringReader;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -29,10 +28,7 @@ import org.apache.lucene.analysis.TokenS
 /** Simple tests for {@link BeiderMorseFilterFactory} */
 public class TestBeiderMorseFilterFactory extends BaseTokenStreamTestCase {
   public void testBasics() throws Exception {
-    BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Map<String, String> args = Collections.emptyMap();
-    factory.init(args);
+    BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(new HashMap<String,String>());
     TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
     assertTokenStreamContents(ts,
         new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
@@ -42,10 +38,9 @@ public class TestBeiderMorseFilterFactor
   }
   
   public void testLanguageSet() throws Exception {
-    BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory();
     Map<String,String> args = new HashMap<String,String>();
     args.put("languageSet", "polish");
-    factory.init(args);
+    BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
     TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
     assertTokenStreamContents(ts,
         new String[] { "vDmbYrk", "vDmbirk", "vambYrk", "vambirk", "vimbYrk", "vimbirk" },
@@ -55,11 +50,10 @@ public class TestBeiderMorseFilterFactor
   }
   
   public void testOptions() throws Exception {
-    BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory();
     Map<String,String> args = new HashMap<String,String>();
     args.put("nameType", "ASHKENAZI");
     args.put("ruleType", "EXACT");
-    factory.init(args);
+    BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
     TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
     assertTokenStreamContents(ts,
         new String[] { "vajnberk" },
@@ -67,4 +61,16 @@ public class TestBeiderMorseFilterFactor
         new int[] { 8 },
         new int[] { 1 });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new BeiderMorseFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java Thu May 30 07:53:18 2013
@@ -30,8 +30,7 @@ import org.apache.lucene.analysis.tokena
 public class TestDoubleMetaphoneFilterFactory extends BaseTokenStreamTestCase {
 
   public void testDefaults() throws Exception {
-    DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
-    factory.init(new HashMap<String, String>());
+    DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new HashMap<String, String>());
     TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
 
     TokenStream filteredStream = factory.create(inputStream);
@@ -40,11 +39,10 @@ public class TestDoubleMetaphoneFilterFa
   }
 
   public void testSettingSizeAndInject() throws Exception {
-    DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
-    Map<String, String> parameters = new HashMap<String, String>();
+    Map<String,String> parameters = new HashMap<String,String>();
     parameters.put("inject", "false");
     parameters.put("maxCodeLength", "8");
-    factory.init(parameters);
+    DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(parameters);
 
     TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
 
@@ -57,8 +55,7 @@ public class TestDoubleMetaphoneFilterFa
    * Ensure that reset() removes any state (buffered tokens)
    */
   public void testReset() throws Exception {
-    DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
-    factory.init(new HashMap<String, String>());
+    DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new HashMap<String, String>());
     TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
 
     TokenStream filteredStream = factory.create(inputStream);
@@ -74,4 +71,16 @@ public class TestDoubleMetaphoneFilterFa
     // ensure there are no more tokens, such as ANTRNXNL
     assertFalse(filteredStream.incrementToken());
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new DoubleMetaphoneFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java Thu May 30 07:53:18 2013
@@ -29,102 +29,107 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
-import org.apache.lucene.util.LuceneTestCase.Slow;
 
-
-/**
- *
- */
-@Slow
 public class TestPhoneticFilterFactory extends BaseTokenStreamTestCase {
   
-  private static final int REPEATS = 100000;
-
   /**
    * Case: default
    */
-  public void testFactory() throws IOException {
-    Map<String,String> args = new HashMap<String, String>();
-    
-    PhoneticFilterFactory ff = new PhoneticFilterFactory();
-    
-    args.put( PhoneticFilterFactory.ENCODER, "Metaphone" );
-    ff.init( args );
-    ff.inform(new ClasspathResourceLoader(ff.getClass()));
-    assertTrue( ff.getEncoder() instanceof Metaphone );
-    assertTrue( ff.inject ); // default
-
-    args.put( PhoneticFilterFactory.INJECT, "false" );
-    ff.init( args );
-    ff.inform(new ClasspathResourceLoader(ff.getClass()));
-    assertFalse( ff.inject );
-
-    args.put( PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
-    ff.init(args);
-    ff.inform(new ClasspathResourceLoader(ff.getClass()));
-    assertEquals(2, ((Metaphone) ff.getEncoder()).getMaxCodeLen());
+  public void testFactoryDefaults() throws IOException {
+    Map<String,String> args = new HashMap<String,String>();
+    args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
+    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+    factory.inform(new ClasspathResourceLoader(factory.getClass()));
+    assertTrue(factory.getEncoder() instanceof Metaphone);
+    assertTrue(factory.inject); // default
+  }
+  
+  public void testInjectFalse() throws IOException {
+    Map<String,String> args = new HashMap<String,String>();
+    args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
+    args.put(PhoneticFilterFactory.INJECT, "false");
+    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+    factory.inform(new ClasspathResourceLoader(factory.getClass()));
+    assertFalse(factory.inject);
+  }
+  
+  public void testMaxCodeLength() throws IOException {
+    Map<String,String> args = new HashMap<String,String>();
+    args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
+    args.put(PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
+    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+    factory.inform(new ClasspathResourceLoader(factory.getClass()));
+    assertEquals(2, ((Metaphone) factory.getEncoder()).getMaxCodeLen());
   }
   
   /**
    * Case: Failures and Exceptions
    */
-  public void testFactoryCaseFailure() throws IOException {
-    Map<String,String> args = new HashMap<String, String>();
-    
-    PhoneticFilterFactory ff = new PhoneticFilterFactory();
-    ClasspathResourceLoader loader = new ClasspathResourceLoader(ff.getClass());
-
+  public void testMissingEncoder() throws IOException {
     try {
-      ff.init( args );
-      ff.inform( loader );
-      fail( "missing encoder parameter" );
+      new PhoneticFilterFactory(new HashMap<String,String>());
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'encoder'"));
     }
-    catch( Exception ex ) {}
-    args.put( PhoneticFilterFactory.ENCODER, "XXX" );
+  }
+  
+  public void testUnknownEncoder() throws IOException {
     try {
-      ff.init( args );
-      ff.inform( loader );
-      fail( "unknown encoder parameter" );
+      Map<String,String> args = new HashMap<String,String>();
+      args.put("encoder", "XXX");
+      PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+      factory.inform(new ClasspathResourceLoader(factory.getClass()));
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Error loading encoder"));
     }
-    catch( Exception ex ) {}
-    args.put( PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.NonExistence" );
+  }
+  
+  public void testUnknownEncoderReflection() throws IOException {
     try {
-      ff.init( args );
-      ff.inform( loader );
-      fail( "unknown encoder parameter" );
+      Map<String,String> args = new HashMap<String,String>();
+      args.put("encoder", "org.apache.commons.codec.language.NonExistence");
+      PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+      factory.inform(new ClasspathResourceLoader(factory.getClass()));
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Error loading encoder"));
     }
-    catch( Exception ex ) {}
   }
   
   /**
    * Case: Reflection
    */
-  public void testFactoryCaseReflection() throws IOException {
+  public void testFactoryReflection() throws IOException {
     Map<String,String> args = new HashMap<String, String>();
-    
-    PhoneticFilterFactory ff = new PhoneticFilterFactory();
-    ClasspathResourceLoader loader = new ClasspathResourceLoader(ff.getClass());
-
-    args.put( PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone" );
-    ff.init( args );
-    ff.inform( loader );
-    assertTrue( ff.getEncoder() instanceof Metaphone );
-    assertTrue( ff.inject ); // default
+    args.put(PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone");
+    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+    factory.inform(new ClasspathResourceLoader(factory.getClass()));
+    assertTrue(factory.getEncoder() instanceof Metaphone);
+    assertTrue(factory.inject); // default
+  }
 
-    // we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
-    // so this effectively tests reflection without package name
-    args.put( PhoneticFilterFactory.ENCODER, "Caverphone2" );
-    ff.init( args );
-    ff.inform( loader );
-    assertTrue( ff.getEncoder() instanceof Caverphone2 );
-    assertTrue( ff.inject ); // default
-    
-    // cross check with registry
-    args.put( PhoneticFilterFactory.ENCODER, "Caverphone" );
-    ff.init( args );
-    ff.inform( loader );
-    assertTrue( ff.getEncoder() instanceof Caverphone2 );
-    assertTrue( ff.inject ); // default
+  /** 
+   * we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
+   * so this effectively tests reflection without package name
+   */
+  public void testFactoryReflectionCaverphone2() throws IOException {
+    Map<String,String> args = new HashMap<String, String>();
+    args.put(PhoneticFilterFactory.ENCODER, "Caverphone2");
+    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+    factory.inform(new ClasspathResourceLoader(factory.getClass()));
+    assertTrue(factory.getEncoder() instanceof Caverphone2);
+    assertTrue(factory.inject); // default
+  }
+  
+  public void testFactoryReflectionCaverphone() throws IOException {
+    Map<String,String> args = new HashMap<String, String>();
+    args.put(PhoneticFilterFactory.ENCODER, "Caverphone");
+    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+    factory.inform(new ClasspathResourceLoader(factory.getClass()));
+    assertTrue(factory.getEncoder() instanceof Caverphone2);
+    assertTrue(factory.inject); // default
   }
   
   public void testAlgorithms() throws Exception {
@@ -161,37 +166,28 @@ public class TestPhoneticFilterFactory e
         new String[] { "67", "862", "67", "862" });
   }
   
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new PhoneticFilterFactory(new HashMap<String,String>() {{
+        put("encoder", "Metaphone");
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
+  
   static void assertAlgorithm(String algName, String inject, String input,
       String[] expected) throws Exception {
     Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
     Map<String,String> args = new HashMap<String,String>();
     args.put("encoder", algName);
     args.put("inject", inject);
-    PhoneticFilterFactory factory = new PhoneticFilterFactory();
-    factory.init(args);
+    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
     factory.inform(new ClasspathResourceLoader(factory.getClass()));
     TokenStream stream = factory.create(tokenizer);
     assertTokenStreamContents(stream, expected);
   }
-  
-  public void testSpeed() throws Exception {
-    checkSpeedEncoding("Metaphone", "easgasg", "ESKS");
-    checkSpeedEncoding("DoubleMetaphone", "easgasg", "ASKS");
-    checkSpeedEncoding("Soundex", "easgasg", "E220");
-    checkSpeedEncoding("RefinedSoundex", "easgasg", "E034034");
-    checkSpeedEncoding("Caverphone", "Carlene", "KLN1111111");
-    checkSpeedEncoding("ColognePhonetic", "Schmitt", "862");
-  }
-  
-  private void checkSpeedEncoding(String encoder, String toBeEncoded, String estimated) throws Exception {
-    long start = System.currentTimeMillis();
-    for ( int i=0; i<REPEATS; i++) {
-        assertAlgorithm(encoder, "false", toBeEncoded,
-                new String[] { estimated });
-    }
-    long duration = System.currentTimeMillis()-start;
-    if (VERBOSE)
-      System.out.println(encoder + " encodings per msec: "+(REPEATS/duration));
-  }
-  
 }

Modified: lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Thu May 30 07:53:18 2013
@@ -52,10 +52,6 @@ public final class SentenceTokenizer ext
     super(reader);
   }
 
-  public SentenceTokenizer(AttributeSource source, Reader reader) {
-    super(source, reader);
-  }
-
   public SentenceTokenizer(AttributeFactory factory, Reader reader) {
     super(factory, reader);
   }

Modified: lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -18,18 +18,27 @@ package org.apache.lucene.analysis.cn.sm
  */
 
 import java.io.Reader;
+import java.util.Map;
 
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
 
 /**
  * Factory for the SmartChineseAnalyzer {@link SentenceTokenizer}
  * @lucene.experimental
  */
 public class SmartChineseSentenceTokenizerFactory extends TokenizerFactory {
+  
+  /** Creates a new SmartChineseSentenceTokenizerFactory */
+  public SmartChineseSentenceTokenizerFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
-  public Tokenizer create(Reader input) {
-    return new SentenceTokenizer(input);
+  public SentenceTokenizer create(AttributeFactory factory, Reader input) {
+    return new SentenceTokenizer(factory, input);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseWordTokenFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseWordTokenFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseWordTokenFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.cn.sm
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
@@ -32,6 +34,15 @@ import org.apache.lucene.analysis.util.T
  * @lucene.experimental
  */
 public class SmartChineseWordTokenFilterFactory extends TokenFilterFactory {
+  
+  /** Creates a new SmartChineseWordTokenFilterFactory */
+  public SmartChineseWordTokenFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public TokenFilter create(TokenStream input) {
       return new WordTokenFilter(input);

Modified: lucene/dev/branches/security/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseFactories.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseFactories.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseFactories.java (original)
+++ lucene/dev/branches/security/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseFactories.java Thu May 30 07:53:18 2013
@@ -17,12 +17,13 @@ package org.apache.lucene.analysis.cn.sm
  * limitations under the License.
  */
 
+import java.io.Reader;
 import java.io.StringReader;
+import java.util.HashMap;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 
 /** 
  * Tests for {@link SmartChineseSentenceTokenizerFactory} and 
@@ -31,28 +32,49 @@ import org.apache.lucene.analysis.core.W
 public class TestSmartChineseFactories extends BaseTokenStreamTestCase {
   /** Test showing the behavior with whitespace */
   public void testSimple() throws Exception {
-    String sentence = "我购买了道具和服装。";
-    WhitespaceTokenizer ws = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(sentence));
-    SmartChineseWordTokenFilterFactory factory = new SmartChineseWordTokenFilterFactory();
-    TokenStream ts = factory.create(ws);
+    Reader reader = new StringReader("我购买了道具和服装。");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    SmartChineseWordTokenFilterFactory factory = new SmartChineseWordTokenFilterFactory(new HashMap<String,String>());
+    stream = factory.create(stream);
     // TODO: fix smart chinese to not emit punctuation tokens
     // at the moment: you have to clean up with WDF, or use the stoplist, etc
-    assertTokenStreamContents(ts, 
+    assertTokenStreamContents(stream, 
        new String[] { "我", "购买", "了", "道具", "和", "服装", "," });
   }
   
   /** Test showing the behavior with whitespace */
   public void testTokenizer() throws Exception {
-    String sentence = "我购买了道具和服装。我购买了道具和服装。";
-    SmartChineseSentenceTokenizerFactory tokenizerFactory = new SmartChineseSentenceTokenizerFactory();
-    Tokenizer tokenizer = tokenizerFactory.create(new StringReader(sentence));
-    SmartChineseWordTokenFilterFactory factory = new SmartChineseWordTokenFilterFactory();
-    TokenStream ts = factory.create(tokenizer);
+    Reader reader = new StringReader("我购买了道具和服装。我购买了道具和服装。");
+    SmartChineseSentenceTokenizerFactory tokenizerFactory = new SmartChineseSentenceTokenizerFactory(new HashMap<String,String>());
+    TokenStream stream = tokenizerFactory.create(reader);
+    SmartChineseWordTokenFilterFactory factory = new SmartChineseWordTokenFilterFactory(new HashMap<String,String>());
+    stream = factory.create(stream);
     // TODO: fix smart chinese to not emit punctuation tokens
     // at the moment: you have to clean up with WDF, or use the stoplist, etc
-    assertTokenStreamContents(ts, 
+    assertTokenStreamContents(stream, 
        new String[] { "我", "购买", "了", "道具", "和", "服装", ",", 
         "我", "购买", "了", "道具", "和", "服装", ","
         });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new SmartChineseSentenceTokenizerFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+    
+    try {
+      new SmartChineseWordTokenFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java Thu May 30 07:53:18 2013
@@ -23,7 +23,7 @@ import java.io.Reader;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.standard.StandardFilter;
@@ -112,7 +112,7 @@ public final class PolishAnalyzer extend
 
   /**
    * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
-   * provided this analyzer will add a {@link KeywordMarkerFilter} before
+   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
    * stemming.
    * 
    * @param matchVersion lucene compatibility version
@@ -135,7 +135,7 @@ public final class PolishAnalyzer extend
    *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
    *         built from an {@link StandardTokenizer} filtered with
    *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
-   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
+   *         , {@link SetKeywordMarkerFilter} if a stem exclusion set is
    *         provided and {@link StempelFilter}.
    */
   @Override
@@ -146,7 +146,7 @@ public final class PolishAnalyzer extend
     result = new LowerCaseFilter(matchVersion, result);
     result = new StopFilter(matchVersion, result, stopwords);
     if(!stemExclusionSet.isEmpty())
-      result = new KeywordMarkerFilter(result, stemExclusionSet);
+      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     result = new StempelFilter(result, new StempelStemmer(stemTable));
     return new TokenStreamComponents(source, result);
   }

Modified: lucene/dev/branches/security/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,11 +17,12 @@ package org.apache.lucene.analysis.stemp
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.pl.PolishAnalyzer;
 import org.apache.lucene.analysis.stempel.StempelFilter;
 import org.apache.lucene.analysis.stempel.StempelStemmer;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
@@ -29,8 +30,13 @@ import org.apache.lucene.analysis.util.T
  */
 public class StempelPolishStemFilterFactory extends TokenFilterFactory {  
   
-  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
-  public StempelPolishStemFilterFactory() {}
+  /** Creates a new StempelPolishStemFilterFactory */
+  public StempelPolishStemFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
 
   @Override
   public TokenStream create(TokenStream input) {

Modified: lucene/dev/branches/security/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,21 +17,36 @@ package org.apache.lucene.analysis.stemp
  * limitations under the License.
  */
 
+import java.io.Reader;
 import java.io.StringReader;
+import java.util.HashMap;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 
 /**
  * Tests for {@link StempelPolishStemFilterFactory}
  */
 public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase {
   public void testBasics() throws Exception {
-    StringReader document = new StringReader("studenta studenci");
-    StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
-    TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
-    assertTokenStreamContents(ts,
+    Reader reader = new StringReader("studenta studenci");
+    StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory(new HashMap<String,String>());
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream,
         new String[] { "student", "student" });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new StempelPolishStemFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Thu May 30 07:53:18 2013
@@ -44,8 +44,9 @@ public abstract class BaseUIMATokenizer 
   protected AnalysisEngine ae;
   protected CAS cas;
 
-  protected BaseUIMATokenizer(Reader reader, String descriptorPath, Map<String, Object> configurationParameters) {
-    super(reader);
+  protected BaseUIMATokenizer
+      (AttributeFactory factory, Reader reader, String descriptorPath, Map<String, Object> configurationParameters) {
+    super(factory, reader);
     this.descriptorPath = descriptorPath;
     this.configurationParameters = configurationParameters;
   }

Modified: lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java Thu May 30 07:53:18 2013
@@ -43,7 +43,12 @@ public final class UIMAAnnotationsTokeni
   private int finalOffset = 0;
 
   public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters, Reader input) {
-    super(input, descriptorPath, configurationParameters);
+    this(descriptorPath, tokenType, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input);
+  }
+
+  public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters, 
+                                  AttributeFactory factory, Reader input) {
+    super(factory, input, descriptorPath, configurationParameters);
     this.tokenTypeString = tokenType;
     this.termAttr = addAttribute(CharTermAttribute.class);
     this.offsetAttr = addAttribute(OffsetAttribute.class);

Modified: lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -17,9 +17,8 @@ package org.apache.lucene.analysis.uima;
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
-import org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizer;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
 
 import java.io.Reader;
 import java.util.HashMap;
@@ -32,29 +31,18 @@ public class UIMAAnnotationsTokenizerFac
 
   private String descriptorPath;
   private String tokenType;
-  private Map<String, Object> configurationParameters;
-
-  @Override
-  public void init(Map<String, String> args) {
-    super.init(args);
-    configurationParameters = new HashMap<String, Object>();
-    for (String k : args.keySet()) {
-      if (k.equals("tokenType")) {
-        tokenType = args.get("tokenType");
-      } else if (k.equals("descriptorPath")) {
-        descriptorPath = args.get("descriptorPath");
-      } else {
-        configurationParameters.put(k, args.get(k));
-      }
-    }
-    if (descriptorPath == null || tokenType == null ) {
-      throw new IllegalArgumentException("descriptorPath and tokenType are mandatory");
-    }
+  private final Map<String,Object> configurationParameters = new HashMap<String,Object>();
 
+  /** Creates a new UIMAAnnotationsTokenizerFactory */
+  public UIMAAnnotationsTokenizerFactory(Map<String,String> args) {
+    super(args);
+    tokenType = require(args, "tokenType");
+    descriptorPath = require(args, "descriptorPath");
+    configurationParameters.putAll(args);
   }
 
   @Override
-  public Tokenizer create(Reader input) {
-    return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, input);
+  public UIMAAnnotationsTokenizer create(AttributeFactory factory, Reader input) {
+    return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, factory, input);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java Thu May 30 07:53:18 2013
@@ -53,7 +53,12 @@ public final class UIMATypeAwareAnnotati
   private int finalOffset = 0;
 
   public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters, Reader input) {
-    super(input, descriptorPath, configurationParameters);
+    this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input);
+  }
+
+  public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, 
+                                           Map<String, Object> configurationParameters, AttributeFactory factory, Reader input) {
+    super(factory, input, descriptorPath, configurationParameters);
     this.tokenTypeString = tokenType;
     this.termAttr = addAttribute(CharTermAttribute.class);
     this.typeAttr = addAttribute(TypeAttribute.class);

Modified: lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -17,8 +17,8 @@ package org.apache.lucene.analysis.uima;
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
 
 import java.io.Reader;
 import java.util.HashMap;
@@ -32,30 +32,20 @@ public class UIMATypeAwareAnnotationsTok
   private String descriptorPath;
   private String tokenType;
   private String featurePath;
-  private Map<String, Object> configurationParameters;
+  private final Map<String,Object> configurationParameters = new HashMap<String,Object>();
 
-  @Override
-  public void init(Map<String, String> args) {
-    super.init(args);
-    configurationParameters = new HashMap<String, Object>();
-    for (String k : args.keySet()) {
-      if (k.equals("featurePath")) {
-        featurePath = args.get("featurePath");
-      } else if (k.equals("tokenType")) {
-        tokenType = args.get("tokenType");
-      } else if (k.equals("descriptorPath")) {
-        descriptorPath = args.get("descriptorPath");
-      } else {
-        configurationParameters.put(k, args.get(k));
-      }
-    }
-    if (descriptorPath == null || tokenType == null || featurePath == null) {
-      throw new IllegalArgumentException("descriptorPath, tokenType, and featurePath are mandatory");
-    }
+  /** Creates a new UIMATypeAwareAnnotationsTokenizerFactory */
+  public UIMATypeAwareAnnotationsTokenizerFactory(Map<String,String> args) {
+    super(args);
+    featurePath = require(args, "featurePath");
+    tokenType = require(args, "tokenType");
+    descriptorPath = require(args, "descriptorPath");
+    configurationParameters.putAll(args);
   }
 
   @Override
-  public Tokenizer create(Reader input) {
-    return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, input);
+  public UIMATypeAwareAnnotationsTokenizer create(AttributeFactory factory, Reader input) {
+    return new UIMATypeAwareAnnotationsTokenizer
+        (descriptorPath, tokenType, featurePath, configurationParameters, factory, input);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java (original)
+++ lucene/dev/branches/security/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java Thu May 30 07:53:18 2013
@@ -81,7 +81,7 @@ public class UIMABaseAnalyzerTest extend
 
     // try the search over the first doc
     DirectoryReader directoryReader = DirectoryReader.open(dir);
-    IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
+    IndexSearcher indexSearcher = newSearcher(directoryReader);
     TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 1);
     assertTrue(result.totalHits > 0);
     StoredDocument d = indexSearcher.doc(result.scoreDocs[0].doc);
@@ -102,7 +102,7 @@ public class UIMABaseAnalyzerTest extend
 
     directoryReader.close();
     directoryReader = DirectoryReader.open(dir);
-    indexSearcher = new IndexSearcher(directoryReader);
+    indexSearcher = newSearcher(directoryReader);
     result = indexSearcher.search(new MatchAllDocsQuery(), 2);
     StoredDocument d1 = indexSearcher.doc(result.scoreDocs[1].doc);
     assertNotNull(d1);
@@ -112,7 +112,7 @@ public class UIMABaseAnalyzerTest extend
     assertEquals(dogmasContents, d1.getField("contents").stringValue());
 
     // do a matchalldocs query to retrieve both docs
-    indexSearcher = new IndexSearcher(directoryReader);
+    indexSearcher = newSearcher(directoryReader);
     result = indexSearcher.search(new MatchAllDocsQuery(), 2);
     assertEquals(2, result.totalHits);
     writer.close();

Modified: lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java (original)
+++ lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java Thu May 30 07:53:18 2013
@@ -287,13 +287,6 @@ public class AnalyzerFactoryTask extends
    */
   private void createAnalysisPipelineComponent
       (StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
-    final AbstractAnalysisFactory instance;
-    try {
-     instance = clazz.newInstance();
-    } catch (Exception e) {
-      throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
-    }
-    Version luceneMatchVersion = null;
     Map<String,String> argMap = new HashMap<String,String>();
     boolean parenthetical = false;
     try {
@@ -347,16 +340,7 @@ public class AnalyzerFactoryTask extends
               case '"':
               case '\'':
               case StreamTokenizer.TT_WORD: {
-                if (argName.equalsIgnoreCase("luceneMatchVersion")) {
-                  try {
-                    luceneMatchVersion = Version.parseLeniently(argValue);
-                  } catch (IllegalArgumentException e) {
-                    throw new RuntimeException
-                        ("Line #" + lineno(stok) + ": Unrecognized luceneMatchVersion '" + argValue + "'", e);
-                  }
-                } else {
-                  argMap.put(argName, argValue);
-                }
+                argMap.put(argName, argValue);
                 break;
               }
               case StreamTokenizer.TT_EOF: {
@@ -370,12 +354,20 @@ public class AnalyzerFactoryTask extends
           }
         }
       }
-
-      instance.setLuceneMatchVersion
-          (null == luceneMatchVersion ? Version.LUCENE_CURRENT : luceneMatchVersion);
-      instance.init(argMap);
+      if (!argMap.containsKey("luceneMatchVersion")) {
+        argMap.put("luceneMatchVersion", Version.LUCENE_CURRENT.toString());
+      }
+      final AbstractAnalysisFactory instance;
+      try {
+        instance = clazz.getConstructor(Map.class).newInstance(argMap);
+      } catch (Exception e) {
+        throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
+      }
       if (instance instanceof ResourceLoaderAware) {
         File baseDir = new File(getRunData().getConfig().get("work.dir", "work")).getAbsoluteFile();
+        if ( ! baseDir.isDirectory()) {
+          baseDir = new File(".").getAbsoluteFile();
+        }
         ((ResourceLoaderAware)instance).inform(new FilesystemResourceLoader(baseDir));
       }
       if (CharFilterFactory.class.isAssignableFrom(clazz)) {

Modified: lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitIndexTask.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitIndexTask.java (original)
+++ lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitIndexTask.java Thu May 30 07:53:18 2013
@@ -1,4 +1,5 @@
 package org.apache.lucene.benchmark.byTask.tasks;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -49,7 +50,9 @@ public class CommitIndexTask extends Per
   public int doLogic() throws Exception {
     IndexWriter iw = getRunData().getIndexWriter();
     if (iw != null) {
-      iw.setCommitData(commitUserData);
+      if (commitUserData != null) {
+        iw.setCommitData(commitUserData);
+      }
       iw.commit();
     }
     

Modified: lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java (original)
+++ lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java Thu May 30 07:53:18 2013
@@ -90,7 +90,7 @@ public class DocMakerTest extends Benchm
     tasks.doLogic();
     
     IndexReader reader = DirectoryReader.open(runData.getDirectory());
-    IndexSearcher searcher = new IndexSearcher(reader);
+    IndexSearcher searcher = newSearcher(reader);
     TopDocs td = searcher.search(new TermQuery(new Term("key", "value")), 10);
     assertEquals(numExpectedResults, td.totalHits);
     reader.close();

Modified: lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java (original)
+++ lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java Thu May 30 07:53:18 2013
@@ -151,7 +151,7 @@ public class LineDocSourceTest extends B
       }
       
       reader = DirectoryReader.open(runData.getDirectory());
-      searcher = new IndexSearcher(reader);
+      searcher = newSearcher(reader);
       TopDocs td = searcher.search(new TermQuery(new Term("body", "body")), 10);
       assertEquals(numAdds, td.totalHits);
       assertNotNull(td.scoreDocs[0]);

Modified: lucene/dev/branches/security/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/build.xml?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/build.xml (original)
+++ lucene/dev/branches/security/lucene/build.xml Thu May 30 07:53:18 2013
@@ -41,7 +41,6 @@
               excludes="build/**,site/**,tools/**,**/lib/*servlet-api*.jar"
   />
 
-
   <!-- ================================================================== -->
   <!-- Prepares the build directory                                       -->
   <!-- ================================================================== -->
@@ -160,7 +159,14 @@
   </target>
 
   <target name="check-licenses" depends="compile-tools,resolve,load-custom-tasks" description="Validate license stuff.">
-    <license-check-macro dir="${basedir}" licensedir="${common.dir}/licenses" />
+    <license-check-macro dir="${basedir}" licensedir="${common.dir}/licenses">
+      <additional-filters>
+        <replaceregex pattern="jetty([^/]+)$" replace="jetty" flags="gi" />
+        <replaceregex pattern="slf4j-([^/]+)$" replace="slf4j" flags="gi" />
+        <replaceregex pattern="javax\.servlet([^/]+)$" replace="javax.servlet" flags="gi" />
+        <replaceregex pattern="(bcmail|bcprov)-([^/]+)$" replace="\1" flags="gi" />
+      </additional-filters>
+    </license-check-macro>
   </target>
 
   <target name="check-forbidden-apis" depends="compile-tools,compile-test,install-forbidden-apis,-forbidden-apis-classpath,-check-forbidden-jdk-apis,-check-forbidden-test-apis,-check-system-out" description="Check forbidden API calls in compiled class files"/>
@@ -296,7 +302,7 @@
     <check-missing-javadocs dir="build/docs/core/org/apache/lucene/codecs" level="method"/>
   </target>
   
-  <target name="-ecj-javadoc-lint" depends="compile,compile-test,-ecj-resolve">
+  <target name="-ecj-javadoc-lint" depends="compile,compile-test,-ecj-javadoc-lint-unsupported,-ecj-resolve" if="ecj-javadoc-lint.supported">
     <subant target="-ecj-javadoc-lint" failonerror="true" inheritall="false">
       <propertyset refid="uptodate.and.compiled.properties"/>
       <fileset dir="core" includes="build.xml"/>
@@ -439,7 +445,7 @@
     <svn-export-source source.dir="."/>
 
     <!-- Exclude javadoc package-list files under licenses incompatible with the ASL -->
-    <delete dir="${svn.export.dir}/tools/javadoc/java6"/>
+    <delete dir="${svn.export.dir}/tools/javadoc/java7"/>
     <!-- Exclude clover license files incompatible with the ASL -->
     <delete dir="${svn.export.dir}/tools/clover"/>
 

Modified: lucene/dev/branches/security/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java (original)
+++ lucene/dev/branches/security/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java Thu May 30 07:53:18 2013
@@ -58,6 +58,9 @@ public class KNearestNeighborClassifier 
    */
   @Override
   public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
+    if (mlt == null) {
+      throw new IOException("You must first call Classifier#train first");
+    }
     Query q = mlt.like(new StringReader(text), textFieldName);
     TopDocs topDocs = indexSearcher.search(q, k);
     return selectClassFromNeighbors(topDocs);

Modified: lucene/dev/branches/security/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java (original)
+++ lucene/dev/branches/security/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java Thu May 30 07:53:18 2013
@@ -103,7 +103,7 @@ public class SimpleNaiveBayesClassifier 
   @Override
   public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
     if (atomicReader == null) {
-      throw new RuntimeException("need to train the classifier first");
+      throw new IOException("You must first call Classifier#train first");
     }
     double max = 0d;
     BytesRef foundClass = new BytesRef();
@@ -117,7 +117,7 @@ public class SimpleNaiveBayesClassifier 
       double clVal = calculatePrior(next) * calculateLikelihood(tokenizedDoc, next);
       if (clVal > max) {
         max = clVal;
-        foundClass = next.clone();
+        foundClass = BytesRef.deepCopyOf(next);
       }
     }
     return new ClassificationResult<BytesRef>(foundClass, max);

Modified: lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java (original)
+++ lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java Thu May 30 07:53:18 2013
@@ -32,12 +32,18 @@ import org.junit.Before;
 /**
  * Base class for testing {@link Classifier}s
  */
-public abstract class ClassificationTestBase extends LuceneTestCase {
+public abstract class ClassificationTestBase<T> extends LuceneTestCase {
+  public final static String POLITICS_INPUT = "Here are some interesting questions and answers about Mitt Romney.. If you don't know the answer to the question about Mitt Romney, then simply click on the answer below the question section.";
+  public static final BytesRef POLITICS_RESULT = new BytesRef("politics");
+
+  public static final String TECHNOLOGY_INPUT = "Much is made of what the likes of Facebook, Google and Apple know about users. Truth is, Amazon may know more.";
+  public static final BytesRef TECHNOLOGY_RESULT = new BytesRef("technology");
 
   private RandomIndexWriter indexWriter;
   private String textFieldName;
-  private String classFieldName;
   private Directory dir;
+  String categoryFieldName;
+  String booleanFieldName;
 
   @Override
   @Before
@@ -46,7 +52,8 @@ public abstract class ClassificationTest
     dir = newDirectory();
     indexWriter = new RandomIndexWriter(random(), dir);
     textFieldName = "text";
-    classFieldName = "cat";
+    categoryFieldName = "cat";
+    booleanFieldName = "bool";
   }
 
   @Override
@@ -58,17 +65,16 @@ public abstract class ClassificationTest
   }
 
 
-  protected void checkCorrectClassification(Classifier<BytesRef> classifier, Analyzer analyzer) throws Exception {
+  protected void checkCorrectClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String classFieldName) throws Exception {
     SlowCompositeReaderWrapper compositeReaderWrapper = null;
     try {
       populateIndex(analyzer);
       compositeReaderWrapper = new SlowCompositeReaderWrapper(indexWriter.getReader());
       classifier.train(compositeReaderWrapper, textFieldName, classFieldName, analyzer);
-      String newText = "Much is made of what the likes of Facebook, Google and Apple know about users. Truth is, Amazon may know more.";
-      ClassificationResult<BytesRef> classificationResult = classifier.assignClass(newText);
+      ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
       assertNotNull(classificationResult.getAssignedClass());
-      assertEquals(new BytesRef("technology"), classificationResult.getAssignedClass());
-      assertTrue(classificationResult.getScore() > 0);
+      assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
+      assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0);
     } finally {
       if (compositeReaderWrapper != null)
         compositeReaderWrapper.close();
@@ -86,48 +92,55 @@ public abstract class ClassificationTest
     doc.add(new Field(textFieldName, "The traveling press secretary for Mitt Romney lost his cool and cursed at reporters " +
         "who attempted to ask questions of the Republican presidential candidate in a public plaza near the Tomb of " +
         "the Unknown Soldier in Warsaw Tuesday.", ft));
-    doc.add(new Field(classFieldName, "politics", ft));
+    doc.add(new Field(categoryFieldName, "politics", ft));
+    doc.add(new Field(booleanFieldName, "false", ft));
 
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
     doc.add(new Field(textFieldName, "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
         " States, that he will be tougher against Iran's nuclear ambitions than President Barack Obama.", ft));
-    doc.add(new Field(classFieldName, "politics", ft));
+    doc.add(new Field(categoryFieldName, "politics", ft));
+    doc.add(new Field(booleanFieldName, "false", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
     doc.add(new Field(textFieldName, "And there's a threshold question that he has to answer for the American people and " +
         "that's whether he is prepared to be commander-in-chief,\" she continued. \"As we look to the past events, we " +
         "know that this raises some questions about his preparedness and we'll see how the rest of his trip goes.\"", ft));
-    doc.add(new Field(classFieldName, "politics", ft));
+    doc.add(new Field(categoryFieldName, "politics", ft));
+    doc.add(new Field(booleanFieldName, "false", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
     doc.add(new Field(textFieldName, "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
         "keep quiet and not go there,\" said Alan Lizotte, dean and professor at the State University of New York at " +
         "Albany's School of Criminal Justice.", ft));
-    doc.add(new Field(classFieldName, "politics", ft));
+    doc.add(new Field(categoryFieldName, "politics", ft));
+    doc.add(new Field(booleanFieldName, "false", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
     doc.add(new Field(textFieldName, "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
         "technology at the University of Wisconsin-La Crosse, documented the historic moment and shared it with the " +
         "world through the Internet.", ft));
-    doc.add(new Field(classFieldName, "technology", ft));
+    doc.add(new Field(categoryFieldName, "technology", ft));
+    doc.add(new Field(booleanFieldName, "true", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
     doc.add(new Field(textFieldName, "So, about all those experts and analysts who've spent the past year or so saying " +
         "Facebook was going to make a phone. A new expert has stepped forward to say it's not going to happen.", ft));
-    doc.add(new Field(classFieldName, "technology", ft));
+    doc.add(new Field(categoryFieldName, "technology", ft));
+    doc.add(new Field(booleanFieldName, "true", ft));
     indexWriter.addDocument(doc, analyzer);
 
     doc = new Document();
     doc.add(new Field(textFieldName, "More than 400 million people trust Google with their e-mail, and 50 million store files" +
         " in the cloud using the Dropbox service. People manage their bank accounts, pay bills, trade stocks and " +
         "generally transfer or store huge volumes of personal data online.", ft));
-    doc.add(new Field(classFieldName, "technology", ft));
+    doc.add(new Field(categoryFieldName, "technology", ft));
+    doc.add(new Field(booleanFieldName, "true", ft));
     indexWriter.addDocument(doc, analyzer);
 
     indexWriter.commit();

Modified: lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java (original)
+++ lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java Thu May 30 07:53:18 2013
@@ -17,16 +17,17 @@
 package org.apache.lucene.classification;
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.util.BytesRef;
 import org.junit.Test;
 
 /**
  * Testcase for {@link KNearestNeighborClassifier}
  */
-public class KNearestNeighborClassifierTest extends ClassificationTestBase {
+public class KNearestNeighborClassifierTest extends ClassificationTestBase<BytesRef> {
 
   @Test
   public void testBasicUsage() throws Exception {
-     checkCorrectClassification(new KNearestNeighborClassifier(1), new MockAnalyzer(random()));
+    checkCorrectClassification(new KNearestNeighborClassifier(1), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new MockAnalyzer(random()), categoryFieldName);
   }
 
 }

Modified: lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java (original)
+++ lucene/dev/branches/security/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java Thu May 30 07:53:18 2013
@@ -18,8 +18,14 @@ package org.apache.lucene.classification
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
+import org.apache.lucene.analysis.reverse.ReverseStringFilter;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
 import org.junit.Test;
 
 import java.io.Reader;
@@ -29,23 +35,24 @@ import java.io.Reader;
  */
 // TODO : eventually remove this if / when fallback methods exist for all un-supportable codec methods (see LUCENE-4872)
 @LuceneTestCase.SuppressCodecs("Lucene3x")
-public class SimpleNaiveBayesClassifierTest extends ClassificationTestBase {
+public class SimpleNaiveBayesClassifierTest extends ClassificationTestBase<BytesRef> {
 
   @Test
   public void testBasicUsage() throws Exception {
-    checkCorrectClassification(new SimpleNaiveBayesClassifier(), new MockAnalyzer(random()));
+    checkCorrectClassification(new SimpleNaiveBayesClassifier(), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new MockAnalyzer(random()), categoryFieldName);
+    checkCorrectClassification(new SimpleNaiveBayesClassifier(), POLITICS_INPUT, POLITICS_RESULT, new MockAnalyzer(random()), categoryFieldName);
   }
 
   @Test
   public void testNGramUsage() throws Exception {
-    checkCorrectClassification(new SimpleNaiveBayesClassifier(), new NGramAnalyzer());
+    checkCorrectClassification(new SimpleNaiveBayesClassifier(), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT, new NGramAnalyzer(), categoryFieldName);
   }
 
   private class NGramAnalyzer extends Analyzer {
     @Override
     protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      return new TokenStreamComponents(new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.Side.BACK,
-          10, 20));
+      final Tokenizer tokenizer = new KeywordTokenizer(reader);
+      return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer), 10, 20)));
     }
   }
 

Modified: lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java (original)
+++ lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java Thu May 30 07:53:18 2013
@@ -18,6 +18,8 @@ package org.apache.lucene.codecs.diskdv;
  */
 
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
@@ -27,6 +29,7 @@ import org.apache.lucene.index.SegmentWr
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.packed.BlockPackedWriter;
 import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
@@ -36,6 +39,13 @@ public class DiskDocValuesConsumer exten
 
   static final int BLOCK_SIZE = 16384;
 
+  /** Compressed using packed blocks of ints. */
+  public static final int DELTA_COMPRESSED = 0;
+  /** Compressed by computing the GCD. */
+  public static final int GCD_COMPRESSED = 1;
+  /** Compressed by giving IDs to unique values. */
+  public static final int TABLE_COMPRESSED = 2;
+
   final IndexOutput data, meta;
   final int maxDoc;
   
@@ -59,23 +69,107 @@ public class DiskDocValuesConsumer exten
   
   @Override
   public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+    addNumericField(field, values, true);
+  }
+
+  void addNumericField(FieldInfo field, Iterable<Number> values, boolean optimizeStorage) throws IOException {
     long count = 0;
-    for (@SuppressWarnings("unused") Number nv : values) {
-      ++count;
+    long minValue = Long.MAX_VALUE;
+    long maxValue = Long.MIN_VALUE;
+    long gcd = 0;
+    // TODO: more efficient?
+    HashSet<Long> uniqueValues = null;
+    if (optimizeStorage) {
+      uniqueValues = new HashSet<>();
+
+      for (Number nv : values) {
+        final long v = nv.longValue();
+
+        if (gcd != 1) {
+          if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
+            // in that case v - minValue might overflow and make the GCD computation return
+            // wrong results. Since these extreme values are unlikely, we just discard
+            // GCD computation for them
+            gcd = 1;
+          } else if (count != 0) { // minValue needs to be set first
+            gcd = MathUtil.gcd(gcd, v - minValue);
+          }
+        }
+
+        minValue = Math.min(minValue, v);
+        maxValue = Math.max(maxValue, v);
+
+        if (uniqueValues != null) {
+          if (uniqueValues.add(v)) {
+            if (uniqueValues.size() > 256) {
+              uniqueValues = null;
+            }
+          }
+        }
+
+        ++count;
+      }
+    } else {
+      for (@SuppressWarnings("unused") Number nv : values) {
+        ++count;
+      }
     }
+    
+    final long delta = maxValue - minValue;
 
+    final int format;
+    if (uniqueValues != null
+        && (delta < 0L || PackedInts.bitsRequired(uniqueValues.size() - 1) < PackedInts.bitsRequired(delta))
+        && count <= Integer.MAX_VALUE) {
+      format = TABLE_COMPRESSED;
+    } else if (gcd != 0 && gcd != 1) {
+      format = GCD_COMPRESSED;
+    } else {
+      format = DELTA_COMPRESSED;
+    }
     meta.writeVInt(field.number);
     meta.writeByte(DiskDocValuesFormat.NUMERIC);
+    meta.writeVInt(format);
     meta.writeVInt(PackedInts.VERSION_CURRENT);
     meta.writeLong(data.getFilePointer());
     meta.writeVLong(count);
     meta.writeVInt(BLOCK_SIZE);
 
-    final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
-    for (Number nv : values) {
-      writer.add(nv.longValue());
+    switch (format) {
+      case GCD_COMPRESSED:
+        meta.writeLong(minValue);
+        meta.writeLong(gcd);
+        final BlockPackedWriter quotientWriter = new BlockPackedWriter(data, BLOCK_SIZE);
+        for (Number nv : values) {
+          quotientWriter.add((nv.longValue() - minValue) / gcd);
+        }
+        quotientWriter.finish();
+        break;
+      case DELTA_COMPRESSED:
+        final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+        for (Number nv : values) {
+          writer.add(nv.longValue());
+        }
+        writer.finish();
+        break;
+      case TABLE_COMPRESSED:
+        final Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
+        final HashMap<Long,Integer> encode = new HashMap<Long,Integer>();
+        meta.writeVInt(decode.length);
+        for (int i = 0; i < decode.length; i++) {
+          meta.writeLong(decode[i]);
+          encode.put(decode[i], i);
+        }
+        final int bitsRequired = PackedInts.bitsRequired(uniqueValues.size() - 1);
+        final PackedInts.Writer ordsWriter = PackedInts.getWriterNoHeader(data, PackedInts.Format.PACKED, (int) count, bitsRequired, PackedInts.DEFAULT_BUFFER_SIZE);
+        for (Number nv : values) {
+          ordsWriter.add(encode.get(nv.longValue()));
+        }
+        ordsWriter.finish();
+        break;
+      default:
+        throw new AssertionError();
     }
-    writer.finish();
   }
 
   @Override
@@ -120,7 +214,7 @@ public class DiskDocValuesConsumer exten
     meta.writeVInt(field.number);
     meta.writeByte(DiskDocValuesFormat.SORTED);
     addBinaryField(field, values);
-    addNumericField(field, docToOrd);
+    addNumericField(field, docToOrd, false);
   }
   
   @Override
@@ -131,11 +225,12 @@ public class DiskDocValuesConsumer exten
     addBinaryField(field, values);
     // write the stream of ords as a numeric field
     // NOTE: we could return an iterator that delta-encodes these within a doc
-    addNumericField(field, ords);
+    addNumericField(field, ords, false);
     
     // write the doc -> ord count as a absolute index to the stream
     meta.writeVInt(field.number);
     meta.writeByte(DiskDocValuesFormat.NUMERIC);
+    meta.writeVInt(DELTA_COMPRESSED);
     meta.writeVInt(PackedInts.VERSION_CURRENT);
     meta.writeLong(data.getFilePointer());
     meta.writeVLong(maxDoc);

Modified: lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java (original)
+++ lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java Thu May 30 07:53:18 2013
@@ -17,6 +17,10 @@ package org.apache.lucene.codecs.diskdv;
  * limitations under the License.
  */
 
+import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.GCD_COMPRESSED;
+import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.TABLE_COMPRESSED;
+
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
@@ -37,6 +41,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.BlockPackedReader;
 import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
+import org.apache.lucene.util.packed.PackedInts;
 
 class DiskDocValuesProducer extends DocValuesProducer {
   private final Map<Integer,NumericEntry> numerics;
@@ -55,15 +60,17 @@ class DiskDocValuesProducer extends DocV
     // read in the entries from the metadata file.
     IndexInput in = state.directory.openInput(metaName, state.context);
     boolean success = false;
+    final int version;
     try {
-      CodecUtil.checkHeader(in, metaCodec, 
-                                DiskDocValuesFormat.VERSION_START,
-                                DiskDocValuesFormat.VERSION_START);
+      version = CodecUtil.checkHeader(in, metaCodec, 
+                                      DiskDocValuesFormat.VERSION_START,
+                                      DiskDocValuesFormat.VERSION_CURRENT);
       numerics = new HashMap<Integer,NumericEntry>();
       ords = new HashMap<Integer,NumericEntry>();
       ordIndexes = new HashMap<Integer,NumericEntry>();
       binaries = new HashMap<Integer,BinaryEntry>();
       readFields(in, state.fieldInfos);
+
       success = true;
     } finally {
       if (success) {
@@ -72,12 +79,24 @@ class DiskDocValuesProducer extends DocV
         IOUtils.closeWhileHandlingException(in);
       }
     }
-    
-    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
-    data = state.directory.openInput(dataName, state.context);
-    CodecUtil.checkHeader(data, dataCodec, 
-                                DiskDocValuesFormat.VERSION_START,
-                                DiskDocValuesFormat.VERSION_START);
+
+    success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+      data = state.directory.openInput(dataName, state.context);
+      final int version2 = CodecUtil.checkHeader(data, dataCodec, 
+                                                 DiskDocValuesFormat.VERSION_START,
+                                                 DiskDocValuesFormat.VERSION_CURRENT);
+      if (version != version2) {
+        throw new CorruptIndexException("Format versions mismatch");
+      }
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this.data);
+      }
+    }
   }
   
   private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
@@ -92,47 +111,47 @@ class DiskDocValuesProducer extends DocV
       } else if (type == DiskDocValuesFormat.SORTED) {
         // sorted = binary + numeric
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         if (meta.readByte() != DiskDocValuesFormat.BINARY) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         BinaryEntry b = readBinaryEntry(meta);
         binaries.put(fieldNumber, b);
         
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         NumericEntry n = readNumericEntry(meta);
         ords.put(fieldNumber, n);
       } else if (type == DiskDocValuesFormat.SORTED_SET) {
         // sortedset = binary + numeric + ordIndex
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         if (meta.readByte() != DiskDocValuesFormat.BINARY) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         BinaryEntry b = readBinaryEntry(meta);
         binaries.put(fieldNumber, b);
         
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         NumericEntry n1 = readNumericEntry(meta);
         ords.put(fieldNumber, n1);
         
         if (meta.readVInt() != fieldNumber) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
-          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
         }
         NumericEntry n2 = readNumericEntry(meta);
         ordIndexes.put(fieldNumber, n2);
@@ -145,10 +164,34 @@ class DiskDocValuesProducer extends DocV
   
   static NumericEntry readNumericEntry(IndexInput meta) throws IOException {
     NumericEntry entry = new NumericEntry();
+    entry.format = meta.readVInt();
     entry.packedIntsVersion = meta.readVInt();
     entry.offset = meta.readLong();
     entry.count = meta.readVLong();
     entry.blockSize = meta.readVInt();
+    switch(entry.format) {
+      case GCD_COMPRESSED:
+        entry.minValue = meta.readLong();
+        entry.gcd = meta.readLong();
+        break;
+      case TABLE_COMPRESSED:
+        if (entry.count > Integer.MAX_VALUE) {
+          throw new CorruptIndexException("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
+        }
+        final int uniqueValues = meta.readVInt();
+        if (uniqueValues > 256) {
+          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
+        }
+        entry.table = new long[uniqueValues];
+        for (int i = 0; i < uniqueValues; ++i) {
+          entry.table[i] = meta.readLong();
+        }
+        break;
+      case DELTA_COMPRESSED:
+        break;
+      default:
+        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+    }
     return entry;
   }
   
@@ -176,13 +219,38 @@ class DiskDocValuesProducer extends DocV
     final IndexInput data = this.data.clone();
     data.seek(entry.offset);
 
-    final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
-    return new LongNumericDocValues() {
-      @Override
-      public long get(long id) {
-        return reader.get(id);
-      }
-    };
+    switch (entry.format) {
+      case DELTA_COMPRESSED:
+        final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+        return new LongNumericDocValues() {
+          @Override
+          public long get(long id) {
+            return reader.get(id);
+          }
+        };
+      case GCD_COMPRESSED:
+        final long min = entry.minValue;
+        final long mult = entry.gcd;
+        final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+        return new LongNumericDocValues() {
+          @Override
+          public long get(long id) {
+            return min + mult * quotientReader.get(id);
+          }
+        };
+      case TABLE_COMPRESSED:
+        final long table[] = entry.table;
+        final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
+        final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
+        return new LongNumericDocValues() {
+          @Override
+          public long get(long id) {
+            return table[(int) ords.get((int) id)];
+          }
+        };
+      default:
+        throw new AssertionError();
+    }
   }
 
   @Override
@@ -350,9 +418,14 @@ class DiskDocValuesProducer extends DocV
   static class NumericEntry {
     long offset;
 
+    int format;
     int packedIntsVersion;
     long count;
     int blockSize;
+    
+    long minValue;
+    long gcd;
+    long table[];
   }
   
   static class BinaryEntry {

Modified: lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Thu May 30 07:53:18 2013
@@ -1481,12 +1481,15 @@ public final class DirectPostingsFormat 
     }
 
     @Override
-    public int advance(int target) {
+    public int advance(int target) throws IOException {
       // Linear scan, but this is low-freq term so it won't
       // be costly:
-      while(nextDoc() < target) {
-      }
-      return docID();
+      return slowAdvance(target);
+    }
+    
+    @Override
+    public long cost() {
+      return postings.length;
     }
   }
 
@@ -1546,12 +1549,15 @@ public final class DirectPostingsFormat 
     }
 
     @Override
-    public int advance(int target) {
+    public int advance(int target) throws IOException {
       // Linear scan, but this is low-freq term so it won't
       // be costly:
-      while(nextDoc() < target) {
-      }
-      return docID();
+      return slowAdvance(target);
+    }
+    
+    @Override
+    public long cost() {
+      return postings.length / 2;
     }
   }
 
@@ -1627,12 +1633,16 @@ public final class DirectPostingsFormat 
     }
 
     @Override
-    public int advance(int target) {
+    public int advance(int target) throws IOException {
       // Linear scan, but this is low-freq term so it won't
       // be costly:
-      while(nextDoc() < target) {
-      }
-      return docID();
+      return slowAdvance(target);
+    }
+    
+    @Override
+    public long cost() {
+      // TODO: could do a better estimate
+      return postings.length / 2;
     }
   }
 
@@ -1767,12 +1777,8 @@ public final class DirectPostingsFormat 
     }
 
     @Override
-    public int advance(int target) {
-      // Linear scan, but this is low-freq term so it won't
-      // be costly:
-      while (nextDoc() < target) {
-      }
-      return docID;
+    public int advance(int target) throws IOException {
+      return slowAdvance(target);
     }
 
     @Override
@@ -1786,6 +1792,12 @@ public final class DirectPostingsFormat 
         return null;
       }
     }
+    
+    @Override
+    public long cost() {
+      // TODO: could do a better estimate
+      return postings.length / 2;
+    }
   }
 
   // Docs + freqs:
@@ -1959,6 +1971,11 @@ public final class DirectPostingsFormat 
         return docID = docIDs[upto];
       }
     }
+    
+    @Override
+    public long cost() {
+      return docIDs.length;
+    }
   }
 
   // TODO: specialize offsets and not
@@ -2192,5 +2209,10 @@ public final class DirectPostingsFormat 
         return payload;
       }
     }
+    
+    @Override
+    public long cost() {
+      return docIDs.length;
+    }
   }
 }