You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/09 17:24:23 UTC

svn commit: r1101062 [17/21] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/db/bdb-je/ dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea...

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java Mon May  9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestRomanianAnalyzer extend
     checkOneTermReuse(a, "absenţa", "absenţa");
     checkOneTermReuse(a, "absenţi", "absenţ");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new RomanianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Mon May  9 15:24:04 2011
@@ -64,4 +64,9 @@ public class TestRussianAnalyzer extends
           new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });
      
     }
+    
+    /** blast some random strings through the analyzer */
+    public void testRandomStrings() throws Exception {
+      checkRandomData(random, new RussianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+    }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java Mon May  9 15:24:04 2011
@@ -45,4 +45,9 @@ public class TestRussianLightStemFilter 
   public void testVocabulary() throws IOException {
     assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java Mon May  9 15:24:04 2011
@@ -23,6 +23,7 @@ import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.hu.HungarianAnalyzer;
 
 public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -50,4 +51,9 @@ public class TestSwedishAnalyzer extends
     checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
     checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new SwedishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java Mon May  9 15:24:04 2011
@@ -45,4 +45,9 @@ public class TestSwedishLightStemFilter 
   public void testVocabulary() throws IOException {
     assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Mon May  9 15:24:04 2011
@@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th;
  * limitations under the License.
  */
 
+import java.io.StringReader;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.util.Version;
 
 /**
@@ -142,5 +146,23 @@ public class TestThaiAnalyzer extends Ba
             analyzer,
             "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
             new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
-    }
+  }
+	
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
+  
+  // LUCENE-3044
+  public void testAttributeReuse() throws Exception {
+    assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
+    ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
+    // just consume
+    TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
+    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
+    // this consumer adds flagsAtt, which this analyzer does not use. 
+    ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
+    ts.addAttribute(FlagsAttribute.class);
+    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java Mon May  9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestTurkishAnalyzer extends
     checkOneTermReuse(a, "ağacı", "ağacı");
     checkOneTermReuse(a, "ağaç", "ağaç");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new TurkishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java Mon May  9 15:24:04 2011
@@ -186,7 +186,7 @@ public abstract class CollationTestBase 
                                    String dkResult) throws Exception {
     RAMDirectory indexStore = new RAMDirectory();
     IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(
-        TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
+        TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
 
     // document data:
     // the tracer field is used to determine which document was hit
@@ -257,27 +257,6 @@ public abstract class CollationTestBase 
     }
     assertEquals(expectedResult, buff.toString());
   }
-  
-  private String randomString() {
-    // ideally we could do this!
-    // return _TestUtil.randomUnicodeString(random);
-    //
-    // http://bugs.icu-project.org/trac/ticket/8060
-    // http://bugs.icu-project.org/trac/ticket/7732
-    // ...
-    // 
-    // as a workaround, just test the BMP for now (and avoid 0xFFFF etc)
-    int length = _TestUtil.nextInt(random, 0, 10);
-    char chars[] = new char[length];
-    for (int i = 0; i < length; i++) {
-      if (random.nextBoolean()) {
-        chars[i] = (char) _TestUtil.nextInt(random, 0, 0xD7FF);
-      } else {
-        chars[i] = (char) _TestUtil.nextInt(random, 0xE000, 0xFFFD);
-      }
-    }
-    return new String(chars, 0, length);
-  }
 
   public void assertThreadSafe(final Analyzer analyzer) throws Exception {
     int numTestPoints = 100;
@@ -289,7 +268,7 @@ public abstract class CollationTestBase 
     // and ensure they are the same as the ones we produced in serial fashion.
 
     for (int i = 0; i < numTestPoints; i++) {
-      String term = randomString();
+      String term = _TestUtil.randomSimpleString(random);
       TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
       TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
       BytesRef bytes = termAtt.getBytesRef();

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java Mon May  9 15:24:04 2011
@@ -87,8 +87,7 @@ public class TestCollationKeyAnalyzer ex
   public void testThreadSafe() throws Exception {
     int iters = 20 * RANDOM_MULTIPLIER;
     for (int i = 0; i < iters; i++) {
-      Locale locale = randomLocale(random);
-      Collator collator = Collator.getInstance(locale);
+      Collator collator = Collator.getInstance(Locale.GERMAN);
       collator.setStrength(Collator.PRIMARY);
       assertThreadSafe(new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
     }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml Mon May  9 15:24:04 2011
@@ -141,14 +141,6 @@ are part of the ICU4C package. See http:
   <target name="javadocs" depends="compile-core">
    	<sequential>
        <mkdir dir="${javadoc.dir}/contrib-${name}"/>
-       <path id="javadoc.classpath">
-         <path refid="classpath"/>
-         <pathelement location="${ant.home}/lib/ant.jar"/>
-         <fileset dir=".">
-           <exclude name="build/**/*.jar"/>
-           <include name="**/lib/*.jar"/>
-         </fileset>
-       </path>
        <invoke-javadoc
          destdir="${javadoc.dir}/contrib-${name}"
        	 title="${Name} ${version} contrib-${name} API">

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java Mon May  9 15:24:04 2011
@@ -29,15 +29,14 @@ import org.apache.lucene.analysis.core.W
  * Tests ICUFoldingFilter
  */
 public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
+  Analyzer a = new Analyzer() {
+    @Override
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      return new ICUFoldingFilter(
+          new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
+    }
+  };
   public void testDefaults() throws IOException {
-    Analyzer a = new Analyzer() {
-      @Override
-      public TokenStream tokenStream(String fieldName, Reader reader) {
-        return new ICUFoldingFilter(
-            new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
-      }
-    };
-
     // case folding
     assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
 
@@ -76,4 +75,9 @@ public class TestICUFoldingFilter extend
     // handling of decomposed combining-dot-above
     assertAnalyzesTo(a, "eli\u0307f", new String[] { "elif" });
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java Mon May  9 15:24:04 2011
@@ -31,16 +31,15 @@ import com.ibm.icu.text.Normalizer2;
  * Tests the ICUNormalizer2Filter
  */
 public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
+  Analyzer a = new Analyzer() {
+    @Override
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      return new ICUNormalizer2Filter(
+          new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
+    }
+  };
 
   public void testDefaults() throws IOException {
-    Analyzer a = new Analyzer() {
-      @Override
-      public TokenStream tokenStream(String fieldName, Reader reader) {
-        return new ICUNormalizer2Filter(
-            new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
-      }
-    };
-
     // case folding
     assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
 
@@ -75,4 +74,9 @@ public class TestICUNormalizer2Filter ex
     // decompose EAcute into E + combining Acute
     assertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" });
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java Mon May  9 15:24:04 2011
@@ -18,10 +18,15 @@ package org.apache.lucene.analysis.icu;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 
 import com.ibm.icu.text.Transliterator;
@@ -83,4 +88,17 @@ public class TestICUTransformFilter exte
     TokenStream ts = new ICUTransformFilter(new KeywordTokenizer((new StringReader(input))), transform);
     assertTokenStreamContents(ts, new String[] { expected });
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    final Transliterator transform = Transliterator.getInstance("Any-Latin");
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+        return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, transform));
+      }
+    };
+    checkRandomData(random, a, 1000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java Mon May  9 15:24:04 2011
@@ -232,4 +232,9 @@ public class TestICUTokenizer extends Ba
         new String[] { "仮", "名", "遣", "い", "カタカナ" },
         new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java Mon May  9 15:24:04 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.collation;
 
 
 import com.ibm.icu.text.Collator;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.util.BytesRef;
@@ -88,7 +89,7 @@ public class TestICUCollationKeyAnalyzer
   public void testThreadSafe() throws Exception {
     int iters = 20 * RANDOM_MULTIPLIER;
     for (int i = 0; i < iters; i++) {
-      Locale locale = randomLocale(random);
+      Locale locale = Locale.GERMAN;
       Collator collator = Collator.getInstance(locale);
       collator.setStrength(Collator.IDENTICAL);
       assertThreadSafe(new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));

Modified: lucene/dev/branches/bulkpostings/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java Mon May  9 15:24:04 2011
@@ -75,7 +75,7 @@ class SegGraph {
     List<SegToken> result = new ArrayList<SegToken>();
     int s = -1, count = 0, size = tokenListTable.size();
     List<SegToken> tokenList;
-    short index = 0;
+    int index = 0;
     while (count < size) {
       if (isStartExist(s)) {
         tokenList = tokenListTable.get(s);

Modified: lucene/dev/branches/bulkpostings/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java Mon May  9 15:24:04 2011
@@ -17,8 +17,11 @@
 
 package org.apache.lucene.analysis.cn.smart;
 
+import java.io.StringReader;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.Version;
 
 public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
@@ -166,4 +169,35 @@ public class TestSmartChineseAnalyzer ex
         new int[] { 0, 1, 3, 4, 6, 7 },
         new int[] { 1, 3, 4, 6, 7, 9 });
   }
+  
+  // LUCENE-3026
+  public void testLargeDocument() throws Exception {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < 5000; i++) {
+      sb.append("我购买了道具和服装。");
+    }
+    Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
+    TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
+    stream.reset();
+    while (stream.incrementToken()) {
+    }
+  }
+  
+  // LUCENE-3026
+  public void testLargeSentence() throws Exception {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < 5000; i++) {
+      sb.append("我购买了道具和服装");
+    }
+    Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
+    TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
+    stream.reset();
+    while (stream.incrementToken()) {
+    }
+  }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java Mon May  9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestPolishAnalyzer extends 
     checkOneTermReuse(a, "studenta", "studenta");
     checkOneTermReuse(a, "studenci", "student");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new PolishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java Mon May  9 15:24:04 2011
@@ -1,136 +1,136 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-
-/** 
- * Parser for trec doc content, invoked on doc text excluding <DOC> and <DOCNO>
- * which are handled in TrecContentSource. Required to be stateless and hence thread safe. 
- */
-public abstract class TrecDocParser {
-
-  /** Types of trec parse paths, */
-  public enum ParsePathType { GOV2, FBIS, FT, FR94, LATIMES }
-  
-  /** trec parser type used for unknown extensions */
-  public static final ParsePathType DEFAULT_PATH_TYPE  = ParsePathType.GOV2;
-
-  static final Map<ParsePathType,TrecDocParser> pathType2parser = new HashMap<ParsePathType,TrecDocParser>();
-  static {
-    pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser());
-    pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser());
-    pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser());
-    pathType2parser.put(ParsePathType.FT, new TrecFTParser());
-    pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser());
-  }
-
-  static final Map<String,ParsePathType> pathName2Type = new HashMap<String,ParsePathType>();
-  static {
-    for (ParsePathType ppt : ParsePathType.values()) {
-      pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt);
-    }
-  }
-  
-  /** max length of walk up from file to its ancestors when looking for a known path type */ 
-  private static final int MAX_PATH_LENGTH = 10;
-  
-  /**
-   * Compute the path type of a file by inspecting name of file and its parents
-   */
-  public static ParsePathType pathType(File f) {
-    int pathLength = 0;
-    while (f != null && ++pathLength < MAX_PATH_LENGTH) {
-      ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH));
-      if (ppt!=null) {
-        return ppt;
-      }
-      f = f.getParentFile();
-    }
-    return DEFAULT_PATH_TYPE;
-  }
-  
-  /** 
-   * parse the text prepared in docBuf into a result DocData, 
-   * no synchronization is required.
-   * @param docData reusable result
-   * @param name name that should be set to the result
-   * @param trecSrc calling trec content source  
-   * @param docBuf text to parse  
-   * @param pathType type of parsed file, or null if unknown - may be used by 
-   * parsers to alter their behavior according to the file path type. 
-   */  
-  public abstract DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
-      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException;
-  
-  /** 
-   * strip tags from <code>buf</code>: each tag is replaced by a single blank.
-   * @return text obtained when stripping all tags from <code>buf</code> (Input StringBuilder is unmodified).
-   */
-  public static String stripTags(StringBuilder buf, int start) {
-    return stripTags(buf.substring(start),0);
-  }
-
-  /** 
-   * strip tags from input.
-   * @see #stripTags(StringBuilder, int)
-   */
-  public static String stripTags(String buf, int start) {
-    if (start>0) {
-      buf = buf.substring(0);
-    }
-    return buf.replaceAll("<[^>]*>", " ");
-  }
-  
-  /**
-   * Extract from <code>buf</code> the text of interest within specified tags
-   * @param buf entire input text
-   * @param startTag tag marking start of text of interest 
-   * @param endTag tag marking end of text of interest
-   * @param maxPos if &ge; 0 sets a limit on start of text of interest
-   * @return text of interest or null if not found
-   */
-  public static String extract(StringBuilder buf, String startTag, String endTag, int maxPos, String noisePrefixes[]) {
-    int k1 = buf.indexOf(startTag);
-    if (k1>=0 && (maxPos<0 || k1<maxPos)) {
-      k1 += startTag.length();
-      int k2 = buf.indexOf(endTag,k1);
-      if (k2>=0 && (maxPos<0 || k2<maxPos)) { // found end tag with allowed range
-        if (noisePrefixes != null) {
-          for (String noise : noisePrefixes) {
-            int k1a = buf.indexOf(noise,k1);
-            if (k1a>=0 && k1a<k2) {
-              k1 = k1a + noise.length();
-            }
-          }          
-        }
-        return buf.substring(k1,k2).trim();
-      }
-    }
-    return null;
-  }
-
-  //public static void main(String[] args) {
-  //  System.out.println(stripTags("is it true that<space>2<<second space>><almost last space>1<one more space>?",0));
-  //}
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+/** 
+ * Parser for trec doc content, invoked on doc text excluding <DOC> and <DOCNO>
+ * which are handled in TrecContentSource. Required to be stateless and hence thread safe. 
+ */
+public abstract class TrecDocParser {
+
+  /** Types of trec parse paths, */
+  public enum ParsePathType { GOV2, FBIS, FT, FR94, LATIMES }
+  
+  /** trec parser type used for unknown extensions */
+  public static final ParsePathType DEFAULT_PATH_TYPE  = ParsePathType.GOV2;
+
+  static final Map<ParsePathType,TrecDocParser> pathType2parser = new HashMap<ParsePathType,TrecDocParser>();
+  static {
+    pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser());
+    pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser());
+    pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser());
+    pathType2parser.put(ParsePathType.FT, new TrecFTParser());
+    pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser());
+  }
+
+  static final Map<String,ParsePathType> pathName2Type = new HashMap<String,ParsePathType>();
+  static {
+    for (ParsePathType ppt : ParsePathType.values()) {
+      pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt);
+    }
+  }
+  
+  /** max length of walk up from file to its ancestors when looking for a known path type */ 
+  private static final int MAX_PATH_LENGTH = 10;
+  
+  /**
+   * Compute the path type of a file by inspecting name of file and its parents
+   */
+  public static ParsePathType pathType(File f) {
+    int pathLength = 0;
+    while (f != null && ++pathLength < MAX_PATH_LENGTH) {
+      ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH));
+      if (ppt!=null) {
+        return ppt;
+      }
+      f = f.getParentFile();
+    }
+    return DEFAULT_PATH_TYPE;
+  }
+  
+  /** 
+   * parse the text prepared in docBuf into a result DocData, 
+   * no synchronization is required.
+   * @param docData reusable result
+   * @param name name that should be set to the result
+   * @param trecSrc calling trec content source  
+   * @param docBuf text to parse  
+   * @param pathType type of parsed file, or null if unknown - may be used by 
+   * parsers to alter their behavior according to the file path type. 
+   */  
+  public abstract DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
+      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException;
+  
+  /** 
+   * strip tags from <code>buf</code>: each tag is replaced by a single blank.
+   * @return text obtained when stripping all tags from <code>buf</code> (Input StringBuilder is unmodified).
+   */
+  public static String stripTags(StringBuilder buf, int start) {
+    return stripTags(buf.substring(start),0);
+  }
+
+  /** 
+   * strip tags from input.
+   * @see #stripTags(StringBuilder, int)
+   */
+  public static String stripTags(String buf, int start) {
+    if (start>0) {
+      buf = buf.substring(0);
+    }
+    return buf.replaceAll("<[^>]*>", " ");
+  }
+  
+  /**
+   * Extract from <code>buf</code> the text of interest within specified tags
+   * @param buf entire input text
+   * @param startTag tag marking start of text of interest 
+   * @param endTag tag marking end of text of interest
+   * @param maxPos if &ge; 0 sets a limit on start of text of interest
+   * @return text of interest or null if not found
+   */
+  public static String extract(StringBuilder buf, String startTag, String endTag, int maxPos, String noisePrefixes[]) {
+    int k1 = buf.indexOf(startTag);
+    if (k1>=0 && (maxPos<0 || k1<maxPos)) {
+      k1 += startTag.length();
+      int k2 = buf.indexOf(endTag,k1);
+      if (k2>=0 && (maxPos<0 || k2<maxPos)) { // found end tag with allowed range
+        if (noisePrefixes != null) {
+          for (String noise : noisePrefixes) {
+            int k1a = buf.indexOf(noise,k1);
+            if (k1a>=0 && k1a<k2) {
+              k1 = k1a + noise.length();
+            }
+          }          
+        }
+        return buf.substring(k1,k2).trim();
+      }
+    }
+    return null;
+  }
+
+  //public static void main(String[] args) {
+  //  System.out.println(stripTags("is it true that<space>2<<second space>><almost last space>1<one more space>?",0));
+  //}
+
+}

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java Mon May  9 15:24:04 2011
@@ -1,65 +1,65 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Date;
-
-/**
- * Parser for the FBIS docs in trec disks 4+5 collection format
- */
-public class TrecFBISParser extends TrecDocParser {
-
-  private static final String HEADER = "<HEADER>";
-  private static final String HEADER_END = "</HEADER>";
-  private static final int HEADER_END_LENGTH = HEADER_END.length();
-  
-  private static final String DATE1 = "<DATE1>";
-  private static final String DATE1_END = "</DATE1>";
-  
-  private static final String TI = "<TI>";
-  private static final String TI_END = "</TI>";
-
-  @Override
-  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
-      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
-    int mark = 0; // that much is skipped
-    // optionally skip some of the text, set date, title
-    Date date = null;
-    String title = null;
-    int h1 = docBuf.indexOf(HEADER);
-    if (h1>=0) {
-      int h2 = docBuf.indexOf(HEADER_END,h1);
-      mark = h2+HEADER_END_LENGTH;
-      // date...
-      String dateStr = extract(docBuf, DATE1, DATE1_END, h2, null);
-      if (dateStr != null) {
-        date = trecSrc.parseDate(dateStr);
-      }
-      // title...
-      title = extract(docBuf, TI, TI_END, h2, null);
-    }
-    docData.clear();
-    docData.setName(name);
-    docData.setDate(date);
-    docData.setTitle(title);
-    docData.setBody(stripTags(docBuf, mark).toString());
-    return docData;
-  }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * Parser for the FBIS docs in trec disks 4+5 collection format
+ */
+public class TrecFBISParser extends TrecDocParser {
+
+  private static final String HEADER = "<HEADER>";
+  private static final String HEADER_END = "</HEADER>";
+  private static final int HEADER_END_LENGTH = HEADER_END.length();
+  
+  private static final String DATE1 = "<DATE1>";
+  private static final String DATE1_END = "</DATE1>";
+  
+  private static final String TI = "<TI>";
+  private static final String TI_END = "</TI>";
+
+  @Override
+  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
+      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+    int mark = 0; // that much is skipped
+    // optionally skip some of the text, set date, title
+    Date date = null;
+    String title = null;
+    int h1 = docBuf.indexOf(HEADER);
+    if (h1>=0) {
+      int h2 = docBuf.indexOf(HEADER_END,h1);
+      mark = h2+HEADER_END_LENGTH;
+      // date...
+      String dateStr = extract(docBuf, DATE1, DATE1_END, h2, null);
+      if (dateStr != null) {
+        date = trecSrc.parseDate(dateStr);
+      }
+      // title...
+      title = extract(docBuf, TI, TI_END, h2, null);
+    }
+    docData.clear();
+    docData.setName(name);
+    docData.setDate(date);
+    docData.setTitle(title);
+    docData.setBody(stripTags(docBuf, mark).toString());
+    return docData;
+  }
+
+}

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java Mon May  9 15:24:04 2011
@@ -1,66 +1,66 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Date;
-
-/**
- * Parser for the FR94 docs in trec disks 4+5 collection format
- */
-public class TrecFR94Parser extends TrecDocParser {
-
-  private static final String TEXT = "<TEXT>";
-  private static final int TEXT_LENGTH = TEXT.length();
-  private static final String TEXT_END = "</TEXT>";
-  
-  private static final String DATE = "<DATE>";
-  private static final String[] DATE_NOISE_PREFIXES = {
-    "DATE:",
-    "date:", //TODO improve date extraction for this format
-    "t.c.",
-  };
-  private static final String DATE_END = "</DATE>";
-  
-  //TODO can we also extract title for this format?
-  
-  @Override
-  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
-      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
-    int mark = 0; // that much is skipped
-    // optionally skip some of the text, set date (no title?)
-    Date date = null;
-    int h1 = docBuf.indexOf(TEXT);
-    if (h1>=0) {
-      int h2 = docBuf.indexOf(TEXT_END,h1);
-      mark = h1+TEXT_LENGTH;
-      // date...
-      String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
-      if (dateStr != null) {
-        dateStr = stripTags(dateStr,0).toString();
-        date = trecSrc.parseDate(dateStr.trim());
-      }
-    }
-    docData.clear();
-    docData.setName(name);
-    docData.setDate(date);
-    docData.setBody(stripTags(docBuf, mark).toString());
-    return docData;
-  }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * Parser for the FR94 docs in trec disks 4+5 collection format
+ */
+public class TrecFR94Parser extends TrecDocParser {
+
+  private static final String TEXT = "<TEXT>";
+  private static final int TEXT_LENGTH = TEXT.length();
+  private static final String TEXT_END = "</TEXT>";
+  
+  private static final String DATE = "<DATE>";
+  private static final String[] DATE_NOISE_PREFIXES = {
+    "DATE:",
+    "date:", //TODO improve date extraction for this format
+    "t.c.",
+  };
+  private static final String DATE_END = "</DATE>";
+  
+  //TODO can we also extract title for this format?
+  
+  @Override
+  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
+      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+    int mark = 0; // that much is skipped
+    // optionally skip some of the text, set date (no title?)
+    Date date = null;
+    int h1 = docBuf.indexOf(TEXT);
+    if (h1>=0) {
+      int h2 = docBuf.indexOf(TEXT_END,h1);
+      mark = h1+TEXT_LENGTH;
+      // date...
+      String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
+      if (dateStr != null) {
+        dateStr = stripTags(dateStr,0).toString();
+        date = trecSrc.parseDate(dateStr.trim());
+      }
+    }
+    docData.clear();
+    docData.setName(name);
+    docData.setDate(date);
+    docData.setBody(stripTags(docBuf, mark).toString());
+    return docData;
+  }
+
+}

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java Mon May  9 15:24:04 2011
@@ -1,57 +1,57 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Date;
-
-/**
- * Parser for the FT docs in trec disks 4+5 collection format
- */
-public class TrecFTParser extends TrecDocParser {
-
-  private static final String DATE = "<DATE>";
-  private static final String DATE_END = "</DATE>";
-  
-  private static final String HEADLINE = "<HEADLINE>";
-  private static final String HEADLINE_END = "</HEADLINE>";
-
-  @Override
-  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
-      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
-    int mark = 0; // that much is skipped
-
-    // date...
-    Date date = null;
-    String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
-    if (dateStr != null) {
-      date = trecSrc.parseDate(dateStr);
-    }
-     
-    // title...
-    String title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
-
-    docData.clear();
-    docData.setName(name);
-    docData.setDate(date);
-    docData.setTitle(title);
-    docData.setBody(stripTags(docBuf, mark).toString());
-    return docData;
-  }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * Parser for the FT docs in trec disks 4+5 collection format
+ */
+public class TrecFTParser extends TrecDocParser {
+
+  private static final String DATE = "<DATE>";
+  private static final String DATE_END = "</DATE>";
+  
+  private static final String HEADLINE = "<HEADLINE>";
+  private static final String HEADLINE_END = "</HEADLINE>";
+
+  @Override
+  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
+      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+    int mark = 0; // that much is skipped
+
+    // date...
+    Date date = null;
+    String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
+    if (dateStr != null) {
+      date = trecSrc.parseDate(dateStr);
+    }
+     
+    // title...
+    String title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
+
+    docData.clear();
+    docData.setName(name);
+    docData.setDate(date);
+    docData.setTitle(title);
+    docData.setBody(stripTags(docBuf, mark).toString());
+    return docData;
+  }
+
+}

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java Mon May  9 15:24:04 2011
@@ -1,71 +1,71 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Date;
-
-/**
- * Parser for the FT docs in trec disks 4+5 collection format
- */
-public class TrecLATimesParser extends TrecDocParser {
-
-  private static final String DATE = "<DATE>";
-  private static final String DATE_END = "</DATE>";
-  private static final String DATE_NOISE = "day,"; // anything aftre the ',' 
-
-  private static final String SUBJECT = "<SUBJECT>";
-  private static final String SUBJECT_END = "</SUBJECT>";
-  private static final String HEADLINE = "<HEADLINE>";
-  private static final String HEADLINE_END = "</HEADLINE>";
-  
-  @Override
-  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
-      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
-    int mark = 0; // that much is skipped
-
-    // date...
-    Date date = null;
-    String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
-    if (dateStr != null) {
-      int d2a = dateStr.indexOf(DATE_NOISE);
-      if (d2a > 0) {
-        dateStr = dateStr.substring(0,d2a+3); // we need the "day" part
-      }
-      dateStr = stripTags(dateStr,0).toString();
-      date = trecSrc.parseDate(dateStr.trim());
-    }
-     
-    // title... first try with SUBJECT, them with HEADLINE
-    String title = extract(docBuf, SUBJECT, SUBJECT_END, -1, null);
-    if (title==null) {
-      title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
-    }
-    if (title!=null) {
-      title = stripTags(title,0).toString().trim();
-    }
-    
-    docData.clear();
-    docData.setName(name);
-    docData.setDate(date);
-    docData.setTitle(title);
-    docData.setBody(stripTags(docBuf, mark).toString());
-    return docData;
-  }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * Parser for the FT docs in trec disks 4+5 collection format
+ */
+public class TrecLATimesParser extends TrecDocParser {
+
+  private static final String DATE = "<DATE>";
+  private static final String DATE_END = "</DATE>";
+  private static final String DATE_NOISE = "day,"; // anything aftre the ',' 
+
+  private static final String SUBJECT = "<SUBJECT>";
+  private static final String SUBJECT_END = "</SUBJECT>";
+  private static final String HEADLINE = "<HEADLINE>";
+  private static final String HEADLINE_END = "</HEADLINE>";
+  
+  @Override
+  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
+      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+    int mark = 0; // that much is skipped
+
+    // date...
+    Date date = null;
+    String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
+    if (dateStr != null) {
+      int d2a = dateStr.indexOf(DATE_NOISE);
+      if (d2a > 0) {
+        dateStr = dateStr.substring(0,d2a+3); // we need the "day" part
+      }
+      dateStr = stripTags(dateStr,0).toString();
+      date = trecSrc.parseDate(dateStr.trim());
+    }
+     
+    // title... first try with SUBJECT, them with HEADLINE
+    String title = extract(docBuf, SUBJECT, SUBJECT_END, -1, null);
+    if (title==null) {
+      title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
+    }
+    if (title!=null) {
+      title = stripTags(title,0).toString().trim();
+    }
+    
+    docData.clear();
+    docData.setName(name);
+    docData.setDate(date);
+    docData.setTitle(title);
+    docData.setBody(stripTags(docBuf, mark).toString());
+    return docData;
+  }
+
+}

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java Mon May  9 15:24:04 2011
@@ -1,33 +1,33 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-/**
- * Parser for trec docs which selects the parser to apply according 
- * to the source files path, defaulting to {@link TrecGov2Parser}.
- */
-public class TrecParserByPath extends TrecDocParser {
-
-  @Override
-  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
-      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
-    return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
-  }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+/**
+ * Parser for trec docs which selects the parser to apply according 
+ * to the source files path, defaulting to {@link TrecGov2Parser}.
+ */
+public class TrecParserByPath extends TrecDocParser {
+
+  @Override
+  public DocData parse(DocData docData, String name, TrecContentSource trecSrc, 
+      StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+    return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
+  }
+
+}

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java Mon May  9 15:24:04 2011
@@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexDele
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.index.MergeScheduler;
 import org.apache.lucene.index.ConcurrentMergeScheduler;
 import org.apache.lucene.index.MergePolicy;
@@ -150,6 +151,9 @@ public class CreateIndexTask extends Per
         LogMergePolicy logMergePolicy = (LogMergePolicy) iwConf.getMergePolicy();
         logMergePolicy.setUseCompoundFile(isCompound);
         logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR));
+      } else if(iwConf.getMergePolicy() instanceof TieredMergePolicy) {
+        TieredMergePolicy tieredMergePolicy = (TieredMergePolicy) iwConf.getMergePolicy();
+        tieredMergePolicy.setUseCompoundFile(isCompound);
       }
     }
     final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB);

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Mon May  9 15:24:04 2011
@@ -96,7 +96,7 @@ public class TestPerfTasksLogic extends 
     assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
     // now we should be able to open the index for write. 
     IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
-        new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
+        new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
             .setOpenMode(OpenMode.APPEND));
     iw.close();
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
@@ -183,7 +183,7 @@ public class TestPerfTasksLogic extends 
 
     assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
     // now we should be able to open the index for write.
-    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
+    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
     iw.close();
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
     assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs());
@@ -222,7 +222,7 @@ public class TestPerfTasksLogic extends 
 
     assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
     // now we should be able to open the index for write.
-    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
+    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
     iw.close();
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
     assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
@@ -295,7 +295,7 @@ public class TestPerfTasksLogic extends 
     assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);
     assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
     // now we should be able to open the index for write. 
-    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
+    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
     iw.close();
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
     assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
@@ -407,7 +407,7 @@ public class TestPerfTasksLogic extends 
     // Index the line docs
     String algLines2[] = {
       "# ----- properties ",
-      "analyzer=org.apache.lucene.analysis.MockAnalyzer",
+      "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
       "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
       "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),
       "content.source.forever=false",
@@ -425,7 +425,7 @@ public class TestPerfTasksLogic extends 
 
     // now we should be able to open the index for write. 
     IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
-        new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
+        new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
             .setOpenMode(OpenMode.APPEND));
     iw.close();
 
@@ -448,7 +448,7 @@ public class TestPerfTasksLogic extends 
     // then build index from the same docs
     String algLines1[] = {
       "# ----- properties ",
-      "analyzer=org.apache.lucene.analysis.MockAnalyzer",
+      "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
       "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
       "docs.file=" + getReuters20LinesFile(),
       "# ----- alg ",
@@ -1021,18 +1021,18 @@ public class TestPerfTasksLogic extends 
                                       "two three four", "three four", 
                                       "three four five", "four five",
                                       "four five six", "five six" });
-    // MockAnalyzer, default maxShingleSize and outputUnigrams
+    // WhitespaceAnalyzer, default maxShingleSize and outputUnigrams
     benchmark = execBenchmark
-      (getShingleConfig("analyzer:MockAnalyzer"));
+      (getShingleConfig("analyzer:WhitespaceAnalyzer"));
     assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
                        new String[] { "one,two,three,", "one,two,three, four",
                                       "four", "four five", "five", "five six", 
                                       "six" });
     
-    // MockAnalyzer, maxShingleSize=3 and outputUnigrams=false
+    // WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false
     benchmark = execBenchmark
       (getShingleConfig
-        ("outputUnigrams:false,maxShingleSize:3,analyzer:MockAnalyzer"));
+        ("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer"));
     assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
                        new String[] { "one,two,three, four", 
                                       "one,two,three, four five",

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java Mon May  9 15:24:04 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Properties;
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
@@ -70,7 +71,7 @@ public class DocMakerTest extends Benchm
     Properties props = new Properties();
     
     // Indexing configuration.
-    props.setProperty("analyzer", MockAnalyzer.class.getName());
+    props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
     props.setProperty("content.source", OneDocSource.class.getName());
     props.setProperty("directory", "RAMDirectory");
     if (setIndexProps) {
@@ -99,7 +100,7 @@ public class DocMakerTest extends Benchm
     Properties props = new Properties();
     
     // Indexing configuration.
-    props.setProperty("analyzer", MockAnalyzer.class.getName());
+    props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
     props.setProperty("content.source", OneDocSource.class.getName());
     props.setProperty("directory", "RAMDirectory");
     if (setNormsProp) {

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java Mon May  9 15:24:04 2011
@@ -27,6 +27,7 @@ import java.util.Properties;
 
 import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.HeaderLineParser;
@@ -121,7 +122,7 @@ public class LineDocSourceTest extends B
     }
     
     // Indexing configuration.
-    props.setProperty("analyzer", MockAnalyzer.class.getName());
+    props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
     props.setProperty("content.source", LineDocSource.class.getName());
     props.setProperty("directory", "RAMDirectory");
     props.setProperty("doc.stored", "true");

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java Mon May  9 15:24:04 2011
@@ -1,37 +1,37 @@
-package org.apache.lucene.benchmark.byTask.tasks.alt;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.benchmark.byTask.PerfRunData;
-import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
-
-/**
- * {@link PerfTask} which does nothing, but is in a different package 
- */
-public class AltTestTask extends PerfTask {
-  
-  public AltTestTask(PerfRunData runData) {
-    super(runData);
-  }
-
-  @Override
-  public int doLogic() throws Exception {
-    return 0;
-  }
-  
-}
+package org.apache.lucene.benchmark.byTask.tasks.alt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
+
+/**
+ * {@link PerfTask} which does nothing, but is in a different package 
+ */
+public class AltTestTask extends PerfTask {
+  
+  public AltTestTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  @Override
+  public int doLogic() throws Exception {
+    return 0;
+  }
+  
+}

Modified: lucene/dev/branches/bulkpostings/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/CHANGES.txt?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/CHANGES.txt Mon May  9 15:24:04 2011
@@ -60,6 +60,11 @@ Detailed Change List
 
 New Features
 ----------------------
+
+* SOLR-2378: A new, automaton-based, implementation of suggest (autocomplete) 
+  component, offering an order of magnitude smaller memory consumption
+  compared to ternary trees and jaspell and very fast lookups at runtime.
+  (Dawid Weiss)
   
 * SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now 
   supports "percentages" which get evaluated  relative the current size of 
@@ -75,7 +80,7 @@ New Features
 
 * SOLR-1682: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Search grouping / Field collapsing.
   (Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar,
-   Koji Sekiguchi, Iv�n de Prado, Ryan McKinley, Marc Sturlese, Peter Karich,
+   Koji Sekiguchi, Iván de Prado, Ryan McKinley, Marc Sturlese, Peter Karich,
    Bojan Smid, Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald,
    Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger,
    Harish Agarwal, yonik)
@@ -100,8 +105,10 @@ New Features
   levenshtein automata.  (rmuir)
    
 * SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
-  built-in load balancing, and infrastructure for future SolrCloud work. 
-  (yonik, Mark Miller)
+  built-in load balancing, and infrastructure for future SolrCloud work. (yonik, Mark Miller)
+  Additional Work:
+  SOLR-2324: SolrCloud solr.xml parameters are not persisted by CoreContainer.
+  (Massimo Schiavon, Mark Miller)
 
 * SOLR-1729: Evaluation of NOW for date math is done only once per request for
   consistency, and is also propagated to shards in distributed search.
@@ -110,7 +117,7 @@ New Features
 
 * SOLR-1566: Transforming documents in the ResponseWriters.  This will allow
   for more complex results in responses and open the door for function queries
-  as results. (ryan with patches from grant, noble, cmale, yonik)
+  as results. (ryan with patches from grant, noble, cmale, yonik, Jan Høydahl)
 
 * SOLR-2417: Add explain info directly to return documents using ?fl=_explain_ (ryan)
 
@@ -121,6 +128,16 @@ New Features
 * SOLR-2338: Add support for using <similarity/> in a schema's fieldType,
   for customizing scoring on a per-field basis. (hossman, yonik, rmuir)
   
+* SOLR-2335: New 'field("...")' function syntax for refering to complex 
+  field names (containing whitespace or special characters) in functions.
+
+* SOLR-1709: Distributed support for Date and Numeric Range Faceting
+  (Peter Sturge, David Smiley, hossman)
+
+* SOLR-2383: /browse improvements: generalize range and date facet display
+  (Jan Høydahl via yonik)
+
+
 
 Optimizations
 ----------------------
@@ -174,6 +191,9 @@ Bug Fixes
   initialization if the schema.xml contains an analyzer configuration
   for a fieldType that does not use TextField.  (hossman) 
 
+* SOLR-2467: Fix <analyzer class="..." /> initialization so any errors 
+  are logged properly.  (hossman)
+
 Other Changes
 ----------------------
 
@@ -211,6 +231,8 @@ Other Changes
   and publish binary, javadoc, and source test-framework jars. 
   (Drew Farris, Robert Muir, Steve Rowe)
 
+* SOLR-2461: QuerySenderListener and AbstractSolrEventListener are 
+  now public (hossman)
 
 Documentation
 ----------------------
@@ -230,6 +252,10 @@ Carrot2 3.4.2
 
 Upgrading from Solr 3.1
 ----------------------
+  
+* The updateRequestProcessorChain for a RequestHandler is now defined
+  with update.chain rather than update.processor. The latter still works,
+  but has been deprecated.
 
 Detailed Change List
 ----------------------
@@ -246,9 +272,52 @@ Bug Fixes
 * SOLR-2445: Change the default qt to blank in form.jsp, because there is no "standard"
   request handler unless you have it in your solrconfig.xml explicitly. (koji)
 
+* SOLR-2455: Prevent double submit of forms in admin interface.
+  (Jeffrey Chang via uschindler)
+
+* SOLR-2464: Fix potential slowness in QueryValueSource (the query() function) when
+  the query is very sparse and may not match any documents in a segment. (yonik)
+
+* SOLR-2469: When using java replication with replicateAfter=startup, the first
+  commit point on server startup is never removed. (yonik)
+
+* SOLR-2466: SolrJ's CommonsHttpSolrServer would retry requests on failure, regardless
+  of the configured maxRetries, due to HttpClient having it's own retry mechanism
+  by default.  The retryCount of HttpClient is now set to 0, and SolrJ does
+  the retry.  (yonik)
+
+* SOLR-2409: edismax parser - treat the text of a fielded query as a literal if the
+  fieldname does not exist.  For example Mission: Impossible should not search on
+  the "Mission" field unless it's a valid field in the schema.  (Ryan McKinley, yonik)
+
+* SOLR-2403: facet.sort=index reported incorrect results for distributed search
+  in a number of scenarios when facet.mincount>0.  This patch also adds some
+  performance/algorithmic improvements when (facet.sort=count && facet.mincount=1
+  && facet.limit=-1) and when (facet.sort=index && facet.mincount>0)  (yonik)
+
+* SOLR-2333: The "rename" core admin action does not persist the new name to solr.xml
+  (Rasmus Hahn, Paul R. Brown via Mark Miller)
+  
+* SOLR-2390: Performance of usePhraseHighlighter is terrible on very large Documents, 
+  regardless of hl.maxDocCharsToAnalyze. (Mark Miller)
+
+* SOLR-2474: The helper TokenStreams in analysis.jsp and AnalysisRequestHandlerBase
+  did not clear all attributes so they displayed incorrect attribute values for tokens
+  in later filter stages. (uschindler, rmuir, yonik)
+
+* SOLR-2493: SolrQueryParser was fixed to not parse the SolrConfig DOM tree on each
+  instantiation which is a huge slowdown.  (Stephane Bailliez via uschindler)
+
+* SOLR-2495: The JSON parser could hang on corrupted input and could fail
+  to detect numbers that were too large to fit in a long.  (yonik)
+
+
 Other Changes
 ----------------------
 
+* SOLR-2105: Rename RequestHandler param 'update.processor' to 'update.chain'.
+	(Jan Høydahl via Mark Miller)
+
 Build
 ----------------------
 

Modified: lucene/dev/branches/bulkpostings/solr/README.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/README.txt?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/README.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/README.txt Mon May  9 15:24:04 2011
@@ -64,18 +64,18 @@ docs/api/index.html
 Instructions for Building Apache Solr from Source
 -------------------------------------------------
 
-1. Download the Java SE 6 JDK (Java Development Kit) or later from http://java.sun.com.
-   You will need the JDK installed, and the %JAVA_HOME%\bin directory included
-   on your command path.  To test this, issue a "java -version" command from your
-   shell and verify that the Java version is 1.6 or later.
-
-2. Download the Apache Ant binary distribution (1.7.0 or greater) from http://ant.apache.org.
-   You will need Ant installed and the %ANT_HOME%\bin directory included on your
-   command path.  To test this, issue a "ant -version" command from your
-   shell and verify that Ant is available.
+1. Download the Java SE 6 JDK (Java Development Kit) or later from http://java.sun.com/
+   You will need the JDK installed, and the $JAVA_HOME/bin (Windows: %JAVA_HOME%\bin) 
+   folder included on your command path. To test this, issue a "java -version" command 
+   from your shell (command prompt) and verify that the Java version is 1.6 or later.
+
+2. Download the Apache Ant binary distribution (1.7.0 or greater) from http://ant.apache.org/
+   You will need Ant installed and the $ANT_HOME/bin (Windows: %ANT_HOME%\bin) folder 
+   included on your command path. To test this, issue a "ant -version" command from your
+   shell (command prompt) and verify that Ant is available.
 
-3. Download the Apache Solr distribution, linked from the above 
-   web site.  Expand the distribution to a folder of your choice, e.g. c:\solr.   
+3. Download the Apache Solr distribution, linked from the above web site. 
+   Unzip the distribution to a folder of your choice, e.g. C:\solr or ~/solr
    Alternately, you can obtain a copy of the latest Apache Solr source code
    directly from the Subversion repository: