You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/11 15:49:34 UTC

[18/21] lucene-solr:branch_6x: LUCENE-6766: more IW.infoStream logging around sorting; fix test bug

LUCENE-6766: more IW.infoStream logging around sorting; fix test bug


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0dd65f61
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0dd65f61
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0dd65f61

Branch: refs/heads/branch_6x
Commit: 0dd65f6130dbcb1a9caae7963fed246c1068ebe0
Parents: 4740056
Author: Mike McCandless <mi...@apache.org>
Authored: Thu May 12 15:25:11 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sat Jun 11 11:48:40 2016 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/index/CheckIndex.java     |  2 +-
 .../org/apache/lucene/index/DocIDMerger.java    |  3 ++-
 .../org/apache/lucene/index/MergeState.java     | 17 +++++++++---
 .../apache/lucene/index/TestIndexSorting.java   | 27 +++++++++++++++++---
 4 files changed, 40 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0dd65f61/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 1031d22..d752c25 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -831,7 +831,7 @@ public final class CheckIndex implements Closeable {
 
     if (sort != null) {
       if (infoStream != null) {
-        infoStream.print("    test: check index sort.....");
+        infoStream.print("    test: index sort..........");
       }
 
       SortField fields[] = sort.getSort();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0dd65f61/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
index e8ffc6c..84f08c7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
@@ -57,7 +57,8 @@ public class DocIDMerger<T extends DocIDMerger.Sub> {
   public DocIDMerger(List<T> subs, int maxCount, boolean indexIsSorted) {
     this.subs = subs;
 
-    if (indexIsSorted) {
+    // nocommit safe?
+    if (indexIsSorted && maxCount > 1) {
       queue = new PriorityQueue<T>(maxCount) {
         @Override
         protected boolean lessThan(Sub a, Sub b) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0dd65f61/lucene/core/src/java/org/apache/lucene/index/MergeState.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeState.java b/lucene/core/src/java/org/apache/lucene/index/MergeState.java
index 7737ff2..12310c6 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Locale;
 
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.FieldsProducer;
@@ -185,7 +186,13 @@ public class MergeState {
 
     } else {
       // do a merge sort of the incoming leaves:
-      return MultiSorter.sort(indexSort, readers);
+      long t0 = System.nanoTime();
+      DocMap[] result = MultiSorter.sort(indexSort, readers);
+      long t1 = System.nanoTime();
+      if (infoStream.isEnabled("SM")) {
+        infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0));
+      }
+      return result;
     }
   }
 
@@ -218,10 +225,14 @@ public class MergeState {
         // to their index files on each indexed document:
 
         // This segment was written by flush, so documents are not yet sorted, so we sort them now:
+        long t0 = System.nanoTime();
         Sorter.DocMap sortDocMap = sorter.sort(leaf);
+        long t1 = System.nanoTime();
+        double msec = (t1-t0)/1000000.0;
+        
         if (sortDocMap != null) {
           if (infoStream.isEnabled("SM")) {
-            infoStream.message("SM", "segment " + leaf + " is not sorted; wrapping for sort " + indexSort + " now");
+            infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted; wrapping for sort %s now (%.2f msec to sort)", leaf, indexSort, msec));
           }
           leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap));
           leafDocMaps[readers.size()] = new DocMap() {
@@ -232,7 +243,7 @@ public class MergeState {
             };
         } else {
           if (infoStream.isEnabled("SM")) {
-            infoStream.message("SM", "segment " + leaf + " is not sorted, but is already accidentally in sort " + indexSort + " order");
+            infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted, but is already accidentally in sort %s order (%.2f msec to sort)", leaf, indexSort, msec));
           }
         }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0dd65f61/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 4deadd3..4e775f3 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -30,8 +30,11 @@ import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@@ -1053,6 +1056,14 @@ public class TestIndexSorting extends LuceneTestCase {
     TERM_VECTORS_TYPE.setStoreTermVectors(true);
     TERM_VECTORS_TYPE.freeze();
 
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new MockTokenizer();
+        return new TokenStreamComponents(tokenizer, tokenizer);
+      }
+    };
+
     List<Document> docs = new ArrayList<>();
     for (int i=0;i<numDocs;i++) {
       int id = i * 10;
@@ -1085,14 +1096,15 @@ public class TestIndexSorting extends LuceneTestCase {
     // We add document alread in ID order for the first writer:
     Directory dir1 = newFSDirectory(createTempDir());
     
-    IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+    Random random1 = new Random(seed);
+    IndexWriterConfig iwc1 = newIndexWriterConfig(random1, a);
     iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity())); // for testing norms field
     // preserve docIDs
     iwc1.setMergePolicy(newLogMergePolicy());
     if (VERBOSE) {
       System.out.println("TEST: now index pre-sorted");
     }
-    RandomIndexWriter w1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
+    RandomIndexWriter w1 = new RandomIndexWriter(random1, dir1, iwc1);
     for(Document doc : docs) {
       ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
       w1.addDocument(doc);
@@ -1101,7 +1113,8 @@ public class TestIndexSorting extends LuceneTestCase {
     // We shuffle documents, but set index sort, for the second writer:
     Directory dir2 = newFSDirectory(createTempDir());
     
-    IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+    Random random2 = new Random(seed);
+    IndexWriterConfig iwc2 = newIndexWriterConfig(random2, a);
     iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity())); // for testing norms field
 
     Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
@@ -1111,7 +1124,7 @@ public class TestIndexSorting extends LuceneTestCase {
     if (VERBOSE) {
       System.out.println("TEST: now index with index-time sorting");
     }
-    RandomIndexWriter w2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
+    RandomIndexWriter w2 = new RandomIndexWriter(random2, dir2, iwc2);
     int count = 0;
     int commitAtCount = TestUtil.nextInt(random(), 1, numDocs-1);
     for(Document doc : docs) {
@@ -1122,10 +1135,16 @@ public class TestIndexSorting extends LuceneTestCase {
       }
       w2.addDocument(doc);
     }
+    if (VERBOSE) {
+      System.out.println("TEST: now force merge");
+    }
     w2.forceMerge(1);
 
     DirectoryReader r1 = w1.getReader();
     DirectoryReader r2 = w2.getReader();
+    if (VERBOSE) {
+      System.out.println("TEST: now compare r1=" + r1 + " r2=" + r2);
+    }
     assertEquals(sort, getOnlyLeafReader(r2).getIndexSort());
     assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
     IOUtils.close(w1, w2, r1, r2, dir1, dir2);