You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jb...@apache.org on 2020/07/13 13:33:25 UTC

[lucene-solr] branch jira/SOLR-14608-export updated: SOLR-14608: Wire-up the MergeIterator

This is an automated email from the ASF dual-hosted git repository.

jbernste pushed a commit to branch jira/SOLR-14608-export
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/jira/SOLR-14608-export by this push:
     new 970c6cf  SOLR-14608: Wire-up the MergeIterator
970c6cf is described below

commit 970c6cf4f5abad4248ff5c686c8ec65061dfa949
Author: Joel Bernstein <jb...@apache.org>
AuthorDate: Mon Jul 13 09:32:52 2020 -0400

    SOLR-14608: Wire-up the MergeIterator
---
 .../apache/solr/handler/export/DoubleValue.java    |  7 +-
 .../solr/handler/export/DoubleValueSortDoc.java    |  7 ++
 .../apache/solr/handler/export/ExportBuffers.java  |  4 +-
 .../apache/solr/handler/export/ExportWriter.java   | 89 +++++++++++++++++++---
 .../org/apache/solr/handler/export/FloatValue.java |  6 ++
 .../org/apache/solr/handler/export/IntValue.java   |  6 +-
 .../org/apache/solr/handler/export/LongValue.java  |  4 +
 .../solr/handler/export/QuadValueSortDoc.java      | 17 ++++-
 .../solr/handler/export/SingleValueSortDoc.java    |  7 +-
 .../org/apache/solr/handler/export/SortDoc.java    |  8 ++
 .../org/apache/solr/handler/export/SortValue.java  |  1 +
 .../apache/solr/handler/export/StringValue.java    | 15 +++-
 .../solr/handler/export/TripleValueSortDoc.java    | 15 +++-
 13 files changed, 167 insertions(+), 19 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/handler/export/DoubleValue.java b/solr/core/src/java/org/apache/solr/handler/export/DoubleValue.java
index d85bbc3..47c7b52 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/DoubleValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/DoubleValue.java
@@ -78,7 +78,12 @@ class DoubleValue implements SortValue {
     }
   }
 
-  @Override
+  public void toGlobalValue(SortValue previousValue) {
+
+  }
+
+
+    @Override
   public boolean isPresent() {
     return present;
   }
diff --git a/solr/core/src/java/org/apache/solr/handler/export/DoubleValueSortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/DoubleValueSortDoc.java
index 8c2a92a..99f56ca 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/DoubleValueSortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/DoubleValueSortDoc.java
@@ -55,6 +55,13 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
     value2.setCurrentValue(docId);
   }
 
+  @Override
+  public void setGlobalValues(SortDoc previous) {
+    DoubleValueSortDoc doubleValueSortDoc = (DoubleValueSortDoc)previous;
+    value1.toGlobalValue(doubleValueSortDoc.value1);
+    value2.toGlobalValue(doubleValueSortDoc.value2);
+  }
+
   public void setValues(SortDoc sortDoc) {
     this.docId = sortDoc.docId;
     this.ord = sortDoc.ord;
diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportBuffers.java b/solr/core/src/java/org/apache/solr/handler/export/ExportBuffers.java
index 72382d2..e314e6a 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/ExportBuffers.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/ExportBuffers.java
@@ -96,13 +96,13 @@ class ExportBuffers {
     filler = () -> {
       try {
         // log.debug("--- filler start {}", Thread.currentThread());
+        SortDoc[] outDocs = new SortDoc[30000];
         SortDoc sortDoc = exportWriter.getSortDoc(searcher, sort.getSort());
         Buffer buffer = getFillBuffer();
-        SortQueue queue = new SortQueue(queueSize, sortDoc);
         long lastOutputCounter = 0;
         for (int count = 0; count < totalHits; ) {
           // log.debug("--- filler fillOutDocs in {}", fillBuffer);
-          exportWriter.fillOutDocs(leaves, sortDoc, queue, buffer);
+          exportWriter.fillOutDocs(leaves, sortDoc, outDocs, buffer);
           count += (buffer.outDocsIndex + 1);
           // log.debug("--- filler count={}, exchange buffer from {}", count, buffer);
           Timer.Context timerContext = getFillerWaitTimer().time();
diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
index d56607d..f9a31c1 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
@@ -303,8 +303,27 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     return tupleStream;
   }
 
+  private void identifyLowestSortingUnexportedDocs(List<LeafReaderContext> leaves, SortDoc sortDoc, SortDoc[] outDocs) throws IOException {
+    Timer.Context timerContext = identifyLowestSortingDocTimer.time();
+    try {
+      MergeIterator mergeIterator = getMergeIterator(leaves, sets, sortDoc);
+      for(int i=0; i<outDocs.length; i++) {
+        SortDoc sdoc = mergeIterator.next();
+        if(sdoc == null) {
+          //Null out the rest of the outDocs
+          outDocs[i] = null;
+        } else {
+          outDocs[i].setValues(sdoc);
+        }
+      }
+    } finally {
+      timerContext.stop();
+    }
+  }
+
   private void identifyLowestSortingUnexportedDocs(List<LeafReaderContext> leaves, SortDoc sortDoc, SortQueue queue) throws IOException {
     Timer.Context timerContext = identifyLowestSortingDocTimer.time();
+
     try {
       queue.reset();
       SortDoc top = queue.top();
@@ -325,6 +344,33 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     }
   }
 
+
+  private void transferBatchToBufferForOutput(SortDoc[] outDocs,
+                                              ExportBuffers.Buffer destination) throws IOException {
+    Timer.Context timerContext = transferBatchToBufferTimer.time();
+    try {
+      int outDocsIndex = -1;
+      for (int i = 0; i < outDocs.length; i++) {
+        if(outDocs[i] != null) {
+          destination.outDocs[++outDocsIndex].setValues(outDocs[i]);
+        } else {
+          break;
+        }
+      }
+      destination.outDocsIndex = outDocsIndex;
+    } catch (Throwable t) {
+      log.error("transfer", t);
+      if (t instanceof InterruptedException) {
+        Thread.currentThread().interrupt();
+      }
+      throw t;
+    } finally {
+      timerContext.stop();
+    }
+  }
+
+
+
   private void transferBatchToBufferForOutput(SortQueue queue,
                                                 List<LeafReaderContext> leaves,
                                                 ExportBuffers.Buffer destination) throws IOException {
@@ -436,12 +482,22 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     }
   }
 
+
   void fillOutDocs(List<LeafReaderContext> leaves, SortDoc sortDoc,
                           SortQueue sortQueue, ExportBuffers.Buffer buffer) throws IOException {
     identifyLowestSortingUnexportedDocs(leaves, sortDoc, sortQueue);
     transferBatchToBufferForOutput(sortQueue, leaves, buffer);
   }
 
+  void fillOutDocs(List<LeafReaderContext> leaves,
+                   SortDoc sortDoc,
+                   SortDoc[] outDocs,
+                   ExportBuffers.Buffer buffer) throws IOException {
+
+    identifyLowestSortingUnexportedDocs(leaves, sortDoc, outDocs);
+    transferBatchToBufferForOutput(outDocs, buffer);
+  }
+
   void writeDoc(SortDoc sortDoc,
                           List<LeafReaderContext> leaves,
                           EntryWriter ew, FieldWriter[] writers) throws IOException {
@@ -613,8 +669,6 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     return new SortDoc(sortValues);
   }
 
-
-
   private static class MergeIterator {
 
     private TreeSet<SortDoc> set = new TreeSet();
@@ -636,8 +690,14 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     */
 
     public SortDoc next() {
-      //This method is free
+
       SortDoc sortDoc = set.pollFirst();
+
+      //We've exhausted all documents
+      if(sortDoc == null) {
+        return null;
+      }
+
       SortDoc nextDoc = segmentIterators[sortDoc.ord].next();
       if(nextDoc != null) {
         //The entire expense of the operation is here
@@ -647,7 +707,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     }
   }
 
-  private MergeIterator getMergeIterator(List<LeafReaderContext> leafs, FixedBitSet[] bits, SortDoc sortDoc) {
+  private MergeIterator getMergeIterator(List<LeafReaderContext> leafs, FixedBitSet[] bits, SortDoc sortDoc) throws IOException {
 
     long totalDocs = 0;
     for(int i=0; i< leafs.size(); i++) {
@@ -664,7 +724,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     SegmentIterator[] segmentIterators = new SegmentIterator[bits.length];
     for(int i=0; i<segmentIterators.length; i++) {
       SortQueue sortQueue = new SortQueue(sizes[i], sortDoc);
-      segmentIterators[i] = new SegmentIterator(bits[i], leafs.get(i), sortQueue, sortDoc);
+      segmentIterators[i] = new SegmentIterator(bits[i], leafs.get(i), sortQueue, sortDoc.copy());
     }
 
     return new MergeIterator(segmentIterators);
@@ -673,15 +733,16 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
   private static class SegmentIterator {
 
     private FixedBitSet bits;
-    private LeafReaderContext context;
     private SortQueue queue;
     private SortDoc sortDoc;
+    private SortDoc lastDoc;
 
-    public SegmentIterator(FixedBitSet bits, LeafReaderContext context, SortQueue sortQueue, SortDoc sortDoc) {
+    public SegmentIterator(FixedBitSet bits, LeafReaderContext context, SortQueue sortQueue, SortDoc sortDoc) throws IOException {
       this.bits = bits;
-      this.context = context;
       this.queue = sortQueue;
       this.sortDoc = sortDoc;
+      this.sortDoc.setNextReader(context);
+      this.lastDoc = sortDoc.copy();
       topDocs();
     }
 
@@ -690,10 +751,16 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
       if(sortDoc == null) {
         topDocs();
         sortDoc = queue.pop();
-        return sortDoc;
-      } else {
-        return sortDoc;
       }
+
+      if(sortDoc != null) {
+        //Load the global ordinals which will be used
+        bits.clear(sortDoc.docId);
+        sortDoc.setGlobalValues(lastDoc);
+        lastDoc.setValues(sortDoc);
+      }
+
+      return sortDoc;
     }
 
     private void topDocs()  {
diff --git a/solr/core/src/java/org/apache/solr/handler/export/FloatValue.java b/solr/core/src/java/org/apache/solr/handler/export/FloatValue.java
index 6d0d73d..2d5e821 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/FloatValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/FloatValue.java
@@ -44,6 +44,10 @@ class FloatValue implements SortValue {
     return currentValue;
   }
 
+  public void toGlobalValue(SortValue previousValue) {
+
+  }
+
   public String getField() {
     return field;
   }
@@ -57,6 +61,8 @@ class FloatValue implements SortValue {
     lastDocID = 0;
   }
 
+
+
   public void setCurrentValue(int docId) throws IOException {
     if (docId < lastDocID) {
       throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + docId);
diff --git a/solr/core/src/java/org/apache/solr/handler/export/IntValue.java b/solr/core/src/java/org/apache/solr/handler/export/IntValue.java
index bae23f9..736f433 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/IntValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/IntValue.java
@@ -75,7 +75,11 @@ public class IntValue implements SortValue {
     }
   }
 
-  @Override
+  public void toGlobalValue(SortValue previousValue) {
+
+  }
+
+    @Override
   public boolean isPresent() {
     return this.present;
   }
diff --git a/solr/core/src/java/org/apache/solr/handler/export/LongValue.java b/solr/core/src/java/org/apache/solr/handler/export/LongValue.java
index 63794ad..21dc376 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/LongValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/LongValue.java
@@ -52,6 +52,10 @@ public class LongValue implements SortValue {
     return new LongValue(field, comp);
   }
 
+  public void toGlobalValue(SortValue previousValue) {
+
+  }
+
   public void setNextReader(LeafReaderContext context) throws IOException {
     this.vals = DocValues.getNumeric(context.reader(), field);
     lastDocID = 0;
diff --git a/solr/core/src/java/org/apache/solr/handler/export/QuadValueSortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/QuadValueSortDoc.java
index b8599c7..b4ccc93 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/QuadValueSortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/QuadValueSortDoc.java
@@ -47,6 +47,16 @@ class QuadValueSortDoc extends TripleValueSortDoc {
     value4.setNextReader(context);
   }
 
+
+  @Override
+  public void setGlobalValues(SortDoc previous) {
+    QuadValueSortDoc quadValueSortDoc = (QuadValueSortDoc) previous;
+    value1.toGlobalValue(quadValueSortDoc.value1);
+    value2.toGlobalValue(quadValueSortDoc.value2);
+    value3.toGlobalValue(quadValueSortDoc.value3);
+    value4.toGlobalValue(quadValueSortDoc.value4);
+  }
+
   public void reset() {
     this.docId = -1;
     this.docBase = -1;
@@ -126,7 +136,12 @@ class QuadValueSortDoc extends TripleValueSortDoc {
       if(comp == 0) {
         comp = value3.compareTo(sd.value3);
         if(comp == 0) {
-          return value4.compareTo(sd.value4);
+          comp = value4.compareTo(sd.value4);
+          if(comp == 0) {
+            return docId+docBase - sd.docId+sd.docBase;
+          } else {
+            return comp;
+          }
         } else {
           return comp;
         }
diff --git a/solr/core/src/java/org/apache/solr/handler/export/SingleValueSortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/SingleValueSortDoc.java
index 0bd3a4a..913daf9 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/SingleValueSortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/SingleValueSortDoc.java
@@ -80,7 +80,12 @@ class SingleValueSortDoc extends SortDoc {
 
   public int compareTo(Object o) {
     SingleValueSortDoc sd = (SingleValueSortDoc)o;
-    return value1.compareTo(sd.value1);
+    int comp = value1.compareTo(sd.value1);
+    if(comp == 0) {
+      return docId+docBase - sd.docId+sd.docBase;
+    } else {
+      return comp;
+    }
   }
 
   public String toString() {
diff --git a/solr/core/src/java/org/apache/solr/handler/export/SortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/SortDoc.java
index d893bd1..c4bd018 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/SortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/SortDoc.java
@@ -34,6 +34,7 @@ class SortDoc {
   }
 
   public SortDoc() {
+
   }
 
   public SortValue getSortValue(String field) {
@@ -69,6 +70,13 @@ class SortDoc {
     }
   }
 
+  public void setGlobalValues(SortDoc previous) {
+    SortValue[] previousValues = previous.sortValues;
+    for(int i=0; i<sortValues.length; i++) {
+      sortValues[i].toGlobalValue(previousValues[i]);
+    }
+  }
+
   public void setValues(SortDoc sortDoc) {
     this.docId = sortDoc.docId;
     this.ord = sortDoc.ord;
diff --git a/solr/core/src/java/org/apache/solr/handler/export/SortValue.java b/solr/core/src/java/org/apache/solr/handler/export/SortValue.java
index ad958c0..93f342e 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/SortValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/SortValue.java
@@ -25,6 +25,7 @@ public interface SortValue extends Comparable<SortValue> {
   public void setCurrentValue(int docId) throws IOException;
   public void setNextReader(LeafReaderContext context) throws IOException;
   public void setCurrentValue(SortValue value);
+  public void toGlobalValue(SortValue previousValue);
   public void reset();
   public SortValue copy();
   public Object getCurrentValue() throws IOException;
diff --git a/solr/core/src/java/org/apache/solr/handler/export/StringValue.java b/solr/core/src/java/org/apache/solr/handler/export/StringValue.java
index e3a36d4..d9588bb 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/StringValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/StringValue.java
@@ -45,6 +45,7 @@ class StringValue implements SortValue {
   private BytesRef lastBytes;
   private String lastString;
   private int lastOrd = -1;
+  private int leafOrd = -1;
 
   public StringValue(SortedDocValues globalDocValues, String field, IntComp comp)  {
     this.globalDocValues = globalDocValues;
@@ -85,7 +86,7 @@ class StringValue implements SortValue {
     }
     if (docId == docValues.docID()) {
       present = true;
-      currentOrd = (int) toGlobal.get(docValues.ordValue());
+      currentOrd = docValues.ordValue();
     } else {
       present = false;
       currentOrd = -1;
@@ -101,6 +102,7 @@ class StringValue implements SortValue {
     StringValue v = (StringValue)sv;
     this.currentOrd = v.currentOrd;
     this.present = v.present;
+    this.leafOrd = v.leafOrd;
   }
 
   public Object getCurrentValue() throws IOException {
@@ -113,11 +115,22 @@ class StringValue implements SortValue {
     return lastBytes;
   }
 
+  public void toGlobalValue(SortValue previousValue) {
+    lastOrd = currentOrd;
+    StringValue sv = (StringValue)previousValue;
+    if(sv.lastOrd == currentOrd) {
+      this.currentOrd = sv.currentOrd;
+    } else {
+      this.currentOrd = (int) toGlobal.get(this.currentOrd);
+    }
+  }
+
   public String getField() {
     return field;
   }
 
   public void setNextReader(LeafReaderContext context) throws IOException {
+    leafOrd = context.ord;
     if (ordinalMap != null) {
       toGlobal = ordinalMap.getGlobalOrds(context.ord);
     }
diff --git a/solr/core/src/java/org/apache/solr/handler/export/TripleValueSortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/TripleValueSortDoc.java
index c68c1a8..09c357c 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/TripleValueSortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/TripleValueSortDoc.java
@@ -60,6 +60,14 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
     value3.setCurrentValue(docId);
   }
 
+  @Override
+  public void setGlobalValues(SortDoc previous) {
+    TripleValueSortDoc tripleValueSortDoc = (TripleValueSortDoc) previous;
+    value1.toGlobalValue(tripleValueSortDoc.value1);
+    value2.toGlobalValue(tripleValueSortDoc.value2);
+    value3.toGlobalValue(tripleValueSortDoc.value3);
+  }
+
   public void setValues(SortDoc sortDoc) {
     this.docId = sortDoc.docId;
     this.ord = sortDoc.ord;
@@ -111,7 +119,12 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
     if (comp == 0) {
       comp = value2.compareTo(sd.value2);
       if (comp == 0) {
-        return value3.compareTo(sd.value3);
+        comp = value3.compareTo(sd.value3);
+        if(comp == 0) {
+          return docId+docBase - sd.docId+sd.docBase;
+        } else {
+          return comp;
+        }
       } else {
         return comp;
       }