You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/03/10 09:58:56 UTC
[lucene] 01/05: SOLR-14608: Faster sorting for the /export handler
Squashed commit of the following:
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch jira/SOLR-14608-export-merge
in repository https://gitbox.apache.org/repos/asf/lucene.git
commit 4f691b8bb4492bec44440c1db65cb45ab83bec1c
Author: Joel Bernstein <jb...@apache.org>
AuthorDate: Fri Jan 8 11:49:19 2021 -0500
SOLR-14608: Faster sorting for the /export handler
Squashed commit of the following:
commit 66f85c550691fcb3b4ee1959c7ed1695aed3cb78
Author: Joel Bernstein <jb...@apache.org>
Date: Thu Jan 7 11:04:18 2021 -0500
SOLR-14608: Fix tie-break in SortDoc, TestExportWriter now passing.
commit a183d29ea84dbd4a76517f27a87ef78d56bc935f
Author: Joel Bernstein <jb...@apache.org>
Date: Tue Jan 5 16:07:18 2021 -0500
SOLR-14608: Fix failing TestExportWriter tests
commit d7e81e8197a4b9e06c8d387d34e941a1365ad163
Author: Joel Bernstein <jb...@apache.org>
Date: Tue Dec 29 12:34:02 2020 -0500
SOLR-14608: Tone down debug logging
commit cae61336f86295014a1c373774bc56c5b9f21670
Author: Joel Bernstein <jb...@apache.org>
Date: Tue Dec 29 09:40:35 2020 -0500
SOLR-14608: Fix nanoTime to millis calculation and more code cleanup
commit 4e2cd9aaeaeeeed835af6b638faeefab231d7b9a
Author: Joel Bernstein <jb...@apache.org>
Date: Mon Dec 28 15:00:12 2020 -0500
SOLR-14608: Code clean up
commit 894141b3c9461880917de471285d3b8a96c0a4fe
Author: Joel Bernstein <jb...@apache.org>
Date: Sun Dec 27 13:56:25 2020 -0500
SOLR-14608: Fix bug when caching docvalues objects related to the leafreader ord
commit 0a7ea0ef20d7b280b7d4381ad851024096addf27
Author: Joel Bernstein <jb...@apache.org>
Date: Wed Dec 23 16:12:59 2020 -0500
SOLR-14608: Reuse docvalues when possible
commit 6af848b086c2002b031ea159e485f4b2f30df7c0
Author: Joel Bernstein <jb...@apache.org>
Date: Mon Dec 21 14:13:31 2020 -0500
SOLR-14608: Suppress Broken pipe logging
commit f40001700778bcac4390b2f00c014ad0bf19d091
Author: Joel Bernstein <jb...@apache.org>
Date: Sun Dec 6 10:36:01 2020 -0500
Test commit 2
commit 8373f3a6e1383028a517831bc561ac2f491c6ce3
Author: Joel Bernstein <jb...@apache.org>
Date: Sun Dec 6 10:34:26 2020 -0500
Test commit
commit 8e9a7afddde80080150e3fd078005a8add29ac7f
Author: Andrzej Bialecki <ab...@apache.org>
Date: Thu Jul 30 15:48:28 2020 +0200
SOLR-14608: More cleanups. Fix a bug in compareTo. Add SortDoc.equals() / hashCode().
commit 536d962d6e016573cafd2f420511e4f7083e0468
Author: Andrzej Bialecki <ab...@apache.org>
Date: Wed Jul 29 13:13:09 2020 +0200
SOLR-14608: Fix generics / raw types, move around the timer metrics so that they make
sense.
commit ebd5bcaab8c917b16164bcd059276558002b09a6
Author: Andrzej Bialecki <ab...@apache.org>
Date: Tue Jul 28 11:38:44 2020 +0200
SOLR-14608: Fix code formatting.
commit bf8d954ca1289d82eb5334719fb97bbabacacb09
Merge: b610ddae2f8 6bf5f4a87f4
Author: Andrzej Bialecki <ab...@apache.org>
Date: Mon Jul 27 15:42:04 2020 +0200
Merge branch 'master' into jira/SOLR-14608-export
commit b610ddae2f8a4258f1d6e7c842f480f2b8c46fa9
Author: Joel Bernstein <jb...@apache.org>
Date: Fri Jul 17 10:32:27 2020 -0400
SOLR-14608: Cache output bytesref
commit ee9c3d083c60850c3e48d301356329cf0f017c86
Author: Joel Bernstein <jb...@apache.org>
Date: Wed Jul 15 15:55:48 2020 -0400
SOLR-14608: Works with one string value sort field.
commit 32e92c5025637e4b6cd940628a491fe6b7e7cb13
Author: Joel Bernstein <jb...@apache.org>
Date: Mon Jul 13 11:26:56 2020 -0400
SOLR-14608: Wire-up the MergeIterator part three
commit f747562ca60907dd542cd9bc141a5806f156db1c
Author: Joel Bernstein <jb...@apache.org>
Date: Mon Jul 13 10:41:39 2020 -0400
SOLR-14608: Wire-up the MergeIterator part two
commit 970c6cf4f5abad4248ff5c686c8ec65061dfa949
Author: Joel Bernstein <jb...@apache.org>
Date: Mon Jul 13 09:32:52 2020 -0400
SOLR-14608: Wire-up the MergeIterator
commit 95e706abc425003d79a037500b9887f2c8a7798c
Author: Joel Bernstein <jb...@apache.org>
Date: Fri Jul 10 09:53:22 2020 -0400
SOLR-14608: Size segment level sort queues based on segement maxdoc
commit e0fc38f1b1093cd761da03e561df6395d3a79fc1
Author: Joel Bernstein <jb...@apache.org>
Date: Thu Jul 9 16:40:38 2020 -0400
SOLR-14608: Add method for creating the MergeIterator
commit 9b01320ddd3800607fa0197df6ac66bfd27e148a
Author: Joel Bernstein <jb...@apache.org>
Date: Thu Jul 9 14:23:17 2020 -0400
SOLR-14608: Add skeleton algorithm for segment level iterator
commit bb4ae51c1c3e54c976bd1d449d5264afa3d74ec2
Author: Joel Bernstein <jb...@apache.org>
Date: Thu Jul 9 13:04:39 2020 -0400
SOLR-14608: Add basic top level merge sort iterator
---
.../solr/handler/export/BoolFieldWriter.java | 34 +--
.../solr/handler/export/DateFieldWriter.java | 23 +-
.../solr/handler/export/DoubleFieldWriter.java | 21 +-
.../apache/solr/handler/export/DoubleValue.java | 8 +-
.../solr/handler/export/DoubleValueSortDoc.java | 38 ++-
.../apache/solr/handler/export/ExportBuffers.java | 72 +++---
.../apache/solr/handler/export/ExportWriter.java | 279 ++++++++++++++++-----
.../solr/handler/export/ExportWriterStream.java | 30 +--
.../apache/solr/handler/export/FieldWriter.java | 4 +-
.../solr/handler/export/FloatFieldWriter.java | 22 +-
.../org/apache/solr/handler/export/FloatValue.java | 8 +-
.../apache/solr/handler/export/IntFieldWriter.java | 22 +-
.../org/apache/solr/handler/export/IntValue.java | 8 +-
.../solr/handler/export/LongFieldWriter.java | 23 +-
.../org/apache/solr/handler/export/LongValue.java | 4 +
.../solr/handler/export/MultiFieldWriter.java | 57 ++++-
.../solr/handler/export/QuadValueSortDoc.java | 47 +++-
.../solr/handler/export/SingleValueSortDoc.java | 33 ++-
.../org/apache/solr/handler/export/SortDoc.java | 35 ++-
.../org/apache/solr/handler/export/SortValue.java | 19 +-
.../solr/handler/export/StringFieldWriter.java | 91 ++++---
.../apache/solr/handler/export/StringValue.java | 33 ++-
.../solr/handler/export/TripleValueSortDoc.java | 44 +++-
.../java/org/apache/solr/util/SolrLogPostTool.java | 27 +-
24 files changed, 691 insertions(+), 291 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/handler/export/BoolFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/BoolFieldWriter.java
index 20b1598..92da7a3 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/BoolFieldWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/BoolFieldWriter.java
@@ -19,45 +19,17 @@ package org.apache.solr.handler.export;
import java.io.IOException;
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.common.MapWriter;
import org.apache.solr.schema.FieldType;
-class BoolFieldWriter extends FieldWriter {
- private String field;
- private FieldType fieldType;
- private CharsRefBuilder cref = new CharsRefBuilder();
-
+class BoolFieldWriter extends StringFieldWriter {
public BoolFieldWriter(String field, FieldType fieldType) {
- this.field = field;
- this.fieldType = fieldType;
+ super(field, fieldType);
}
- public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
- BytesRef ref;
- SortValue sortValue = sortDoc.getSortValue(this.field);
- if (sortValue != null) {
- if (sortValue.isPresent()) {
- ref = (BytesRef) sortValue.getCurrentValue();
- } else { //empty-value
- return false;
- }
- } else {
- // field is not part of 'sort' param, but part of 'fl' param
- SortedDocValues vals = DocValues.getSorted(reader, this.field);
- if (vals.advance(sortDoc.docId) != sortDoc.docId) {
- return false;
- }
- int ord = vals.ordValue();
- ref = vals.lookupOrd(ord);
- }
-
+ protected void writeBytes(MapWriter.EntryWriter ew, BytesRef ref, FieldType fieldType) throws IOException {
fieldType.indexedToReadable(ref, cref);
ew.put(this.field, "true".equals(cref.toString()));
- return true;
}
}
diff --git a/solr/core/src/java/org/apache/solr/handler/export/DateFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/DateFieldWriter.java
index e585a06..e44ba08 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/DateFieldWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/DateFieldWriter.java
@@ -20,19 +20,22 @@ package org.apache.solr.handler.export;
import java.io.IOException;
import java.util.Date;
+import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter;
class DateFieldWriter extends FieldWriter {
private String field;
+ private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
+
public DateFieldWriter(String field) {
this.field = field;
}
- public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
+ public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
Long val;
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
@@ -43,7 +46,21 @@ class DateFieldWriter extends FieldWriter {
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
- NumericDocValues vals = DocValues.getNumeric(reader, this.field);
+ int readerOrd = readerContext.ord;
+ NumericDocValues vals = null;
+ if(docValuesCache.containsKey(readerOrd)) {
+ NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
+ if(numericDocValues.docID() < sortDoc.docId) {
+ //We have not advanced beyond the current docId so we can use this docValues.
+ vals = numericDocValues;
+ }
+ }
+
+ if(vals == null) {
+ vals = DocValues.getNumeric(readerContext.reader(), this.field);
+ docValuesCache.put(readerOrd, vals);
+ }
+
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
val = vals.longValue();
} else {
diff --git a/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java
index 8211269..cb36a75 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java
@@ -19,19 +19,21 @@ package org.apache.solr.handler.export;
import java.io.IOException;
+import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter;
class DoubleFieldWriter extends FieldWriter {
private String field;
+ private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
public DoubleFieldWriter(String field) {
this.field = field;
}
- public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
+ public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
@@ -43,7 +45,20 @@ class DoubleFieldWriter extends FieldWriter {
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
- NumericDocValues vals = DocValues.getNumeric(reader, this.field);
+ int readerOrd = readerContext.ord;
+ NumericDocValues vals = null;
+ if(docValuesCache.containsKey(readerOrd)) {
+ NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
+ if(numericDocValues.docID() < sortDoc.docId) {
+ //We have not advanced beyond the current docId so we can use this docValues.
+ vals = numericDocValues;
+ }
+ }
+
+ if(vals == null) {
+ vals = DocValues.getNumeric(readerContext.reader(), this.field);
+ docValuesCache.put(readerOrd, vals);
+ }
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
long val = vals.longValue();
ew.put(this.field, Double.longBitsToDouble(val));
diff --git a/solr/core/src/java/org/apache/solr/handler/export/DoubleValue.java b/solr/core/src/java/org/apache/solr/handler/export/DoubleValue.java
index d85bbc3..db1c04c 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/DoubleValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/DoubleValue.java
@@ -78,13 +78,17 @@ class DoubleValue implements SortValue {
}
}
+ public void toGlobalValue(SortValue previousValue) {
+
+ }
+
@Override
public boolean isPresent() {
return present;
}
public void setCurrentValue(SortValue sv) {
- DoubleValue dv = (DoubleValue)sv;
+ DoubleValue dv = (DoubleValue) sv;
this.currentValue = dv.currentValue;
this.present = dv.present;
}
@@ -95,7 +99,7 @@ class DoubleValue implements SortValue {
}
public int compareTo(SortValue o) {
- DoubleValue dv = (DoubleValue)o;
+ DoubleValue dv = (DoubleValue) o;
return comp.compare(currentValue, dv.currentValue);
}
}
diff --git a/solr/core/src/java/org/apache/solr/handler/export/DoubleValueSortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/DoubleValueSortDoc.java
index 8c2a92a..82338c9 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/DoubleValueSortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/DoubleValueSortDoc.java
@@ -25,6 +25,7 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
protected SortValue value2;
+ @Override
public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) {
return value1;
@@ -34,6 +35,7 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
return null;
}
+ @Override
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
this.docBase = context.docBase;
@@ -41,6 +43,7 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
value2.setNextReader(context);
}
+ @Override
public void reset() {
this.docId = -1;
this.docBase = -1;
@@ -49,18 +52,27 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
value2.reset();
}
+ @Override
public void setValues(int docId) throws IOException {
this.docId = docId;
value1.setCurrentValue(docId);
value2.setCurrentValue(docId);
}
+ @Override
+ public void setGlobalValues(SortDoc previous) {
+ DoubleValueSortDoc doubleValueSortDoc = (DoubleValueSortDoc) previous;
+ value1.toGlobalValue(doubleValueSortDoc.value1);
+ value2.toGlobalValue(doubleValueSortDoc.value2);
+ }
+
+ @Override
public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId;
this.ord = sortDoc.ord;
this.docBase = sortDoc.docBase;
- value1.setCurrentValue(((DoubleValueSortDoc)sortDoc).value1);
- value2.setCurrentValue(((DoubleValueSortDoc)sortDoc).value2);
+ value1.setCurrentValue(((DoubleValueSortDoc) sortDoc).value1);
+ value2.setCurrentValue(((DoubleValueSortDoc) sortDoc).value2);
}
public DoubleValueSortDoc(SortValue value1, SortValue value2) {
@@ -68,34 +80,42 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
this.value2 = value2;
}
+ @Override
public SortDoc copy() {
return new DoubleValueSortDoc(value1.copy(), value2.copy());
}
+ @Override
public boolean lessThan(Object o) {
- DoubleValueSortDoc sd = (DoubleValueSortDoc)o;
+ DoubleValueSortDoc sd = (DoubleValueSortDoc) o;
int comp = value1.compareTo(sd.value1);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
comp = value2.compareTo(sd.value2);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
- return docId+docBase > sd.docId+sd.docBase;
+ return docId + docBase > sd.docId + sd.docBase;
}
}
}
- public int compareTo(Object o) {
- DoubleValueSortDoc sd = (DoubleValueSortDoc)o;
+ @Override
+ public int compareTo(SortDoc o) {
+ DoubleValueSortDoc sd = (DoubleValueSortDoc) o;
int comp = value1.compareTo(sd.value1);
if (comp == 0) {
- return value2.compareTo(sd.value2);
+ comp = value2.compareTo(sd.value2);
+ if (comp == 0) {
+ return (sd.docId + sd.docBase) - (docId + docBase);
+ } else {
+ return comp;
+ }
} else {
return comp;
}
diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportBuffers.java b/solr/core/src/java/org/apache/solr/handler/export/ExportBuffers.java
index 72382d2..696013f 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/ExportBuffers.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/ExportBuffers.java
@@ -26,14 +26,16 @@ import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAdder;
+import java.util.concurrent.BrokenBarrierException;
-import com.codahale.metrics.Timer;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Sort;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.handler.export.ExportWriter.MergeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -51,9 +53,6 @@ class ExportBuffers {
final List<LeafReaderContext> leaves;
final ExportWriter exportWriter;
final OutputStream os;
- final Timer writeOutputBufferTimer;
- final Timer fillerWaitTimer;
- final Timer writerWaitTimer;
final IteratorWriter.ItemWriter rawWriter;
final IteratorWriter.ItemWriter writer;
final CyclicBarrier barrier;
@@ -68,7 +67,7 @@ class ExportBuffers {
ExportBuffers(ExportWriter exportWriter, List<LeafReaderContext> leaves, SolrIndexSearcher searcher,
OutputStream os, IteratorWriter.ItemWriter rawWriter, Sort sort, int queueSize, int totalHits,
- Timer writeOutputBufferTimer, Timer fillerWaitTimer, Timer writerWaitTimer) throws IOException {
+ FixedBitSet[] sets) throws IOException {
this.exportWriter = exportWriter;
this.leaves = leaves;
this.os = os;
@@ -81,55 +80,60 @@ class ExportBuffers {
return this;
}
};
- this.writeOutputBufferTimer = writeOutputBufferTimer;
- this.fillerWaitTimer = fillerWaitTimer;
- this.writerWaitTimer = writerWaitTimer;
+
this.bufferOne = new Buffer(queueSize);
this.bufferTwo = new Buffer(queueSize);
this.totalHits = totalHits;
fillBuffer = bufferOne;
outputBuffer = bufferTwo;
SortDoc writerSortDoc = exportWriter.getSortDoc(searcher, sort.getSort());
+
+ MergeIterator mergeIterator = exportWriter.getMergeIterator(leaves, sets, writerSortDoc);
+
bufferOne.initialize(writerSortDoc);
bufferTwo.initialize(writerSortDoc);
barrier = new CyclicBarrier(2, () -> swapBuffers());
filler = () -> {
try {
// log.debug("--- filler start {}", Thread.currentThread());
- SortDoc sortDoc = exportWriter.getSortDoc(searcher, sort.getSort());
Buffer buffer = getFillBuffer();
- SortQueue queue = new SortQueue(queueSize, sortDoc);
long lastOutputCounter = 0;
for (int count = 0; count < totalHits; ) {
// log.debug("--- filler fillOutDocs in {}", fillBuffer);
- exportWriter.fillOutDocs(leaves, sortDoc, queue, buffer);
+ exportWriter.fillOutDocs(mergeIterator, buffer);
count += (buffer.outDocsIndex + 1);
// log.debug("--- filler count={}, exchange buffer from {}", count, buffer);
- Timer.Context timerContext = getFillerWaitTimer().time();
try {
+ long startBufferWait = System.nanoTime();
exchangeBuffers();
+ long endBufferWait = System.nanoTime();
+ log.debug("Waited for writer thread:"+Long.toString(((endBufferWait-startBufferWait)/1000000)));
} finally {
- timerContext.stop();
+
}
+
buffer = getFillBuffer();
if (outputCounter.longValue() > lastOutputCounter) {
lastOutputCounter = outputCounter.longValue();
flushOutput();
}
- // log.debug("--- filler got empty buffer {}", buffer);
}
buffer.outDocsIndex = Buffer.NO_MORE_DOCS;
- // log.debug("--- filler final exchange buffer from {}", buffer);
- Timer.Context timerContext = getFillerWaitTimer().time();
try {
exchangeBuffers();
} finally {
- timerContext.stop();
+
}
buffer = getFillBuffer();
// log.debug("--- filler final got buffer {}", buffer);
} catch (Throwable e) {
- log.error("filler", e);
+ if(!(e instanceof InterruptedException) && !(e instanceof BrokenBarrierException)) {
+ /*
+ Don't log the interrupt or BrokenBarrierException as it creates noise during early client disconnects and
+ doesn't log anything particularly useful in other situations.
+ */
+ log.error("filler", e);
+ }
error(e);
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
@@ -155,7 +159,7 @@ class ExportBuffers {
}
private void swapBuffers() {
- log.debug("--- swap buffers");
+ //log.debug("--- swap buffers");
Buffer one = fillBuffer;
fillBuffer = outputBuffer;
outputBuffer = one;
@@ -174,18 +178,6 @@ class ExportBuffers {
return fillBuffer;
}
- public Timer getWriteOutputBufferTimer() {
- return writeOutputBufferTimer;
- }
-
- public Timer getFillerWaitTimer() {
- return fillerWaitTimer;
- }
-
- public Timer getWriterWaitTimer() {
- return writerWaitTimer;
- }
-
// decorated writer that keeps track of number of writes
public IteratorWriter.ItemWriter getWriter() {
return writer;
@@ -231,7 +223,23 @@ class ExportBuffers {
// allDone.join();
log.debug("-- finished.");
} catch (Exception e) {
- log.error("Exception running filler / writer", e);
+ Throwable ex = e;
+ boolean ignore = false;
+ while (ex != null) {
+ String m = ex.getMessage();
+ if (m != null && m.contains("Broken pipe")) {
+ ignore = true;
+ break;
+ }
+ ex = ex.getCause();
+ }
+ if(!ignore) {
+ /*
+ Ignore Broken pipes. Broken pipes occur normally when using the export handler for
+ merge joins when the join is complete before both sides of the join are fully read.
+ */
+ log.error("Exception running filler / writer", e);
+ }
error(e);
//
} finally {
diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
index 8d07239..4cfb594 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
@@ -25,8 +25,8 @@ import java.io.PrintWriter;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.List;
+import java.util.TreeSet;
-import com.codahale.metrics.Timer;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
@@ -74,6 +74,7 @@ import org.apache.solr.schema.StrField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SyntaxError;
+import org.eclipse.jetty.util.IO;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -114,11 +115,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
FixedBitSet[] sets = null;
PushWriter writer;
private String wt;
- final Timer identifyLowestSortingDocTimer;
- final Timer transferBatchToBufferTimer;
- final Timer writeOutputBufferTimer;
- final Timer writerWaitTimer;
- final Timer fillerWaitTimer;
+
public ExportWriter(SolrQueryRequest req, SolrQueryResponse res, String wt,
@@ -130,12 +127,9 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
this.initialStreamContext = initialStreamContext;
this.solrMetricsContext = solrMetricsContext;
this.metricsPath = metricsPath;
- this.batchSize = req.getParams().getInt(BATCH_SIZE_PARAM, DEFAULT_BATCH_SIZE);
- identifyLowestSortingDocTimer = solrMetricsContext.timer("identifyLowestSortingDoc", metricsPath);
- transferBatchToBufferTimer = solrMetricsContext.timer("transferBatchToBuffer", metricsPath);
- writeOutputBufferTimer = solrMetricsContext.timer("writeOutputBuffer", metricsPath);
- writerWaitTimer = solrMetricsContext.timer("writerWaitTimer", metricsPath);
- fillerWaitTimer = solrMetricsContext.timer("fillerWaitTimer", metricsPath);
+ // may be too tricky to get this right? always use default for now
+ //this.batchSize = req.getParams().getInt(BATCH_SIZE_PARAM, DEFAULT_BATCH_SIZE);
+ this.batchSize = DEFAULT_BATCH_SIZE;
}
@Override
@@ -147,10 +141,20 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
@Override
public void close() throws IOException {
- if (writer != null) writer.close();
+ if (writer != null) {
+ try {
+ writer.close();
+ } catch (Throwable t) {
+ //We're going to sit on this.
+ }
+ }
if (respWriter != null) {
- respWriter.flush();
- respWriter.close();
+ try {
+ respWriter.flush();
+ respWriter.close();
+ } catch (Throwable t) {
+
+ }
}
}
@@ -168,6 +172,14 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
}
public void write(OutputStream os) throws IOException {
+ try {
+ _write(os);
+ } finally {
+
+ }
+ }
+
+ private void _write(OutputStream os) throws IOException {
QueryResponseWriter rw = req.getCore().getResponseWriters().get(wt);
if (rw instanceof BinaryResponseWriter) {
//todo add support for other writers after testing
@@ -302,41 +314,16 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
return tupleStream;
}
- private void identifyLowestSortingUnexportedDocs(List<LeafReaderContext> leaves, SortDoc sortDoc, SortQueue queue) throws IOException {
- Timer.Context timerContext = identifyLowestSortingDocTimer.time();
- try {
- queue.reset();
- SortDoc top = queue.top();
- for (int i = 0; i < leaves.size(); i++) {
- sortDoc.setNextReader(leaves.get(i));
- DocIdSetIterator it = new BitSetIterator(sets[i], 0); // cost is not useful here
- int docId;
- while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- sortDoc.setValues(docId);
- if (top.lessThan(sortDoc)) {
- top.setValues(sortDoc);
- top = queue.updateTop();
- }
- }
- }
- } finally {
- timerContext.stop();
- }
- }
-
- private void transferBatchToBufferForOutput(SortQueue queue,
- List<LeafReaderContext> leaves,
- ExportBuffers.Buffer destination) throws IOException {
- Timer.Context timerContext = transferBatchToBufferTimer.time();
+ private void transferBatchToBufferForOutput(MergeIterator mergeIterator,
+ ExportBuffers.Buffer destination) throws IOException {
try {
int outDocsIndex = -1;
- for (int i = 0; i < queue.maxSize; i++) {
- SortDoc s = queue.pop();
- if (s.docId > -1) {
- destination.outDocs[++outDocsIndex].setValues(s);
- // remove this doc id from the matching bitset, it's been exported
- sets[s.ord].clear(s.docId);
- s.reset(); // reuse
+ for (int i = 0; i < batchSize; i++) {
+ SortDoc sortDoc = mergeIterator.next();
+ if (sortDoc != null) {
+ destination.outDocs[++outDocsIndex].setValues(sortDoc);
+ } else {
+ break;
}
}
destination.outDocsIndex = outDocsIndex;
@@ -347,7 +334,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
}
throw t;
} finally {
- timerContext.stop();
+
}
}
@@ -355,8 +342,17 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves();
final int queueSize = Math.min(batchSize, totalHits);
- ExportBuffers buffers = new ExportBuffers(this, leaves, req.getSearcher(), os, writer, sort, queueSize, totalHits,
- writeOutputBufferTimer, fillerWaitTimer, writerWaitTimer);
+
+ ExportBuffers buffers = new ExportBuffers(this,
+ leaves,
+ req.getSearcher(),
+ os,
+ writer,
+ sort,
+ queueSize,
+ totalHits,
+ sets);
+
if (streamExpression != null) {
streamContext.put(ExportBuffers.EXPORT_BUFFERS_KEY, buffers);
@@ -408,9 +404,8 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
log.debug("--- writer interrupted");
break;
}
- Timer.Context timerContext = writeOutputBufferTimer.time();
try {
- for (int i = buffer.outDocsIndex; i >= 0; --i) {
+ for (int i = 0; i <= buffer.outDocsIndex; ++i) {
// we're using the raw writer here because there's no potential
// reduction in the number of output items, unlike when using
// streaming expressions
@@ -418,37 +413,36 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
writer.add((MapWriter) ew -> writeDoc(currentDoc, leaves, ew, fieldWriters));
}
} finally {
- timerContext.stop();
}
- log.debug("--- writer exchanging from {}", buffer);
- timerContext = writerWaitTimer.time();
+ //log.debug("--- writer exchanging from {}", buffer);
try {
+ long startExchangeBuffers = System.nanoTime();
buffers.exchangeBuffers();
+ long endExchangeBuffers = System.nanoTime();
+ log.debug("Waited for reader thread:"+Long.toString(((endExchangeBuffers-startExchangeBuffers)/1000000)));
} finally {
- timerContext.stop();
}
buffer = buffers.getOutputBuffer();
- log.debug("--- writer got {}", buffer);
+ //log.debug("--- writer got {}", buffer);
}
return true;
});
}
}
- void fillOutDocs(List<LeafReaderContext> leaves, SortDoc sortDoc,
- SortQueue sortQueue, ExportBuffers.Buffer buffer) throws IOException {
- identifyLowestSortingUnexportedDocs(leaves, sortDoc, sortQueue);
- transferBatchToBufferForOutput(sortQueue, leaves, buffer);
+ void fillOutDocs(MergeIterator mergeIterator,
+ ExportBuffers.Buffer buffer) throws IOException {
+ transferBatchToBufferForOutput(mergeIterator, buffer);
}
void writeDoc(SortDoc sortDoc,
- List<LeafReaderContext> leaves,
- EntryWriter ew, FieldWriter[] writers) throws IOException {
+ List<LeafReaderContext> leaves,
+ EntryWriter ew, FieldWriter[] writers) throws IOException {
int ord = sortDoc.ord;
LeafReaderContext context = leaves.get(ord);
int fieldIndex = 0;
for (FieldWriter fieldWriter : writers) {
- if (fieldWriter.write(sortDoc, context.reader(), ew, fieldIndex)) {
+ if (fieldWriter.write(sortDoc, context, ew, fieldIndex)) {
++fieldIndex;
}
}
@@ -612,6 +606,160 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
return new SortDoc(sortValues);
}
+
+ static class MergeIterator {
+ private TreeSet<SortDoc> set = new TreeSet<>();
+ private SegmentIterator[] segmentIterators;
+ private SortDoc outDoc;
+
+ public MergeIterator(SegmentIterator[] segmentIterators, SortDoc proto) throws IOException {
+ outDoc = proto.copy();
+ this.segmentIterators = segmentIterators;
+ for (int i = 0; i < segmentIterators.length; i++) {
+ try {
+ SortDoc sortDoc = segmentIterators[i].next();
+ if (sortDoc != null) {
+ set.add(sortDoc);
+ }
+ } catch (IOException e) {
+ log.error("Error in MergeIterator: ", e);
+ throw e;
+ }
+ }
+ }
+
+ /*
+ * Merge sorts the SortDocs from Segment Iterators
+ * Returns null when all docs are iterated.
+ */
+
+ public SortDoc next() throws IOException {
+ SortDoc sortDoc = set.pollLast();
+ //We've exhausted all documents
+ if (sortDoc == null) {
+ return null;
+ } else {
+ outDoc.setValues(sortDoc);
+ }
+
+ SortDoc nextDoc = segmentIterators[sortDoc.ord].next();
+ if (nextDoc != null) {
+ //The entire expense of the operation is here
+ set.add(nextDoc);
+ }
+ return outDoc;
+ }
+ }
+
+ public MergeIterator getMergeIterator(List<LeafReaderContext> leaves, FixedBitSet[] bits, SortDoc sortDoc) throws IOException {
+ try {
+ long totalDocs = 0;
+ for (int i = 0; i < leaves.size(); i++) {
+ totalDocs += leaves.get(i).reader().maxDoc();
+ }
+
+ int[] sizes = new int[leaves.size()];
+ for (int i = 0; i < leaves.size(); i++) {
+ long maxDoc = leaves.get(i).reader().maxDoc();
+ int sortQueueSize = Math.min((int) (((double) maxDoc / (double) totalDocs) * 200000), batchSize);
+ sizes[i] = sortQueueSize;
+ }
+
+ SegmentIterator[] segmentIterators = new SegmentIterator[leaves.size()];
+ for (int i = 0; i < segmentIterators.length; i++) {
+ SortQueue sortQueue = new SortQueue(sizes[i], sortDoc.copy());
+ segmentIterators[i] = new SegmentIterator(bits[i], leaves.get(i), sortQueue, sortDoc.copy());
+ }
+
+ return new MergeIterator(segmentIterators, sortDoc);
+ } finally {
+ }
+ }
+
+ private static class SegmentIterator {
+
+ private final FixedBitSet bits;
+ private final SortQueue queue;
+ private final SortDoc sortDoc;
+ private final LeafReaderContext context;
+ private final SortDoc[] outDocs;
+
+ private SortDoc nextDoc;
+ private int index;
+
+
+ public SegmentIterator(FixedBitSet bits, LeafReaderContext context, SortQueue sortQueue, SortDoc sortDoc) throws IOException {
+ this.bits = bits;
+ this.queue = sortQueue;
+ this.sortDoc = sortDoc;
+ this.nextDoc = sortDoc.copy();
+ this.context = context;
+ this.outDocs = new SortDoc[sortQueue.maxSize];
+ topDocs();
+ }
+
+ public SortDoc next() throws IOException {
+ SortDoc sortDoc = null;
+ if (index > -1) {
+ sortDoc = outDocs[index--];
+ } else {
+ topDocs();
+ if (index > -1) {
+ sortDoc = outDocs[index--];
+ }
+ }
+
+ if (sortDoc != null) {
+ //Clear the bit so it's not loaded again.
+ bits.clear(sortDoc.docId);
+
+ //Load the global ordinal (only matters for strings)
+ sortDoc.setGlobalValues(nextDoc);
+
+ //Save this doc so we don't have to lookup the global ordinal for the next doc if they have the same segment ordinal.
+ //lastDoc.setValues(sortDoc);
+ nextDoc.setValues(sortDoc);
+ //We are now done with this doc.
+ sortDoc.reset();
+ } else {
+ nextDoc = null;
+ }
+ return nextDoc;
+ }
+
+ private void topDocs() throws IOException {
+ try {
+ queue.reset();
+ SortDoc top = queue.top();
+ sortDoc.setNextReader(context);
+ DocIdSetIterator it = new BitSetIterator(bits, 0); // cost is not useful here
+ int docId;
+ while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ sortDoc.setValues(docId);
+ if (top.lessThan(sortDoc)) {
+ top.setValues(sortDoc);
+ top = queue.updateTop();
+ }
+ }
+
+ //Pop the queue and load up the array.
+ index = -1;
+
+ SortDoc sortDoc;
+ while ((sortDoc = queue.pop()) != null) {
+ if (sortDoc.docId > -1) {
+ outDocs[++index] = sortDoc;
+ }
+ }
+ } catch (Exception e) {
+ log.error("Segment Iterator Error:", e);
+ throw new IOException(e);
+ } finally {
+
+ }
+ }
+ }
+
public static class IgnoreException extends IOException {
public void printStackTrace(PrintWriter pw) {
pw.print("Early Client Disconnect");
@@ -621,5 +769,4 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
return "Early Client Disconnect";
}
}
-
}
diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriterStream.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriterStream.java
index f096810..c83897d 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriterStream.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriterStream.java
@@ -25,7 +25,6 @@ import java.util.Map;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.TimeoutException;
-import com.codahale.metrics.Timer;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.comp.ComparatorOrder;
import org.apache.solr.client.solrj.io.comp.FieldComparator;
@@ -56,9 +55,9 @@ public class ExportWriterStream extends TupleStream implements Expressible {
StreamContext context;
StreamComparator streamComparator;
int pos = -1;
+ int index = -1;
ExportBuffers exportBuffers;
ExportBuffers.Buffer buffer;
- Timer.Context writeOutputTimerContext;
private static final class TupleEntryWriter implements EntryWriter {
Tuple tuple;
@@ -131,9 +130,7 @@ public class ExportWriterStream extends TupleStream implements Expressible {
@Override
public void close() throws IOException {
- if (writeOutputTimerContext != null) {
- writeOutputTimerContext.stop();
- }
+
exportBuffers = null;
}
@@ -141,18 +138,17 @@ public class ExportWriterStream extends TupleStream implements Expressible {
public Tuple read() throws IOException {
Tuple res = null;
if (pos < 0) {
- if (writeOutputTimerContext != null) {
- writeOutputTimerContext.stop();
- writeOutputTimerContext = null;
- }
+
try {
buffer.outDocsIndex = ExportBuffers.Buffer.EMPTY;
- log.debug("--- ews exchange empty buffer {}", buffer);
+ //log.debug("--- ews exchange empty buffer {}", buffer);
boolean exchanged = false;
while (!exchanged) {
- Timer.Context timerContext = exportBuffers.getWriterWaitTimer().time();
try {
+ long startExchangeBuffers = System.nanoTime();
exportBuffers.exchangeBuffers();
+ long endExchangeBuffers = System.nanoTime();
+ log.debug("Waited for reader thread:"+Long.toString(((endExchangeBuffers-startExchangeBuffers)/1000000)));
exchanged = true;
} catch (TimeoutException e) {
log.debug("--- ews timeout loop");
@@ -175,7 +171,6 @@ public class ExportWriterStream extends TupleStream implements Expressible {
}
break;
} finally {
- timerContext.stop();
}
}
} catch (InterruptedException e) {
@@ -196,6 +191,7 @@ public class ExportWriterStream extends TupleStream implements Expressible {
res = Tuple.EOF();
} else {
pos = buffer.outDocsIndex;
+ index = -1; //restart index.
log.debug("--- ews new pos={}", pos);
}
}
@@ -205,15 +201,11 @@ public class ExportWriterStream extends TupleStream implements Expressible {
}
if (res != null) {
// only errors or EOF assigned result so far
- if (writeOutputTimerContext != null) {
- writeOutputTimerContext.stop();
- }
+
return res;
}
- if (writeOutputTimerContext == null) {
- writeOutputTimerContext = exportBuffers.getWriteOutputBufferTimer().time();
- }
- SortDoc sortDoc = buffer.outDocs[pos];
+
+ SortDoc sortDoc = buffer.outDocs[++index];
tupleEntryWriter.tuple = new Tuple();
exportBuffers.exportWriter.writeDoc(sortDoc, exportBuffers.leaves, tupleEntryWriter, exportBuffers.exportWriter.fieldWriters);
pos--;
diff --git a/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java
index 9c1361c..fd8cd58 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java
@@ -19,9 +19,9 @@ package org.apache.solr.handler.export;
import java.io.IOException;
-import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.solr.common.MapWriter;
abstract class FieldWriter {
- public abstract boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter out, int fieldIndex) throws IOException;
+ public abstract boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex) throws IOException;
}
\ No newline at end of file
diff --git a/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java
index e482ebc..9a498f6 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java
@@ -19,19 +19,21 @@ package org.apache.solr.handler.export;
import java.io.IOException;
+import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter;
class FloatFieldWriter extends FieldWriter {
private String field;
+ private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
public FloatFieldWriter(String field) {
this.field = field;
}
- public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
+ public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
@@ -43,7 +45,21 @@ class FloatFieldWriter extends FieldWriter {
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
- NumericDocValues vals = DocValues.getNumeric(reader, this.field);
+ int readerOrd = readerContext.ord;
+ NumericDocValues vals = null;
+ if(docValuesCache.containsKey(readerOrd)) {
+ NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
+ if(numericDocValues.docID() < sortDoc.docId) {
+ //We have not advanced beyond the current docId so we can use this docValues.
+ vals = numericDocValues;
+ }
+ }
+
+ if(vals == null) {
+ vals = DocValues.getNumeric(readerContext.reader(), this.field);
+ docValuesCache.put(readerOrd, vals);
+ }
+
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
int val = (int) vals.longValue();
ew.put(this.field, Float.intBitsToFloat(val));
diff --git a/solr/core/src/java/org/apache/solr/handler/export/FloatValue.java b/solr/core/src/java/org/apache/solr/handler/export/FloatValue.java
index 6d0d73d..f6ee024 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/FloatValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/FloatValue.java
@@ -44,6 +44,10 @@ class FloatValue implements SortValue {
return currentValue;
}
+ public void toGlobalValue(SortValue previousValue) {
+
+ }
+
public String getField() {
return field;
}
@@ -81,7 +85,7 @@ class FloatValue implements SortValue {
}
public void setCurrentValue(SortValue sv) {
- FloatValue fv = (FloatValue)sv;
+ FloatValue fv = (FloatValue) sv;
this.currentValue = fv.currentValue;
this.present = fv.present;
}
@@ -92,7 +96,7 @@ class FloatValue implements SortValue {
}
public int compareTo(SortValue o) {
- FloatValue fv = (FloatValue)o;
+ FloatValue fv = (FloatValue) o;
return comp.compare(currentValue, fv.currentValue);
}
}
diff --git a/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java
index f78f098..1ce5426 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java
@@ -19,19 +19,21 @@ package org.apache.solr.handler.export;
import java.io.IOException;
+import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter;
class IntFieldWriter extends FieldWriter {
private String field;
+ private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
public IntFieldWriter(String field) {
this.field = field;
}
- public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
+ public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
int val;
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
@@ -42,7 +44,21 @@ class IntFieldWriter extends FieldWriter {
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
- NumericDocValues vals = DocValues.getNumeric(reader, this.field);
+ int readerOrd = readerContext.ord;
+ NumericDocValues vals = null;
+ if(docValuesCache.containsKey(readerOrd)) {
+ NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
+ if(numericDocValues.docID() < sortDoc.docId) {
+ //We have not advanced beyond the current docId so we can use this docValues.
+ vals = numericDocValues;
+ }
+ }
+
+ if(vals == null) {
+ vals = DocValues.getNumeric(readerContext.reader(), this.field);
+ docValuesCache.put(readerOrd, vals);
+ }
+
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
val = (int) vals.longValue();
} else {
diff --git a/solr/core/src/java/org/apache/solr/handler/export/IntValue.java b/solr/core/src/java/org/apache/solr/handler/export/IntValue.java
index bae23f9..876b2cb 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/IntValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/IntValue.java
@@ -75,18 +75,22 @@ public class IntValue implements SortValue {
}
}
+ public void toGlobalValue(SortValue previousValue) {
+
+ }
+
@Override
public boolean isPresent() {
return this.present;
}
public int compareTo(SortValue o) {
- IntValue iv = (IntValue)o;
+ IntValue iv = (IntValue) o;
return comp.compare(currentValue, iv.currentValue);
}
public void setCurrentValue(SortValue sv) {
- IntValue iv = (IntValue)sv;
+ IntValue iv = (IntValue) sv;
this.currentValue = iv.currentValue;
this.present = iv.present;
}
diff --git a/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java
index 6c1b4fb..457adf4 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java
@@ -19,19 +19,22 @@ package org.apache.solr.handler.export;
import java.io.IOException;
+import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter;
class LongFieldWriter extends FieldWriter {
private String field;
+ private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
+
public LongFieldWriter(String field) {
this.field = field;
}
- public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
+ public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
long val;
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
@@ -42,7 +45,21 @@ class LongFieldWriter extends FieldWriter {
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
- NumericDocValues vals = DocValues.getNumeric(reader, this.field);
+ int readerOrd = readerContext.ord;
+ NumericDocValues vals = null;
+ if(docValuesCache.containsKey(readerOrd)) {
+ NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
+ if(numericDocValues.docID() < sortDoc.docId) {
+ //We have not advanced beyond the current docId so we can use this docValues.
+ vals = numericDocValues;
+ }
+ }
+
+ if(vals == null) {
+ vals = DocValues.getNumeric(readerContext.reader(), this.field);
+ docValuesCache.put(readerOrd, vals);
+ }
+
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
val = vals.longValue();
} else {
diff --git a/solr/core/src/java/org/apache/solr/handler/export/LongValue.java b/solr/core/src/java/org/apache/solr/handler/export/LongValue.java
index 63794ad..21dc376 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/LongValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/LongValue.java
@@ -52,6 +52,10 @@ public class LongValue implements SortValue {
return new LongValue(field, comp);
}
+ public void toGlobalValue(SortValue previousValue) {
+
+ }
+
public void setNextReader(LeafReaderContext context) throws IOException {
this.vals = DocValues.getNumeric(context.reader(), field);
lastDocID = 0;
diff --git a/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java
index ae4a6cd..2748b71 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java
@@ -21,11 +21,8 @@ import java.io.IOException;
import java.util.Date;
import java.util.function.LongFunction;
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.SortedNumericDocValues;
-import org.apache.lucene.index.SortedSetDocValues;
+import com.carrotsearch.hppc.IntObjectHashMap;
+import org.apache.lucene.index.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.NumericUtils;
@@ -41,6 +38,8 @@ class MultiFieldWriter extends FieldWriter {
private boolean numeric;
private CharsRefBuilder cref = new CharsRefBuilder();
private final LongFunction<Object> bitsToValue;
+ private IntObjectHashMap<Object> docValuesCache = new IntObjectHashMap<>();
+
public MultiFieldWriter(String field, FieldType fieldType, SchemaField schemaField, boolean numeric) {
this.field = field;
@@ -54,25 +53,59 @@ class MultiFieldWriter extends FieldWriter {
}
}
- public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter out, int fieldIndex) throws IOException {
+ public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex) throws IOException {
if (this.fieldType.isPointField()) {
- SortedNumericDocValues vals = DocValues.getSortedNumeric(reader, this.field);
+ int readerOrd = readerContext.ord;
+ SortedNumericDocValues vals = null;
+ if(docValuesCache.containsKey(readerOrd)) {
+ SortedNumericDocValues sortedNumericDocValues = (SortedNumericDocValues) docValuesCache.get(readerOrd);
+ if(sortedNumericDocValues.docID() < sortDoc.docId) {
+ //We have not advanced beyond the current docId so we can use this docValues.
+ vals = sortedNumericDocValues;
+ }
+ }
+
+ if(vals == null) {
+ vals = DocValues.getSortedNumeric(readerContext.reader(), this.field);
+ docValuesCache.put(readerOrd, vals);
+ }
+
if (!vals.advanceExact(sortDoc.docId)) return false;
+
+ final SortedNumericDocValues docVals = vals;
+
out.put(this.field,
(IteratorWriter) w -> {
- for (int i = 0, count = vals.docValueCount(); i < count; i++) {
- w.add(bitsToValue.apply(vals.nextValue()));
+ for (int i = 0, count = docVals.docValueCount(); i < count; i++) {
+ w.add(bitsToValue.apply(docVals.nextValue()));
}
});
return true;
} else {
- SortedSetDocValues vals = DocValues.getSortedSet(reader, this.field);
+ int readerOrd = readerContext.ord;
+ SortedSetDocValues vals = null;
+ if(docValuesCache.containsKey(readerOrd)) {
+ SortedSetDocValues sortedSetDocValues = (SortedSetDocValues) docValuesCache.get(readerOrd);
+ if(sortedSetDocValues.docID() < sortDoc.docId) {
+ //We have not advanced beyond the current docId so we can use this docValues.
+ vals = sortedSetDocValues;
+ }
+ }
+
+ if(vals == null) {
+ vals = DocValues.getSortedSet(readerContext.reader(), this.field);
+ docValuesCache.put(readerOrd, vals);
+ }
+
if (vals.advance(sortDoc.docId) != sortDoc.docId) return false;
+
+ final SortedSetDocValues docVals = vals;
+
out.put(this.field,
(IteratorWriter) w -> {
long o;
- while((o = vals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
- BytesRef ref = vals.lookupOrd(o);
+ while((o = docVals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ BytesRef ref = docVals.lookupOrd(o);
fieldType.indexedToReadable(ref, cref);
IndexableField f = fieldType.createField(schemaField, cref.toString());
if (f == null) w.add(cref.toString());
diff --git a/solr/core/src/java/org/apache/solr/handler/export/QuadValueSortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/QuadValueSortDoc.java
index b8599c7..17cc53a 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/QuadValueSortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/QuadValueSortDoc.java
@@ -25,6 +25,7 @@ class QuadValueSortDoc extends TripleValueSortDoc {
protected SortValue value4;
+ @Override
public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) {
return value1;
@@ -38,6 +39,7 @@ class QuadValueSortDoc extends TripleValueSortDoc {
return null;
}
+ @Override
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
this.docBase = context.docBase;
@@ -47,6 +49,17 @@ class QuadValueSortDoc extends TripleValueSortDoc {
value4.setNextReader(context);
}
+
+ @Override
+ public void setGlobalValues(SortDoc previous) {
+ QuadValueSortDoc quadValueSortDoc = (QuadValueSortDoc) previous;
+ value1.toGlobalValue(quadValueSortDoc.value1);
+ value2.toGlobalValue(quadValueSortDoc.value2);
+ value3.toGlobalValue(quadValueSortDoc.value3);
+ value4.toGlobalValue(quadValueSortDoc.value4);
+ }
+
+ @Override
public void reset() {
this.docId = -1;
this.docBase = -1;
@@ -57,6 +70,7 @@ class QuadValueSortDoc extends TripleValueSortDoc {
value4.reset();
}
+ @Override
public void setValues(int docId) throws IOException {
this.docId = docId;
value1.setCurrentValue(docId);
@@ -65,6 +79,7 @@ class QuadValueSortDoc extends TripleValueSortDoc {
value4.setCurrentValue(docId);
}
+ @Override
public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId;
this.ord = sortDoc.ord;
@@ -80,53 +95,61 @@ class QuadValueSortDoc extends TripleValueSortDoc {
this.value4 = value4;
}
+ @Override
public SortDoc copy() {
return new QuadValueSortDoc(value1.copy(), value2.copy(), value3.copy(), value4.copy());
}
+ @Override
public boolean lessThan(Object o) {
- QuadValueSortDoc sd = (QuadValueSortDoc)o;
+ QuadValueSortDoc sd = (QuadValueSortDoc) o;
int comp = value1.compareTo(sd.value1);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
comp = value2.compareTo(sd.value2);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
comp = value3.compareTo(sd.value3);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
comp = value4.compareTo(sd.value4);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
- return docId+docBase > sd.docId+sd.docBase;
+ return docId + docBase > sd.docId + sd.docBase;
}
}
}
}
}
- public int compareTo(Object o) {
- QuadValueSortDoc sd = (QuadValueSortDoc)o;
+ @Override
+ public int compareTo(SortDoc o) {
+ QuadValueSortDoc sd = (QuadValueSortDoc) o;
int comp = value1.compareTo(sd.value1);
- if(comp == 0) {
+ if (comp == 0) {
comp = value2.compareTo(sd.value2);
- if(comp == 0) {
+ if (comp == 0) {
comp = value3.compareTo(sd.value3);
- if(comp == 0) {
- return value4.compareTo(sd.value4);
+ if (comp == 0) {
+ comp = value4.compareTo(sd.value4);
+ if (comp == 0) {
+ return (sd.docId + sd.docBase) - (docId + docBase);
+ } else {
+ return comp;
+ }
} else {
return comp;
}
diff --git a/solr/core/src/java/org/apache/solr/handler/export/SingleValueSortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/SingleValueSortDoc.java
index 0bd3a4a..04d3135 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/SingleValueSortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/SingleValueSortDoc.java
@@ -25,6 +25,7 @@ class SingleValueSortDoc extends SortDoc {
protected SortValue value1;
+ @Override
public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) {
return value1;
@@ -32,12 +33,14 @@ class SingleValueSortDoc extends SortDoc {
return null;
}
+ @Override
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
this.docBase = context.docBase;
value1.setNextReader(context);
}
+ @Override
public void reset() {
this.docId = -1;
this.docBase = -1;
@@ -45,16 +48,18 @@ class SingleValueSortDoc extends SortDoc {
this.value1.reset();
}
+ @Override
public void setValues(int docId) throws IOException {
this.docId = docId;
value1.setCurrentValue(docId);
}
+ @Override
public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId;
this.ord = sortDoc.ord;
this.docBase = sortDoc.docBase;
- value1.setCurrentValue(((SingleValueSortDoc)sortDoc).value1);
+ value1.setCurrentValue(((SingleValueSortDoc) sortDoc).value1);
}
public SingleValueSortDoc(SortValue value1) {
@@ -62,25 +67,39 @@ class SingleValueSortDoc extends SortDoc {
this.value1 = value1;
}
+ @Override
+ public void setGlobalValues(SortDoc previous) {
+ SortValue previousValue = ((SingleValueSortDoc) previous).value1;
+ value1.toGlobalValue(previousValue);
+ }
+
+ @Override
public SortDoc copy() {
return new SingleValueSortDoc(value1.copy());
}
+ @Override
public boolean lessThan(Object o) {
- SingleValueSortDoc sd = (SingleValueSortDoc)o;
+ SingleValueSortDoc sd = (SingleValueSortDoc) o;
int comp = value1.compareTo(sd.value1);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
- return docId+docBase > sd.docId+sd.docBase;
+ return docId + docBase > sd.docId + sd.docBase;
}
}
- public int compareTo(Object o) {
- SingleValueSortDoc sd = (SingleValueSortDoc)o;
- return value1.compareTo(sd.value1);
+ @Override
+ public int compareTo(SortDoc o) {
+ SingleValueSortDoc sd = (SingleValueSortDoc) o;
+ int comp = value1.compareTo(sd.value1);
+ if (comp == 0) {
+ return (sd.docId + sd.docBase) - (docId + docBase);
+ } else {
+ return comp;
+ }
}
public String toString() {
diff --git a/solr/core/src/java/org/apache/solr/handler/export/SortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/SortDoc.java
index d893bd1..377266a1 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/SortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/SortDoc.java
@@ -18,10 +18,11 @@
package org.apache.solr.handler.export;
import java.io.IOException;
+import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
-class SortDoc {
+class SortDoc implements Comparable<SortDoc> {
protected int docId = -1;
protected int ord = -1;
@@ -34,6 +35,21 @@ class SortDoc {
}
public SortDoc() {
+
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ // subclasses are not equal
+ if (!obj.getClass().equals(getClass())) {
+ return false;
+ }
+ return compareTo((SortDoc) obj) == 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(docId, ord, docBase);
}
public SortValue getSortValue(String field) {
@@ -69,6 +85,13 @@ class SortDoc {
}
}
+ public void setGlobalValues(SortDoc previous) {
+ SortValue[] previousValues = previous.sortValues;
+ for (int i = 0; i < sortValues.length; i++) {
+ sortValues[i].toGlobalValue(previousValues[i]);
+ }
+ }
+
public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId;
this.ord = sortDoc.ord;
@@ -84,7 +107,6 @@ class SortDoc {
for (int i = 0; i < sortValues.length; i++) {
svs[i] = sortValues[i].copy();
}
-
return new SortDoc(svs);
}
@@ -92,7 +114,7 @@ class SortDoc {
if (docId == -1) {
return true;
}
- SortDoc sd = (SortDoc)o;
+ SortDoc sd = (SortDoc) o;
SortValue[] sortValues1 = sd.sortValues;
for (int i = 0; i < sortValues.length; i++) {
int comp = sortValues[i].compareTo(sortValues1[i]);
@@ -105,18 +127,17 @@ class SortDoc {
return docId + docBase > sd.docId + sd.docBase; //index order
}
- public int compareTo(Object o) {
- SortDoc sd = (SortDoc)o;
+ @Override
+ public int compareTo(SortDoc sd) {
for (int i = 0; i < sortValues.length; i++) {
int comp = sortValues[i].compareTo(sd.sortValues[i]);
if (comp != 0) {
return comp;
}
}
- return 0;
+ return (sd.docId + sd.docBase) - (docId + docBase);
}
-
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append(ord).append(':').append(docBase).append(':').append(docId).append("; ");
diff --git a/solr/core/src/java/org/apache/solr/handler/export/SortValue.java b/solr/core/src/java/org/apache/solr/handler/export/SortValue.java
index ad958c0..da42be0 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/SortValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/SortValue.java
@@ -21,18 +21,19 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
-public interface SortValue extends Comparable<SortValue> {
- public void setCurrentValue(int docId) throws IOException;
- public void setNextReader(LeafReaderContext context) throws IOException;
- public void setCurrentValue(SortValue value);
- public void reset();
- public SortValue copy();
- public Object getCurrentValue() throws IOException;
- public String getField();
+interface SortValue extends Comparable<SortValue> {
+ void setCurrentValue(int docId) throws IOException;
+ void setNextReader(LeafReaderContext context) throws IOException;
+ void setCurrentValue(SortValue value);
+ void toGlobalValue(SortValue previousValue);
+ void reset();
+ SortValue copy();
+ Object getCurrentValue() throws IOException;
+ String getField();
/**
*
* @return true if document has a value for the specified field
*/
- public boolean isPresent();
+ boolean isPresent();
}
\ No newline at end of file
diff --git a/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java b/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java
index 846eb08..5d43803 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/StringFieldWriter.java
@@ -18,11 +18,9 @@
package org.apache.solr.handler.export;
import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
+import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
@@ -32,10 +30,13 @@ import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.schema.FieldType;
class StringFieldWriter extends FieldWriter {
- private String field;
+ protected String field;
private FieldType fieldType;
- private Map<Integer, SortedDocValues> lastDocValues = new HashMap<>();
- private CharsRefBuilder cref = new CharsRefBuilder();
+ private BytesRef lastRef;
+ private int lastOrd = -1;
+ private IntObjectHashMap<SortedDocValues> docValuesCache = new IntObjectHashMap<>();
+
+ protected CharsRefBuilder cref = new CharsRefBuilder();
final ByteArrayUtf8CharSequence utf8 = new ByteArrayUtf8CharSequence(new byte[0], 0, 0) {
@Override
public String toString() {
@@ -53,48 +54,64 @@ class StringFieldWriter extends FieldWriter {
this.fieldType = fieldType;
}
- public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
- BytesRef ref;
- SortValue sortValue = sortDoc.getSortValue(this.field);
- if (sortValue != null) {
- if (sortValue.isPresent()) {
- ref = (BytesRef) sortValue.getCurrentValue();
- } else { //empty-value
- return false;
+ public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
+ StringValue stringValue = (StringValue) sortDoc.getSortValue(this.field);
+ BytesRef ref = null;
+
+ if (stringValue != null) {
+ /*
+ We already have the top level ordinal used for sorting.
+ Now let's use it for caching the BytesRef so we don't have to look it up.
+ When we have long runs of repeated values do to the sort order of the docs this is a huge win.
+ */
+
+ if (this.lastOrd == stringValue.currentOrd) {
+ ref = lastRef;
}
- } else {
- // field is not part of 'sort' param, but part of 'fl' param
- SortedDocValues vals = lastDocValues.get(sortDoc.ord);
- if (vals == null || vals.docID() >= sortDoc.docId) {
- vals = DocValues.getSorted(reader, this.field);
- lastDocValues.put(sortDoc.ord, vals);
+
+ this.lastOrd = stringValue.currentOrd;
+ }
+
+ if (ref == null) {
+ //Reuse the last DocValues object if possible
+ int readerOrd = readerContext.ord;
+ SortedDocValues vals = null;
+ if(docValuesCache.containsKey(readerOrd)) {
+ SortedDocValues sortedDocValues = docValuesCache.get(readerOrd);
+ if(sortedDocValues.docID() < sortDoc.docId) {
+ //We have not advanced beyond the current docId so we can use this docValues.
+ vals = sortedDocValues;
+ }
+ }
+
+ if(vals == null) {
+ vals = DocValues.getSorted(readerContext.reader(), this.field);
+ docValuesCache.put(readerOrd, vals);
}
+
if (vals.advance(sortDoc.docId) != sortDoc.docId) {
return false;
}
+
int ord = vals.ordValue();
ref = vals.lookupOrd(ord);
+
+ if(stringValue != null) {
+ //Don't need to set the lastRef if it's not a sort value.
+ lastRef = ref.clone();
+ }
}
+ writeBytes(ew, ref, fieldType);
+ return true;
+ }
+
+ protected void writeBytes(MapWriter.EntryWriter ew, BytesRef ref, FieldType fieldType) throws IOException {
if (ew instanceof JavaBinCodec.BinEntryWriter) {
ew.put(this.field, utf8.reset(ref.bytes, ref.offset, ref.length, null));
} else {
- String v = null;
- if (sortValue != null) {
- v = ((StringValue) sortValue).getLastString();
- if (v == null) {
- fieldType.indexedToReadable(ref, cref);
- v = cref.toString();
- ((StringValue) sortValue).setLastString(v);
- }
- } else {
- fieldType.indexedToReadable(ref, cref);
- v = cref.toString();
- }
-
- ew.put(this.field, v);
-
+ fieldType.indexedToReadable(ref, cref);
+ ew.put(this.field, cref.toString());
}
- return true;
}
}
\ No newline at end of file
diff --git a/solr/core/src/java/org/apache/solr/handler/export/StringValue.java b/solr/core/src/java/org/apache/solr/handler/export/StringValue.java
index e3a36d4..2550c07 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/StringValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/StringValue.java
@@ -38,13 +38,14 @@ class StringValue implements SortValue {
protected LongValues toGlobal = LongValues.IDENTITY; // this segment to global ordinal. NN;
protected SortedDocValues docValues;
- protected int currentOrd;
+ public int currentOrd;
protected int lastDocID;
private boolean present;
private BytesRef lastBytes;
private String lastString;
private int lastOrd = -1;
+ private int leafOrd = -1;
public StringValue(SortedDocValues globalDocValues, String field, IntComp comp) {
this.globalDocValues = globalDocValues;
@@ -74,18 +75,22 @@ class StringValue implements SortValue {
}
public void setCurrentValue(int docId) throws IOException {
+ //System.out.println(docId +":"+lastDocID);
+ /*
if (docId < lastDocID) {
throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + docId);
}
lastDocID = docId;
+ */
if (docId > docValues.docID()) {
docValues.advance(docId);
}
+
if (docId == docValues.docID()) {
present = true;
- currentOrd = (int) toGlobal.get(docValues.ordValue());
+ currentOrd = docValues.ordValue();
} else {
present = false;
currentOrd = -1;
@@ -98,9 +103,12 @@ class StringValue implements SortValue {
}
public void setCurrentValue(SortValue sv) {
- StringValue v = (StringValue)sv;
+ StringValue v = (StringValue) sv;
this.currentOrd = v.currentOrd;
this.present = v.present;
+ this.leafOrd = v.leafOrd;
+ this.lastOrd = v.lastOrd;
+ this.toGlobal = v.toGlobal;
}
public Object getCurrentValue() throws IOException {
@@ -113,11 +121,27 @@ class StringValue implements SortValue {
return lastBytes;
}
+ public void toGlobalValue(SortValue previousValue) {
+ lastOrd = currentOrd;
+ StringValue sv = (StringValue) previousValue;
+ if (sv.lastOrd == currentOrd) {
+ //Take the global ord from the previousValue unless we are a -1 which is the same in both global and leaf ordinal
+ if(this.currentOrd != -1) {
+ this.currentOrd = sv.currentOrd;
+ }
+ } else {
+ if(this.currentOrd > -1) {
+ this.currentOrd = (int) toGlobal.get(this.currentOrd);
+ }
+ }
+ }
+
public String getField() {
return field;
}
public void setNextReader(LeafReaderContext context) throws IOException {
+ leafOrd = context.ord;
if (ordinalMap != null) {
toGlobal = ordinalMap.getGlobalOrds(context.ord);
}
@@ -128,6 +152,7 @@ class StringValue implements SortValue {
public void reset() {
this.currentOrd = comp.resetValue();
this.present = false;
+ lastDocID = 0;
}
public int compareTo(SortValue o) {
@@ -136,6 +161,6 @@ class StringValue implements SortValue {
}
public String toString() {
- return Integer.toString(this.currentOrd);
+ return "HERE:"+Integer.toString(this.currentOrd);
}
}
diff --git a/solr/core/src/java/org/apache/solr/handler/export/TripleValueSortDoc.java b/solr/core/src/java/org/apache/solr/handler/export/TripleValueSortDoc.java
index c68c1a8..c990d5d 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/TripleValueSortDoc.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/TripleValueSortDoc.java
@@ -25,6 +25,7 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
protected SortValue value3;
+ @Override
public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) {
return value1;
@@ -36,6 +37,7 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
return null;
}
+ @Override
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
this.docBase = context.docBase;
@@ -44,6 +46,7 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
value3.setNextReader(context);
}
+ @Override
public void reset() {
this.docId = -1;
this.docBase = -1;
@@ -53,6 +56,7 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
value3.reset();
}
+ @Override
public void setValues(int docId) throws IOException {
this.docId = docId;
value1.setCurrentValue(docId);
@@ -60,13 +64,22 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
value3.setCurrentValue(docId);
}
+ @Override
+ public void setGlobalValues(SortDoc previous) {
+ TripleValueSortDoc tripleValueSortDoc = (TripleValueSortDoc) previous;
+ value1.toGlobalValue(tripleValueSortDoc.value1);
+ value2.toGlobalValue(tripleValueSortDoc.value2);
+ value3.toGlobalValue(tripleValueSortDoc.value3);
+ }
+
+ @Override
public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId;
this.ord = sortDoc.ord;
this.docBase = sortDoc.docBase;
- value1.setCurrentValue(((TripleValueSortDoc)sortDoc).value1);
- value2.setCurrentValue(((TripleValueSortDoc)sortDoc).value2);
- value3.setCurrentValue(((TripleValueSortDoc)sortDoc).value3);
+ value1.setCurrentValue(((TripleValueSortDoc) sortDoc).value1);
+ value2.setCurrentValue(((TripleValueSortDoc) sortDoc).value2);
+ value3.setCurrentValue(((TripleValueSortDoc) sortDoc).value3);
}
public TripleValueSortDoc(SortValue value1, SortValue value2, SortValue value3) {
@@ -74,44 +87,51 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
this.value3 = value3;
}
+ @Override
public SortDoc copy() {
return new TripleValueSortDoc(value1.copy(), value2.copy(), value3.copy());
}
+ @Override
public boolean lessThan(Object o) {
-
- TripleValueSortDoc sd = (TripleValueSortDoc)o;
+ TripleValueSortDoc sd = (TripleValueSortDoc) o;
int comp = value1.compareTo(sd.value1);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
comp = value2.compareTo(sd.value2);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
comp = value3.compareTo(sd.value3);
- if(comp == -1) {
+ if (comp == -1) {
return true;
} else if (comp == 1) {
return false;
} else {
- return docId+docBase > sd.docId+sd.docBase;
+ return docId + docBase > sd.docId + sd.docBase;
}
}
}
}
- public int compareTo(Object o) {
- TripleValueSortDoc sd = (TripleValueSortDoc)o;
+ @Override
+ public int compareTo(SortDoc o) {
+ TripleValueSortDoc sd = (TripleValueSortDoc) o;
int comp = value1.compareTo(sd.value1);
if (comp == 0) {
comp = value2.compareTo(sd.value2);
if (comp == 0) {
- return value3.compareTo(sd.value3);
+ comp = value3.compareTo(sd.value3);
+ if (comp == 0) {
+ return (sd.docId + sd.docBase) - (docId + docBase);
+ } else {
+ return comp;
+ }
} else {
return comp;
}
diff --git a/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java b/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java
index 286d6d3..ec9cfcc 100644
--- a/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java
+++ b/solr/core/src/java/org/apache/solr/util/SolrLogPostTool.java
@@ -503,55 +503,60 @@ public class SolrLogPostTool {
private void addParams(SolrInputDocument doc, String params) {
String[] pairs = params.split("&");
- for(String pair : pairs) {
+ for (String pair : pairs) {
String[] parts = pair.split("=");
- if(parts.length == 2 && parts[0].equals("q")) {
+ if (parts.length == 2 && parts[0].equals("q")) {
String dq = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "q_s", dq);
setFieldIfUnset(doc, "q_t", dq);
}
- if(parts[0].equals("rows")) {
+ if (parts[0].equals("rows")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "rows_i", dr);
}
- if(parts[0].equals("distrib")) {
+ if (parts[0].equals("start")) {
+ String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
+ setFieldIfUnset(doc, "start_i", dr);
+ }
+
+ if (parts[0].equals("distrib")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "distrib_s", dr);
}
- if(parts[0].equals("shards")) {
+ if (parts[0].equals("shards")) {
setFieldIfUnset(doc, "shards_s", "true");
}
- if(parts[0].equals("ids") && !isRTGRequest(doc)) {
+ if (parts[0].equals("ids") && !isRTGRequest(doc)) {
setFieldIfUnset(doc, "ids_s", "true");
}
- if(parts[0].equals("isShard")) {
+ if (parts[0].equals("isShard")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "isShard_s", dr);
}
- if(parts[0].equals("wt")) {
+ if (parts[0].equals("wt")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "wt_s", dr);
}
- if(parts[0].equals("facet")) {
+ if (parts[0].equals("facet")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "facet_s", dr);
}
- if(parts[0].equals("shards.purpose")) {
+ if (parts[0].equals("shards.purpose")) {
try {
int purpose = Integer.parseInt(parts[1]);
String[] purposes = getRequestPurposeNames(purpose);
for (String p : purposes) {
doc.addField("purpose_ss", p);
}
- } catch(Throwable e) {
+ } catch (Throwable e) {
//We'll just sit on this for now and not interrupt the load for this one field.
}
}