You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/06/20 14:22:32 UTC
svn commit: r1604158 - in /lucene/dev/trunk: ./ lucene/ lucene/CHANGES.txt
lucene/core/ lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Author: jpountz
Date: Fri Jun 20 12:22:31 2014
New Revision: 1604158
URL: http://svn.apache.org/r1604158
Log:
LUCENE-5780: Make OrdinalMap more memory-efficient.
Modified:
lucene/dev/trunk/ (props changed)
lucene/dev/trunk/lucene/ (props changed)
lucene/dev/trunk/lucene/CHANGES.txt (contents, props changed)
lucene/dev/trunk/lucene/core/ (props changed)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1604158&r1=1604157&r2=1604158&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Jun 20 12:22:31 2014
@@ -96,12 +96,15 @@ Other
======================= Lucene 4.10.0 ======================
-(No Changes)
-
API Changes
* LUCENE-5752: Simplified Automaton API to be immutable. (Mike McCandless)
+Optimizations
+
+* LUCENE-5780: Make OrdinalMap more memory-efficient, especially in case the
+ first segment has all values. (Adrien Grand, Robert Muir)
+
======================= Lucene 4.9.0 =======================
Changes in Runtime Behavior
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java?rev=1604158&r1=1604157&r2=1604158&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java Fri Jun 20 12:22:31 2014
@@ -427,14 +427,18 @@ public class MultiDocValues {
long globalOrd = 0;
while (mte.next() != null) {
TermsEnumWithSlice matches[] = mte.getMatchArray();
+ int firstSegmentIndex = Integer.MAX_VALUE;
+ long globalOrdDelta = Long.MAX_VALUE;
for (int i = 0; i < mte.getMatchCount(); i++) {
int segmentIndex = matches[i].index;
long segmentOrd = matches[i].terms.ord();
long delta = globalOrd - segmentOrd;
- // for each unique term, just mark the first segment index/delta where it occurs
- if (i == 0) {
- firstSegments.add(segmentIndex);
- globalOrdDeltas.add(delta);
+ // We compute the least segment where the term occurs. In case the
+ // first segment contains most (or better all) values, this will
+ // help save significant memory
+ if (segmentIndex < firstSegmentIndex) {
+ firstSegmentIndex = segmentIndex;
+ globalOrdDelta = delta;
}
// for each per-segment ord, map it back to the global term.
while (segmentOrds[segmentIndex] <= segmentOrd) {
@@ -443,6 +447,10 @@ public class MultiDocValues {
segmentOrds[segmentIndex]++;
}
}
+ // for each unique term, just mark the first segment index/delta where it occurs
+ assert firstSegmentIndex < segmentOrds.length;
+ firstSegments.add(firstSegmentIndex);
+ globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
firstSegments.freeze();