You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/10/03 15:02:30 UTC
svn commit: r1528837 - in /lucene/dev/trunk/lucene/core/src:
java/org/apache/lucene/index/ test/org/apache/lucene/index/
Author: shaie
Date: Thu Oct 3 13:02:29 2013
New Revision: 1528837
URL: http://svn.apache.org/r1528837
Log:
LUCENE-5189: fix updates-order and docsWithField bugs
Modified:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=1528837&r1=1528836&r2=1528837&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java Thu Oct 3 13:02:29 2013
@@ -66,29 +66,32 @@ class BufferedDeletes { // TODO (DVU_REN
/* Rough logic: NumericUpdate calculates its actual size,
* including the update Term and DV field (String). The
- * per-term map holds a reference to the update Term, and
+ * per-field map holds a reference to the updated field, and
* therefore we only account for the object reference and
* map space itself. This is incremented when we first see
- * an update Term.
- * LinkedHashMap has an array[Entry] w/ varying load factor
- * (say 2*POINTER). Entry is an object w/ Term key, Map val,
- * int hash, Entry next, Entry before, Entry after (OBJ_HEADER + 5*POINTER + INT).
- * Term (key) is counted only as POINTER.
- * Map (val) is counted as OBJ_HEADER, array[Entry] ref + header, 4*INT, 1*FLOAT,
+ * an updated field.
+ *
+ * HashMap has an array[Entry] w/ varying load
+ * factor (say 2*POINTER). Entry is an object w/ String key,
+ * LinkedHashMap val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT).
+ *
+ * LinkedHashMap (val) is counted as OBJ_HEADER, array[Entry] ref + header, 4*INT, 1*FLOAT,
* Set (entrySet) (2*OBJ_HEADER + ARRAY_HEADER + 2*POINTER + 4*INT + FLOAT)
*/
- final static int BYTES_PER_NUMERIC_UPDATE_TERM_ENTRY =
- 9*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 3*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER +
+ final static int BYTES_PER_NUMERIC_FIELD_ENTRY =
+ 7*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 3*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER +
RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + 5*RamUsageEstimator.NUM_BYTES_INT + RamUsageEstimator.NUM_BYTES_FLOAT;
-
- /* Rough logic: Incremented when we see another field for an already updated
- * Term.
- * HashMap has an array[Entry] w/ varying load
- * factor (say 2*POINTER). Entry is an object w/ String key,
- * NumericUpdate val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT).
- * NumericUpdate returns its own size, and therefore isn't accounted for here.
+
+ /* Rough logic: Incremented when we see another Term for an already updated
+ * field.
+ * LinkedHashMap has an array[Entry] w/ varying load factor
+ * (say 2*POINTER). Entry is an object w/ Term key, NumericUpdate val,
+ * int hash, Entry next, Entry before, Entry after (OBJ_HEADER + 5*POINTER + INT).
+ *
+ * Term (key) is counted only as POINTER.
+ * NumericUpdate (val) counts its own size and isn't accounted for here.
*/
- final static int BYTES_PER_NUMERIC_UPDATE_ENTRY = 5*RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT;
+ final static int BYTES_PER_NUMERIC_UPDATE_ENTRY = 7*RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT;
final AtomicInteger numTermDeletes = new AtomicInteger();
final AtomicInteger numNumericUpdates = new AtomicInteger();
@@ -96,12 +99,14 @@ class BufferedDeletes { // TODO (DVU_REN
final Map<Query,Integer> queries = new HashMap<Query,Integer>();
final List<Integer> docIDs = new ArrayList<Integer>();
- // Map<updateTerm,Map<dvField,NumericUpdate>>
- // LinkedHashMap because we need to preserve the order of the updates. That
- // is, if two terms update the same document and same DV field, whoever came
- // in last should win. LHM guarantees we iterate on the map in insertion
- // order.
- final Map<Term,Map<String,NumericUpdate>> numericUpdates = new LinkedHashMap<Term,Map<String,NumericUpdate>>();
+ // Map<dvField,Map<updateTerm,NumericUpdate>>
+ // For each field we keep an ordered list of NumericUpdates, key'd by the
+ // update Term. LinkedHashMap guarantees we will later traverse the map in
+ // insertion order (so that if two terms affect the same document, the last
+ // one that came in wins), and helps us detect faster if the same Term is
+ // used to update the same field multiple times (so we later traverse it
+ // only once).
+ final Map<String,LinkedHashMap<Term,NumericUpdate>> numericUpdates = new HashMap<String,LinkedHashMap<Term,NumericUpdate>>();
public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE);
@@ -180,13 +185,13 @@ class BufferedDeletes { // TODO (DVU_REN
}
public void addNumericUpdate(NumericUpdate update, int docIDUpto) {
- Map<String,NumericUpdate> termUpdates = numericUpdates.get(update.term);
- if (termUpdates == null) {
- termUpdates = new HashMap<String,NumericUpdate>();
- numericUpdates.put(update.term, termUpdates);
- bytesUsed.addAndGet(BYTES_PER_NUMERIC_UPDATE_TERM_ENTRY);
+ LinkedHashMap<Term,NumericUpdate> fieldUpdates = numericUpdates.get(update.field);
+ if (fieldUpdates == null) {
+ fieldUpdates = new LinkedHashMap<Term,NumericUpdate>();
+ numericUpdates.put(update.field, fieldUpdates);
+ bytesUsed.addAndGet(BYTES_PER_NUMERIC_FIELD_ENTRY);
}
- final NumericUpdate current = termUpdates.get(update.field);
+ final NumericUpdate current = fieldUpdates.get(update.term);
if (current != null && docIDUpto < current.docIDUpto) {
// Only record the new number if it's greater than or equal to the current
// one. This is important because if multiple threads are replacing the
@@ -196,7 +201,12 @@ class BufferedDeletes { // TODO (DVU_REN
}
update.docIDUpto = docIDUpto;
- termUpdates.put(update.field, update);
+ // since it's a LinkedHashMap, we must first remove the Term entry so that
+ // it's added last (we're interested in insertion-order).
+ if (current != null) {
+ fieldUpdates.remove(update.term);
+ }
+ fieldUpdates.put(update.term, update);
numNumericUpdates.incrementAndGet();
if (current == null) {
bytesUsed.addAndGet(BYTES_PER_NUMERIC_UPDATE_ENTRY + update.sizeInBytes());
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java?rev=1528837&r1=1528836&r2=1528837&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java Thu Oct 3 13:02:29 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.util.ArrayList;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -78,9 +79,13 @@ class FrozenBufferedDeletes { // TODO (D
upto++;
}
+ // TODO if a Term affects multiple fields, we could keep the updates key'd by Term
+ // so that it maps to all fields it affects, sorted by their docUpto, and traverse
+ // that Term only once, applying the update to all fields that still need to be
+ // updated.
List<NumericUpdate> allUpdates = new ArrayList<NumericUpdate>();
int numericUpdatesSize = 0;
- for (Map<String,NumericUpdate> fieldUpdates : deletes.numericUpdates.values()) {
+ for (LinkedHashMap<Term,NumericUpdate> fieldUpdates : deletes.numericUpdates.values()) {
for (NumericUpdate update : fieldUpdates.values()) {
allUpdates.add(update);
numericUpdatesSize += update.sizeInBytes();
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java?rev=1528837&r1=1528836&r2=1528837&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java Thu Oct 3 13:02:29 2013
@@ -409,6 +409,7 @@ class ReadersAndLiveDocs { // TODO (DVU_
fieldsConsumer.addNumericField(fieldInfo, new Iterable<Number>() {
@SuppressWarnings("synthetic-access")
final NumericDocValues currentValues = reader.getNumericDocValues(field);
+ final Bits docsWithField = reader.getDocsWithField(field);
@Override
public Iterator<Number> iterator() {
return new Iterator<Number>() {
@@ -429,7 +430,10 @@ class ReadersAndLiveDocs { // TODO (DVU_
}
Long updatedValue = updates.get(curDoc);
if (updatedValue == null) {
- updatedValue = Long.valueOf(currentValues.get(curDoc));
+ // only read the current value if the document had a value before
+ if (currentValues != null && docsWithField.get(curDoc)) {
+ updatedValue = currentValues.get(curDoc);
+ }
} else if (updatedValue == NumericUpdate.MISSING) {
updatedValue = null;
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java?rev=1528837&r1=1528836&r2=1528837&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java Thu Oct 3 13:02:29 2013
@@ -512,7 +512,7 @@ public class TestNumericDocValuesUpdates
}
@Test
- public void testUpdateNonDocValueField() throws Exception {
+ public void testUpdateNonNumericDocValuesField() throws Exception {
// we don't support adding new fields or updating existing non-numeric-dv
// fields through numeric updates
Directory dir = newDirectory();
@@ -811,7 +811,10 @@ public class TestNumericDocValuesUpdates
// first segment with NDV
Document doc = new Document();
doc.add(new StringField("id", "doc0", Store.NO));
- doc.add(new NumericDocValuesField("ndv", 5));
+ doc.add(new NumericDocValuesField("ndv", 3));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new StringField("id", "doc4", Store.NO)); // document without 'ndv' field
writer.addDocument(doc);
writer.commit();
@@ -819,9 +822,17 @@ public class TestNumericDocValuesUpdates
doc = new Document();
doc.add(new StringField("id", "doc1", Store.NO));
writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new StringField("id", "doc2", Store.NO)); // document that isn't updated
+ writer.addDocument(doc);
writer.commit();
- // update document in the second segment
+ // update document in the first segment - should not affect docsWithField of
+ // the document without NDV field
+ writer.updateNumericDocValue(new Term("id", "doc0"), "ndv", 5L);
+
+ // update document in the second segment - field should be added and we should
+ // be able to handle the other document correctly (e.g. no NPE)
writer.updateNumericDocValue(new Term("id", "doc1"), "ndv", 5L);
writer.close();
@@ -829,9 +840,12 @@ public class TestNumericDocValuesUpdates
for (AtomicReaderContext context : reader.leaves()) {
AtomicReader r = context.reader();
NumericDocValues ndv = r.getNumericDocValues("ndv");
- for (int i = 0; i < r.maxDoc(); i++) {
- assertEquals(5L, ndv.get(i));
- }
+ Bits docsWithField = r.getDocsWithField("ndv");
+ assertNotNull(docsWithField);
+ assertTrue(docsWithField.get(0));
+ assertEquals(5L, ndv.get(0));
+ assertFalse(docsWithField.get(1));
+ assertEquals(0L, ndv.get(1));
}
reader.close();
@@ -1236,4 +1250,31 @@ public class TestNumericDocValuesUpdates
dir.close();
}
+ @Test
+ public void testUpdatesOrder() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ IndexWriter writer = new IndexWriter(dir, conf);
+
+ Document doc = new Document();
+ doc.add(new StringField("upd", "t1", Store.NO));
+ doc.add(new StringField("upd", "t2", Store.NO));
+ doc.add(new NumericDocValuesField("f1", 1L));
+ doc.add(new NumericDocValuesField("f2", 1L));
+ writer.addDocument(doc);
+ writer.updateNumericDocValue(new Term("upd", "t1"), "f1", 2L); // update f1 to 2
+ writer.updateNumericDocValue(new Term("upd", "t1"), "f2", 2L); // update f2 to 2
+ writer.updateNumericDocValue(new Term("upd", "t2"), "f1", 3L); // update f1 to 3
+ writer.updateNumericDocValue(new Term("upd", "t2"), "f2", 3L); // update f2 to 3
+ writer.updateNumericDocValue(new Term("upd", "t1"), "f1", 4L); // update f1 to 4 (but not f2)
+ writer.close();
+
+ DirectoryReader reader = DirectoryReader.open(dir);
+ assertEquals(4, reader.leaves().get(0).reader().getNumericDocValues("f1").get(0));
+ assertEquals(3, reader.leaves().get(0).reader().getNumericDocValues("f2").get(0));
+ reader.close();
+
+ dir.close();
+ }
+
}