You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2016/06/16 16:25:33 UTC
[4/5] lucene-solr:branch_6_0: LUCENE-7301: ensure multiple doc values updates to one document within one update batch are applied in the correct order

LUCENE-7301: ensure multiple doc values updates to one document within one update batch are applied in the correct order


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e9ccc822
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e9ccc822
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e9ccc822

Branch: refs/heads/branch_6_0
Commit: e9ccc822bb8d606dba5385c409a5ea2804d6282c
Parents: 240140d
Author: Mike McCandless <mi...@apache.org>
Authored: Wed Jun 1 14:27:53 2016 -0400
Committer: Steve Rowe <sa...@apache.org>
Committed: Thu Jun 16 12:22:10 2016 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   4 +
 .../lucene/index/BufferedUpdatesStream.java     |  30 +++--
 .../apache/lucene/index/CoalescedUpdates.java   |  14 ++-
 .../apache/lucene/index/DocValuesUpdate.java    |   2 +-
 .../index/TestNumericDocValuesUpdates.java      | 112 ++++++++++++++++++-
 5 files changed, 146 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e9ccc822/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6c8d3d0..90862ce 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -48,6 +48,10 @@ Bug Fixes
 * LUCENE-7293: Don't try to highlight GeoPoint queries (Britta Weber,
   Nick Knize, Mike McCandless, Uwe Schindler)
 
+* LUCENE-7301: Multiple doc values updates to the same document within
+  one update batch could be applied in the wrong order resulting in
+  the wrong updated value (Ishan Chattopadhyaya, hossman, Mike McCandless)
+
 Documentation
 
 * LUCENE-7223: Improve XXXPoint javadocs to make it clear that you

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e9ccc822/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
index bbf83e4..9da1e09 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
@@ -228,14 +228,19 @@ class BufferedUpdatesStream implements Accountable {
           assert pool.infoIsLive(info);
           int delCount = 0;
           final DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
-          if (coalescedUpdates != null) {
-            delCount += applyQueryDeletes(coalescedUpdates.queriesIterable(), segState);
-            applyDocValuesUpdates(coalescedUpdates.numericDVUpdates, segState, dvUpdates);
-            applyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, segState, dvUpdates);
-          }
+
+          // first apply segment-private deletes/updates
           delCount += applyQueryDeletes(packet.queriesIterable(), segState);
           applyDocValuesUpdates(Arrays.asList(packet.numericDVUpdates), segState, dvUpdates);
           applyDocValuesUpdates(Arrays.asList(packet.binaryDVUpdates), segState, dvUpdates);
+
+          // ... then coalesced deletes/updates, so that if there is an update that appears in both, the coalesced updates (carried from
+          // updates ahead of the segment-privates ones) win:
+          if (coalescedUpdates != null) {
+            delCount += applyQueryDeletes(coalescedUpdates.queriesIterable(), segState);
+            applyDocValuesUpdatesList(coalescedUpdates.numericDVUpdates, segState, dvUpdates);
+            applyDocValuesUpdatesList(coalescedUpdates.binaryDVUpdates, segState, dvUpdates);
+          }
           if (dvUpdates.any()) {
             segState.rld.writeFieldUpdates(info.info.dir, dvUpdates);
           }
@@ -261,8 +266,8 @@ class BufferedUpdatesStream implements Accountable {
             int delCount = 0;
             delCount += applyQueryDeletes(coalescedUpdates.queriesIterable(), segState);
             DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
-            applyDocValuesUpdates(coalescedUpdates.numericDVUpdates, segState, dvUpdates);
-            applyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, segState, dvUpdates);
+            applyDocValuesUpdatesList(coalescedUpdates.numericDVUpdates, segState, dvUpdates);
+            applyDocValuesUpdatesList(coalescedUpdates.binaryDVUpdates, segState, dvUpdates);
             if (dvUpdates.any()) {
               segState.rld.writeFieldUpdates(info.info.dir, dvUpdates);
             }
@@ -599,8 +604,17 @@ class BufferedUpdatesStream implements Accountable {
     return delTermVisitedCount;
   }
 
+  private synchronized void applyDocValuesUpdatesList(List<List<DocValuesUpdate>> updates, 
+      SegmentState segState, DocValuesFieldUpdates.Container dvUpdatesContainer) throws IOException {
+    // we walk backwards through the segments, appending deletion packets to the coalesced updates, so we must apply the packets in reverse
+    // so that newer packets override older ones:
+    for(int idx=updates.size()-1;idx>=0;idx--) {
+      applyDocValuesUpdates(updates.get(idx), segState, dvUpdatesContainer);
+    }
+  }
+
   // DocValues updates
-  private synchronized void applyDocValuesUpdates(Iterable<? extends DocValuesUpdate> updates, 
+  private synchronized void applyDocValuesUpdates(List<DocValuesUpdate> updates, 
       SegmentState segState, DocValuesFieldUpdates.Container dvUpdatesContainer) throws IOException {
     Fields fields = segState.reader.fields();
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e9ccc822/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java
index e908a99..bf92ac1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java
@@ -32,8 +32,8 @@ import org.apache.lucene.util.BytesRef;
 class CoalescedUpdates {
   final Map<Query,Integer> queries = new HashMap<>();
   final List<PrefixCodedTerms> terms = new ArrayList<>();
-  final List<NumericDocValuesUpdate> numericDVUpdates = new ArrayList<>();
-  final List<BinaryDocValuesUpdate> binaryDVUpdates = new ArrayList<>();
+  final List<List<DocValuesUpdate>> numericDVUpdates = new ArrayList<>();
+  final List<List<DocValuesUpdate>> binaryDVUpdates = new ArrayList<>();
   long totalTermCount;
   
   @Override
@@ -53,17 +53,21 @@ class CoalescedUpdates {
       final Query query = in.queries[queryIdx];
       queries.put(query, BufferedUpdates.MAX_INT);
     }
-    
+
+    List<DocValuesUpdate> numericPacket = new ArrayList<>();
+    numericDVUpdates.add(numericPacket);
     for (NumericDocValuesUpdate nu : in.numericDVUpdates) {
       NumericDocValuesUpdate clone = new NumericDocValuesUpdate(nu.term, nu.field, (Long) nu.value);
       clone.docIDUpto = Integer.MAX_VALUE;
-      numericDVUpdates.add(clone);
+      numericPacket.add(clone);
     }
     
+    List<DocValuesUpdate> binaryPacket = new ArrayList<>();
+    binaryDVUpdates.add(binaryPacket);
     for (BinaryDocValuesUpdate bu : in.binaryDVUpdates) {
       BinaryDocValuesUpdate clone = new BinaryDocValuesUpdate(bu.term, bu.field, (BytesRef) bu.value);
       clone.docIDUpto = Integer.MAX_VALUE;
-      binaryDVUpdates.add(clone);
+      binaryPacket.add(clone);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e9ccc822/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java
index c972964..1c85f33 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java
@@ -68,7 +68,7 @@ abstract class DocValuesUpdate {
   
   @Override
   public String toString() {
-    return "term=" + term + ",field=" + field + ",value=" + value;
+    return "term=" + term + ",field=" + field + ",value=" + value + ",docIDUpto=" + docIDUpto;
   }
   
   /** An in-place update to a binary DocValues field */

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e9ccc822/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java b/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
index 15ecc0f..71b8284 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
@@ -18,6 +18,8 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.HashSet;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
@@ -58,12 +60,118 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 public class TestNumericDocValuesUpdates extends LuceneTestCase {
 
   private Document doc(int id) {
+    // make sure we don't set the doc's value to 0, to not confuse with a document that's missing values
+    return doc(id, id +1);
+  }
+  
+  private Document doc(int id, long val) {
     Document doc = new Document();
     doc.add(new StringField("id", "doc-" + id, Store.NO));
-    // make sure we don't set the doc's value to 0, to not confuse with a document that's missing values
-    doc.add(new NumericDocValuesField("val", id + 1));
+    doc.add(new NumericDocValuesField("val", val));
     return doc;
   }
+
+  public void testMultipleUpdatesSameDoc() throws Exception {
+
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+    
+    conf.setMaxBufferedDocs(3); // small number of docs, so use a tiny maxBufferedDocs
+
+    IndexWriter writer = new IndexWriter(dir, conf);
+
+    writer.updateDocument       (new Term("id","doc-1"), doc(1, 1000000000L ));
+    writer.updateNumericDocValue(new Term("id","doc-1"), "val", 1000001111L );
+    writer.updateDocument       (new Term("id","doc-2"), doc(2, 2000000000L ));
+    writer.updateDocument       (new Term("id","doc-2"), doc(2, 2222222222L ));
+    writer.updateNumericDocValue(new Term("id","doc-1"), "val", 1111111111L );
+    writer.commit();
+    
+    final DirectoryReader reader = DirectoryReader.open(dir);
+    final IndexSearcher searcher = new IndexSearcher(reader);
+    TopFieldDocs td;
+    
+    td = searcher.search(new TermQuery(new Term("id", "doc-1")), 1, 
+                         new Sort(new SortField("val", SortField.Type.LONG)));
+    assertEquals("doc-1 missing?", 1, td.scoreDocs.length);
+    assertEquals("doc-1 value", 1111111111L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+    
+    td = searcher.search(new TermQuery(new Term("id", "doc-2")), 1, 
+                        new Sort(new SortField("val", SortField.Type.LONG)));
+    assertEquals("doc-2 missing?", 1, td.scoreDocs.length);
+    assertEquals("doc-2 value", 2222222222L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+    
+    IOUtils.close(reader, writer, dir);
+  }
+
+  public void testBiasedMixOfRandomUpdates() throws Exception {
+    // 3 types of operations: add, updated, updateDV.
+    // rather then randomizing equally, we'll pick (random) cutoffs so each test run is biased,
+    // in terms of some ops happen more often then others
+    final int ADD_CUTOFF = TestUtil.nextInt(random(), 1, 98);
+    final int UPD_CUTOFF = TestUtil.nextInt(random(), ADD_CUTOFF+1, 99);
+
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    final int numOperations = atLeast(1000);
+    final Map<Integer,Long> expected = new HashMap<>(numOperations / 3);
+
+    // start with at least one doc before any chance of updates
+    final int numSeedDocs = atLeast(1); 
+    for (int i = 0; i < numSeedDocs; i++) {
+      final long val = random().nextLong();
+      expected.put(i, val);
+      writer.addDocument(doc(i, val));
+    }
+
+    int numDocUpdates = 0;
+    int numValueUpdates = 0;
+
+    //System.out.println("TEST: numOperations=" + numOperations + " ADD_CUTOFF=" + ADD_CUTOFF + " UPD_CUTOFF=" + UPD_CUTOFF);
+
+    for (int i = 0; i < numOperations; i++) {
+      final int op = TestUtil.nextInt(random(), 1, 100);
+      final long val = random().nextLong();
+      if (op <= ADD_CUTOFF) {
+        final int id = expected.size();
+        //System.out.println("TEST i=" + i + ": addDocument id=" + id + " val=" + val);
+        expected.put(id, val);
+        writer.addDocument(doc(id, val));
+      } else {
+        final int id = TestUtil.nextInt(random(), 0, expected.size()-1);
+        expected.put(id, val);
+        if (op <= UPD_CUTOFF) {
+          numDocUpdates++;
+          //System.out.println("TEST i=" + i + ": updateDocument id=" + id + " val=" + val);
+          writer.updateDocument(new Term("id","doc-" + id), doc(id, val));
+        } else {
+          numValueUpdates++;
+          //System.out.println("TEST i=" + i + ": updateDV id=" + id + " val=" + val);
+          writer.updateNumericDocValue(new Term("id","doc-" + id), "val", val);
+        }
+      }
+    }
+
+    writer.commit();
+    
+    final DirectoryReader reader = DirectoryReader.open(dir);
+    final IndexSearcher searcher = new IndexSearcher(reader);
+
+    // TODO: make more efficient if max numOperations is going to be increased much
+    for (Map.Entry<Integer,Long> expect : expected.entrySet()) {
+      String id = "doc-" + expect.getKey();
+      TopFieldDocs td = searcher.search(new TermQuery(new Term("id", id)), 1, 
+                                        new Sort(new SortField("val", SortField.Type.LONG)));
+      assertEquals(id + " missing?", 1, td.totalHits);
+      assertEquals(id + " value", expect.getValue(), ((FieldDoc)td.scoreDocs[0]).fields[0]);
+    }
+    
+    IOUtils.close(reader, writer, dir);
+  }
+
   
   @Test
   public void testUpdatesAreFlushed() throws IOException {