You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2017/01/12 19:06:14 UTC

lucene-solr:jira/solr-5944: SOLR-5944: add a final check of the model vs RTG and search

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-5944 5e7c2543b -> 619d1283c


SOLR-5944: add a final check of the model vs RTG and search

this uncovered several bugs in how the model was being updated, realted to the use of 'magic' values (ex: -1, Integer.MIN_VALUE, 0) -- particularly when solr reported version conflicts -- that were then causing threads to make bad decisions about when to update the model

still a lot of room for improvement here (see new nocommits) but i want to hammer on the test like this for a bit first before making any more changes


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/619d1283
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/619d1283
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/619d1283

Branch: refs/heads/jira/solr-5944
Commit: 619d1283cf355feaaf562e3e7f41bce1a12cb4c5
Parents: 5e7c254
Author: Chris Hostetter <ho...@apache.org>
Authored: Thu Jan 12 12:05:53 2017 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Thu Jan 12 12:05:53 2017 -0700

----------------------------------------------------------------------
 .../solr/cloud/TestStressInPlaceUpdates.java    | 184 +++++++++++++++----
 1 file changed, 147 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/619d1283/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java b/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
index db4e7f5..8f758c9 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
@@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.Replica;
@@ -83,7 +84,9 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
 
   private void initModel(int ndocs) {
     for (int i = 0; i < ndocs; i++) {
-      model.put(i, new DocInfo(0l, 0, 0));
+      // seed versions w/-1 so "from scratch" adds/updates will fail optimistic concurrency checks
+      // if some other thread beats us to adding the id
+      model.put(i, new DocInfo(-1L, 0, 0));
     }
     committedModel.putAll(model);
   }
@@ -102,7 +105,7 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
     final int softCommitPercent = 30 + random().nextInt(75); // what percent of the commits are soft
     final int deletePercent = 4 + random().nextInt(25);
     final int deleteByQueryPercent = random().nextInt(8);
-    final int ndocs = 5 + (random().nextBoolean() ? random().nextInt(25) : random().nextInt(200));
+    final int ndocs = atLeast(5);
     int nWriteThreads = 5 + random().nextInt(25);
     int fullUpdatePercent = 5 + random().nextInt(50);
 
@@ -209,7 +212,7 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
               if (oper < commitPercent + deletePercent + deleteByQueryPercent) {
                 log.info("deleting id {}: {}",id,info);
                 
-                long returnedVersion;
+                Long returnedVersion = null;
 
                 try {
                   final boolean dbq = (oper >= commitPercent + deletePercent);
@@ -221,15 +224,24 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
                       || e.getMessage() != null && e.getMessage().contains("Conflict")) {
                     // Its okay for a leader to reject a concurrent request
                     log.warn("Conflict during partial update, rejected id=" + id + ", " + e);
-                    returnedVersion = -1;
+                    returnedVersion = null;
                   } else throw e;
                 }
 
-                // only update model if the version is newer
+                // only update model if update had no conflict & the version is newer
                 synchronized (model) {
                   DocInfo currInfo = model.get(id);
-                  if (Math.abs(returnedVersion) > Math.abs(currInfo.version)) {
-                    model.put(id, new DocInfo(returnedVersion, 0, 0));
+                  if (null != returnedVersion &&
+                      (Math.abs(returnedVersion.longValue()) > Math.abs(currInfo.version))) {
+
+                    // nocommit: CHANGE TO THIS...
+                    //
+                    // Preserve the values from the old model in the deleted doc.
+                    // This way the values will always be increasing if/when the doc is re-added, and the 
+                    // read threads can always assert that if a doc is found with a newer version then expected
+                    // then the field values must be greater then the ones expected.
+
+                    model.put(id, new DocInfo(returnedVersion.longValue(), 0, 0));
                   }
                 }
                 
@@ -240,7 +252,7 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
                 long nextVal2 = val2;
 
                 int addOper = rand.nextInt(100);
-                long returnedVersion;
+                Long returnedVersion;
                 if (addOper < fullUpdatePercent || info.version <= 0) { // if document was never indexed or was deleted
                   // FULL UPDATE
                   nextVal1 = Primes.nextPrime(val1 + 1);
@@ -254,7 +266,7 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
                         || e.getMessage() != null && e.getMessage().contains("Conflict")) {
                       // Its okay for a leader to reject a concurrent request
                       log.warn("Conflict during partial update, rejected id=" + id + ", " + e);
-                      returnedVersion = Integer.MIN_VALUE;
+                      returnedVersion = null;
                     } else throw e;
                   }
                 } else {
@@ -268,20 +280,20 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
                         || e.getMessage() != null && e.getMessage().contains("Conflict")) {
                       // Its okay for a leader to reject a concurrent request
                       log.warn("Conflict during full update, rejected id=" + id + ", " + e);
-                      returnedVersion = -1;
                     } else if (e.getMessage() != null && e.getMessage().contains("Document not found for update.") 
                         && e.getMessage().contains("id="+id)) {
                       log.warn("Attempting a partial update for a recently deleted document, rejected id=" + id + ", " + e);
-                      returnedVersion = Integer.MIN_VALUE;
                     } else throw e;
+                    returnedVersion = null;
                   }
                 }
 
-                // only update model if the version is newer
+                // only update model if update had no conflict & the version is newer
                 synchronized (model) {
                   DocInfo currInfo = model.get(id);
-                  if (returnedVersion > currInfo.version) {
-                    model.put(id, new DocInfo(returnedVersion, nextVal1, nextVal2));
+                  if (null != returnedVersion &&
+                      (Math.abs(returnedVersion.longValue()) > Math.abs(currInfo.version))) {
+                    model.put(id, new DocInfo(returnedVersion.longValue(), nextVal1, nextVal2));
                   }
 
                 }
@@ -301,7 +313,6 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
 
       threads.add(thread);
 
-      // nocommit: need a final pass over the model to check every doc
     }
 
     // Read threads
@@ -321,13 +332,13 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
               // so when querying, we should first check the model, and then the index
 
               boolean realTime = rand.nextInt(100) < percentRealtimeQuery;
-              DocInfo info;
+              DocInfo expected;
 
               if (realTime) {
-                info = model.get(id);
+                expected = model.get(id);
               } else {
                 synchronized (TestStressInPlaceUpdates.this) {
-                  info = committedModel.get(id);
+                  expected = committedModel.get(id);
                 }
               }
 
@@ -352,29 +363,42 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
               if (response.getResults().size() == 0) {
                 // there's no info we can get back with a delete, so not much we can check without further synchronization
               } else if (response.getResults().size() == 1) {
-                assertNotNull("Realtime=" + realTime + ", Response is: " + response + ", model: " + info,
-                    response.getResults().get(0).get("val2_l_dvo"));
-                assertNotNull("Realtime=" + realTime + ", Response is: " + response.getResults().get(0) + ", model: " + info + ", client="+((HttpSolrClient)clients.get(clientId)).getBaseURL()+", leaderClient="+((HttpSolrClient)leaderClient).getBaseURL(),
-                    response.getResults().get(0).get("val1_i_dvo"));
+                final SolrDocument actual = response.getResults().get(0);
+                final String msg = "Realtime=" + realTime + ", expected=" + expected + ", actual=" + actual;
+                assertNotNull(msg, actual);
+
+                final Long foundVersion = (Long) actual.getFieldValue("_version_");
+                assertNotNull(msg, foundVersion);
+                assertTrue(msg + "... solr doc has non-positive version???",
+                           0 < foundVersion.longValue());
+                final Integer intVal = (Integer) actual.getFieldValue("val1_i_dvo");
+                assertNotNull(msg, intVal);
+                
+                final Long longVal = (Long) actual.getFieldValue("val2_l_dvo");
+                assertNotNull(msg, longVal);
+
+                assertTrue(msg + " ...solr returned older version then model. " +
+                           "should not be possible given the order of operations in writer threads",
+                           Math.abs(expected.version) <= foundVersion.longValue());
 
-                Object obj1 = response.getResults().get(0).getFirstValue("val1_i_dvo");
-                int val1 = (Integer) obj1;
-                Object obj2 = response.getResults().get(0).getFirstValue("val2_l_dvo");
-                long val2 = (Long) obj2;
-                Object objVer = response.getResults().get(0).getFirstValue("_version_");
-                long foundVer = (Long) objVer;
+                if (foundVersion.longValue() == expected.version) {
+                  assertEquals(msg, expected.intFieldValue, intVal.intValue());
+                  assertEquals(msg, expected.longFieldValue, longVal.longValue());
+                }
 
+                // Some things we can assert about any Doc returned from solr,
+                // even if it's newer then our (expected) model information...
 
-                if (!(val1 == 0 && val2 == 0 || val2 % val1 == 0)) {
-                  assertTrue("Vals are: " + val1 + ", " + val2 + ", id=" + id + ", clientId=" + clients.get(clientId) + ", Doc retrived is: " + response.toString(),
-                      val1 == 0 && val2 == 0 || val2 % val1 == 0);
+                assertTrue(msg + " ...how did a doc in solr get a non positive intVal?",
+                           0 < intVal);
+                assertTrue(msg + " ...how did a doc in solr get a non positive longVal?",
+                           0 < longVal);
+                assertEquals(msg + " ...intVal and longVal in solr doc are internally (modulo) inconsistent w/eachother",
+                             0, (longVal % intVal));
 
-                }
-                if (foundVer < Math.abs(info.version)
-                    || (foundVer == info.version && (val1 != info.intFieldValue || val2 != info.longFieldValue))) {    // if the version matches, the val must
-                  log.error("Realtime=" + realTime + ", ERROR, id=" + id + " found=" + response + " model=" + info + ", client="+((HttpSolrClient)clients.get(clientId)).getBaseURL()+", leaderClient="+((HttpSolrClient)leaderClient).getBaseURL());
-                  assertTrue("Realtime=" + realTime + ", ERROR, id=" + id + " found=" + response + " model=" + info, false);
-                }
+                // nocommit: for expected.version < foundVersion, assert expected.values < doc.values
+                // nocommit: (see previous nocommit in writer thread)
+                
               } else {
                 fail(String.format(Locale.ENGLISH, "There were more than one result: {}", response));
               }
@@ -397,6 +421,91 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
     for (Thread thread : threads) {
       thread.join();
     }
+
+    { // final pass over uncommitted model with RTG
+
+      for (SolrClient client : clients) {
+        for (Map.Entry<Integer,DocInfo> entry : model.entrySet()) {
+          final Integer id = entry.getKey();
+          final DocInfo expected = entry.getValue();
+          final SolrDocument actual = client.getById(id.toString());
+
+          String msg = "RTG: " + id + "=" + expected;
+          if (null == actual) {
+            // a deleted or non-existent document
+            // sanity check of the model agrees...
+            assertTrue(msg + " is deleted/non-existent in Solr, but model has non-neg version",
+                       expected.version < 0);
+            // nocommit: (see related nocommit in writer thread about updating model for deletes)
+            // nocommit: if deleted deleted docs preserve old model and never have 0,0 values, change asserts...
+            assertEquals(msg + " is deleted/non-existent in Solr", expected.intFieldValue, 0);
+            assertEquals(msg + " is deleted/non-existent in Solr", expected.longFieldValue, 0);
+            // nocommit: ...to this...
+            // assertEquals(msg + " is deleted/non-existent in Solr, but model has inconsistent (modulo) values",
+            //             0, (expected.longFieldValue % expected.intFieldValue));
+          } else {
+            msg = msg + " <==VS==> " + actual;
+            assertEquals(msg, expected.intFieldValue, actual.getFieldValue("val1_i_dvo"));
+            assertEquals(msg, expected.longFieldValue, actual.getFieldValue("val2_l_dvo"));
+            assertEquals(msg, expected.version, actual.getFieldValue("_version_"));
+            assertTrue(msg + " doc exists in solr, but version is negative???",
+                       0 < expected.version);
+          }
+        }
+      }
+    }
+    
+    { // do a final search and compare every result with the model
+      
+      // nocommit: START: better way to check the original committedModel????
+      // nocommit: do we really need to wait for shards to catchup, or is this just a red herring for another bug?
+      waitForThingsToLevelOut(30); // NOTE: this does an automatic commit...
+      committedModel = new HashMap<>(model);
+      // nocommit: END
+
+      // first, prune the model of any docs that have negative versions
+      // ie: were never actually added, or were ultimately deleted.
+      for (int i = 0; i < ndocs; i++) {
+        DocInfo info = committedModel.get(i);
+        if (info.version < 0) {
+          // first, a quick sanity check of the model itself...
+
+          // nocommit: (see related nocommit in writer thread about updating model for deletes)
+          // nocommit: if deleted deleted docs preserve old model and never have 0,0 values, change asserts...
+          assertEquals("Inconsistent int value in model for deleted doc" + i + "=" + info,
+                       0, info.intFieldValue);
+          assertEquals("Inconsistent long value in model for deleted doc" + i + "=" + info,
+                       0L, info.longFieldValue);
+          // nocommit: ...to this...
+          // assertEquals("Inconsistent (modulo) values in model for deleted doc" + i + "=" + info,
+          //             0, (info.longFieldValue % info.intFieldValue));
+
+          committedModel.remove(i);
+        }
+      }
+
+      for (SolrClient client : clients) {
+        QueryResponse rsp = client.query(params("q","*:*", "sort", "id asc", "rows", ndocs+""));
+        for (SolrDocument actual : rsp.getResults()) {
+          final Integer id = Integer.parseInt(actual.getFieldValue("id").toString());
+          final DocInfo expected = committedModel.get(id); 
+          
+          assertNotNull("Doc found but missing/deleted from model: " + actual, expected);
+          
+          final String msg = "Search: " + id + "=" + expected + " <==VS==> " + actual;
+          assertEquals(msg, expected.intFieldValue, actual.getFieldValue("val1_i_dvo"));
+          assertEquals(msg, expected.longFieldValue, actual.getFieldValue("val2_l_dvo"));
+          assertEquals(msg, expected.version, actual.getFieldValue("_version_"));
+          assertTrue(msg + " doc exists in solr, but version is negative???",
+                     0 < expected.version);
+
+          // also sanity check the model (which we already know matches the doc)
+          assertEquals("Inconsistent (modulo) values in model for id " + id + "=" + expected,
+                       0, (expected.longFieldValue % expected.intFieldValue));
+        }
+        assertEquals(committedModel.size(), rsp.getResults().getNumFound());
+      }
+    }
   }
 
   /**
@@ -408,6 +517,7 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
     long longFieldValue;
 
     public DocInfo(long version, int val1, long val2) {
+      assert version != 0; // must either be real positive version, or negative deleted version/indicator
       this.version = version;
       this.intFieldValue = val1;
       this.longFieldValue = val2;