You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by th...@apache.org on 2015/05/20 20:23:07 UTC

svn commit: r1680648 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/core/ solr/core/src/java/org/apache/solr/update/ solr/core/src/test-files/solr/collection1/conf/ solr/core/src/test/org/apache/solr/cloud/ ...

Author: thelabdude
Date: Wed May 20 18:23:06 2015
New Revision: 1680648

URL: http://svn.apache.org/r1680648
Log:
SOLR-7332: Initialize the highest value for all version buckets with the max value from the index or recent updates to avoid unnecessary lookups to the index to check for reordered updates when processing new documents.

Added:
    lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema-version-dv.xml
      - copied unchanged from r1680639, lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema-version-dv.xml
    lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema-version-indexed.xml
      - copied unchanged from r1680639, lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema-version-indexed.xml
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/DistributedVersionInfoTest.java
      - copied unchanged from r1680639, lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/DistributedVersionInfoTest.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/update/VersionInfoTest.java
      - copied unchanged from r1680639, lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/VersionInfoTest.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/core/SolrCore.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/UpdateLog.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/VersionInfo.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1680648&r1=1680647&r2=1680648&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Wed May 20 18:23:06 2015
@@ -250,6 +250,10 @@ Optimizations
   configurable and use knowledge that a batch is being processed to poll efficiently.
   (Timothy Potter)
 
+* SOLR-7332: Initialize the highest value for all version buckets with the max value from
+  the index or recent updates to avoid unnecessary lookups to the index to check for reordered
+  updates when processing new documents. (Timothy Potter, yonik)
+
 Other Changes
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/core/SolrCore.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/core/SolrCore.java?rev=1680648&r1=1680647&r2=1680648&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/core/SolrCore.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/core/SolrCore.java Wed May 20 18:23:06 2015
@@ -1806,6 +1806,10 @@ public final class SolrCore implements S
         }
 
         if (currSearcher == null) {
+          if (updateHandler != null && updateHandler.getUpdateLog() != null) {
+            updateHandler.getUpdateLog().onFirstSearcher(newSearcher);
+          }
+
           future = searcherExecutor.submit(new Callable() {
             @Override
             public Object call() throws Exception {

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/UpdateLog.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/UpdateLog.java?rev=1680648&r1=1680647&r2=1680648&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/UpdateLog.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/UpdateLog.java Wed May 20 18:23:06 2015
@@ -145,6 +145,7 @@ public class UpdateLog implements Plugin
   protected int numRecordsToKeep;
   protected int maxNumLogsToKeep;
   protected int numVersionBuckets; // This should only be used to initialize VersionInfo... the actual number of buckets may be rounded up to a power of two.
+  protected Long maxVersionFromIndex = null;
 
   // keep track of deletes only... this is not updated on an add
   protected LinkedHashMap<BytesRef, LogPtr> oldDeletes = new LinkedHashMap<BytesRef, LogPtr>(numDeletesToKeep) {
@@ -703,6 +704,7 @@ public class UpdateLog implements Plugin
         SolrCore.verbose("TLOG: postSoftCommit: disposing of prevMap="+ System.identityHashCode(prevMap) + ", prevMap2=" + System.identityHashCode(prevMap2));
       }
       clearOldMaps();
+
     }
   }
 
@@ -1052,6 +1054,15 @@ public class UpdateLog implements Plugin
         log.decref();
       }
     }
+
+    public long getMaxRecentVersion() {
+      long maxRecentVersion = 0L;
+      if (updates != null) {
+        for (Long key : updates.keySet())
+          maxRecentVersion = Math.max(maxRecentVersion, Math.abs(key.longValue()));
+      }
+      return maxRecentVersion;
+    }
   }
 
   /** The RecentUpdates object returned must be closed after use */
@@ -1257,6 +1268,12 @@ public class UpdateLog implements Plugin
         // change the state while updates are still blocked to prevent races
         state = State.ACTIVE;
         if (finishing) {
+
+          // after replay, update the max from the index
+          log.info("Re-computing max version from index after log re-play.");
+          maxVersionFromIndex = null;
+          getMaxVersionFromIndex();
+
           versionInfo.unblockUpdates();
         }
 
@@ -1527,6 +1544,69 @@ public class UpdateLog implements Plugin
       }
     }
   }
-  
+
+  // this method is primarily used for unit testing and is not part of the public API for this class
+  Long getMaxVersionFromIndex() {
+    if (maxVersionFromIndex == null && versionInfo != null) {
+      RefCounted<SolrIndexSearcher> newestSearcher = (uhandler != null && uhandler.core != null)
+          ? uhandler.core.getRealtimeSearcher() : null;
+      if (newestSearcher == null)
+        throw new IllegalStateException("No searcher available to lookup max version from index!");
+
+      try {
+        maxVersionFromIndex = seedBucketsWithHighestVersion(newestSearcher.get(), versionInfo);
+      } finally {
+        newestSearcher.decref();
+      }
+    }
+    return maxVersionFromIndex;
+  }
+
+  /**
+   * Used to seed all version buckets with the max value of the version field in the index.
+   */
+  protected Long seedBucketsWithHighestVersion(SolrIndexSearcher newSearcher, VersionInfo versions) {
+    Long highestVersion = null;
+    long startMs = System.currentTimeMillis();
+
+    RecentUpdates recentUpdates = null;
+    try {
+      recentUpdates = getRecentUpdates();
+      long maxVersionFromRecent = recentUpdates.getMaxRecentVersion();
+      long maxVersionFromIndex = versions.getMaxVersionFromIndex(newSearcher);
+
+      long maxVersion = Math.max(maxVersionFromIndex, maxVersionFromRecent);
+      if (maxVersion == 0L) {
+        maxVersion = versions.getNewClock();
+        log.warn("Could not find max version in index or recent updates, using new clock {}", maxVersion);
+      }
+
+      // seed all version buckets with the highest value from recent and index
+      versions.seedBucketsWithHighestVersion(maxVersion);
+
+      highestVersion = maxVersion;
+    } catch (IOException ioExc) {
+      log.warn("Failed to determine the max value of the version field due to: "+ioExc, ioExc);
+    } finally {
+      if (recentUpdates != null)
+        recentUpdates.close();
+    }
+
+    long tookMs = (System.currentTimeMillis() - startMs);
+    log.info("Took {} ms to seed version buckets with highest version {}",
+        tookMs, String.valueOf(highestVersion));
+
+    return highestVersion;
+  }
+
+  public void onFirstSearcher(SolrIndexSearcher newSearcher) {
+    log.info("On first searcher opened, looking up max value of version field");
+    versionInfo.blockUpdates();
+    try {
+      maxVersionFromIndex = seedBucketsWithHighestVersion(newSearcher, versionInfo);
+    } finally {
+      versionInfo.unblockUpdates();
+    }
+  }
 }
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/VersionInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/VersionInfo.java?rev=1680648&r1=1680647&r2=1680648&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/VersionInfo.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/VersionInfo.java Wed May 20 18:23:06 2015
@@ -22,17 +22,29 @@ import java.util.Map;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class VersionInfo {
+
+  public static Logger log = LoggerFactory.getLogger(VersionInfo.class);
+
   public static final String VERSION_FIELD="_version_";
 
   private final UpdateLog ulog;
@@ -88,7 +100,6 @@ public class VersionInfo {
   }
 
   public void reload() {
-
   }
 
   public SchemaField getVersionField() {
@@ -191,13 +202,13 @@ public class VersionInfo {
     try {
       SolrIndexSearcher searcher = newestSearcher.get();
       long lookup = searcher.lookupId(idBytes);
-      if (lookup < 0) return null;
+      if (lookup < 0) return null; // this means the doc doesn't exist in the index yet
 
       ValueSource vs = versionField.getType().getValueSource(versionField, null);
       Map context = ValueSource.newContext(searcher);
       vs.createWeight(context, searcher);
-      FunctionValues fv = vs.getValues(context, searcher.getTopReaderContext().leaves().get((int)(lookup>>32)));
-      long ver = fv.longVal((int)lookup);
+      FunctionValues fv = vs.getValues(context, searcher.getTopReaderContext().leaves().get((int) (lookup >> 32)));
+      long ver = fv.longVal((int) lookup);
       return ver;
 
     } catch (IOException e) {
@@ -209,4 +220,47 @@ public class VersionInfo {
     }
   }
 
+  public Long getMaxVersionFromIndex(SolrIndexSearcher searcher) throws IOException {
+
+    String versionFieldName = versionField.getName();
+
+    log.info("Refreshing highest value of {} for {} version buckets from index", versionFieldName, buckets.length);
+    long maxVersionInIndex = 0L;
+
+    // if indexed, then we have terms to get the max from
+    if (versionField.indexed()) {
+      Terms versionTerms = searcher.getLeafReader().terms(versionFieldName);
+      if (versionTerms != null) {
+        maxVersionInIndex = NumericUtils.getMaxLong(versionTerms);
+        log.info("Found MAX value {} from Terms for {} in index", maxVersionInIndex, versionFieldName);
+      } else {
+        log.warn("No terms found for {}, cannot seed version bucket highest value from index", versionFieldName);
+      }
+    } else {
+      ValueSource vs = versionField.getType().getValueSource(versionField, null);
+      Map funcContext = ValueSource.newContext(searcher);
+      vs.createWeight(funcContext, searcher);
+      // TODO: multi-thread this
+      for (LeafReaderContext ctx : searcher.getTopReaderContext().leaves()) {
+        int maxDoc = ctx.reader().maxDoc();
+        FunctionValues fv = vs.getValues(funcContext, ctx);
+        for (int doc = 0; doc < maxDoc; doc++) {
+          long v = fv.longVal(doc);
+          maxVersionInIndex = Math.max(v, maxVersionInIndex);
+        }
+      }
+    }
+
+    return maxVersionInIndex;
+  }
+
+  public void seedBucketsWithHighestVersion(long highestVersion) {
+    for (int i=0; i<buckets.length; i++) {
+      // should not happen, but in case other threads are calling updateHighest on the version bucket
+      synchronized (buckets[i]) {
+        if (buckets[i].highest < highestVersion)
+          buckets[i].highest = highestVersion;
+      }
+    }
+  }
 }