You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by st...@apache.org on 2019/06/26 11:10:09 UTC

svn commit: r1862124 - in /jackrabbit/oak/branches/1.10: ./ oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/ oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/

Author: stefanegli
Date: Wed Jun 26 11:10:09 2019
New Revision: 1862124

URL: http://svn.apache.org/viewvc?rev=1862124&view=rev
Log:
OAK-8351 : backported 1862044 from trunk to 1.10 branch : split rgc query into 2 simpler queries to work around mongodb bug where the wrong index was chosen, resulting in long running _id_ index scan

Added:
    jackrabbit/oak/branches/1.10/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSplitDocTest.java
      - copied unchanged from r1862044, jackrabbit/oak/trunk/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSplitDocTest.java
Modified:
    jackrabbit/oak/branches/1.10/   (props changed)
    jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java

Propchange: jackrabbit/oak/branches/1.10/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Jun 26 11:10:09 2019
@@ -1,3 +1,3 @@
 /jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852135,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853083,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854055,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854539,1854701,1854773-1854774,1854827,1854848,1854859,1854930,1854990-1854991,1855032,1855221,1855477-1855478,1855776,1855993,1856049,1856056,1856538,1856545,1857000,1857010,1857104,1857159,1857212,1857221,1857238,1857247,1857253,1857294,1857314,1857463,1857480,1857577,1857589,1857592,1857627,1857634-1857635,1857638,1857640,1857687,1857936,1858032,1858053,1858123,1858139,1858385,1858424,1858571,1858578,1858810,1858926,1858931,1859020,1859231,1859292,1859294,1859359,1859533,1859609,1859612,1859619,1859711,1859716,1859772,1859776,1859780,1859843,1859854,1859881
 ,1860120,1860131,1860137,1860202,1860278,1860328,1860330,1860355,1860393,1860442,1860548,1860564-1860565,1861626
+/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852135,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853083,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854055,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854539,1854701,1854773-1854774,1854827,1854848,1854859,1854930,1854990-1854991,1855032,1855221,1855477-1855478,1855776,1855993,1856049,1856056,1856538,1856545,1857000,1857010,1857104,1857159,1857212,1857221,1857238,1857247,1857253,1857294,1857314,1857463,1857480,1857577,1857589,1857592,1857627,1857634-1857635,1857638,1857640,1857687,1857936,1858032,1858053,1858123,1858139,1858385,1858424,1858571,1858578,1858810,1858926,1858931,1859020,1859231,1859292,1859294,1859359,1859533,1859609,1859612,1859619,1859711,1859716,1859772,1859776,1859780,1859843,1859854,1859881
 ,1860120,1860131,1860137,1860202,1860278,1860328,1860330,1860355,1860393,1860442,1860548,1860564-1860565,1861626,1862044
 /jackrabbit/trunk:1345480

Modified: jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java?rev=1862124&r1=1862123&r2=1862124&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java (original)
+++ jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java Wed Jun 26 11:10:09 2019
@@ -19,25 +19,29 @@
 
 package org.apache.jackrabbit.oak.plugins.document.mongo;
 
+import static com.google.common.collect.Iterables.concat;
+import static com.google.common.collect.Iterables.filter;
+import static com.google.common.collect.Iterables.transform;
+import static java.util.Collections.emptyList;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
+
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.regex.Pattern;
 
-import com.google.common.base.Function;
-import com.google.common.base.Joiner;
-import com.google.common.base.Predicate;
-import com.google.common.base.StandardSystemProperty;
-import com.google.common.collect.Lists;
-import com.mongodb.BasicDBObject;
-import com.mongodb.Block;
-import com.mongodb.client.FindIterable;
-import com.mongodb.client.MongoCollection;
-import com.mongodb.client.model.Filters;
-
 import org.apache.jackrabbit.oak.plugins.document.Document;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
 import org.apache.jackrabbit.oak.plugins.document.Revision;
 import org.apache.jackrabbit.oak.plugins.document.RevisionVector;
 import org.apache.jackrabbit.oak.plugins.document.SplitDocumentCleanUp;
@@ -51,19 +55,16 @@ import org.jetbrains.annotations.NotNull
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static com.google.common.collect.Iterables.filter;
-import static com.google.common.collect.Iterables.transform;
-import static java.util.Collections.singletonList;
-import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
-import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.base.Predicate;
+import com.google.common.base.StandardSystemProperty;
+import com.google.common.collect.Lists;
+import com.mongodb.BasicDBObject;
+import com.mongodb.Block;
+import com.mongodb.client.FindIterable;
+import com.mongodb.client.MongoCollection;
+import com.mongodb.client.model.Filters;
 
 /**
  * Mongo specific version of VersionGCSupport which uses mongo queries
@@ -122,19 +123,33 @@ public class MongoVersionGCSupport exten
     protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> gcTypes,
                                                      final RevisionVector sweepRevs,
                                                      final long oldestRevTimeStamp) {
-        return filter(transform(getNodeCollection().find(
-                createQuery(gcTypes, sweepRevs, oldestRevTimeStamp)),
-                new Function<BasicDBObject, NodeDocument>() {
-            @Override
-            public NodeDocument apply(BasicDBObject input) {
-                return store.convertFromDBObject(NODES, input);
-            }
-        }), new Predicate<NodeDocument>() {
-            @Override
-            public boolean apply(NodeDocument input) {
-                return !isDefaultNoBranchSplitNewerThan(input, sweepRevs);
-            }
-        });
+        // With OAK-8351 this switched from 1 to 2 queries (see createQueries)
+        // hence we iterate over the queries returned by createQueries
+        List<Bson> queries = createQueries(gcTypes, sweepRevs, oldestRevTimeStamp);
+        Iterable<NodeDocument> allResults = emptyList();
+        for (Bson query : queries) {
+            // this query uses a timeout of 15min. hitting the timeout will
+            // result in an exception which should show up in the log file.
+            // while this doesn't resolve the situation (the restructuring
+            // of the query as part of OAK-8351 does), it nevertheless 
+            // makes any future similar problem more visible than long running
+            // queries alone (15min is still long).
+            Iterable<NodeDocument> iterable = filter(transform(getNodeCollection().find(query)
+                    .maxTime(15, TimeUnit.MINUTES),
+                    new Function<BasicDBObject, NodeDocument>() {
+                @Override
+                public NodeDocument apply(BasicDBObject input) {
+                    return store.convertFromDBObject(NODES, input);
+                }
+            }), new Predicate<NodeDocument>() {
+                @Override
+                public boolean apply(NodeDocument input) {
+                    return !isDefaultNoBranchSplitNewerThan(input, sweepRevs);
+                }
+            });
+            allResults = concat(allResults, iterable);
+        }
+        return allResults;
     }
 
     @Override
@@ -162,32 +177,33 @@ public class MongoVersionGCSupport exten
         return result.get(0);
     }
 
-    private Bson createQuery(Set<SplitDocType> gcTypes,
+    private List<Bson> createQueries(Set<SplitDocType> gcTypes,
                                  RevisionVector sweepRevs,
                                  long oldestRevTimeStamp) {
-        List<Integer> gcTypeCodes = Lists.newArrayList();
+        List<Bson> result = Lists.newArrayList();
         List<Bson> orClauses = Lists.newArrayList();
         for(SplitDocType type : gcTypes) {
-            gcTypeCodes.add(type.typeCode());
-            for (Bson query : queriesForType(type, sweepRevs)) {
-                orClauses.add(query);
+            if (DEFAULT_NO_BRANCH != type) {
+                orClauses.add(Filters.eq(SD_TYPE, type.typeCode()));
+            } else {
+                result.add(queryForDefaultNoBranch(sweepRevs, getModifiedInSecs(oldestRevTimeStamp)));
             }
         }
-        return Filters.and(
-                Filters.in(SD_TYPE, gcTypeCodes),
+        // OAK-8351: this (last) query only contains SD_TYPE and SD_MAX_REV_TIME_IN_SECS
+        // so mongodb should really use that _sdType_1__sdMaxRevTime_1 index
+        result.add(Filters.and(
                 Filters.or(orClauses),
                 Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp))
-        );
+                ));
+
+        return result;
     }
 
     @NotNull
-    private Iterable<Bson> queriesForType(SplitDocType type, RevisionVector sweepRevs) {
-        if (type != DEFAULT_NO_BRANCH) {
-            return singletonList(Filters.eq(SD_TYPE, type.typeCode()));
-        }
+    private Bson queryForDefaultNoBranch(RevisionVector sweepRevs, long maxRevTimeInSecs) {
         // default_no_branch split type is special because we can
         // only remove those older than sweep rev
-        List<Bson> queries = Lists.newArrayList();
+        List<Bson> orClauses = Lists.newArrayList();
         for (Revision r : sweepRevs) {
             String idSuffix = Utils.getPreviousIdFor("/", r, 0);
             idSuffix = idSuffix.substring(idSuffix.lastIndexOf('-'));
@@ -202,13 +218,16 @@ public class MongoVersionGCSupport exten
                     )
             );
 
-            queries.add(Filters.and(
-                    Filters.eq(SD_TYPE, type.typeCode()),
+            orClauses.add(Filters.and(
                     idPathClause,
                     Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(r.getTimestamp()))
             ));
         }
-        return queries;
+        return Filters.and(
+                Filters.eq(SD_TYPE, DEFAULT_NO_BRANCH.typeCode()),
+                Filters.lt(SD_MAX_REV_TIME_IN_SECS, maxRevTimeInSecs),
+                Filters.or(orClauses)
+                );
     }
 
     private void logSplitDocIdsTobeDeleted(Bson query) {
@@ -257,15 +276,21 @@ public class MongoVersionGCSupport exten
 
         @Override
         protected int deleteSplitDocuments() {
-            Bson query = createQuery(gcTypes, sweepRevs, oldestRevTimeStamp);
+            List<Bson> queries = createQueries(gcTypes, sweepRevs, oldestRevTimeStamp);
 
             if(LOG.isDebugEnabled()){
                 //if debug level logging is on then determine the id of documents to be deleted
                 //and log them
-                logSplitDocIdsTobeDeleted(query);
+                for (Bson query : queries) {
+                    logSplitDocIdsTobeDeleted(query);
+                }
             }
 
-            return (int) getNodeCollection().deleteMany(query).getDeletedCount();
+            int cnt = 0;
+            for (Bson query : queries) {
+                cnt += getNodeCollection().deleteMany(query).getDeletedCount();
+            }
+            return cnt;
         }
     }
 }