You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by st...@apache.org on 2019/06/26 11:10:09 UTC
svn commit: r1862124 - in /jackrabbit/oak/branches/1.10: ./
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/
oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/
Author: stefanegli
Date: Wed Jun 26 11:10:09 2019
New Revision: 1862124
URL: http://svn.apache.org/viewvc?rev=1862124&view=rev
Log:
OAK-8351 : backported 1862044 from trunk to 1.10 branch : split rgc query into 2 simpler queries to work around mongodb bug where the wrong index was chosen, resulting in long running _id_ index scan
Added:
jackrabbit/oak/branches/1.10/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSplitDocTest.java
- copied unchanged from r1862044, jackrabbit/oak/trunk/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSplitDocTest.java
Modified:
jackrabbit/oak/branches/1.10/ (props changed)
jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
Propchange: jackrabbit/oak/branches/1.10/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Jun 26 11:10:09 2019
@@ -1,3 +1,3 @@
/jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852135,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853083,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854055,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854539,1854701,1854773-1854774,1854827,1854848,1854859,1854930,1854990-1854991,1855032,1855221,1855477-1855478,1855776,1855993,1856049,1856056,1856538,1856545,1857000,1857010,1857104,1857159,1857212,1857221,1857238,1857247,1857253,1857294,1857314,1857463,1857480,1857577,1857589,1857592,1857627,1857634-1857635,1857638,1857640,1857687,1857936,1858032,1858053,1858123,1858139,1858385,1858424,1858571,1858578,1858810,1858926,1858931,1859020,1859231,1859292,1859294,1859359,1859533,1859609,1859612,1859619,1859711,1859716,1859772,1859776,1859780,1859843,1859854,1859881
,1860120,1860131,1860137,1860202,1860278,1860328,1860330,1860355,1860393,1860442,1860548,1860564-1860565,1861626
+/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852135,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853083,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854055,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854539,1854701,1854773-1854774,1854827,1854848,1854859,1854930,1854990-1854991,1855032,1855221,1855477-1855478,1855776,1855993,1856049,1856056,1856538,1856545,1857000,1857010,1857104,1857159,1857212,1857221,1857238,1857247,1857253,1857294,1857314,1857463,1857480,1857577,1857589,1857592,1857627,1857634-1857635,1857638,1857640,1857687,1857936,1858032,1858053,1858123,1858139,1858385,1858424,1858571,1858578,1858810,1858926,1858931,1859020,1859231,1859292,1859294,1859359,1859533,1859609,1859612,1859619,1859711,1859716,1859772,1859776,1859780,1859843,1859854,1859881
,1860120,1860131,1860137,1860202,1860278,1860328,1860330,1860355,1860393,1860442,1860548,1860564-1860565,1861626,1862044
/jackrabbit/trunk:1345480
Modified: jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java?rev=1862124&r1=1862123&r2=1862124&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java (original)
+++ jackrabbit/oak/branches/1.10/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java Wed Jun 26 11:10:09 2019
@@ -19,25 +19,29 @@
package org.apache.jackrabbit.oak.plugins.document.mongo;
+import static com.google.common.collect.Iterables.concat;
+import static com.google.common.collect.Iterables.filter;
+import static com.google.common.collect.Iterables.transform;
+import static java.util.Collections.emptyList;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
+
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
-import com.google.common.base.Function;
-import com.google.common.base.Joiner;
-import com.google.common.base.Predicate;
-import com.google.common.base.StandardSystemProperty;
-import com.google.common.collect.Lists;
-import com.mongodb.BasicDBObject;
-import com.mongodb.Block;
-import com.mongodb.client.FindIterable;
-import com.mongodb.client.MongoCollection;
-import com.mongodb.client.model.Filters;
-
import org.apache.jackrabbit.oak.plugins.document.Document;
import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
import org.apache.jackrabbit.oak.plugins.document.Revision;
import org.apache.jackrabbit.oak.plugins.document.RevisionVector;
import org.apache.jackrabbit.oak.plugins.document.SplitDocumentCleanUp;
@@ -51,19 +55,16 @@ import org.jetbrains.annotations.NotNull
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static com.google.common.collect.Iterables.filter;
-import static com.google.common.collect.Iterables.transform;
-import static java.util.Collections.singletonList;
-import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
-import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.base.Predicate;
+import com.google.common.base.StandardSystemProperty;
+import com.google.common.collect.Lists;
+import com.mongodb.BasicDBObject;
+import com.mongodb.Block;
+import com.mongodb.client.FindIterable;
+import com.mongodb.client.MongoCollection;
+import com.mongodb.client.model.Filters;
/**
* Mongo specific version of VersionGCSupport which uses mongo queries
@@ -122,19 +123,33 @@ public class MongoVersionGCSupport exten
protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> gcTypes,
final RevisionVector sweepRevs,
final long oldestRevTimeStamp) {
- return filter(transform(getNodeCollection().find(
- createQuery(gcTypes, sweepRevs, oldestRevTimeStamp)),
- new Function<BasicDBObject, NodeDocument>() {
- @Override
- public NodeDocument apply(BasicDBObject input) {
- return store.convertFromDBObject(NODES, input);
- }
- }), new Predicate<NodeDocument>() {
- @Override
- public boolean apply(NodeDocument input) {
- return !isDefaultNoBranchSplitNewerThan(input, sweepRevs);
- }
- });
+ // With OAK-8351 this switched from 1 to 2 queries (see createQueries)
+ // hence we iterate over the queries returned by createQueries
+ List<Bson> queries = createQueries(gcTypes, sweepRevs, oldestRevTimeStamp);
+ Iterable<NodeDocument> allResults = emptyList();
+ for (Bson query : queries) {
+ // this query uses a timeout of 15min. hitting the timeout will
+ // result in an exception which should show up in the log file.
+ // while this doesn't resolve the situation (the restructuring
+ // of the query as part of OAK-8351 does), it nevertheless
+ // makes any future similar problem more visible than long running
+ // queries alone (15min is still long).
+ Iterable<NodeDocument> iterable = filter(transform(getNodeCollection().find(query)
+ .maxTime(15, TimeUnit.MINUTES),
+ new Function<BasicDBObject, NodeDocument>() {
+ @Override
+ public NodeDocument apply(BasicDBObject input) {
+ return store.convertFromDBObject(NODES, input);
+ }
+ }), new Predicate<NodeDocument>() {
+ @Override
+ public boolean apply(NodeDocument input) {
+ return !isDefaultNoBranchSplitNewerThan(input, sweepRevs);
+ }
+ });
+ allResults = concat(allResults, iterable);
+ }
+ return allResults;
}
@Override
@@ -162,32 +177,33 @@ public class MongoVersionGCSupport exten
return result.get(0);
}
- private Bson createQuery(Set<SplitDocType> gcTypes,
+ private List<Bson> createQueries(Set<SplitDocType> gcTypes,
RevisionVector sweepRevs,
long oldestRevTimeStamp) {
- List<Integer> gcTypeCodes = Lists.newArrayList();
+ List<Bson> result = Lists.newArrayList();
List<Bson> orClauses = Lists.newArrayList();
for(SplitDocType type : gcTypes) {
- gcTypeCodes.add(type.typeCode());
- for (Bson query : queriesForType(type, sweepRevs)) {
- orClauses.add(query);
+ if (DEFAULT_NO_BRANCH != type) {
+ orClauses.add(Filters.eq(SD_TYPE, type.typeCode()));
+ } else {
+ result.add(queryForDefaultNoBranch(sweepRevs, getModifiedInSecs(oldestRevTimeStamp)));
}
}
- return Filters.and(
- Filters.in(SD_TYPE, gcTypeCodes),
+ // OAK-8351: this (last) query only contains SD_TYPE and SD_MAX_REV_TIME_IN_SECS
+ // so mongodb should really use that _sdType_1__sdMaxRevTime_1 index
+ result.add(Filters.and(
Filters.or(orClauses),
Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp))
- );
+ ));
+
+ return result;
}
@NotNull
- private Iterable<Bson> queriesForType(SplitDocType type, RevisionVector sweepRevs) {
- if (type != DEFAULT_NO_BRANCH) {
- return singletonList(Filters.eq(SD_TYPE, type.typeCode()));
- }
+ private Bson queryForDefaultNoBranch(RevisionVector sweepRevs, long maxRevTimeInSecs) {
// default_no_branch split type is special because we can
// only remove those older than sweep rev
- List<Bson> queries = Lists.newArrayList();
+ List<Bson> orClauses = Lists.newArrayList();
for (Revision r : sweepRevs) {
String idSuffix = Utils.getPreviousIdFor("/", r, 0);
idSuffix = idSuffix.substring(idSuffix.lastIndexOf('-'));
@@ -202,13 +218,16 @@ public class MongoVersionGCSupport exten
)
);
- queries.add(Filters.and(
- Filters.eq(SD_TYPE, type.typeCode()),
+ orClauses.add(Filters.and(
idPathClause,
Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(r.getTimestamp()))
));
}
- return queries;
+ return Filters.and(
+ Filters.eq(SD_TYPE, DEFAULT_NO_BRANCH.typeCode()),
+ Filters.lt(SD_MAX_REV_TIME_IN_SECS, maxRevTimeInSecs),
+ Filters.or(orClauses)
+ );
}
private void logSplitDocIdsTobeDeleted(Bson query) {
@@ -257,15 +276,21 @@ public class MongoVersionGCSupport exten
@Override
protected int deleteSplitDocuments() {
- Bson query = createQuery(gcTypes, sweepRevs, oldestRevTimeStamp);
+ List<Bson> queries = createQueries(gcTypes, sweepRevs, oldestRevTimeStamp);
if(LOG.isDebugEnabled()){
//if debug level logging is on then determine the id of documents to be deleted
//and log them
- logSplitDocIdsTobeDeleted(query);
+ for (Bson query : queries) {
+ logSplitDocIdsTobeDeleted(query);
+ }
}
- return (int) getNodeCollection().deleteMany(query).getDeletedCount();
+ int cnt = 0;
+ for (Bson query : queries) {
+ cnt += getNodeCollection().deleteMany(query).getDeletedCount();
+ }
+ return cnt;
}
}
}