You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by mk...@apache.org on 2021/09/09 06:30:31 UTC

[jackrabbit-oak] branch trunk updated: OAK-9552: Don't index except if it's oak:QueryIndexDefinition (#357)

This is an automated email from the ASF dual-hosted git repository.

mkataria pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 7758523  OAK-9552: Don't index except if it's oak:QueryIndexDefinition (#357)
7758523 is described below

commit 7758523789202bc987673d1b74560266af4341c4
Author: Mohit Kataria <mk...@apache.org>
AuthorDate: Thu Sep 9 12:00:25 2021 +0530

    OAK-9552: Don't index except if it's oak:QueryIndexDefinition (#357)
    
    * OAK-9552: Don't index except if it's oak:QueryIndexDefinition
---
 .../jackrabbit/oak/plugins/index/IndexUpdate.java  |  25 ++-
 .../oak/plugins/index/lucene/InvalidIndexTest.java | 198 +++++++++++++++++++++
 2 files changed, 222 insertions(+), 1 deletion(-)

diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java
index b4cce77..80a18e0 100644
--- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java
+++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java
@@ -46,6 +46,7 @@ import java.util.Set;
 
 import com.google.common.collect.Iterables;
 
+import org.apache.jackrabbit.JcrConstants;
 import org.apache.jackrabbit.oak.api.CommitFailedException;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
@@ -73,6 +74,14 @@ public class IndexUpdate implements Editor, PathSource {
     private static final Logger log = LoggerFactory.getLogger(IndexUpdate.class);
     private static final String TYPE_ELASTICSEARCH = "elasticsearch";
 
+    //This is used so that wrong index definitions are sparsely logged. After every 1000 indexing cycles, index definitions
+    // with wrong nodetype will be logged.
+    public static final long INDEX_JCR_TYPE_INVALID_LOG_LIMITER = Long.parseLong(System.getProperty("oak.indexer.indexJcrTypeInvalidLogLimiter", "1000"));
+
+    // Initial value is set at indexJcrTypeInvalidLogLimiter so that first logging start on first cycle/update itself.
+    // This counter is cyclically incremented till indexJcrTypeInvalidLogLimiter and then reset to 0
+    private static volatile long cyclicExecutionCount = INDEX_JCR_TYPE_INVALID_LOG_LIMITER;
+
     /**
      * <p>
      * The value of this flag determines the behavior of the IndexUpdate when
@@ -124,7 +133,6 @@ public class IndexUpdate implements Editor, PathSource {
      */
     private final Map<String, Editor> reindex = new HashMap<String, Editor>();
 
-
     public IndexUpdate(
             IndexEditorProvider provider, String async,
             NodeState root, NodeBuilder builder,
@@ -280,10 +288,25 @@ public class IndexUpdate implements Editor, PathSource {
             NodeBuilder definition = definitions.getChildNode(name);
             if (isIncluded(rootState.async, definition)) {
                 String type = definition.getString(TYPE_PROPERTY_NAME);
+                String primaryType = definition.getName(JcrConstants.JCR_PRIMARYTYPE);
                 if (type == null) {
                     // probably not an index def
                     continue;
                 }
+                /*
+                 Log a warning after every indexJcrTypeInvalidLogLimiter cycles of indexer where nodeState changed.
+                 and skip further execution for invalid nodetype of index definition.
+                 */
+                if (!IndexConstants.INDEX_DEFINITIONS_NODE_TYPE.equals(primaryType)) {
+                    // It is a cyclic counter which reset back to 0 after INDEX_JCR_TYPE_INVALID_LOG_LIMITER
+                    // This is to sparsely log this warning.
+                    if ((cyclicExecutionCount >= INDEX_JCR_TYPE_INVALID_LOG_LIMITER)) {
+                        log.warn("jcr:primaryType of index {} should be {} instead of {}", name, IndexConstants.INDEX_DEFINITIONS_NODE_TYPE, primaryType);
+                        cyclicExecutionCount = 0;
+                    }
+                    cyclicExecutionCount++;
+                    continue;
+                }
 
                 boolean shouldReindex = shouldReindex(definition, before, name);
                 String indexPath = getIndexPath(getPath(), name);
diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/InvalidIndexTest.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/InvalidIndexTest.java
new file mode 100644
index 0000000..7a2563b
--- /dev/null
+++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/InvalidIndexTest.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene;
+
+import ch.qos.logback.classic.Level;
+import org.apache.jackrabbit.oak.InitialContent;
+import org.apache.jackrabbit.oak.Oak;
+import org.apache.jackrabbit.oak.api.*;
+import org.apache.jackrabbit.oak.commons.junit.LogCustomizer;
+import org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate;
+import org.apache.jackrabbit.oak.plugins.index.IndexUpdate;
+import org.apache.jackrabbit.oak.plugins.index.TrackingCorruptIndexHandler;
+import org.apache.jackrabbit.oak.plugins.index.counter.NodeCounterEditorProvider;
+import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
+import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexProvider;
+import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
+import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
+import org.apache.jackrabbit.oak.spi.commit.Observer;
+import org.apache.jackrabbit.oak.spi.nodetype.NodeTypeConstants;
+import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
+import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import static com.google.common.collect.Lists.newArrayList;
+import static org.apache.jackrabbit.oak.plugins.index.CompositeIndexEditorProvider.compose;
+import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+/**
+ * Tests: Logging of invalid jcr:primaryType of index.
+ */
+public class InvalidIndexTest {
+
+    private final long INDEX_CORRUPT_INTERVAL_IN_MILLIS = 100;
+    private MemoryBlobStore blobStore;
+
+    protected Root root;
+
+    private AsyncIndexUpdate asyncIndexUpdate;
+
+    private static final String invalidJcrPrimaryTypeIndexName = "myTestIndexWithWrongJcrPrimaryType";
+    private static final String invalidJcrPrimaryTypeForIndex = NodeTypeConstants.NT_OAK_UNSTRUCTURED;
+
+    private NodeStore nodeStore;
+    private LogCustomizer customLogger;
+
+
+    // Setting contentCount to DEFAULT_indexJcrTypeInvalidLogLimiter + 1, so that invalid indexes are logged atleast once
+    private final long contentCount = IndexUpdate.INDEX_JCR_TYPE_INVALID_LOG_LIMITER + 1;
+
+    @Before
+    public void before() throws Exception {
+        ContentSession session = createRepository().login(null, null);
+        root = session.getLatestRoot();
+        customLogger = LogCustomizer
+                .forLogger(IndexUpdate.class.getName())
+                .enable(Level.WARN).create();
+        customLogger.starting();
+    }
+
+    @After
+    public void after() {
+        customLogger.finished();
+    }
+
+    private ContentRepository createRepository() {
+        nodeStore = new MemoryNodeStore();
+        blobStore = new MemoryBlobStore();
+        blobStore.setBlockSizeMin(48);//make it as small as possible
+
+        LuceneIndexEditorProvider luceneIndexEditorProvider = new LuceneIndexEditorProvider();
+        LuceneIndexProvider provider = new LuceneIndexProvider();
+        luceneIndexEditorProvider.setBlobStore(blobStore);
+
+        asyncIndexUpdate = new AsyncIndexUpdate("async", nodeStore, compose(newArrayList(
+                luceneIndexEditorProvider,
+                new NodeCounterEditorProvider()
+        )));
+        TrackingCorruptIndexHandler trackingCorruptIndexHandler = new TrackingCorruptIndexHandler();
+        trackingCorruptIndexHandler.setCorruptInterval(INDEX_CORRUPT_INTERVAL_IN_MILLIS, TimeUnit.MILLISECONDS);
+        asyncIndexUpdate.setCorruptIndexHandler(trackingCorruptIndexHandler);
+        return new Oak(nodeStore)
+                .with(new InitialContent())
+                .with(new OpenSecurityProvider())
+                .with((QueryIndexProvider) provider)
+                .with((Observer) provider)
+                .with(luceneIndexEditorProvider)
+                .with(new PropertyIndexEditorProvider())
+                .with(new NodeTypeIndexProvider())
+                .with(new PropertyIndexProvider())
+                .with(new PropertyIndexEditorProvider())
+                .createContentRepository();
+    }
+
+    private void runEmptyAsyncIndexerCyclesWithoutNewContent(long count) {
+        for (int i = 0; i < count; i++) {
+            asyncIndexUpdate.run();
+        }
+    }
+
+    private void runIndexerCyclesAfterEachNodeCommit(long count, boolean async) throws CommitFailedException {
+        for (int i = 0; i < count; i++) {
+            root.getTree("/").addChild("testNode" + i);
+            root.commit();
+            if (async) {
+                asyncIndexUpdate.run();
+            }
+        }
+    }
+
+    private boolean assertionLogPresent(List<String> logs, String assertionLog) {
+        for (String log : logs) {
+            if (log.equals(assertionLog)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private String invalidJcrPrimaryTypeLog() {
+        return "jcr:primaryType of index " + invalidJcrPrimaryTypeIndexName + " should be oak:QueryIndexDefinition instead of " + invalidJcrPrimaryTypeForIndex;
+    }
+
+    /**
+     * Logs warning as index is not a valid index definition
+     */
+    @Test
+    public void loggingInvalidJcrPrimaryType() throws Exception {
+        Tree test1 = root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(invalidJcrPrimaryTypeIndexName);
+        test1.setProperty("jcr:primaryType", invalidJcrPrimaryTypeForIndex, Type.NAME);
+        test1.setProperty("type", "lucene");
+        test1.setProperty("reindex", true);
+        test1.setProperty("async", "async");
+        root.commit();
+        runIndexerCyclesAfterEachNodeCommit(contentCount, true);
+        runEmptyAsyncIndexerCyclesWithoutNewContent(contentCount);
+
+        List<String> logs = customLogger.getLogs();
+        assertTrue(assertionLogPresent(logs, invalidJcrPrimaryTypeLog()));
+    }
+
+    @Test
+    public void noLoggingIfNodeIsNotIndexDefinition() throws Exception {
+        // This is not a valid index definition. Refer method: IndexUpdate.isIncluded
+        Tree test1 = root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(invalidJcrPrimaryTypeIndexName);
+        test1.setProperty("jcr:primaryType", invalidJcrPrimaryTypeForIndex, Type.NAME);
+        // Here index definition itself is not valid as there is no type property
+        // test1.setProperty("type", "lucene");
+        test1.setProperty("reindex", true);
+        test1.setProperty("async", "async");
+        root.commit();
+
+        runIndexerCyclesAfterEachNodeCommit(contentCount, true);
+        runEmptyAsyncIndexerCyclesWithoutNewContent(contentCount);
+
+        List<String> logs = customLogger.getLogs();
+        assertFalse(assertionLogPresent(logs, invalidJcrPrimaryTypeLog()));
+    }
+
+    @Test
+    public void loggingInCaseOfSyncLuceneIndexDefinition() throws Exception {
+        Tree test1 = root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(invalidJcrPrimaryTypeIndexName);
+        test1.setProperty("jcr:primaryType", invalidJcrPrimaryTypeForIndex, Type.NAME);
+        test1.setProperty("type", "lucene");
+        test1.setProperty("reindex", true);
+        // test1.setProperty("async", "async");
+        root.commit();
+
+        runIndexerCyclesAfterEachNodeCommit(contentCount, false);
+        List<String> logs = customLogger.getLogs();
+        assertTrue(assertionLogPresent(logs, invalidJcrPrimaryTypeLog()));
+    }
+}