You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by mk...@apache.org on 2019/10/04 08:55:12 UTC

svn commit: r1867967 - in /jackrabbit/oak/trunk: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/ oak-search-elastic/src/main/java/org/apache/jackrabbit/oa...

Author: mkataria
Date: Fri Oct  4 08:55:12 2019
New Revision: 1867967

URL: http://svn.apache.org/viewvc?rev=1867967&view=rev
Log:
OAK-8642: Add warn logs if we add/update a string property larger than 100KB (index)

Added:
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java
Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1867967&r1=1867966&r2=1867967&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java Fri Oct  4 08:55:12 2019
@@ -102,30 +102,24 @@ public class LuceneDocumentMaker extends
     }
 
     @Override
-    protected boolean indexTypedProperty(Document doc, PropertyState property, String pname, PropertyDefinition pd) {
+    protected void indexTypedProperty(Document doc, PropertyState property, String pname, PropertyDefinition pd, int i) {
         int tag = property.getType().tag();
-        boolean fieldAdded = false;
-        for (int i = 0; i < property.count(); i++) {
-            Field f;
-            if (tag == Type.LONG.tag()) {
-                f = new LongField(pname, property.getValue(Type.LONG, i), Field.Store.NO);
-            } else if (tag == Type.DATE.tag()) {
-                String date = property.getValue(Type.DATE, i);
-                f = new LongField(pname, FieldFactory.dateToLong(date), Field.Store.NO);
-            } else if (tag == Type.DOUBLE.tag()) {
-                f = new DoubleField(pname, property.getValue(Type.DOUBLE, i), Field.Store.NO);
-            } else if (tag == Type.BOOLEAN.tag()) {
-                f = new StringField(pname, property.getValue(Type.BOOLEAN, i).toString(), Field.Store.NO);
-            } else {
-                f = new StringField(pname, property.getValue(Type.STRING, i), Field.Store.NO);
-            }
 
-            if (includePropertyValue(property, i, pd)){
-                doc.add(f);
-                fieldAdded = true;
-            }
+        Field f;
+        if (tag == Type.LONG.tag()) {
+            f = new LongField(pname, property.getValue(Type.LONG, i), Field.Store.NO);
+        } else if (tag == Type.DATE.tag()) {
+            String date = property.getValue(Type.DATE, i);
+            f = new LongField(pname, FieldFactory.dateToLong(date), Field.Store.NO);
+        } else if (tag == Type.DOUBLE.tag()) {
+            f = new DoubleField(pname, property.getValue(Type.DOUBLE, i), Field.Store.NO);
+        } else if (tag == Type.BOOLEAN.tag()) {
+            f = new StringField(pname, property.getValue(Type.BOOLEAN, i).toString(), Field.Store.NO);
+        } else {
+            f = new StringField(pname, property.getValue(Type.STRING, i), Field.Store.NO);
         }
-        return fieldAdded;
+
+        doc.add(f);
     }
 
     @Override

Added: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java?rev=1867967&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java (added)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java Fri Oct  4 08:55:12 2019
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene;
+
+import ch.qos.logback.classic.LoggerContext;
+import ch.qos.logback.classic.spi.ILoggingEvent;
+import ch.qos.logback.core.read.ListAppender;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.junit.TemporarySystemProperty;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.IndexDefinitionBuilder;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextDocumentMaker;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.security.InvalidParameterException;
+
+import static java.util.Arrays.asList;
+import static org.apache.jackrabbit.oak.InitialContentHelper.INITIAL_CONTENT;
+import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+public class LuceneDocumentMakerLargeStringPropertiesLogTest {
+
+    ListAppender<ILoggingEvent> listAppender = null;
+    private final String nodeImplLogger = LuceneDocumentMaker.class.getName();
+    private final String warnMessage = "String length: {} for property: {} at Node: {} is greater than configured value {}";
+    private String customStringPropertyThresholdLimit = "9";
+    private String smallStringProperty = "1234567";
+    private String largeStringPropertyAsPerCustomThreshold = "1234567890";
+
+    @Rule
+    public TemporarySystemProperty temporarySystemProperty = new TemporarySystemProperty();
+
+    @Before
+    public void loggingAppenderStart() {
+        LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+        listAppender = new ListAppender<>();
+        listAppender.start();
+        context.getLogger(nodeImplLogger).addAppender(listAppender);
+    }
+
+    @After
+    public void loggingAppenderStop() {
+        listAppender.stop();
+    }
+
+    private void setThresholdLimit(String threshold) {
+        System.setProperty(FulltextDocumentMaker.WARN_LOG_STRING_SIZE_THRESHOLD_KEY, threshold);
+    }
+
+    private LuceneDocumentMaker addPropertyAccordingToType(NodeBuilder test, Type type, String... str) throws IOException {
+        NodeState root = INITIAL_CONTENT;
+        IndexDefinitionBuilder builder = new IndexDefinitionBuilder();
+        builder.indexRule("nt:base")
+                .property("foo")
+                .propertyIndex()
+                .analyzed()
+                .valueExcludedPrefixes("/jobs");
+
+        LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(root, builder.build(), "/foo").build();
+        LuceneDocumentMaker docMaker = new LuceneDocumentMaker(defn,
+                defn.getApplicableIndexingRule("nt:base"), "/x");
+
+        if (Type.STRINGS == type) {
+            test.setProperty("foo", asList(str), Type.STRINGS);
+        } else if (Type.STRING == type && str.length == 1) {
+            test.setProperty("foo", str[0]);
+        } else {
+            throw new InvalidParameterException();
+        }
+        return docMaker;
+    }
+
+    @Test
+    public void testDefaultThreshold() throws IOException {
+        NodeBuilder test = EMPTY_NODE.builder();
+        LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRING, largeStringPropertyAsPerCustomThreshold);
+        assertNotNull(docMaker.makeDocument(test.getNodeState()));
+        assertFalse(isWarnMessagePresent(listAppender));
+    }
+
+    @Test
+    public void testNoLoggingOnAddingSmallStringWithCustomThreshold() throws IOException {
+        setThresholdLimit(customStringPropertyThresholdLimit);
+        NodeBuilder test = EMPTY_NODE.builder();
+        LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRING, smallStringProperty);
+        assertNotNull(docMaker.makeDocument(test.getNodeState()));
+        assertFalse(isWarnMessagePresent(listAppender));
+    }
+
+    @Test
+    public void testNoLoggingOnAddingSmallStringArrayWithCustomThreshold() throws IOException {
+        setThresholdLimit(customStringPropertyThresholdLimit);
+        NodeBuilder test = EMPTY_NODE.builder();
+        LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, smallStringProperty, smallStringProperty);
+        assertNotNull(docMaker.makeDocument(test.getNodeState()));
+        assertFalse(isWarnMessagePresent(listAppender));
+    }
+
+    @Test
+    public void testNoLoggingOnAddingSmallStringArrayWithoutCustomThreshold() throws IOException {
+        NodeBuilder test = EMPTY_NODE.builder();
+        LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, smallStringProperty, smallStringProperty);
+        assertNotNull(docMaker.makeDocument(test.getNodeState()));
+        assertFalse(isWarnMessagePresent(listAppender));
+    }
+
+    @Test
+    public void testLoggingOnAddingLargeStringWithCustomThreshold() throws IOException {
+        setThresholdLimit(customStringPropertyThresholdLimit);
+        NodeBuilder test = EMPTY_NODE.builder();
+        LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRING, largeStringPropertyAsPerCustomThreshold);
+        assertNotNull(docMaker.makeDocument(test.getNodeState()));
+        assertTrue(isWarnMessagePresent(listAppender));
+    }
+
+    @Test
+    public void testLoggingOnAddingLargeStringWithoutCustomThreshold() throws IOException {
+        //  setThresholdLimit(null);
+        NodeBuilder test = EMPTY_NODE.builder();
+        LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRING, smallStringProperty);
+        assertNotNull(docMaker.makeDocument(test.getNodeState()));
+        assertFalse(isWarnMessagePresent(listAppender));
+    }
+
+    @Test
+    public void testLoggingOnAddingLargeStringArrayOneLargePropertyWithoutCustomThreshold() throws IOException {
+        NodeBuilder test = EMPTY_NODE.builder();
+        LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, largeStringPropertyAsPerCustomThreshold, smallStringProperty);
+        assertNotNull(docMaker.makeDocument(test.getNodeState()));
+        assertFalse(isWarnMessagePresent(listAppender));
+    }
+
+    @Test
+    public void testLoggingOnAddingLargeStringArrayOneLargePropertyWithCustomThreshold() throws IOException {
+        setThresholdLimit(customStringPropertyThresholdLimit);
+        NodeBuilder test = EMPTY_NODE.builder();
+        LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, largeStringPropertyAsPerCustomThreshold, smallStringProperty);
+        assertNotNull(docMaker.makeDocument(test.getNodeState()));
+        assertTrue(isWarnMessagePresent(listAppender));
+        assertEquals(2, listAppender.list.size());
+    }
+
+    @Test
+    public void testLoggingOnAddingLargeStringArrayTwoLargePropertiesWithCustomThreshold() throws IOException {
+        setThresholdLimit(customStringPropertyThresholdLimit);
+        NodeBuilder test = EMPTY_NODE.builder();
+        LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, largeStringPropertyAsPerCustomThreshold, largeStringPropertyAsPerCustomThreshold);
+        assertNotNull(docMaker.makeDocument(test.getNodeState()));
+        assertTrue(isWarnMessagePresent(listAppender));
+        // number of logs equal twice the number of large properties once for fulltext indexing
+        // and once for property indexing.
+        assertEquals(4, listAppender.list.size());
+    }
+
+    private boolean isWarnMessagePresent(ListAppender<ILoggingEvent> listAppender) {
+        for (ILoggingEvent loggingEvent : listAppender.list) {
+            if (loggingEvent.getMessage().contains(warnMessage)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+}
\ No newline at end of file

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java?rev=1867967&r1=1867966&r2=1867967&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java Fri Oct  4 08:55:12 2019
@@ -115,29 +115,23 @@ public class ElasticsearchDocumentMaker
     }
 
     @Override
-    protected boolean indexTypedProperty(ElasticsearchDocument doc, PropertyState property, String pname, PropertyDefinition pd) {
+    protected void indexTypedProperty(ElasticsearchDocument doc, PropertyState property, String pname, PropertyDefinition pd, int i) {
         int tag = property.getType().tag();
-        boolean fieldAdded = false;
-        for (int i = 0; i < property.count(); i++) {
-            Object f;
-            if (tag == Type.LONG.tag()) {
-                f = property.getValue(Type.LONG, i);
-            } else if (tag == Type.DATE.tag()) {
-                f = property.getValue(Type.DATE, i);
-            } else if (tag == Type.DOUBLE.tag()) {
-                f = property.getValue(Type.DOUBLE, i);
-            } else if (tag == Type.BOOLEAN.tag()) {
-                f = property.getValue(Type.BOOLEAN, i).toString();
-            } else {
-                f = property.getValue(Type.STRING, i);
-            }
 
-            if (includePropertyValue(property, i, pd)){
-                doc.addProperty(pname, f);
-                fieldAdded = true;
-            }
+        Object f;
+        if (tag == Type.LONG.tag()) {
+            f = property.getValue(Type.LONG, i);
+        } else if (tag == Type.DATE.tag()) {
+            f = property.getValue(Type.DATE, i);
+        } else if (tag == Type.DOUBLE.tag()) {
+            f = property.getValue(Type.DOUBLE, i);
+        } else if (tag == Type.BOOLEAN.tag()) {
+            f = property.getValue(Type.BOOLEAN, i).toString();
+        } else {
+            f = property.getValue(Type.STRING, i);
         }
-        return fieldAdded;
+
+        doc.addProperty(pname, f);
     }
 
     @Override

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1867967&r1=1867966&r2=1867967&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Fri Oct  4 08:55:12 2019
@@ -57,11 +57,14 @@ import static org.apache.jackrabbit.oak.
 public abstract class FulltextDocumentMaker<D> implements DocumentMaker<D> {
 
     private final Logger log = LoggerFactory.getLogger(getClass());
+    public static final String WARN_LOG_STRING_SIZE_THRESHOLD_KEY = "oak.repository.property.index.logWarnStringSizeThreshold";
+    private static final int DEFAULT_WARN_LOG_STRING_SIZE_THRESHOLD_VALUE = 102400;
 
     private final FulltextBinaryTextExtractor textExtractor;
     protected final IndexDefinition definition;
     protected final IndexDefinition.IndexingRule indexingRule;
     protected final String path;
+    private final int logWarnStringSizeThreshold;
 
     public FulltextDocumentMaker(@Nullable FulltextBinaryTextExtractor textExtractor,
                                @NotNull IndexDefinition definition,
@@ -71,6 +74,8 @@ public abstract class FulltextDocumentMa
         this.definition = checkNotNull(definition);
         this.indexingRule = checkNotNull(indexingRule);
         this.path = checkNotNull(path);
+        this.logWarnStringSizeThreshold = Integer.getInteger(WARN_LOG_STRING_SIZE_THRESHOLD_KEY,
+                DEFAULT_WARN_LOG_STRING_SIZE_THRESHOLD_VALUE);
     }
 
     protected abstract D initDoc();
@@ -93,7 +98,7 @@ public abstract class FulltextDocumentMa
 
     protected abstract void indexFulltextValue(D doc, String value);
 
-    protected abstract boolean indexTypedProperty(D doc, PropertyState property, String pname, PropertyDefinition pd);
+    protected abstract void indexTypedProperty(D doc, PropertyState property, String pname, PropertyDefinition pd, int index);
 
     protected abstract void indexAncestors(D doc, String path);
 
@@ -105,6 +110,13 @@ public abstract class FulltextDocumentMa
 
     protected abstract void indexNodeName(D doc, String value);
 
+    protected void logLargeStringProperties(String propertyName, String value) {
+        if (value.length() > logWarnStringSizeThreshold) {
+            log.warn("String length: {} for property: {} at Node: {} is greater than configured value {}",
+                    value.length(), propertyName, path, logWarnStringSizeThreshold);
+        }
+    }
+
     @Nullable
     public D makeDocument(NodeState state) throws IOException {
         return makeDocument(state, false, Collections.<PropertyState>emptyList());
@@ -231,6 +243,7 @@ public abstract class FulltextDocumentMa
             if (pd.fulltextEnabled() && includeTypeForFullText) {
                 for (String value : property.getValue(Type.STRINGS)) {
 
+                    logLargeStringProperties(property.getName(), value);
                     if (!includePropertyValue(value, pd)){
                         continue;
                     }
@@ -281,7 +294,22 @@ public abstract class FulltextDocumentMa
     protected abstract void indexSimilarityStrings(D doc, PropertyDefinition pd, String value) throws IOException;
 
     private boolean addTypedFields(D doc, PropertyState property, String pname, PropertyDefinition pd) {
-        return indexTypedProperty(doc, property, pname, pd);
+        int tag = property.getType().tag();
+        boolean fieldAdded = false;
+
+        for (int i = 0; i < property.count(); i++) {
+            if (!includePropertyValue(property, i, pd)) {
+                continue;
+            }
+
+            indexTypedProperty(doc, property, pname, pd, i);
+            fieldAdded = true;
+
+            if (tag == Type.STRING.tag()) {
+                logLargeStringProperties(property.getName(), property.getValue(Type.STRING, i));
+            }
+        }
+        return fieldAdded;
     }
 
     private boolean addTypedOrderedFields(D doc,