You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by mk...@apache.org on 2020/04/16 11:34:58 UTC

svn commit: r1876597 - in /jackrabbit/oak/trunk/oak-search-elastic: ./ src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ src/test/resources/org/ src/test/reso...

Author: mkataria
Date: Thu Apr 16 11:34:58 2020
New Revision: 1876597

URL: http://svn.apache.org/viewvc?rev=1876597&view=rev
Log:
OAK-9006:Elastic Search: Support text search of a text file via aggregation

Added:
    jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java   (with props)
    jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/
    jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/
    jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/
    jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/
    jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/
    jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/
    jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/
    jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd   (with props)
Modified:
    jackrabbit/oak/trunk/oak-search-elastic/pom.xml
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java

Modified: jackrabbit/oak/trunk/oak-search-elastic/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/pom.xml?rev=1876597&r1=1876596&r2=1876597&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/pom.xml Thu Apr 16 11:34:58 2020
@@ -134,6 +134,22 @@
       <artifactId>lucene-core</artifactId>
       <version>${lucene.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-parsers</artifactId>
+      <version>${tika.version}</version>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-logging</groupId>
+          <artifactId>commons-logging</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-log4j12</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
     <dependency>
       <groupId>org.apache.jackrabbit</groupId>

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java?rev=1876597&r1=1876596&r2=1876597&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java Thu Apr 16 11:34:58 2020
@@ -36,4 +36,5 @@ public class ElasticsearchIndexProvider
     public @NotNull List<? extends QueryIndex> getQueryIndexes(NodeState nodeState) {
         return Collections.singletonList(new ElasticsearchIndex(elasticsearchConnection, nodeState));
     }
+
 }

Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java?rev=1876597&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java Thu Apr 16 11:34:58 2020
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch;
+
+import com.github.dockerjava.api.DockerClient;
+import com.google.common.collect.Lists;
+import org.apache.commons.io.FileUtils;
+import org.apache.jackrabbit.oak.InitialContent;
+import org.apache.jackrabbit.oak.Oak;
+import org.apache.jackrabbit.oak.api.CommitFailedException;
+import org.apache.jackrabbit.oak.api.ContentRepository;
+import org.apache.jackrabbit.oak.api.Root;
+import org.apache.jackrabbit.oak.api.Tree;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.index.ElasticsearchIndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchIndexProvider;
+import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
+import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
+import org.apache.jackrabbit.oak.plugins.index.search.IndexFormatVersion;
+import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
+import org.apache.jackrabbit.oak.plugins.name.NamespaceEditorProvider;
+import org.apache.jackrabbit.oak.plugins.nodetype.TypeEditorProvider;
+import org.apache.jackrabbit.oak.plugins.nodetype.write.NodeTypeRegistry;
+import org.apache.jackrabbit.oak.plugins.tree.factories.RootFactory;
+import org.apache.jackrabbit.oak.query.AbstractQueryTest;
+import org.apache.jackrabbit.oak.spi.commit.CompositeEditorProvider;
+import org.apache.jackrabbit.oak.spi.commit.EditorHook;
+import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
+import org.apache.jackrabbit.oak.spi.state.ApplyDiff;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.elasticsearch.Version;
+import org.jetbrains.annotations.NotNull;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.DockerClientFactory;
+import org.testcontainers.elasticsearch.ElasticsearchContainer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.List;
+
+import static com.google.common.collect.Lists.newArrayList;
+import static org.apache.jackrabbit.JcrConstants.JCR_CONTENT;
+import static org.apache.jackrabbit.JcrConstants.JCR_DATA;
+import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE;
+import static org.apache.jackrabbit.JcrConstants.JCR_SYSTEM;
+import static org.apache.jackrabbit.JcrConstants.NT_FILE;
+import static org.apache.jackrabbit.oak.api.Type.NAME;
+import static org.apache.jackrabbit.oak.plugins.memory.BinaryPropertyState.binaryProperty;
+import static org.apache.jackrabbit.oak.spi.nodetype.NodeTypeConstants.JCR_NODE_TYPES;
+import static org.junit.Assert.fail;
+import static org.junit.Assume.assumeNotNull;
+
+public class ElasticIndexAggregationNtFileTest extends AbstractQueryTest {
+    private static final Logger LOG = LoggerFactory.getLogger(ElasticIndexAggregationNtFileTest.class);
+    private static final String NT_TEST_ASSET = "test:Asset";
+
+    @Rule
+    public final ElasticsearchContainer elastic =
+            new ElasticsearchContainer("docker.elastic.co/elasticsearch/elasticsearch:" + Version.CURRENT);
+
+    @BeforeClass
+    public static void beforeMethod() {
+        DockerClient client = null;
+        try {
+            client = DockerClientFactory.instance().client();
+        } catch (Exception e) {
+            LOG.warn("Docker is not available, ElasticsearchPropertyIndexTest will be skipped");
+        }
+        assumeNotNull(client);
+    }
+
+    @Override
+    protected ContentRepository createRepository() {
+        ElasticsearchConnection coordinate = new ElasticsearchConnection(
+                ElasticsearchConnection.DEFAULT_SCHEME,
+                elastic.getContainerIpAddress(),
+                elastic.getMappedPort(ElasticsearchConnection.DEFAULT_PORT));
+
+        ElasticsearchIndexEditorProvider editorProvider = new ElasticsearchIndexEditorProvider(coordinate,
+                new ExtractedTextCache(10 * FileUtils.ONE_MB, 100));
+        ElasticsearchIndexProvider provider = new ElasticsearchIndexProvider(coordinate);
+
+        return new Oak()
+                .with(new InitialContent() {
+
+                    @Override
+                    public void initialize(@NotNull NodeBuilder builder) {
+                        super.initialize(builder);
+                        // registering additional node types for wider testing
+                        InputStream stream = null;
+                        try {
+                            stream = ElasticIndexAggregationNtFileTest.class
+                                    .getResourceAsStream("test_nodetypes.cnd");
+                            NodeState base = builder.getNodeState();
+                            NodeStore store = new MemoryNodeStore(base);
+
+                            Root root = RootFactory.createSystemRoot(store, new EditorHook(
+                                    new CompositeEditorProvider(new NamespaceEditorProvider(),
+                                            new TypeEditorProvider())), null, null, null);
+                            NodeTypeRegistry.register(root, stream, "testing node types");
+                            NodeState target = store.getRoot();
+                            target.compareAgainstBaseState(base, new ApplyDiff(builder));
+                        } catch (Exception e) {
+                            LOG.error("Error while registering required node types. Failing here", e);
+                            fail("Error while registering required node types");
+                        } finally {
+                            printNodeTypes(builder);
+                            if (stream != null) {
+                                try {
+                                    stream.close();
+                                } catch (IOException e) {
+                                    LOG.debug("Ignoring exception on stream closing.", e);
+                                }
+                            }
+                        }
+                    }
+
+                })
+                .with(new OpenSecurityProvider())
+                .with(editorProvider)
+                .with(provider)
+                .createContentRepository();
+    }
+
+    /**
+     * convenience method for printing on logs the currently registered node types.
+     *
+     * @param builder
+     */
+    private static void printNodeTypes(NodeBuilder builder) {
+        if (LOG.isDebugEnabled()) {
+            NodeBuilder namespace = builder.child(JCR_SYSTEM).child(JCR_NODE_TYPES);
+            List<String> nodes = Lists.newArrayList(namespace.getChildNodeNames());
+            Collections.sort(nodes);
+            for (String node : nodes) {
+                LOG.debug(node);
+            }
+        }
+    }
+
+    @Override
+    protected void createTestIndexNode() throws Exception {
+        Tree index = root.getTree("/");
+        Tree indexDefn = createTestIndexNode(index, ElasticsearchIndexDefinition.TYPE_ELASTICSEARCH);
+        indexDefn.setProperty(FulltextIndexConstants.COMPAT_MODE, IndexFormatVersion.V2.getVersion());
+        Tree includeNtFileContent = indexDefn.addChild(FulltextIndexConstants.AGGREGATES)
+                .addChild(NT_TEST_ASSET).addChild("include10");
+        includeNtFileContent.setProperty(FulltextIndexConstants.AGG_RELATIVE_NODE, true);
+        includeNtFileContent.setProperty(FulltextIndexConstants.AGG_PATH, "jcr:content/renditions/dam.text.txt/jcr:content");
+        root.commit();
+    }
+
+    @Test
+    public void indexNtFileText() throws CommitFailedException, InterruptedException {
+        setTraversalEnabled(false);
+        final String statement = "//element(*, test:Asset)[ " +
+                "jcr:contains(jcr:content/renditions/dam.text.txt/jcr:content, 'quick') ]";
+        List<String> expected = newArrayList();
+        Tree content = root.getTree("/").addChild("content");
+        Tree page = content.addChild("asset");
+        page.setProperty(JCR_PRIMARYTYPE, NT_TEST_ASSET, NAME);
+        Tree ntfile = page.addChild(JCR_CONTENT).addChild("renditions").addChild("dam.text.txt");
+        ntfile.setProperty(JCR_PRIMARYTYPE, NT_FILE, Type.NAME);
+        Tree resource = ntfile.addChild(JCR_CONTENT);
+        resource.setProperty(JCR_PRIMARYTYPE, "nt:resource", Type.NAME);
+        resource.setProperty("jcr:lastModified", Calendar.getInstance());
+        resource.setProperty("jcr:encoding", "UTF-8");
+        resource.setProperty("jcr:mimeType", "text/plain");
+        resource.setProperty(binaryProperty(JCR_DATA,
+                "the quick brown fox jumps over the lazy dog."));
+        root.commit();
+        expected.add("/content/asset");
+        Thread.sleep(5000);
+        assertQuery(statement, "xpath", expected);
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd?rev=1876597&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd Thu Apr 16 11:34:58 2020
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<test='http://jackrabbit.apache.org/oak-test/ns/1.0'>
+
+
+[test:PageContent] > nt:unstructured
+  orderable
+
+[test:Page] > nt:hierarchyNode
+  orderable primaryitem jcr:content
+  + jcr:content (nt:base) = nt:unstructured
+  + * (nt:base) = nt:base version
+
+[test:AssetContent] > nt:unstructured
+ + metadata (nt:unstructured)
+
+[test:Asset] > nt:hierarchyNode
+ + jcr:content (test:AssetContent) = test:AssetContent copy primary
+ + * (nt:base) = nt:base version

Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd
------------------------------------------------------------------------------
    svn:eol-style = native