You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by mk...@apache.org on 2020/04/16 11:34:58 UTC
svn commit: r1876597 - in /jackrabbit/oak/trunk/oak-search-elastic: ./
src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/
src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/
src/test/resources/org/ src/test/reso...
Author: mkataria
Date: Thu Apr 16 11:34:58 2020
New Revision: 1876597
URL: http://svn.apache.org/viewvc?rev=1876597&view=rev
Log:
OAK-9006:Elastic Search: Support text search of a text file via aggregation
Added:
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/
jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/
jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/
jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/
jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/
jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/
jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/
jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd (with props)
Modified:
jackrabbit/oak/trunk/oak-search-elastic/pom.xml
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java
Modified: jackrabbit/oak/trunk/oak-search-elastic/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/pom.xml?rev=1876597&r1=1876596&r2=1876597&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/pom.xml Thu Apr 16 11:34:58 2020
@@ -134,6 +134,22 @@
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <version>${tika.version}</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
<dependency>
<groupId>org.apache.jackrabbit</groupId>
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java?rev=1876597&r1=1876596&r2=1876597&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexProvider.java Thu Apr 16 11:34:58 2020
@@ -36,4 +36,5 @@ public class ElasticsearchIndexProvider
public @NotNull List<? extends QueryIndex> getQueryIndexes(NodeState nodeState) {
return Collections.singletonList(new ElasticsearchIndex(elasticsearchConnection, nodeState));
}
+
}
Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java?rev=1876597&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java Thu Apr 16 11:34:58 2020
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch;
+
+import com.github.dockerjava.api.DockerClient;
+import com.google.common.collect.Lists;
+import org.apache.commons.io.FileUtils;
+import org.apache.jackrabbit.oak.InitialContent;
+import org.apache.jackrabbit.oak.Oak;
+import org.apache.jackrabbit.oak.api.CommitFailedException;
+import org.apache.jackrabbit.oak.api.ContentRepository;
+import org.apache.jackrabbit.oak.api.Root;
+import org.apache.jackrabbit.oak.api.Tree;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.index.ElasticsearchIndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchIndexProvider;
+import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
+import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
+import org.apache.jackrabbit.oak.plugins.index.search.IndexFormatVersion;
+import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
+import org.apache.jackrabbit.oak.plugins.name.NamespaceEditorProvider;
+import org.apache.jackrabbit.oak.plugins.nodetype.TypeEditorProvider;
+import org.apache.jackrabbit.oak.plugins.nodetype.write.NodeTypeRegistry;
+import org.apache.jackrabbit.oak.plugins.tree.factories.RootFactory;
+import org.apache.jackrabbit.oak.query.AbstractQueryTest;
+import org.apache.jackrabbit.oak.spi.commit.CompositeEditorProvider;
+import org.apache.jackrabbit.oak.spi.commit.EditorHook;
+import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
+import org.apache.jackrabbit.oak.spi.state.ApplyDiff;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.elasticsearch.Version;
+import org.jetbrains.annotations.NotNull;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.DockerClientFactory;
+import org.testcontainers.elasticsearch.ElasticsearchContainer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.List;
+
+import static com.google.common.collect.Lists.newArrayList;
+import static org.apache.jackrabbit.JcrConstants.JCR_CONTENT;
+import static org.apache.jackrabbit.JcrConstants.JCR_DATA;
+import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE;
+import static org.apache.jackrabbit.JcrConstants.JCR_SYSTEM;
+import static org.apache.jackrabbit.JcrConstants.NT_FILE;
+import static org.apache.jackrabbit.oak.api.Type.NAME;
+import static org.apache.jackrabbit.oak.plugins.memory.BinaryPropertyState.binaryProperty;
+import static org.apache.jackrabbit.oak.spi.nodetype.NodeTypeConstants.JCR_NODE_TYPES;
+import static org.junit.Assert.fail;
+import static org.junit.Assume.assumeNotNull;
+
+public class ElasticIndexAggregationNtFileTest extends AbstractQueryTest {
+ private static final Logger LOG = LoggerFactory.getLogger(ElasticIndexAggregationNtFileTest.class);
+ private static final String NT_TEST_ASSET = "test:Asset";
+
+ @Rule
+ public final ElasticsearchContainer elastic =
+ new ElasticsearchContainer("docker.elastic.co/elasticsearch/elasticsearch:" + Version.CURRENT);
+
+ @BeforeClass
+ public static void beforeMethod() {
+ DockerClient client = null;
+ try {
+ client = DockerClientFactory.instance().client();
+ } catch (Exception e) {
+ LOG.warn("Docker is not available, ElasticsearchPropertyIndexTest will be skipped");
+ }
+ assumeNotNull(client);
+ }
+
+ @Override
+ protected ContentRepository createRepository() {
+ ElasticsearchConnection coordinate = new ElasticsearchConnection(
+ ElasticsearchConnection.DEFAULT_SCHEME,
+ elastic.getContainerIpAddress(),
+ elastic.getMappedPort(ElasticsearchConnection.DEFAULT_PORT));
+
+ ElasticsearchIndexEditorProvider editorProvider = new ElasticsearchIndexEditorProvider(coordinate,
+ new ExtractedTextCache(10 * FileUtils.ONE_MB, 100));
+ ElasticsearchIndexProvider provider = new ElasticsearchIndexProvider(coordinate);
+
+ return new Oak()
+ .with(new InitialContent() {
+
+ @Override
+ public void initialize(@NotNull NodeBuilder builder) {
+ super.initialize(builder);
+ // registering additional node types for wider testing
+ InputStream stream = null;
+ try {
+ stream = ElasticIndexAggregationNtFileTest.class
+ .getResourceAsStream("test_nodetypes.cnd");
+ NodeState base = builder.getNodeState();
+ NodeStore store = new MemoryNodeStore(base);
+
+ Root root = RootFactory.createSystemRoot(store, new EditorHook(
+ new CompositeEditorProvider(new NamespaceEditorProvider(),
+ new TypeEditorProvider())), null, null, null);
+ NodeTypeRegistry.register(root, stream, "testing node types");
+ NodeState target = store.getRoot();
+ target.compareAgainstBaseState(base, new ApplyDiff(builder));
+ } catch (Exception e) {
+ LOG.error("Error while registering required node types. Failing here", e);
+ fail("Error while registering required node types");
+ } finally {
+ printNodeTypes(builder);
+ if (stream != null) {
+ try {
+ stream.close();
+ } catch (IOException e) {
+ LOG.debug("Ignoring exception on stream closing.", e);
+ }
+ }
+ }
+ }
+
+ })
+ .with(new OpenSecurityProvider())
+ .with(editorProvider)
+ .with(provider)
+ .createContentRepository();
+ }
+
+ /**
+ * convenience method for printing on logs the currently registered node types.
+ *
+ * @param builder
+ */
+ private static void printNodeTypes(NodeBuilder builder) {
+ if (LOG.isDebugEnabled()) {
+ NodeBuilder namespace = builder.child(JCR_SYSTEM).child(JCR_NODE_TYPES);
+ List<String> nodes = Lists.newArrayList(namespace.getChildNodeNames());
+ Collections.sort(nodes);
+ for (String node : nodes) {
+ LOG.debug(node);
+ }
+ }
+ }
+
+ @Override
+ protected void createTestIndexNode() throws Exception {
+ Tree index = root.getTree("/");
+ Tree indexDefn = createTestIndexNode(index, ElasticsearchIndexDefinition.TYPE_ELASTICSEARCH);
+ indexDefn.setProperty(FulltextIndexConstants.COMPAT_MODE, IndexFormatVersion.V2.getVersion());
+ Tree includeNtFileContent = indexDefn.addChild(FulltextIndexConstants.AGGREGATES)
+ .addChild(NT_TEST_ASSET).addChild("include10");
+ includeNtFileContent.setProperty(FulltextIndexConstants.AGG_RELATIVE_NODE, true);
+ includeNtFileContent.setProperty(FulltextIndexConstants.AGG_PATH, "jcr:content/renditions/dam.text.txt/jcr:content");
+ root.commit();
+ }
+
+ @Test
+ public void indexNtFileText() throws CommitFailedException, InterruptedException {
+ setTraversalEnabled(false);
+ final String statement = "//element(*, test:Asset)[ " +
+ "jcr:contains(jcr:content/renditions/dam.text.txt/jcr:content, 'quick') ]";
+ List<String> expected = newArrayList();
+ Tree content = root.getTree("/").addChild("content");
+ Tree page = content.addChild("asset");
+ page.setProperty(JCR_PRIMARYTYPE, NT_TEST_ASSET, NAME);
+ Tree ntfile = page.addChild(JCR_CONTENT).addChild("renditions").addChild("dam.text.txt");
+ ntfile.setProperty(JCR_PRIMARYTYPE, NT_FILE, Type.NAME);
+ Tree resource = ntfile.addChild(JCR_CONTENT);
+ resource.setProperty(JCR_PRIMARYTYPE, "nt:resource", Type.NAME);
+ resource.setProperty("jcr:lastModified", Calendar.getInstance());
+ resource.setProperty("jcr:encoding", "UTF-8");
+ resource.setProperty("jcr:mimeType", "text/plain");
+ resource.setProperty(binaryProperty(JCR_DATA,
+ "the quick brown fox jumps over the lazy dog."));
+ root.commit();
+ expected.add("/content/asset");
+ Thread.sleep(5000);
+ assertQuery(statement, "xpath", expected);
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticIndexAggregationNtFileTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd?rev=1876597&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd Thu Apr 16 11:34:58 2020
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<test='http://jackrabbit.apache.org/oak-test/ns/1.0'>
+
+
+[test:PageContent] > nt:unstructured
+ orderable
+
+[test:Page] > nt:hierarchyNode
+ orderable primaryitem jcr:content
+ + jcr:content (nt:base) = nt:unstructured
+ + * (nt:base) = nt:base version
+
+[test:AssetContent] > nt:unstructured
+ + metadata (nt:unstructured)
+
+[test:Asset] > nt:hierarchyNode
+ + jcr:content (test:AssetContent) = test:AssetContent copy primary
+ + * (nt:base) = nt:base version
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/org/apache/jackrabbit/oak/plugins/index/elasticsearch/test_nodetypes.cnd
------------------------------------------------------------------------------
svn:eol-style = native