You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/07/26 20:58:29 UTC

[tika] branch main updated: TIKA-3495 -- and update opensearch child key to be the same as the one in Solr. Add an ignored es test.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new e549e44  TIKA-3495 -- and update opensearch child key to be the same as the one in Solr.  Add an ignored es test.
e549e44 is described below

commit e549e445ce4985246b3b2d677c48a22d5b3372cd
Author: tallison <ta...@apache.org>
AuthorDate: Mon Jul 26 16:57:58 2021 -0400

    TIKA-3495 -- and update opensearch child key to be the same as the one in Solr.  Add an ignored es test.
---
 .../tika/pipes/es/tests/TikaPipesES7Test.java      | 35 ++++++++++++++++++++++
 .../opensearch/tests/TikaPipesOpenSearchTest.java  | 26 ++++++++++++----
 .../pipes/solr/tests/TikaPipesSolrTestBase.java    | 13 +++++---
 .../pipes/emitter/opensearch/OpenSearchClient.java |  7 +++--
 .../pipes/emitter/solr/SolrEmitterDevTest.java     |  2 +-
 5 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/es/tests/TikaPipesES7Test.java b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/es/tests/TikaPipesES7Test.java
new file mode 100644
index 0000000..062e85f
--- /dev/null
+++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/es/tests/TikaPipesES7Test.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.es.tests;
+
+import org.junit.Ignore;
+
+import org.apache.tika.pipes.opensearch.tests.TikaPipesOpenSearchTest;
+
+@Ignore("until we figure out if we're supporting es")
+public class TikaPipesES7Test extends TikaPipesOpenSearchTest {
+
+    @Override
+    public String getOpenSearchImageName() {
+        return "docker.elastic.co/elasticsearch/elasticsearch:7.13.4";
+    }
+
+    @Override
+    public String getProtocol() {
+        return "http://";
+    }
+}
diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
index 17f2435..7037f5d 100644
--- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
+++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
@@ -27,6 +27,7 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import org.apache.commons.io.IOUtils;
@@ -63,10 +64,14 @@ public class TikaPipesOpenSearchTest {
                     .withExposedPorts(9200)
                     .withEnv("discovery.type", "single-node");
 
-    private String getOpenSearchImageName() {
+    public String getOpenSearchImageName() {
         return "opensearchproject/opensearch:1.0.0";
     }
 
+    public String getProtocol() {
+        return "https://";
+    }
+
     @Before
     public void setupTest() throws Exception {
         setupOpenSearch(openSearchContainer);
@@ -142,8 +147,12 @@ public class TikaPipesOpenSearchTest {
                 results.getJson().get("hits").get("total").get("value").asInt());
         JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
 
-        assertEquals("test_recursive_embedded.docx-9",
-                results.getJson().get("hits").get("hits").get(0).get("_id").asText());
+        Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
+                ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+                "[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
+                results.getJson().get("hits").get("hits").get(0).get("_id").asText()
+        );
+        assertTrue("test_recursive_embedded.docx_$guid", m.find());
         assertEquals("test_recursive_embedded.docx",
                 results.getJson().get("hits").get("hits").get(0).get("_routing").asText());
         assertEquals("test_recursive_embedded.docx",
@@ -201,8 +210,13 @@ public class TikaPipesOpenSearchTest {
                 results.getJson().get("hits").get("total").get("value").asInt());
         JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
 
-        assertEquals("test_recursive_embedded.docx-9",
-                results.getJson().get("hits").get("hits").get(0).get("_id").asText());
+        Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
+                ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+                "[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
+                results.getJson().get("hits").get("hits").get(0).get("_id").asText()
+        );
+        assertTrue("test_recursive_embedded.docx_$guid", m.find());
+
         assertNull("test_recursive_embedded.docx",
                 results.getJson().get("hits").get("hits").get(0).get("_routing"));
         assertNull("test_recursive_embedded.docx",
@@ -293,7 +307,7 @@ public class TikaPipesOpenSearchTest {
         this.openSearch = openSearchContainer;
         openSearchHost = openSearch.getHost();
         openSearchPort = openSearch.getMappedPort(9200);
-        openSearchEndpointBase = "https://" + openSearchHost + ":" + openSearchPort + "/";
+        openSearchEndpointBase = getProtocol() + openSearchHost + ":" + openSearchPort + "/";
         HttpClientFactory httpClientFactory = new HttpClientFactory();
         httpClientFactory.setUserName("admin");
         httpClientFactory.setPassword("admin");
diff --git a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
index 1729961..38fd306 100644
--- a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
+++ b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
@@ -87,7 +87,7 @@ public abstract class TikaPipesSolrTestBase {
                     "<html><body>" + bodyContent + "</body></html>", StandardCharsets.UTF_8);
         }
         FileUtils.copyInputStreamToFile(this.getClass().getResourceAsStream("/embedded/embedded.docx"),
-                new File(testFileFolder, "test-embedded.doc"));
+                new File(testFileFolder, "test-embedded.docx"));
     }
 
     protected void setupSolr(GenericContainer<?> solr) throws Exception {
@@ -112,7 +112,7 @@ public abstract class TikaPipesSolrTestBase {
                 solrClient.add(collection, solrDoc);
             }
             SolrInputDocument embeddedDoc = new SolrInputDocument();
-            String filename = "test-embedded.doc";
+            String filename = "test-embedded.docx";
             embeddedDoc.setField("id", filename);
             embeddedDoc.setField("path", filename);
             solrClient.add(collection, embeddedDoc);
@@ -161,7 +161,6 @@ public abstract class TikaPipesSolrTestBase {
                         SolrEmitter.AttachmentStrategy.PARENT_CHILD,
                         HandlerConfig.PARSE_MODE.RMETA);
         FileUtils.writeStringToFile(tikaConfigFile, tikaConfigXml, StandardCharsets.UTF_8);
-
         TikaCLI.main(new String[] {"-a", "--config=" + tikaConfigFile.getAbsolutePath()});
 
         try (SolrClient solrClient = new LBHttpSolrClient.Builder().withBaseSolrUrls(solrEndpoint)
@@ -174,10 +173,16 @@ public abstract class TikaPipesSolrTestBase {
                     solrClient.query(collection, new SolrQuery("content_s:*initial*")).getResults()
                             .getNumFound());
             Assert.assertEquals(3,
-                    solrClient.query(collection, new SolrQuery("_root_:\"test-embedded.doc\"")).getResults()
+                    solrClient.query(collection, new SolrQuery("_root_:\"test-embedded.docx\"")).getResults()
                             .getNumFound());
+
+            //clean up test-embedded.docx so that the iterator won't try to update its children
+            //in the next test
+            solrClient.deleteById(collection, "_root_:\"test-embedded.docx\"");
+            solrClient.commit(collection);
         }
 
+
         // update the documents with "update must exist" and run tika async again with "UPDATE_MUST_EXIST".
         // It should not fail, and docs should be updated.
         createTestFiles("updated");
diff --git a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
index face044..c3ec807 100644
--- a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
+++ b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
@@ -24,6 +24,7 @@ import java.io.StringWriter;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
+import java.util.UUID;
 
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonGenerator;
@@ -67,11 +68,11 @@ public class OpenSearchClient {
                 emitKey : null;
 
         for (Metadata metadata : metadataList) {
-            String id = emitKey;
+            StringBuilder id = new StringBuilder(emitKey);
             if (i > 0) {
-                id += "-" + i;
+                id.append("_").append(UUID.randomUUID());
             }
-            String indexJson = getBulkIndexJson(id, routing);
+            String indexJson = getBulkIndexJson(id.toString(), routing);
             sb.append(indexJson).append("\n");
             if (i == 0) {
                 sb.append(metadataToJsonContainer(metadata, attachmentStrategy));
diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
index 779f8bb..8e60daf 100644
--- a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
+++ b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
@@ -31,7 +31,7 @@ import org.apache.tika.metadata.filter.FieldNameMappingFilter;
 /**
  * This is meant only for one off development tests with a locally
  * running instance of Solr.  Please add unit tests to the
- * tika-integration-tests/solr-*
+ * tika-integration-tests/tika-pipes-solr-integration-tests
  */
 @Ignore
 public class SolrEmitterDevTest {