You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/07/26 20:58:29 UTC
[tika] branch main updated: TIKA-3495 -- and update opensearch
child key to be the same as the one in Solr. Add an ignored es test.
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new e549e44 TIKA-3495 -- and update opensearch child key to be the same as the one in Solr. Add an ignored es test.
e549e44 is described below
commit e549e445ce4985246b3b2d677c48a22d5b3372cd
Author: tallison <ta...@apache.org>
AuthorDate: Mon Jul 26 16:57:58 2021 -0400
TIKA-3495 -- and update opensearch child key to be the same as the one in Solr. Add an ignored es test.
---
.../tika/pipes/es/tests/TikaPipesES7Test.java | 35 ++++++++++++++++++++++
.../opensearch/tests/TikaPipesOpenSearchTest.java | 26 ++++++++++++----
.../pipes/solr/tests/TikaPipesSolrTestBase.java | 13 +++++---
.../pipes/emitter/opensearch/OpenSearchClient.java | 7 +++--
.../pipes/emitter/solr/SolrEmitterDevTest.java | 2 +-
5 files changed, 69 insertions(+), 14 deletions(-)
diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/es/tests/TikaPipesES7Test.java b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/es/tests/TikaPipesES7Test.java
new file mode 100644
index 0000000..062e85f
--- /dev/null
+++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/es/tests/TikaPipesES7Test.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.es.tests;
+
+import org.junit.Ignore;
+
+import org.apache.tika.pipes.opensearch.tests.TikaPipesOpenSearchTest;
+
+@Ignore("until we figure out if we're supporting es")
+public class TikaPipesES7Test extends TikaPipesOpenSearchTest {
+
+ @Override
+ public String getOpenSearchImageName() {
+ return "docker.elastic.co/elasticsearch/elasticsearch:7.13.4";
+ }
+
+ @Override
+ public String getProtocol() {
+ return "http://";
+ }
+}
diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
index 17f2435..7037f5d 100644
--- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
+++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
@@ -27,6 +27,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import com.fasterxml.jackson.databind.JsonNode;
import org.apache.commons.io.IOUtils;
@@ -63,10 +64,14 @@ public class TikaPipesOpenSearchTest {
.withExposedPorts(9200)
.withEnv("discovery.type", "single-node");
- private String getOpenSearchImageName() {
+ public String getOpenSearchImageName() {
return "opensearchproject/opensearch:1.0.0";
}
+ public String getProtocol() {
+ return "https://";
+ }
+
@Before
public void setupTest() throws Exception {
setupOpenSearch(openSearchContainer);
@@ -142,8 +147,12 @@ public class TikaPipesOpenSearchTest {
results.getJson().get("hits").get("total").get("value").asInt());
JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
- assertEquals("test_recursive_embedded.docx-9",
- results.getJson().get("hits").get("hits").get(0).get("_id").asText());
+ Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
+ ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+ "[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
+ results.getJson().get("hits").get("hits").get(0).get("_id").asText()
+ );
+ assertTrue("test_recursive_embedded.docx_$guid", m.find());
assertEquals("test_recursive_embedded.docx",
results.getJson().get("hits").get("hits").get(0).get("_routing").asText());
assertEquals("test_recursive_embedded.docx",
@@ -201,8 +210,13 @@ public class TikaPipesOpenSearchTest {
results.getJson().get("hits").get("total").get("value").asInt());
JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
- assertEquals("test_recursive_embedded.docx-9",
- results.getJson().get("hits").get("hits").get(0).get("_id").asText());
+ Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
+ ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+ "[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
+ results.getJson().get("hits").get("hits").get(0).get("_id").asText()
+ );
+ assertTrue("test_recursive_embedded.docx_$guid", m.find());
+
assertNull("test_recursive_embedded.docx",
results.getJson().get("hits").get("hits").get(0).get("_routing"));
assertNull("test_recursive_embedded.docx",
@@ -293,7 +307,7 @@ public class TikaPipesOpenSearchTest {
this.openSearch = openSearchContainer;
openSearchHost = openSearch.getHost();
openSearchPort = openSearch.getMappedPort(9200);
- openSearchEndpointBase = "https://" + openSearchHost + ":" + openSearchPort + "/";
+ openSearchEndpointBase = getProtocol() + openSearchHost + ":" + openSearchPort + "/";
HttpClientFactory httpClientFactory = new HttpClientFactory();
httpClientFactory.setUserName("admin");
httpClientFactory.setPassword("admin");
diff --git a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
index 1729961..38fd306 100644
--- a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
+++ b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
@@ -87,7 +87,7 @@ public abstract class TikaPipesSolrTestBase {
"<html><body>" + bodyContent + "</body></html>", StandardCharsets.UTF_8);
}
FileUtils.copyInputStreamToFile(this.getClass().getResourceAsStream("/embedded/embedded.docx"),
- new File(testFileFolder, "test-embedded.doc"));
+ new File(testFileFolder, "test-embedded.docx"));
}
protected void setupSolr(GenericContainer<?> solr) throws Exception {
@@ -112,7 +112,7 @@ public abstract class TikaPipesSolrTestBase {
solrClient.add(collection, solrDoc);
}
SolrInputDocument embeddedDoc = new SolrInputDocument();
- String filename = "test-embedded.doc";
+ String filename = "test-embedded.docx";
embeddedDoc.setField("id", filename);
embeddedDoc.setField("path", filename);
solrClient.add(collection, embeddedDoc);
@@ -161,7 +161,6 @@ public abstract class TikaPipesSolrTestBase {
SolrEmitter.AttachmentStrategy.PARENT_CHILD,
HandlerConfig.PARSE_MODE.RMETA);
FileUtils.writeStringToFile(tikaConfigFile, tikaConfigXml, StandardCharsets.UTF_8);
-
TikaCLI.main(new String[] {"-a", "--config=" + tikaConfigFile.getAbsolutePath()});
try (SolrClient solrClient = new LBHttpSolrClient.Builder().withBaseSolrUrls(solrEndpoint)
@@ -174,10 +173,16 @@ public abstract class TikaPipesSolrTestBase {
solrClient.query(collection, new SolrQuery("content_s:*initial*")).getResults()
.getNumFound());
Assert.assertEquals(3,
- solrClient.query(collection, new SolrQuery("_root_:\"test-embedded.doc\"")).getResults()
+ solrClient.query(collection, new SolrQuery("_root_:\"test-embedded.docx\"")).getResults()
.getNumFound());
+
+ //clean up test-embedded.docx so that the iterator won't try to update its children
+ //in the next test
+ solrClient.deleteById(collection, "_root_:\"test-embedded.docx\"");
+ solrClient.commit(collection);
}
+
// update the documents with "update must exist" and run tika async again with "UPDATE_MUST_EXIST".
// It should not fail, and docs should be updated.
createTestFiles("updated");
diff --git a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
index face044..c3ec807 100644
--- a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
+++ b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
@@ -24,6 +24,7 @@ import java.io.StringWriter;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
+import java.util.UUID;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
@@ -67,11 +68,11 @@ public class OpenSearchClient {
emitKey : null;
for (Metadata metadata : metadataList) {
- String id = emitKey;
+ StringBuilder id = new StringBuilder(emitKey);
if (i > 0) {
- id += "-" + i;
+ id.append("_").append(UUID.randomUUID());
}
- String indexJson = getBulkIndexJson(id, routing);
+ String indexJson = getBulkIndexJson(id.toString(), routing);
sb.append(indexJson).append("\n");
if (i == 0) {
sb.append(metadataToJsonContainer(metadata, attachmentStrategy));
diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
index 779f8bb..8e60daf 100644
--- a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
+++ b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
@@ -31,7 +31,7 @@ import org.apache.tika.metadata.filter.FieldNameMappingFilter;
/**
* This is meant only for one off development tests with a locally
* running instance of Solr. Please add unit tests to the
- * tika-integration-tests/solr-*
+ * tika-integration-tests/tika-pipes-solr-integration-tests
*/
@Ignore
public class SolrEmitterDevTest {