You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/07/29 18:48:08 UTC

[tika] 02/02: TIKA-3508 and update CHANGES.txt

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 4e151a73ec7ce1f5a8e08f4cf8d011117615d02c
Author: tallison <ta...@apache.org>
AuthorDate: Thu Jul 29 14:47:45 2021 -0400

    TIKA-3508 and update CHANGES.txt
---
 CHANGES.txt                                                      | 9 +++++++--
 .../tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java     | 6 +++---
 .../apache/tika/pipes/emitter/opensearch/OpenSearchClient.java   | 2 +-
 tika-pipes/tika-emitters/tika-emitter-s3/pom.xml                 | 1 -
 tika-pipes/tika-emitters/tika-emitter-solr/pom.xml               | 1 +
 .../java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java     | 4 ++--
 tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml   | 1 +
 7 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index fe0ea2a..6898dee 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,12 @@
 Release 2.0.1 - ???
 
+   * Fix serialization of embedded docs in OpenSearch emitter
+     and fix embedded documents not being indexed in some use
+     cases in the Solr emitter (TIKA-3490).
+
+   * Add pipesClientId system property to PipesServer so that each
+     forked process can log to its own logger (TIKA-3480).
+
    * Add DateNormalizingMetadataFilter let users ensure that all dates
      emitted to Solr/OpenSearch are in UTC. Users can configure which
      timezone they'd like to use in cases where the file format does
@@ -9,8 +16,6 @@ Release 2.0.1 - ???
      the SKIP or CONCATENATE attachment strategy, modify the
      parseMode in the pipesiterators or in the FetchEmitTuple (TIKA-3494).
 
-   * Fix serialization of embedded docs in OpenSearch emitter (TIKA-3490).
-
 Release 2.0.0 - 07/07/2021
 
    * Cleanup of fetcher integration with tika-server.
diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
index 7037f5d..986ce7a 100644
--- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
+++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
@@ -148,7 +148,7 @@ public class TikaPipesOpenSearchTest {
         JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
 
         Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
-                ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+                ".docx-[0-9a-f]{8}-[0-9a-f]{4}-" +
                 "[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
                 results.getJson().get("hits").get("hits").get(0).get("_id").asText()
         );
@@ -211,11 +211,11 @@ public class TikaPipesOpenSearchTest {
         JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
 
         Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
-                ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+                ".docx-[0-9a-f]{8}-[0-9a-f]{4}-" +
                 "[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
                 results.getJson().get("hits").get("hits").get(0).get("_id").asText()
         );
-        assertTrue("test_recursive_embedded.docx_$guid", m.find());
+        assertTrue("test_recursive_embedded.docx-$guid", m.find());
 
         assertNull("test_recursive_embedded.docx",
                 results.getJson().get("hits").get("hits").get(0).get("_routing"));
diff --git a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
index c3ec807..b30a648 100644
--- a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
+++ b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
@@ -70,7 +70,7 @@ public class OpenSearchClient {
         for (Metadata metadata : metadataList) {
             StringBuilder id = new StringBuilder(emitKey);
             if (i > 0) {
-                id.append("_").append(UUID.randomUUID());
+                id.append("-").append(UUID.randomUUID());
             }
             String indexJson = getBulkIndexJson(id.toString(), routing);
             sb.append(indexJson).append("\n");
diff --git a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
index d34112e..e51d37d 100644
--- a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
@@ -85,7 +85,6 @@
       <groupId>org.apache.logging.log4j</groupId>
       <artifactId>log4j-slf4j-impl</artifactId>
     </dependency>
-
   </dependencies>
 
   <build>
diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml b/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
index d613e57..142cd94 100644
--- a/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
@@ -48,6 +48,7 @@
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
       <artifactId>log4j-slf4j-impl</artifactId>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
index 12ec5a5..8e1615b 100644
--- a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
+++ b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
@@ -101,7 +101,7 @@ public class SolrEmitter extends AbstractEmitter implements Initializable {
                 SolrInputDocument childSolrInputDocument = new SolrInputDocument();
                 Metadata m = metadataList.get(i);
                 childSolrInputDocument
-                        .setField(idField, emitKey + "_" + UUID.randomUUID().toString());
+                        .setField(idField, emitKey + "-" + UUID.randomUUID().toString());
                 addMetadataToSolrInputDocument(m, childSolrInputDocument, updateStrategy);
                 solrInputDocument.addChildDocument(childSolrInputDocument);
             }
@@ -113,7 +113,7 @@ public class SolrEmitter extends AbstractEmitter implements Initializable {
                 SolrInputDocument childSolrInputDocument = new SolrInputDocument();
                 Metadata m = metadataList.get(i);
                 childSolrInputDocument.setField(idField,
-                        solrInputDocument.get(idField) + "-" + UUID.randomUUID().toString());
+                        solrInputDocument.get(idField).getValue() + "-" + UUID.randomUUID().toString());
                 addMetadataToSolrInputDocument(m, childSolrInputDocument, updateStrategy);
                 docsToUpdate.add(childSolrInputDocument);
             }
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml
index 8c40259..65917fb 100644
--- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml
@@ -80,6 +80,7 @@
       <groupId>org.apache.logging.log4j</groupId>
       <artifactId>log4j-slf4j-impl</artifactId>
       <version>${log4j2.version}</version>
+      <scope>provided</scope>
     </dependency>
   </dependencies>
   <build>