You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/07/29 18:48:08 UTC
[tika] 02/02: TIKA-3508 and update CHANGES.txt
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4e151a73ec7ce1f5a8e08f4cf8d011117615d02c
Author: tallison <ta...@apache.org>
AuthorDate: Thu Jul 29 14:47:45 2021 -0400
TIKA-3508 and update CHANGES.txt
---
CHANGES.txt | 9 +++++++--
.../tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java | 6 +++---
.../apache/tika/pipes/emitter/opensearch/OpenSearchClient.java | 2 +-
tika-pipes/tika-emitters/tika-emitter-s3/pom.xml | 1 -
tika-pipes/tika-emitters/tika-emitter-solr/pom.xml | 1 +
.../java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java | 4 ++--
tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml | 1 +
7 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index fe0ea2a..6898dee 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,12 @@
Release 2.0.1 - ???
+ * Fix serialization of embedded docs in OpenSearch emitter
+ and fix embedded documents not being indexed in some use
+ cases in the Solr emitter (TIKA-3490).
+
+ * Add pipesClientId system property to PipesServer so that each
+ forked process can log to its own logger (TIKA-3480).
+
* Add DateNormalizingMetadataFilter let users ensure that all dates
emitted to Solr/OpenSearch are in UTC. Users can configure which
timezone they'd like to use in cases where the file format does
@@ -9,8 +16,6 @@ Release 2.0.1 - ???
the SKIP or CONCATENATE attachment strategy, modify the
parseMode in the pipesiterators or in the FetchEmitTuple (TIKA-3494).
- * Fix serialization of embedded docs in OpenSearch emitter (TIKA-3490).
-
Release 2.0.0 - 07/07/2021
* Cleanup of fetcher integration with tika-server.
diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
index 7037f5d..986ce7a 100644
--- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
+++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
@@ -148,7 +148,7 @@ public class TikaPipesOpenSearchTest {
JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
- ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+ ".docx-[0-9a-f]{8}-[0-9a-f]{4}-" +
"[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
results.getJson().get("hits").get("hits").get(0).get("_id").asText()
);
@@ -211,11 +211,11 @@ public class TikaPipesOpenSearchTest {
JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
- ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+ ".docx-[0-9a-f]{8}-[0-9a-f]{4}-" +
"[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
results.getJson().get("hits").get("hits").get(0).get("_id").asText()
);
- assertTrue("test_recursive_embedded.docx_$guid", m.find());
+ assertTrue("test_recursive_embedded.docx-$guid", m.find());
assertNull("test_recursive_embedded.docx",
results.getJson().get("hits").get("hits").get(0).get("_routing"));
diff --git a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
index c3ec807..b30a648 100644
--- a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
+++ b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
@@ -70,7 +70,7 @@ public class OpenSearchClient {
for (Metadata metadata : metadataList) {
StringBuilder id = new StringBuilder(emitKey);
if (i > 0) {
- id.append("_").append(UUID.randomUUID());
+ id.append("-").append(UUID.randomUUID());
}
String indexJson = getBulkIndexJson(id.toString(), routing);
sb.append(indexJson).append("\n");
diff --git a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
index d34112e..e51d37d 100644
--- a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
@@ -85,7 +85,6 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</dependency>
-
</dependencies>
<build>
diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml b/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
index d613e57..142cd94 100644
--- a/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
@@ -48,6 +48,7 @@
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
+ <scope>provided</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
index 12ec5a5..8e1615b 100644
--- a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
+++ b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
@@ -101,7 +101,7 @@ public class SolrEmitter extends AbstractEmitter implements Initializable {
SolrInputDocument childSolrInputDocument = new SolrInputDocument();
Metadata m = metadataList.get(i);
childSolrInputDocument
- .setField(idField, emitKey + "_" + UUID.randomUUID().toString());
+ .setField(idField, emitKey + "-" + UUID.randomUUID().toString());
addMetadataToSolrInputDocument(m, childSolrInputDocument, updateStrategy);
solrInputDocument.addChildDocument(childSolrInputDocument);
}
@@ -113,7 +113,7 @@ public class SolrEmitter extends AbstractEmitter implements Initializable {
SolrInputDocument childSolrInputDocument = new SolrInputDocument();
Metadata m = metadataList.get(i);
childSolrInputDocument.setField(idField,
- solrInputDocument.get(idField) + "-" + UUID.randomUUID().toString());
+ solrInputDocument.get(idField).getValue() + "-" + UUID.randomUUID().toString());
addMetadataToSolrInputDocument(m, childSolrInputDocument, updateStrategy);
docsToUpdate.add(childSolrInputDocument);
}
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml
index 8c40259..65917fb 100644
--- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml
@@ -80,6 +80,7 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>${log4j2.version}</version>
+ <scope>provided</scope>
</dependency>
</dependencies>
<build>