You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/09/29 15:34:29 UTC

[tika] branch main updated: TIKA-3865 -- add a CompositePipesReporter

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 50d648c29 TIKA-3865 -- add a CompositePipesReporter
50d648c29 is described below

commit 50d648c294b80e5601f777e8b13284b8cc4494ee
Author: tallison <ta...@apache.org>
AuthorDate: Thu Sep 29 11:34:11 2022 -0400

    TIKA-3865 -- add a CompositePipesReporter
---
 .../java/org/apache/tika/config/ConfigBase.java    | 34 +++++++++++
 .../apache/tika/pipes/CompositePipesReporter.java  | 65 ++++++++++++++++++++++
 .../apache/tika/pipes/async/MockReporterTest.java  | 13 +++++
 .../org/apache/tika/pipes/async/TIKA-3865.xml      | 45 +++++++++++++++
 4 files changed, 157 insertions(+)

diff --git a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
index 0d145c18a..6af662dfc 100644
--- a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
+++ b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
@@ -294,6 +294,40 @@ public abstract class ConfigBase {
     }
 
     private static void tryToSetList(Object object, Node param) throws TikaConfigException {
+        if (param.hasAttributes() && param.getAttributes().getNamedItem("class") != null) {
+            tryToSetClassList(object, param);
+        } else {
+            tryToSetStringList(object, param);
+        }
+    }
+
+    private static void tryToSetClassList(Object object, Node node) throws TikaConfigException {
+        String name = node.getLocalName();
+        try {
+            Class interfaze =
+                    Class.forName(node.getAttributes().getNamedItem("class").getTextContent());
+            List items = new ArrayList<Object>();
+            NodeList nodeList = node.getChildNodes();
+            for (int i = 0; i < nodeList.getLength(); i++) {
+                Node child = nodeList.item(i);
+                if (child.getNodeType() == 1) {
+                    Object item = buildClass(child, child.getLocalName(), interfaze);
+                    setParams(item, child, new HashSet<>());
+                    items.add(item);
+                }
+            }
+
+            String setter = "set" + name.substring(0, 1).toUpperCase(Locale.US) + name.substring(1);
+            Method m = object.getClass().getMethod(setter, List.class);
+            m.invoke(object, items);
+
+        } catch (ClassNotFoundException | InvocationTargetException | NoSuchMethodException |
+                 IllegalAccessException e) {
+            throw new TikaConfigException("couldn't find class", e);
+        }
+    }
+
+    private static void tryToSetStringList(Object object, Node param) throws TikaConfigException {
         String name = param.getLocalName();
         List<String> strings = new ArrayList<>();
         NodeList nodeList = param.getChildNodes();
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java b/tika-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java
new file mode 100644
index 000000000..da34f3f98
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+
+public class CompositePipesReporter extends PipesReporter implements Initializable {
+
+    private List<PipesReporter> pipesReporters;
+
+    @Override
+    public void report(FetchEmitTuple t, PipesResult result, long elapsed) {
+        for (PipesReporter reporter : pipesReporters) {
+            reporter.report(t, result, elapsed);
+        }
+
+    }
+
+    @Field
+    public void setPipesReporters(List<PipesReporter> pipesReporters) {
+        this.pipesReporters = pipesReporters;
+    }
+
+
+    public List<PipesReporter> getPipesReporters() {
+        return pipesReporters;
+    }
+
+    @Override
+    public void initialize(Map<String, Param> params) throws TikaConfigException {
+        //no-op
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+        if (pipesReporters == null) {
+            throw new TikaConfigException("must specify 'pipesReporters'");
+        }
+        if (pipesReporters.size() == 0) {
+            throw new TikaConfigException("must specify at least one pipes reporter");
+        }
+    }
+}
diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java b/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java
index 599ac474d..9bfcd5591 100644
--- a/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java
+++ b/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java
@@ -21,9 +21,11 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.List;
 
 import org.junit.jupiter.api.Test;
 
+import org.apache.tika.pipes.CompositePipesReporter;
 import org.apache.tika.pipes.PipesReporter;
 
 public class MockReporterTest {
@@ -36,4 +38,15 @@ public class MockReporterTest {
         assertTrue(reporter instanceof MockReporter);
         assertEquals("somethingOrOther", ((MockReporter)reporter).getEndpoint());
     }
+
+    @Test
+    public void testCompositePipesReporter() throws Exception {
+        Path configPath = Paths.get(this.getClass().getResource("TIKA-3865.xml").toURI());
+        AsyncConfig asyncConfig = AsyncConfig.load(configPath);
+        PipesReporter reporter = asyncConfig.getPipesReporter();
+        assertTrue(reporter instanceof CompositePipesReporter);
+        List<PipesReporter> reporters = ((CompositePipesReporter)reporter).getPipesReporters();
+        assertEquals("somethingOrOther1", ((MockReporter)reporters.get(0)).getEndpoint());
+        assertEquals("somethingOrOther2", ((MockReporter)reporters.get(1)).getEndpoint());
+    }
 }
diff --git a/tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3865.xml b/tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3865.xml
new file mode 100644
index 000000000..04f7dfa6f
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3865.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<properties>
+  <async>
+    <params>
+      <maxForEmitBatchBytes>10000</maxForEmitBatchBytes>
+      <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes>
+      <emitWithinMillis>60000</emitWithinMillis>
+      <numEmitters>1</numEmitters>
+    </params>
+    <pipesReporter class="org.apache.tika.pipes.CompositePipesReporter">
+      <params>
+        <pipesReporters class="org.apache.tika.pipes.PipesReporter">
+          <pipesReporter class="org.apache.tika.pipes.async.MockReporter">
+            <params>
+              <endpoint>somethingOrOther1</endpoint>
+            </params>
+          </pipesReporter>
+          <pipesReporter class="org.apache.tika.pipes.async.MockReporter">
+            <params>
+              <endpoint>somethingOrOther2</endpoint>
+            </params>
+          </pipesReporter>
+        </pipesReporters>
+      </params>
+    </pipesReporter>
+  </async>
+</properties>
\ No newline at end of file