You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/07/17 19:28:30 UTC

[tika] branch branch_1x updated: TIKA-3140 -- add the tika-eval metadata filter to a service file so that it loads automatically

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new 499394e  TIKA-3140 -- add the tika-eval metadata filter to a service file so that it loads automatically
499394e is described below

commit 499394e44a8cfc24cd170902020ec1c558b5f2d1
Author: tallison <ta...@apache.org>
AuthorDate: Fri Jul 17 15:22:16 2020 -0400

    TIKA-3140 -- add the tika-eval metadata filter to a service file
    so that it loads automatically
---
 .../org.apache.tika.metadata.filter.MetadataFilter | 16 +++++++
 .../eval/metadata/TikaEvalMetadataFilterTest.java  | 49 ++++++++++++----------
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/tika-eval/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter b/tika-eval/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter
new file mode 100644
index 0000000..b722f23
--- /dev/null
+++ b/tika-eval/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.eval.metadata.TikaEvalMetadataFilter
\ No newline at end of file
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/metadata/TikaEvalMetadataFilterTest.java b/tika-eval/src/test/java/org/apache/tika/eval/metadata/TikaEvalMetadataFilterTest.java
index 1b3d006..894d663 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/metadata/TikaEvalMetadataFilterTest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/metadata/TikaEvalMetadataFilterTest.java
@@ -17,6 +17,8 @@
 package org.apache.tika.eval.metadata;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.filter.DefaultMetadataFilter;
+import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
 import org.junit.Test;
 
@@ -26,26 +28,31 @@ public class TikaEvalMetadataFilterTest {
 
     @Test
     public void testBasic() throws Exception {
-        Metadata metadata = new Metadata();
-        String content = "the quick brown fox, Zothro 1234 1235, jumped over the lazy dog";
-        metadata.set(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT, content);
-        TikaEvalMetadataFilter filter = new TikaEvalMetadataFilter();
-        filter.filter(metadata);
-        assertEquals("eng", metadata.get(TikaEvalMetadataFilter.LANGUAGE));
-        assertEquals(12, (int)metadata.getInt(TikaEvalMetadataFilter.NUM_TOKENS));
-        assertEquals(11, (int)metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_TOKENS));
-        assertEquals(10, (int)metadata.getInt(TikaEvalMetadataFilter.NUM_ALPHA_TOKENS));
-        assertEquals(9, (int)metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_ALPHA_TOKENS));
-
-
-        assertEquals(0.0999,
-                Double.parseDouble(metadata.get(TikaEvalMetadataFilter.OUT_OF_VOCABULARY)),
-                0.1);
-        assertEquals("eng", metadata.get(TikaEvalMetadataFilter.LANGUAGE));
-
-        assertEquals(0.0196,
-                Double.parseDouble(metadata.get(TikaEvalMetadataFilter.LANGUAGE_CONFIDENCE)),
-                0.1);
-
+        for (MetadataFilter filter : new MetadataFilter[]{
+                new TikaEvalMetadataFilter(),
+                //make sure that the TikaEvalMetadataFilter is loaded automatically
+                new DefaultMetadataFilter()
+        }) {
+            Metadata metadata = new Metadata();
+            String content = "the quick brown fox, Zothro 1234 1235, jumped over the lazy dog";
+            metadata.set(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT, content);
+
+            filter.filter(metadata);
+            assertEquals("eng", metadata.get(TikaEvalMetadataFilter.LANGUAGE));
+            assertEquals(12, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_TOKENS));
+            assertEquals(11, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_TOKENS));
+            assertEquals(10, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_ALPHA_TOKENS));
+            assertEquals(9, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_ALPHA_TOKENS));
+
+
+            assertEquals(0.0999,
+                    Double.parseDouble(metadata.get(TikaEvalMetadataFilter.OUT_OF_VOCABULARY)),
+                    0.1);
+            assertEquals("eng", metadata.get(TikaEvalMetadataFilter.LANGUAGE));
+
+            assertEquals(0.0196,
+                    Double.parseDouble(metadata.get(TikaEvalMetadataFilter.LANGUAGE_CONFIDENCE)),
+                    0.1);
+        }
     }
 }