You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/07/17 19:28:30 UTC
[tika] branch branch_1x updated: TIKA-3140 -- add the tika-eval
metadata filter to a service file so that it loads automatically
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_1x by this push:
new 499394e TIKA-3140 -- add the tika-eval metadata filter to a service file so that it loads automatically
499394e is described below
commit 499394e44a8cfc24cd170902020ec1c558b5f2d1
Author: tallison <ta...@apache.org>
AuthorDate: Fri Jul 17 15:22:16 2020 -0400
TIKA-3140 -- add the tika-eval metadata filter to a service file
so that it loads automatically
---
.../org.apache.tika.metadata.filter.MetadataFilter | 16 +++++++
.../eval/metadata/TikaEvalMetadataFilterTest.java | 49 ++++++++++++----------
2 files changed, 44 insertions(+), 21 deletions(-)
diff --git a/tika-eval/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter b/tika-eval/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter
new file mode 100644
index 0000000..b722f23
--- /dev/null
+++ b/tika-eval/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.tika.eval.metadata.TikaEvalMetadataFilter
\ No newline at end of file
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/metadata/TikaEvalMetadataFilterTest.java b/tika-eval/src/test/java/org/apache/tika/eval/metadata/TikaEvalMetadataFilterTest.java
index 1b3d006..894d663 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/metadata/TikaEvalMetadataFilterTest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/metadata/TikaEvalMetadataFilterTest.java
@@ -17,6 +17,8 @@
package org.apache.tika.eval.metadata;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.filter.DefaultMetadataFilter;
+import org.apache.tika.metadata.filter.MetadataFilter;
import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
import org.junit.Test;
@@ -26,26 +28,31 @@ public class TikaEvalMetadataFilterTest {
@Test
public void testBasic() throws Exception {
- Metadata metadata = new Metadata();
- String content = "the quick brown fox, Zothro 1234 1235, jumped over the lazy dog";
- metadata.set(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT, content);
- TikaEvalMetadataFilter filter = new TikaEvalMetadataFilter();
- filter.filter(metadata);
- assertEquals("eng", metadata.get(TikaEvalMetadataFilter.LANGUAGE));
- assertEquals(12, (int)metadata.getInt(TikaEvalMetadataFilter.NUM_TOKENS));
- assertEquals(11, (int)metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_TOKENS));
- assertEquals(10, (int)metadata.getInt(TikaEvalMetadataFilter.NUM_ALPHA_TOKENS));
- assertEquals(9, (int)metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_ALPHA_TOKENS));
-
-
- assertEquals(0.0999,
- Double.parseDouble(metadata.get(TikaEvalMetadataFilter.OUT_OF_VOCABULARY)),
- 0.1);
- assertEquals("eng", metadata.get(TikaEvalMetadataFilter.LANGUAGE));
-
- assertEquals(0.0196,
- Double.parseDouble(metadata.get(TikaEvalMetadataFilter.LANGUAGE_CONFIDENCE)),
- 0.1);
-
+ for (MetadataFilter filter : new MetadataFilter[]{
+ new TikaEvalMetadataFilter(),
+ //make sure that the TikaEvalMetadataFilter is loaded automatically
+ new DefaultMetadataFilter()
+ }) {
+ Metadata metadata = new Metadata();
+ String content = "the quick brown fox, Zothro 1234 1235, jumped over the lazy dog";
+ metadata.set(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT, content);
+
+ filter.filter(metadata);
+ assertEquals("eng", metadata.get(TikaEvalMetadataFilter.LANGUAGE));
+ assertEquals(12, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_TOKENS));
+ assertEquals(11, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_TOKENS));
+ assertEquals(10, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_ALPHA_TOKENS));
+ assertEquals(9, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_ALPHA_TOKENS));
+
+
+ assertEquals(0.0999,
+ Double.parseDouble(metadata.get(TikaEvalMetadataFilter.OUT_OF_VOCABULARY)),
+ 0.1);
+ assertEquals("eng", metadata.get(TikaEvalMetadataFilter.LANGUAGE));
+
+ assertEquals(0.0196,
+ Double.parseDouble(metadata.get(TikaEvalMetadataFilter.LANGUAGE_CONFIDENCE)),
+ 0.1);
+ }
}
}