You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2024/02/08 19:38:44 UTC

(tika) branch TIKA-4193 created (now d07fb16b1)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-4193
in repository https://gitbox.apache.org/repos/asf/tika.git


      at d07fb16b1 TIKA-4193 -- add num common tokens to TikaEvalMetadataFilter

This branch includes the following new commits:

     new d07fb16b1 TIKA-4193 -- add num common tokens to TikaEvalMetadataFilter

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



(tika) 01/01: TIKA-4193 -- add num common tokens to TikaEvalMetadataFilter

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4193
in repository https://gitbox.apache.org/repos/asf/tika.git

commit d07fb16b132294ced01a9ce64ae7f8263149f3d8
Author: tallison <ta...@apache.org>
AuthorDate: Thu Feb 8 14:38:30 2024 -0500

    TIKA-4193 -- add num common tokens to TikaEvalMetadataFilter
---
 .../org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java    | 4 ++++
 .../apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java    | 1 +
 2 files changed, 5 insertions(+)

diff --git a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
index 0ac65d240..811958af4 100644
--- a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
+++ b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
@@ -48,6 +48,9 @@ public class TikaEvalMetadataFilter extends MetadataFilter {
     public static Property NUM_ALPHA_TOKENS =
             Property.externalInteger(TIKA_EVAL_NS + "numAlphaTokens");
 
+    public static Property NUM_COMMON_TOKENS =
+            Property.externalInteger(TIKA_EVAL_NS + "numCommonTokens");
+
     public static Property NUM_UNIQUE_ALPHA_TOKENS =
             Property.externalInteger(TIKA_EVAL_NS + "numUniqueAlphaTokens");
 
@@ -90,6 +93,7 @@ public class TikaEvalMetadataFilter extends MetadataFilter {
         CommonTokenResult commonTokenResult = (CommonTokenResult) results.get(CommonTokens.class);
         metadata.set(NUM_ALPHA_TOKENS, commonTokenResult.getAlphabeticTokens());
         metadata.set(NUM_UNIQUE_ALPHA_TOKENS, commonTokenResult.getUniqueAlphabeticTokens());
+        metadata.set(NUM_COMMON_TOKENS, commonTokenResult.getCommonTokens());
         if (commonTokenResult.getAlphabeticTokens() > 0) {
             metadata.set(OUT_OF_VOCABULARY, commonTokenResult.getOOV());
         } else {
diff --git a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java b/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java
index 1961698b4..f1fd21c21 100644
--- a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java
+++ b/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java
@@ -42,6 +42,7 @@ public class TikaEvalMetadataFilterTest {
             assertEquals(11, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_TOKENS));
             assertEquals(10, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_ALPHA_TOKENS));
             assertEquals(9, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_ALPHA_TOKENS));
+            assertEquals(9, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_COMMON_TOKENS));
 
 
             assertEquals(0.0999,