You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/04/11 20:02:28 UTC

[tika] branch master updated: TIKA-2323

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

The following commit(s) were added to refs/heads/master by this push:
       new  2a2e631   TIKA-2323
2a2e631 is described below

commit 2a2e6310b668d9e1ada9c4db23411aa8475583f3
Author: tballison <ta...@mitre.org>
AuthorDate: Tue Apr 11 16:02:05 2017 -0400

    TIKA-2323
---
 .../src/main/java/org/apache/tika/eval/AbstractProfiler.java | 12 ++++++------
 .../src/main/java/org/apache/tika/eval/ExtractComparer.java  |  3 ---
 .../java/org/apache/tika/eval/batch/EvalConsumerBuilder.java | 12 ++++++------
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java b/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java
index b71c1d2..b24b27e 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java
@@ -267,7 +267,7 @@ public abstract class AbstractProfiler extends FileResourceConsumer {
         data.put(Cols.ELAPSED_TIME_MILLIS,
                 getTime(m));
 
-        String content = getContent(m, maxContentLength);
+        String content = getContent(m);
         if (content == null || content.trim().length() == 0) {
             data.put(Cols.HAS_CONTENT, FALSE);
         } else {
@@ -458,15 +458,15 @@ public abstract class AbstractProfiler extends FileResourceConsumer {
      */
     protected static String getContent(Metadata metadata, int maxLength, Map<Cols, String> data) {
         data.put(Cols.CONTENT_TRUNCATED_AT_MAX_LEN, "FALSE");
-        String c = getContent(metadata, maxLength);
-        if (c.length() > maxLength) {
+        String c = getContent(metadata);
+        if (maxLength > -1 && c.length() > maxLength) {
             c = c.substring(0, maxLength);
             data.put(Cols.CONTENT_TRUNCATED_AT_MAX_LEN, "TRUE");
         }
         return c;
 
     }
-    protected static String getContent(Metadata metadata, int maxLength) {
+    protected static String getContent(Metadata metadata) {
         if (metadata == null) {
             return "";
         }
@@ -478,7 +478,7 @@ public abstract class AbstractProfiler extends FileResourceConsumer {
     }
 
     void unicodeBlocks(Metadata metadata, Map<Cols, String> data) {
-        String content = getContent(metadata, maxContentLengthForLangId);
+        String content = getContent(metadata);
         if (content.length() < 200) {
             return;
         }
@@ -531,7 +531,7 @@ public abstract class AbstractProfiler extends FileResourceConsumer {
     }
 
     void langid(Metadata metadata, Map<Cols, String> data) {
-        String content = getContent(metadata, maxContentLengthForLangId);
+        String content = getContent(metadata);
         if (content.length() < 50) {
             return;
         }
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java b/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java
index 9caef9f..65606d0 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java
@@ -80,12 +80,9 @@ public class ExtractComparer extends AbstractProfiler {
                 .addOption("tablePrefixB", true, "EXPERT: optional prefix for table names for B")
                 .addOption("drop", true, "drop tables if they exist")
                 .addOption("maxFilesToAdd", true, "maximum number of files to add to the crawler")
-                .addOption("maxContentLength", true, "truncate content beyond this length for calculating 'contents' stats")
-                .addOption("maxContentLengthForLangId", true, "truncate content beyond this length for language id")
                 .addOption("maxTokens", true, "maximum tokens to process, default=200000")
                 .addOption("maxContentLength", true, "truncate content beyond this length for calculating 'contents' stats, default=1000000")
                 .addOption("maxContentLengthForLangId", true, "truncate content beyond this length for language id, default=50000")
-
         ;
     }
 
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/batch/EvalConsumerBuilder.java b/tika-eval/src/main/java/org/apache/tika/eval/batch/EvalConsumerBuilder.java
index be0533a..694b05e 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/batch/EvalConsumerBuilder.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/batch/EvalConsumerBuilder.java
@@ -176,18 +176,18 @@ public abstract class EvalConsumerBuilder {
 
     FileResourceConsumer parameterizeProfiler(AbstractProfiler abstractProfiler) {
 
-        int maxContentLength = PropsUtil.getInt(localAttrs.get("maxContentLength"), -1);
-        if (maxContentLength > -1) {
+        int maxContentLength = PropsUtil.getInt(localAttrs.get("maxContentLength"), -2);
+        if (maxContentLength > -2) {
             abstractProfiler.setMaxContentLength(maxContentLength);
         }
 
-        int maxContentLengthForLangId = PropsUtil.getInt(localAttrs.get("maxContentLengthForLangId"), -1);
-        if (maxContentLengthForLangId > -1) {
+        int maxContentLengthForLangId = PropsUtil.getInt(localAttrs.get("maxContentLengthForLangId"), -2);
+        if (maxContentLengthForLangId > -2) {
             abstractProfiler.setMaxContentLengthForLangId(maxContentLengthForLangId);
         }
 
-        int maxTokens = PropsUtil.getInt(localAttrs.get("maxTokens"), -1);
-        if (maxTokens > -1) {
+        int maxTokens = PropsUtil.getInt(localAttrs.get("maxTokens"), -2);
+        if (maxTokens > -2) {
             abstractProfiler.setMaxTokens(maxTokens);
         }
 

-- 
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].