You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/04/06 15:58:11 UTC

[tika] 01/03: bump maximum tokens to 1000000

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 1ea20c6354a7df8257837ff143b0dbee31cc6656
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Apr 5 14:28:29 2017 -0400

    bump maximum tokens to 1000000
---
 tika-eval/src/main/resources/lucene-analyzers.json                    | 2 +-
 tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tika-eval/src/main/resources/lucene-analyzers.json b/tika-eval/src/main/resources/lucene-analyzers.json
index 663ebe2..fd02fa7 100644
--- a/tika-eval/src/main/resources/lucene-analyzers.json
+++ b/tika-eval/src/main/resources/lucene-analyzers.json
@@ -27,7 +27,7 @@
         {
           "factory": "oala.miscellaneous.LimitTokenCountFilterFactory",
           "params": {
-            "maxTokenCount": "100000",
+            "maxTokenCount": "1000000",
             "consumeAllTokens": "false"
           }
         }
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java b/tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java
index 7a8a8fb..7b27b5d 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java
@@ -82,7 +82,7 @@ public class AnalyzerManagerTest {
     public void testTokenCountFilter() throws Exception {
         AnalyzerManager analyzerManager = AnalyzerManager.newInstance();
         StringBuilder sb = new StringBuilder();
-        for (int i = 0; i < 101000; i++) {
+        for (int i = 0; i < 1001000; i++) {
             sb.append("the ");
         }
         TokenStream ts = analyzerManager.getGeneralAnalyzer().tokenStream("f", sb.toString());
@@ -94,7 +94,7 @@ public class AnalyzerManagerTest {
             tokens++;
         }
 
-        assertEquals(100000, tokens);
+        assertEquals(1000000, tokens);
 
     }
 

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.