You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/04/06 15:58:11 UTC
[tika] 01/03: bump maximum tokens to 1000000
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 1ea20c6354a7df8257837ff143b0dbee31cc6656
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Apr 5 14:28:29 2017 -0400
bump maximum tokens to 1000000
---
tika-eval/src/main/resources/lucene-analyzers.json | 2 +-
tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tika-eval/src/main/resources/lucene-analyzers.json b/tika-eval/src/main/resources/lucene-analyzers.json
index 663ebe2..fd02fa7 100644
--- a/tika-eval/src/main/resources/lucene-analyzers.json
+++ b/tika-eval/src/main/resources/lucene-analyzers.json
@@ -27,7 +27,7 @@
{
"factory": "oala.miscellaneous.LimitTokenCountFilterFactory",
"params": {
- "maxTokenCount": "100000",
+ "maxTokenCount": "1000000",
"consumeAllTokens": "false"
}
}
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java b/tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java
index 7a8a8fb..7b27b5d 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/AnalyzerManagerTest.java
@@ -82,7 +82,7 @@ public class AnalyzerManagerTest {
public void testTokenCountFilter() throws Exception {
AnalyzerManager analyzerManager = AnalyzerManager.newInstance();
StringBuilder sb = new StringBuilder();
- for (int i = 0; i < 101000; i++) {
+ for (int i = 0; i < 1001000; i++) {
sb.append("the ");
}
TokenStream ts = analyzerManager.getGeneralAnalyzer().tokenStream("f", sb.toString());
@@ -94,7 +94,7 @@ public class AnalyzerManagerTest {
tokens++;
}
- assertEquals(100000, tokens);
+ assertEquals(1000000, tokens);
}
--
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.