You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/04/01 06:43:02 UTC
svn commit: r1583530 - in /lucene/dev/trunk/lucene: ./
analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/
analysis/common/src/java/org/apache/lucene/analysis/sinks/
analysis/common/src/java/org/apache/lucene/analysis/standard/ analysis/...
Author: rmuir
Date: Tue Apr 1 04:43:01 2014
New Revision: 1583530
URL: http://svn.apache.org/r1583530
Log:
LUCENE-5559: Add missing checks to TokenFilters with numeric arguments
Added:
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Apr 1 04:43:01 2014
@@ -215,6 +215,9 @@ Bug fixes
* LUCENE-5555: Fix SortedInputIterator to correctly encode/decode contexts in presence of payload (Areek Zillur)
+* LUCENE-5559: Add missing argument checks to tokenfilters taking
+ numeric arguments. (Ahmet Arslan via Robert Muir)
+
Test Framework
* LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _.
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java Tue Apr 1 04:43:01 2014
@@ -32,7 +32,7 @@ public final class LengthFilter extends
private final int min;
private final int max;
-
+
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
@@ -46,6 +46,12 @@ public final class LengthFilter extends
*/
public LengthFilter(Version version, TokenStream in, int min, int max) {
super(version, in);
+ if (min < 0) {
+ throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
+ }
+ if (min > max) {
+ throw new IllegalArgumentException("maximum length must not be greater than minimum length");
+ }
this.min = min;
this.max = max;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java Tue Apr 1 04:43:01 2014
@@ -61,6 +61,9 @@ public final class LimitTokenCountFilter
*/
public LimitTokenCountFilter(TokenStream in, int maxTokenCount, boolean consumeAllTokens) {
super(in);
+ if (maxTokenCount < 1) {
+ throw new IllegalArgumentException("maxTokenCount must be greater than zero");
+ }
this.maxTokenCount = maxTokenCount;
this.consumeAllTokens = consumeAllTokens;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java Tue Apr 1 04:43:01 2014
@@ -67,6 +67,9 @@ public final class LimitTokenPositionFil
*/
public LimitTokenPositionFilter(TokenStream in, int maxTokenPosition, boolean consumeAllTokens) {
super(in);
+ if (maxTokenPosition < 1) {
+ throw new IllegalArgumentException("maxTokenPosition must be greater than zero");
+ }
this.maxTokenPosition = maxTokenPosition;
this.consumeAllTokens = consumeAllTokens;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java Tue Apr 1 04:43:01 2014
@@ -31,6 +31,12 @@ public class TokenRangeSinkFilter extend
private int count;
public TokenRangeSinkFilter(int lower, int upper) {
+ if (lower < 1) {
+ throw new IllegalArgumentException("lower must be greater than zero");
+ }
+ if (lower > upper) {
+ throw new IllegalArgumentException("lower must not be greater than upper");
+ }
this.lower = lower;
this.upper = upper;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Tue Apr 1 04:43:01 2014
@@ -84,6 +84,9 @@ public final class ClassicTokenizer exte
/** Set the max allowed token length. Any token longer
* than this is skipped. */
public void setMaxTokenLength(int length) {
+ if (length < 1) {
+ throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+ }
this.maxTokenLength = length;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Tue Apr 1 04:43:01 2014
@@ -98,6 +98,9 @@ public final class StandardTokenizer ext
/** Set the max allowed token length. Any token longer
* than this is skipped. */
public void setMaxTokenLength(int length) {
+ if (length < 1) {
+ throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+ }
this.maxTokenLength = length;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Tue Apr 1 04:43:01 2014
@@ -84,6 +84,9 @@ public final class UAX29URLEmailTokenize
/** Set the max allowed token length. Any token longer
* than this is skipped. */
public void setMaxTokenLength(int length) {
+ if (length < 1) {
+ throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+ }
this.maxTokenLength = length;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java Tue Apr 1 04:43:01 2014
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.junit.Test;
public class TestLengthFilter extends BaseTokenStreamTestCase {
@@ -50,4 +51,11 @@ public class TestLengthFilter extends Ba
checkOneTerm(a, "", "");
}
+ /**
+ * checking the validity of constructor arguments
+ */
+ @Test(expected = IllegalArgumentException.class)
+ public void testIllegalArguments() throws Exception {
+ new LengthFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), -4, -1);
+ }
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java Tue Apr 1 04:43:01 2014
@@ -1,11 +1,12 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@@ -31,21 +32,36 @@ public class TestLengthFilterFactory ext
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer)stream).setReader(reader);
stream = tokenFilterFactory("Length",
- "min", "4",
- "max", "10").create(stream);
+ LengthFilterFactory.MIN_KEY, "4",
+ LengthFilterFactory.MAX_KEY, "10").create(stream);
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
}
-
+
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {
try {
- tokenFilterFactory("Length",
- "min", "4",
- "max", "5",
+ tokenFilterFactory("Length",
+ LengthFilterFactory.MIN_KEY, "4",
+ LengthFilterFactory.MAX_KEY, "5",
"bogusArg", "bogusValue");
fail();
} catch (IllegalArgumentException expected) {
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
+
+ /** Test that invalid arguments result in exception */
+ public void testInvalidArguments() throws Exception {
+ try {
+ Reader reader = new StringReader("foo foobar super-duper-trooper");
+ TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ ((Tokenizer)stream).setReader(reader);
+ tokenFilterFactory("Length",
+ LengthFilterFactory.MIN_KEY, "5",
+ LengthFilterFactory.MAX_KEY, "4").create(stream);
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("maximum length must not be greater than minimum length"));
+ }
+ }
}
\ No newline at end of file
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java?rev=1583530&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java Tue Apr 1 04:43:01 2014
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.junit.Test;
+
+public class TestLimitTokenCountFilter extends BaseTokenStreamTestCase {
+
+ public void test() throws Exception {
+ for (final boolean consumeAll : new boolean[]{true, false}) {
+ MockTokenizer tokenizer = whitespaceMockTokenizer("A1 B2 C3 D4 E5 F6");
+ tokenizer.setEnableChecks(consumeAll);
+ TokenStream stream = new LimitTokenCountFilter(tokenizer, 3, consumeAll);
+ assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"});
+ }
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testIllegalArguments() throws Exception {
+ new LimitTokenCountFilter(whitespaceMockTokenizer("A1 B2 C3 D4 E5 F6"), -1);
+ }
+}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java Tue Apr 1 04:43:01 2014
@@ -1,11 +1,12 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@@ -16,25 +17,28 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.Reader;
-import java.io.StringReader;
-
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+import java.io.Reader;
+import java.io.StringReader;
+
public class TestLimitTokenCountFilterFactory extends BaseTokenStreamFactoryTestCase {
public void test() throws Exception {
- Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
- MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
- tokenizer.setReader(reader);
- // LimitTokenCountFilter doesn't consume the entire stream that it wraps
- tokenizer.setEnableChecks(false);
- TokenStream stream = tokenizer;
- stream = tokenFilterFactory("LimitTokenCount",
- "maxTokenCount", "3").create(stream);
- assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" });
+ for (final boolean consumeAll : new boolean[]{true, false}) {
+ Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
+ MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ tokenizer.setReader(reader);
+ tokenizer.setEnableChecks(consumeAll);
+ TokenStream stream = tokenizer;
+ stream = tokenFilterFactory("LimitTokenCount",
+ LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3",
+ LimitTokenCountFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
+ ).create(stream);
+ assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"});
+ }
}
public void testRequired() throws Exception {
@@ -44,15 +48,17 @@ public class TestLimitTokenCountFilterFa
fail();
} catch (IllegalArgumentException e) {
assertTrue("exception doesn't mention param: " + e.getMessage(),
- 0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY));
+ 0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY));
}
}
-
- /** Test that bogus arguments result in exception */
+
+ /**
+ * Test that bogus arguments result in exception
+ */
public void testBogusArguments() throws Exception {
try {
- tokenFilterFactory("LimitTokenCount",
- "maxTokenCount", "3",
+ tokenFilterFactory("LimitTokenCount",
+ LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3",
"bogusArg", "bogusValue");
fail();
} catch (IllegalArgumentException expected) {
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java Tue Apr 1 04:43:01 2014
@@ -16,10 +16,6 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@@ -27,11 +23,15 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.synonym.SynonymFilter;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.util.CharsRef;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.io.StringReader;
public class TestLimitTokenPositionFilter extends BaseTokenStreamTestCase {
public void testMaxPosition2() throws IOException {
- for (final boolean consumeAll : new boolean[] { true, false }) {
+ for (final boolean consumeAll : new boolean[]{true, false}) {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
@@ -42,43 +42,50 @@ public class TestLimitTokenPositionFilte
}
};
- // dont use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
- assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"),
- new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : null);
- assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")),
- new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : null);
+ // don't use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
+ assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"),
+ new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 16 : null);
+ assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")),
+ new String[]{"1", "2"}, new int[]{0, 2}, new int[]{1, 3}, consumeAll ? 9 : null);
// less than the limit, ensure we behave correctly
assertTokenStreamContents(a.tokenStream("dummy", "1 "),
- new String[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : null);
-
+ new String[]{"1"}, new int[]{0}, new int[]{1}, consumeAll ? 3 : null);
+
// equal to limit
- assertTokenStreamContents(a.tokenStream("dummy", "1 2 "),
- new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : null);
+ assertTokenStreamContents(a.tokenStream("dummy", "1 2 "),
+ new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 6 : null);
}
}
-
+
public void testMaxPosition3WithSynomyms() throws IOException {
- MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
- tokenizer.setEnableChecks(false); // LimitTokenPositionFilter doesn't consume the entire stream that it wraps
-
- SynonymMap.Builder builder = new SynonymMap.Builder(true);
- builder.add(new CharsRef("one"), new CharsRef("first"), true);
- builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
- builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
- CharsRef multiWordCharsRef = new CharsRef();
- SynonymMap.Builder.join(new String[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
- builder.add(new CharsRef("one"), multiWordCharsRef, true);
- SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
- builder.add(new CharsRef("two"), multiWordCharsRef, true);
- SynonymMap synonymMap = builder.build();
- TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
- stream = new LimitTokenPositionFilter(stream, 3); // consumeAllTokens defaults to false
-
- // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
- assertTokenStreamContents(stream,
- new String[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" },
- new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 });
-
+ for (final boolean consumeAll : new boolean[]{true, false}) {
+ MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
+ // if we are consuming all tokens, we can use the checks, otherwise we can't
+ tokenizer.setEnableChecks(consumeAll);
+
+ SynonymMap.Builder builder = new SynonymMap.Builder(true);
+ builder.add(new CharsRef("one"), new CharsRef("first"), true);
+ builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
+ builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
+ CharsRef multiWordCharsRef = new CharsRef();
+ SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
+ builder.add(new CharsRef("one"), multiWordCharsRef, true);
+ SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
+ builder.add(new CharsRef("two"), multiWordCharsRef, true);
+ SynonymMap synonymMap = builder.build();
+ TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
+ stream = new LimitTokenPositionFilter(stream, 3, consumeAll);
+
+ // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
+ assertTokenStreamContents(stream,
+ new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
+ new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
+ }
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testIllegalArguments() throws Exception {
+ new LimitTokenPositionFilter(whitespaceMockTokenizer("one two three four five"), 0);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java Tue Apr 1 04:43:01 2014
@@ -16,26 +16,30 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.Reader;
-import java.io.StringReader;
-
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+import java.io.Reader;
+import java.io.StringReader;
+
public class TestLimitTokenPositionFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testMaxPosition1() throws Exception {
- Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
- MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
- // LimitTokenPositionFilter doesn't consume the entire stream that it wraps
- tokenizer.setEnableChecks(false);
- TokenStream stream = tokenizer;
- stream = tokenFilterFactory("LimitTokenPosition",
- "maxTokenPosition", "1").create(stream);
- assertTokenStreamContents(stream, new String[] { "A1" });
+ for (final boolean consumeAll : new boolean[]{true, false}) {
+ Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
+ MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
+ // if we are consuming all tokens, we can use the checks, otherwise we can't
+ tokenizer.setEnableChecks(consumeAll);
+ TokenStream stream = tokenizer;
+ stream = tokenFilterFactory("LimitTokenPosition",
+ LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
+ LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
+ ).create(stream);
+ assertTokenStreamContents(stream, new String[]{"A1"});
+ }
}
-
+
public void testMissingParam() throws Exception {
try {
tokenFilterFactory("LimitTokenPosition");
@@ -47,34 +51,31 @@ public class TestLimitTokenPositionFilte
}
public void testMaxPosition1WithShingles() throws Exception {
- Reader reader = new StringReader("one two three four five");
- MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
- // LimitTokenPositionFilter doesn't consume the entire stream that it wraps
- tokenizer.setEnableChecks(false);
- TokenStream stream = tokenizer;
- stream = tokenFilterFactory("Shingle",
- "minShingleSize", "2",
- "maxShingleSize", "3",
- "outputUnigrams", "true").create(stream);
- stream = tokenFilterFactory("LimitTokenPosition",
- "maxTokenPosition", "1").create(stream);
- assertTokenStreamContents(stream, new String[] { "one", "one two", "one two three" });
- }
-
- public void testConsumeAllTokens() throws Exception {
- Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
- TokenStream stream = whitespaceMockTokenizer(reader);
- stream = tokenFilterFactory("LimitTokenPosition",
- "maxTokenPosition", "3",
- "consumeAllTokens", "true").create(stream);
- assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" });
+ for (final boolean consumeAll : new boolean[]{true, false}) {
+ Reader reader = new StringReader("one two three four five");
+ MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
+ // if we are consuming all tokens, we can use the checks, otherwise we can't
+ tokenizer.setEnableChecks(consumeAll);
+ TokenStream stream = tokenizer;
+ stream = tokenFilterFactory("Shingle",
+ "minShingleSize", "2",
+ "maxShingleSize", "3",
+ "outputUnigrams", "true").create(stream);
+ stream = tokenFilterFactory("LimitTokenPosition",
+ LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
+ LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
+ ).create(stream);
+ assertTokenStreamContents(stream, new String[]{"one", "one two", "one two three"});
+ }
}
-
- /** Test that bogus arguments result in exception */
+
+ /**
+ * Test that bogus arguments result in exception
+ */
public void testBogusArguments() throws Exception {
try {
- tokenFilterFactory("LimitTokenPosition",
- "maxTokenPosition", "3",
+ tokenFilterFactory("LimitTokenPosition",
+ "maxTokenPosition", "3",
"bogusArg", "bogusValue");
fail();
} catch (IllegalArgumentException expected) {
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java Tue Apr 1 04:43:01 2014
@@ -1,11 +1,12 @@
package org.apache.lucene.analysis.sinks;
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@@ -21,6 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.junit.Test;
public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
@@ -29,20 +31,25 @@ public class TokenRangeSinkTokenizerTest
String test = "The quick red fox jumped over the lazy brown dogs";
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(whitespaceMockTokenizer(test));
TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
-
+
int count = 0;
tee.reset();
while(tee.incrementToken()) {
count++;
}
-
+
int sinkCount = 0;
rangeToks.reset();
while (rangeToks.incrementToken()) {
sinkCount++;
}
-
+
assertTrue(count + " does not equal: " + 10, count == 10);
assertTrue("rangeToks Size: " + sinkCount + " is not: " + 2, sinkCount == 2);
}
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testIllegalArguments() throws Exception {
+ new TokenRangeSinkFilter(4, 2);
+ }
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java Tue Apr 1 04:43:01 2014
@@ -172,4 +172,13 @@ public class TestUAX29URLEmailTokenizerF
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
+
+ public void testIllegalArguments() throws Exception {
+ try {
+ tokenizerFactory("UAX29URLEmail", "maxTokenLength", "-1").create();
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("maxTokenLength must be greater than zero"));
+ }
+ }
}