You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2021/01/24 13:07:04 UTC
[lucene-solr] branch master updated: LUCENE-9575: fix style
violations so precommit passes again
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 93107d6 LUCENE-9575: fix style violations so precommit passes again
93107d6 is described below
commit 93107d6379bb5c3ed3f7a7c15afa0c2506dbc4fe
Author: Mike McCandless <mi...@apache.org>
AuthorDate: Sun Jan 24 08:06:50 2021 -0500
LUCENE-9575: fix style violations so precommit passes again
---
.../analysis/pattern/PatternTypingFilter.java | 27 ++++-----
.../pattern/PatternTypingFilterFactory.java | 59 +++++++++---------
.../analysis/pattern/TestPatternTypingFilter.java | 70 +++++++++++++---------
.../pattern/TestPatternTypingFilterFactory.java | 35 +++++++----
4 files changed, 109 insertions(+), 82 deletions(-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java
index fbdbdf9..c622bde 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java
@@ -17,22 +17,22 @@
package org.apache.lucene.analysis.pattern;
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import java.io.IOException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
/**
- * Set a type attribute to a parameterized value when tokens are matched by any of a several regex patterns. The
- * value set in the type attribute is parameterized with the match groups of the regex used for matching.
- * In combination with TypeAsSynonymFilter and DropIfFlagged filter this can supply complex synonym patterns
- * that are protected from subsequent analysis, and optionally drop the original term based on the flag
- * set in this filter. See {@link PatternTypingFilterFactory} for full documentation.
+ * Set a type attribute to a parameterized value when tokens are matched by any of a several regex
+ * patterns. The value set in the type attribute is parameterized with the match groups of the regex
+ * used for matching. In combination with TypeAsSynonymFilter and DropIfFlagged filter this can
+ * supply complex synonym patterns that are protected from subsequent analysis, and optionally drop
+ * the original term based on the flag set in this filter. See {@link PatternTypingFilterFactory}
+ * for full documentation.
*
* @see PatternTypingFilterFactory
* @since 8.8.0
@@ -44,7 +44,7 @@ public class PatternTypingFilter extends TokenFilter {
private final FlagsAttribute flagAtt = addAttribute(FlagsAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
- public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) {
+ public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) {
super(input);
this.replacementAndFlagByPattern = replacementAndFlagByPattern;
}
@@ -55,7 +55,8 @@ public class PatternTypingFilter extends TokenFilter {
for (PatternTypingRule rule : replacementAndFlagByPattern) {
Matcher matcher = rule.getPattern().matcher(termAtt);
if (matcher.find()) {
- // allow 2nd reset() and find() that occurs inside replaceFirst to avoid excess string creation
+ // allow 2nd reset() and find() that occurs inside replaceFirst to avoid excess string
+ // creation
typeAtt.setType(matcher.replaceFirst(rule.getTypeTemplate()));
flagAtt.setFlags(rule.getFlags());
return true;
@@ -66,9 +67,7 @@ public class PatternTypingFilter extends TokenFilter {
return false;
}
- /**
- * Value holding class for pattern typing rules.
- */
+ /** Value holding class for pattern typing rules. */
public static class PatternTypingRule {
private final Pattern pattern;
private final int flags;
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilterFactory.java
index 3eb168b..4d4539e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilterFactory.java
@@ -17,22 +17,21 @@
package org.apache.lucene.analysis.pattern;
-import org.apache.lucene.analysis.TokenFilterFactory;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.pattern.PatternTypingFilter.PatternTypingRule;
-import org.apache.lucene.util.ResourceLoader;
-import org.apache.lucene.util.ResourceLoaderAware;
-
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
-
+import org.apache.lucene.analysis.TokenFilterFactory;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pattern.PatternTypingFilter.PatternTypingRule;
+import org.apache.lucene.util.ResourceLoader;
+import org.apache.lucene.util.ResourceLoaderAware;
/**
- * Provides a filter that will analyze tokens with the analyzer from an arbitrary field type. By itself this
- * filter is not very useful. Normally it is combined with a filter that reacts to types or flags.
+ * Provides a filter that will analyze tokens with the analyzer from an arbitrary field type. By
+ * itself this filter is not very useful. Normally it is combined with a filter that reacts to types
+ * or flags.
*
* <pre class="prettyprint" >
* <fieldType name="text_taf" class="solr.TextField" positionIncrementGap="100">
@@ -44,37 +43,38 @@ import java.util.regex.Pattern;
* ignore="word,&lt;ALPHANUM&gt;,&lt;NUM&gt;,&lt;SOUTHEAST_ASIAN&gt;,&lt;IDEOGRAPHIC&gt;,&lt;HIRAGANA&gt;,&lt;KATAKANA&gt;,&lt;HANGUL&gt;,&lt;EMOJI&gt;"/>
* </analyzer>
* </fieldType></pre>
- * <p>
- * Note that a configuration such as above may interfere with multi-word synonyms. The patterns file has the format:
+ *
+ * <p>Note that a configuration such as above may interfere with multi-word synonyms. The patterns
+ * file has the format:
+ *
* <pre>
* (flags) (pattern) ::: (replacement)
* </pre>
- * Therefore to set the first 2 flag bits on the original token matching 401k or 401(k) and adding a type of
- * 'legal2_401_k' whenever either one is encountered one would use:
+ *
+ * Therefore to set the first 2 flag bits on the original token matching 401k or 401(k) and adding a
+ * type of 'legal2_401_k' whenever either one is encountered one would use:
+ *
* <pre>
* 3 (\d+)\(?([a-z])\)? ::: legal2_$1_$2
* </pre>
- * Note that the number indicating the flag bits to set must not have leading spaces and be followed by a single
- * space, and must be 0 if no flags should be set. The flags number should not contain commas or a decimal point.
- * Lines for which the first character is <code>#</code> will be ignored as comments. Does not support producing
- * a synonym textually identical to the original term.
+ *
+ * Note that the number indicating the flag bits to set must not have leading spaces and be followed
+ * by a single space, and must be 0 if no flags should be set. The flags number should not contain
+ * commas or a decimal point. Lines for which the first character is <code>#</code> will be ignored
+ * as comments. Does not support producing a synonym textually identical to the original term.
*
* @lucene.spi {@value #NAME}
* @since 8.8
*/
public class PatternTypingFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
- /**
- * SPI name
- */
+ /** SPI name */
public static final String NAME = "patternTyping";
private final String patternFile;
private PatternTypingRule[] rules;
- /**
- * Creates a new PatternTypingFilterFactory
- */
+ /** Creates a new PatternTypingFilterFactory */
public PatternTypingFilterFactory(Map<String, String> args) {
super(args);
patternFile = require(args, "patternFile");
@@ -83,9 +83,7 @@ public class PatternTypingFilterFactory extends TokenFilterFactory implements Re
}
}
- /**
- * Default ctor for compatibility with SPI
- */
+ /** Default ctor for compatibility with SPI */
public PatternTypingFilterFactory() {
throw defaultCtorException();
}
@@ -94,16 +92,19 @@ public class PatternTypingFilterFactory extends TokenFilterFactory implements Re
public void inform(ResourceLoader loader) throws IOException {
List<PatternTypingRule> ruleList = new ArrayList<>();
List<String> lines = getLines(loader, patternFile);
- // format: # regex ::: typename[_$1[_$2 ...]] (technically _$1 does not need the '_' but it usually makes sense)
+ // format: # regex ::: typename[_$1[_$2 ...]] (technically _$1 does not need the '_' but it
+ // usually makes sense)
// eg: 2 (\d+\(?([a-z])\)?\(?(\d+)\)? ::: legal3_$1_$2_3
// which yields legal3_501_c_3 for 501(c)(3) or 501c3 and sets the second lowest bit in flags
for (String line : lines) {
int firstSpace = line.indexOf(" "); // no leading spaces allowed
int flagsVal = Integer.parseInt(line.substring(0, firstSpace));
line = line.substring(firstSpace + 1);
- String[] split = line.split(" ::: "); // arbitrary, unlikely to occur in a useful regex easy to read
+ String[] split =
+ line.split(" ::: "); // arbitrary, unlikely to occur in a useful regex easy to read
if (split.length != 2) {
- throw new RuntimeException("The PatternTypingFilter: Always two there are, no more, no less, a pattern and a replacement (separated by ' ::: ' )");
+ throw new RuntimeException(
+ "The PatternTypingFilter: Always two there are, no more, no less, a pattern and a replacement (separated by ' ::: ' )");
}
Pattern compiled = Pattern.compile(split[0]);
ruleList.add(new PatternTypingRule(compiled, flagsVal, split[1]));
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilter.java
index 7c206e5..2611ff2 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilter.java
@@ -16,23 +16,18 @@
*/
package org.apache.lucene.analysis.pattern;
+import java.io.IOException;
+import java.util.regex.Pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pattern.PatternTypingFilter.PatternTypingRule;
-import java.io.IOException;
-import java.util.regex.Pattern;
-
-/**
- * Test that this filter sets a type for tokens matching patterns defined in a patterns.txt file
- */
+/** Test that this filter sets a type for tokens matching patterns defined in a patterns.txt file */
public class TestPatternTypingFilter extends BaseTokenStreamTestCase {
- /**
- * Test the straight forward cases. When all flags match the token should be dropped
- */
+ /** Test the straight forward cases. When all flags match the token should be dropped */
public void testPatterns() throws Exception {
Token tokenA1 = new Token("One", 0, 2);
@@ -43,15 +38,26 @@ public class TestPatternTypingFilter extends BaseTokenStreamTestCase {
TokenStream ts = new CannedTokenStream(tokenA1, tokenA2, tokenA3, tokenB1, tokenB2);
- //2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
- ts = new PatternTypingFilter(ts,
- new PatternTypingRule(Pattern.compile("^(\\d+)\\(?([a-z])\\)?$"),2,"legal2_$1_$2"));
+ // 2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
+ ts =
+ new PatternTypingFilter(
+ ts,
+ new PatternTypingRule(Pattern.compile("^(\\d+)\\(?([a-z])\\)?$"), 2, "legal2_$1_$2"));
- assertTokenStreamContents(ts, new String[]{
- "One", "401(k)", "two", "three", "401k"}, null, null,
- new String[]{"word", "legal2_401_k", "word", "word", "legal2_401_k"},
- null, null, null, null, null, false, null,
- new int[]{0, 2, 0, 0, 2});
+ assertTokenStreamContents(
+ ts,
+ new String[] {"One", "401(k)", "two", "three", "401k"},
+ null,
+ null,
+ new String[] {"word", "legal2_401_k", "word", "word", "legal2_401_k"},
+ null,
+ null,
+ null,
+ null,
+ null,
+ false,
+ null,
+ new int[] {0, 2, 0, 0, 2});
}
public void testFirstPatternWins() throws IOException {
@@ -61,17 +67,27 @@ public class TestPatternTypingFilter extends BaseTokenStreamTestCase {
TokenStream ts = new CannedTokenStream(tokenA1, tokenA3, tokenB1);
- //2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
- PatternTypingRule p1 = new PatternTypingRule(Pattern.compile("^(\\d+)-(\\d+)$"), 6, "$1_hnum_$2");
- PatternTypingRule p2 = new PatternTypingRule(Pattern.compile("^(\\w+)-(\\w+)$"), 2, "$1_hword_$2");
+ // 2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
+ PatternTypingRule p1 =
+ new PatternTypingRule(Pattern.compile("^(\\d+)-(\\d+)$"), 6, "$1_hnum_$2");
+ PatternTypingRule p2 =
+ new PatternTypingRule(Pattern.compile("^(\\w+)-(\\w+)$"), 2, "$1_hword_$2");
- ts = new PatternTypingFilter(ts, p1,p2); // 101
+ ts = new PatternTypingFilter(ts, p1, p2); // 101
- assertTokenStreamContents(ts, new String[]{
- "One", "forty-two", "4-2"}, null, null,
- new String[]{"word", "forty_hword_two", "4_hnum_2"},
- null, null, null, null, null, false, null,
- new int[]{0, 2, 6});
+ assertTokenStreamContents(
+ ts,
+ new String[] {"One", "forty-two", "4-2"},
+ null,
+ null,
+ new String[] {"word", "forty_hword_two", "4_hnum_2"},
+ null,
+ null,
+ null,
+ null,
+ null,
+ false,
+ null,
+ new int[] {0, 2, 6});
}
-
}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java
index 8d5d115..dce0b59 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java
@@ -25,9 +25,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
import org.apache.lucene.util.Version;
-/**
- * This test just ensures the factory works
- */
+/** This test just ensures the factory works */
public class TestPatternTypingFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testFactory() throws Exception {
@@ -37,16 +35,29 @@ public class TestPatternTypingFilterFactory extends BaseTokenStreamFactoryTestCa
TokenStream ts = new CannedTokenStream(tokenA1, tokenA3, tokenB1);
- TokenFilterFactory tokenFilterFactory = tokenFilterFactory("patternTyping", Version.LATEST, new StringMockResourceLoader(
- "6 \\b(\\d+)-(\\d+) ::: $1_hnum_$2\n" +
- "2 \\b(\\w+)-(\\w+) ::: $1_hword_$2"
- ), "patternFile", "patterns.txt");
+ TokenFilterFactory tokenFilterFactory =
+ tokenFilterFactory(
+ "patternTyping",
+ Version.LATEST,
+ new StringMockResourceLoader(
+ "6 \\b(\\d+)-(\\d+) ::: $1_hnum_$2\n" + "2 \\b(\\w+)-(\\w+) ::: $1_hword_$2"),
+ "patternFile",
+ "patterns.txt");
ts = tokenFilterFactory.create(ts);
- assertTokenStreamContents(ts, new String[]{
- "One", "forty-two", "4-2"}, null, null,
- new String[]{"word", "forty_hword_two", "4_hnum_2"},
- null, null, null, null, null, false, null,
- new int[]{0, 2, 6});
+ assertTokenStreamContents(
+ ts,
+ new String[] {"One", "forty-two", "4-2"},
+ null,
+ null,
+ new String[] {"word", "forty_hword_two", "4_hnum_2"},
+ null,
+ null,
+ null,
+ null,
+ null,
+ false,
+ null,
+ new int[] {0, 2, 6});
}
}