You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/12/30 09:54:57 UTC
[2/2] lucene-solr:branch_6x: LUCENE-7606: Normalization with
CustomAnalyzer would only apply the last token filter.
LUCENE-7606: Normalization with CustomAnalyzer would only apply the last token filter.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/26760fe0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/26760fe0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/26760fe0
Branch: refs/heads/branch_6x
Commit: 26760fe0f7e79e7c80af0364c23fe2db74e1e1f7
Parents: 6168a5d
Author: Adrien Grand <jp...@gmail.com>
Authored: Fri Dec 30 10:30:40 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Fri Dec 30 10:45:47 2016 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +++
.../lucene/analysis/custom/CustomAnalyzer.java | 2 +-
.../analysis/custom/TestCustomAnalyzer.java | 21 ++++++++++++++++++++
.../apache/lucene/analysis/custom/mapping1.txt | 1 +
.../apache/lucene/analysis/custom/mapping2.txt | 1 +
5 files changed, 27 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/26760fe0/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7a566ba..afa65d5 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -73,6 +73,9 @@ Bug Fixes
using helpers for exclusive bounds that are consistent with Double.compare.
(Adrien Grand, Dawid Weiss)
+* LUCENE-7606: Normalization with CustomAnalyzer would only apply the last
+ token filter. (Adrien Grand)
+
Improvements
* LUCENE-7532: Add back lost codec file format documentation
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/26760fe0/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
index 466642c..1cfdfe3 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
@@ -145,7 +145,7 @@ public final class CustomAnalyzer extends Analyzer {
for (TokenFilterFactory filter : tokenFilters) {
if (filter instanceof MultiTermAwareComponent) {
filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
- result = filter.create(in);
+ result = filter.create(result);
}
}
return result;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/26760fe0/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
index 60633e4..044d2f9 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
+import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory;
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
@@ -479,4 +480,24 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
assertEquals(new BytesRef("2A"), analyzer2.normalize("dummy", "0�"));
}
+ public void testNormalizationWithMultipleTokenFilters() throws IOException {
+ CustomAnalyzer analyzer = CustomAnalyzer.builder()
+ // none of these components are multi-term aware so they should not be applied
+ .withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap())
+ .addTokenFilter(LowerCaseFilterFactory.class, Collections.emptyMap())
+ .addTokenFilter(ASCIIFoldingFilterFactory.class, Collections.emptyMap())
+ .build();
+ assertEquals(new BytesRef("a b e"), analyzer.normalize("dummy", "� B �"));
+ }
+
+ public void testNormalizationWithMultiplCharFilters() throws IOException {
+ CustomAnalyzer analyzer = CustomAnalyzer.builder()
+ // none of these components are multi-term aware so they should not be applied
+ .withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap())
+ .addCharFilter(MappingCharFilterFactory.class, new HashMap<>(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping1.txt")))
+ .addCharFilter(MappingCharFilterFactory.class, new HashMap<>(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping2.txt")))
+ .build();
+ assertEquals(new BytesRef("e f c"), analyzer.normalize("dummy", "a b c"));
+ }
+
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/26760fe0/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping1.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping1.txt b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping1.txt
new file mode 100644
index 0000000..40aaf5a
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping1.txt
@@ -0,0 +1 @@
+"a" => "e"
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/26760fe0/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping2.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping2.txt b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping2.txt
new file mode 100644
index 0000000..cac0bea
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/mapping2.txt
@@ -0,0 +1 @@
+"b" => "f"