You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2016/10/27 20:09:53 UTC
[49/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7429:
AnalyzerWrapper can now wrap the normalization analysis chain too.
LUCENE-7429: AnalyzerWrapper can now wrap the normalization analysis chain too.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/af600480
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/af600480
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/af600480
Branch: refs/heads/jira/solr-8593
Commit: af60048097a83220aae135b09d209a0f2d4ba3c6
Parents: 2172f3e
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Oct 27 16:27:45 2016 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Oct 27 16:27:45 2016 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../lucene/analysis/custom/CustomAnalyzer.java | 2 +-
.../lucene/collation/CollationKeyAnalyzer.java | 2 +-
.../org/apache/lucene/analysis/Analyzer.java | 9 +-
.../apache/lucene/analysis/AnalyzerWrapper.java | 50 ++++++++-
.../analysis/DelegatingAnalyzerWrapper.java | 14 ++-
.../analysis/TestDelegatingAnalyzerWrapper.java | 107 +++++++++++++++++++
.../lucene/analysis/MockBytesAnalyzer.java | 2 +-
.../apache/solr/analysis/TokenizerChain.java | 2 +-
9 files changed, 180 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af600480/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index d574a8a..5a6601b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -107,6 +107,9 @@ Bug Fixes
allTermsRequired is false and context filters are specified (Mike
McCandless)
+* LUCENE-7429: AnalyzerWrapper can now modify the normalization chain too and
+ DelegatingAnalyzerWrapper does the right thing automatically. (Adrien Grand)
+
Improvements
* LUCENE-7439: FuzzyQuery now matches all terms within the specified
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af600480/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
index b2de5e8..466642c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
@@ -131,7 +131,7 @@ public final class CustomAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer tk = tokenizer.create(attributeFactory());
+ final Tokenizer tk = tokenizer.create(attributeFactory(fieldName));
TokenStream ts = tk;
for (final TokenFilterFactory filter : tokenFilters) {
ts = filter.create(ts);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af600480/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
index ea98731..4d0f039 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
@@ -85,7 +85,7 @@ public final class CollationKeyAnalyzer extends Analyzer {
}
@Override
- protected AttributeFactory attributeFactory() {
+ protected AttributeFactory attributeFactory(String fieldName) {
return factory;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af600480/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java b/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
index aa4b42d..3a5d41c 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
@@ -238,7 +238,7 @@ public abstract class Analyzer implements Closeable {
throw new IllegalStateException("Normalization threw an unexpected exeption", e);
}
- final AttributeFactory attributeFactory = attributeFactory();
+ final AttributeFactory attributeFactory = attributeFactory(fieldName);
try (TokenStream ts = normalize(fieldName,
new StringTokenStream(attributeFactory, filteredText, text.length()))) {
final TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
@@ -286,9 +286,10 @@ public abstract class Analyzer implements Closeable {
/** Return the {@link AttributeFactory} to be used for
* {@link #tokenStream analysis} and
- * {@link #normalize(String, String) normalization}. The default
- * implementation returns {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY}. */
- protected AttributeFactory attributeFactory() {
+ * {@link #normalize(String, String) normalization} on the given
+ * {@code FieldName}. The default implementation returns
+ * {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY}. */
+ protected AttributeFactory attributeFactory(String fieldName) {
return TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af600480/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java b/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
index 1e5640f..d23d004 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
@@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
import java.io.Reader;
+import org.apache.lucene.util.AttributeFactory;
+
/**
* Extension to {@link Analyzer} suitable for Analyzers which wrap
* other Analyzers.
@@ -82,6 +84,22 @@ public abstract class AnalyzerWrapper extends Analyzer {
}
/**
+ * Wraps / alters the given TokenStream for normalization purposes, taken
+ * from the wrapped Analyzer, to form new components. It is through this
+ * method that new TokenFilters can be added by AnalyzerWrappers. By default,
+ * the given token stream are returned.
+ *
+ * @param fieldName
+ * Name of the field which is to be analyzed
+ * @param in
+ * TokenStream taken from the wrapped Analyzer
+ * @return Wrapped / altered TokenStreamComponents.
+ */
+ protected TokenStream wrapTokenStreamForNormalization(String fieldName, TokenStream in) {
+ return in;
+ }
+
+ /**
* Wraps / alters the given Reader. Through this method AnalyzerWrappers can
* implement {@link #initReader(String, Reader)}. By default, the given reader
* is returned.
@@ -95,13 +113,33 @@ public abstract class AnalyzerWrapper extends Analyzer {
protected Reader wrapReader(String fieldName, Reader reader) {
return reader;
}
-
+
+ /**
+ * Wraps / alters the given Reader. Through this method AnalyzerWrappers can
+ * implement {@link #initReaderForNormalization(String, Reader)}. By default,
+ * the given reader is returned.
+ *
+ * @param fieldName
+ * name of the field which is to be analyzed
+ * @param reader
+ * the reader to wrap
+ * @return the wrapped reader
+ */
+ protected Reader wrapReaderForNormalization(String fieldName, Reader reader) {
+ return reader;
+ }
+
@Override
protected final TokenStreamComponents createComponents(String fieldName) {
return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName));
}
@Override
+ protected final TokenStream normalize(String fieldName, TokenStream in) {
+ return wrapTokenStreamForNormalization(fieldName, getWrappedAnalyzer(fieldName).normalize(fieldName, in));
+ }
+
+ @Override
public int getPositionIncrementGap(String fieldName) {
return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName);
}
@@ -115,4 +153,14 @@ public abstract class AnalyzerWrapper extends Analyzer {
public final Reader initReader(String fieldName, Reader reader) {
return getWrappedAnalyzer(fieldName).initReader(fieldName, wrapReader(fieldName, reader));
}
+
+ @Override
+ protected final Reader initReaderForNormalization(String fieldName, Reader reader) {
+ return getWrappedAnalyzer(fieldName).initReaderForNormalization(fieldName, wrapReaderForNormalization(fieldName, reader));
+ }
+
+ @Override
+ protected final AttributeFactory attributeFactory(String fieldName) {
+ return getWrappedAnalyzer(fieldName).attributeFactory(fieldName);
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af600480/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java b/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java
index 6f05d4d..edf5b2b 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java
@@ -54,12 +54,22 @@ public abstract class DelegatingAnalyzerWrapper extends AnalyzerWrapper {
protected final TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return super.wrapComponents(fieldName, components);
}
-
+
+ @Override
+ protected final TokenStream wrapTokenStreamForNormalization(String fieldName, TokenStream in) {
+ return super.wrapTokenStreamForNormalization(fieldName, in);
+ }
+
@Override
protected final Reader wrapReader(String fieldName, Reader reader) {
return super.wrapReader(fieldName, reader);
}
-
+
+ @Override
+ protected final Reader wrapReaderForNormalization(String fieldName, Reader reader) {
+ return super.wrapReaderForNormalization(fieldName, reader);
+ }
+
private static final class DelegatingReuseStrategy extends ReuseStrategy {
DelegatingAnalyzerWrapper wrapper;
private final ReuseStrategy fallbackStrategy;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af600480/lucene/core/src/test/org/apache/lucene/analysis/TestDelegatingAnalyzerWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestDelegatingAnalyzerWrapper.java b/lucene/core/src/test/org/apache/lucene/analysis/TestDelegatingAnalyzerWrapper.java
new file mode 100644
index 0000000..1d6cf15
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestDelegatingAnalyzerWrapper.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestDelegatingAnalyzerWrapper extends LuceneTestCase {
+
+ public void testDelegatesNormalization() {
+ Analyzer analyzer1 = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ DelegatingAnalyzerWrapper w1 = new DelegatingAnalyzerWrapper(Analyzer.GLOBAL_REUSE_STRATEGY) {
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ return analyzer1;
+ }
+ };
+ assertEquals(new BytesRef("Ab C"), w1.normalize("foo", "Ab C"));
+
+ Analyzer analyzer2 = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
+ DelegatingAnalyzerWrapper w2 = new DelegatingAnalyzerWrapper(Analyzer.GLOBAL_REUSE_STRATEGY) {
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ return analyzer2;
+ }
+ };
+ assertEquals(new BytesRef("ab c"), w2.normalize("foo", "Ab C"));
+ }
+
+ public void testDelegatesAttributeFactory() throws Exception {
+ Analyzer analyzer1 = new MockBytesAnalyzer();
+ DelegatingAnalyzerWrapper w1 = new DelegatingAnalyzerWrapper(Analyzer.GLOBAL_REUSE_STRATEGY) {
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ return analyzer1;
+ }
+ };
+ assertEquals(new BytesRef("Ab C".getBytes(StandardCharsets.UTF_16LE)), w1.normalize("foo", "Ab C"));
+ }
+
+ public void testDelegatesCharFilter() throws Exception {
+ Analyzer analyzer1 = new Analyzer() {
+ @Override
+ protected Reader initReaderForNormalization(String fieldName, Reader reader) {
+ return new DummyCharFilter(reader, 'b', 'z');
+ }
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(attributeFactory(fieldName));
+ return new TokenStreamComponents(tokenizer);
+ }
+ };
+ DelegatingAnalyzerWrapper w1 = new DelegatingAnalyzerWrapper(Analyzer.GLOBAL_REUSE_STRATEGY) {
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ return analyzer1;
+ }
+ };
+ assertEquals(new BytesRef("az c"), w1.normalize("foo", "ab c"));
+ }
+
+ private static class DummyCharFilter extends CharFilter {
+
+ private final char match, repl;
+
+ public DummyCharFilter(Reader input, char match, char repl) {
+ super(input);
+ this.match = match;
+ this.repl = repl;
+ }
+
+ @Override
+ protected int correct(int currentOff) {
+ return currentOff;
+ }
+
+ @Override
+ public int read(char[] cbuf, int off, int len) throws IOException {
+ final int read = input.read(cbuf, off, len);
+ for (int i = 0; i < read; ++i) {
+ if (cbuf[off+i] == match) {
+ cbuf[off+i] = repl;
+ }
+ }
+ return read;
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af600480/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
index b8cfc5b..4d51717 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
@@ -30,7 +30,7 @@ public final class MockBytesAnalyzer extends Analyzer {
}
@Override
- protected AttributeFactory attributeFactory() {
+ protected AttributeFactory attributeFactory(String fieldName) {
return MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY;
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af600480/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
index a5afbec..ab5458c 100644
--- a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
+++ b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
@@ -99,7 +99,7 @@ public final class TokenizerChain extends SolrAnalyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- Tokenizer tk = tokenizer.create(attributeFactory());
+ Tokenizer tk = tokenizer.create(attributeFactory(fieldName));
TokenStream ts = tk;
for (TokenFilterFactory filter : filters) {
ts = filter.create(ts);