You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2016/07/21 13:37:30 UTC
[08/51] [abbrv] lucene-solr:apiv2: LUCENE-7355: Add
Analyzer#normalize() and use it in query parsers.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
index 01f3d4d..b8cfc5b 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
@@ -16,6 +16,8 @@
*/
package org.apache.lucene.analysis;
+import org.apache.lucene.util.AttributeFactory;
+
/**
* Analyzer for testing that encodes terms as UTF-16 bytes.
*/
@@ -26,4 +28,9 @@ public final class MockBytesAnalyzer extends Analyzer {
MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
return new TokenStreamComponents(t);
}
+
+ @Override
+ protected AttributeFactory attributeFactory() {
+ return MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY;
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/test-framework/src/java/org/apache/lucene/analysis/MockLowerCaseFilter.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockLowerCaseFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockLowerCaseFilter.java
new file mode 100644
index 0000000..b1aea3d
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockLowerCaseFilter.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/** A lowercasing {@link TokenFilter}. */
+public final class MockLowerCaseFilter extends TokenFilter {
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ /** Sole constructor. */
+ public MockLowerCaseFilter(TokenStream in) {
+ super(in);
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ CharacterUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length());
+ return true;
+ } else
+ return false;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
index c9f263d..a5afbec 100644
--- a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
+++ b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
@@ -18,6 +18,7 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
@@ -84,8 +85,21 @@ public final class TokenizerChain extends SolrAnalyzer {
}
@Override
+ protected Reader initReaderForNormalization(String fieldName, Reader reader) {
+ if (charFilters != null && charFilters.length > 0) {
+ for (CharFilterFactory charFilter : charFilters) {
+ if (charFilter instanceof MultiTermAwareComponent) {
+ charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
+ reader = charFilter.create(reader);
+ }
+ }
+ }
+ return reader;
+ }
+
+ @Override
protected TokenStreamComponents createComponents(String fieldName) {
- Tokenizer tk = tokenizer.create();
+ Tokenizer tk = tokenizer.create(attributeFactory());
TokenStream ts = tk;
for (TokenFilterFactory filter : filters) {
ts = filter.create(ts);
@@ -94,6 +108,18 @@ public final class TokenizerChain extends SolrAnalyzer {
}
@Override
+ protected TokenStream normalize(String fieldName, TokenStream in) {
+ TokenStream result = in;
+ for (TokenFilterFactory filter : filters) {
+ if (filter instanceof MultiTermAwareComponent) {
+ filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
+ result = filter.create(in);
+ }
+ }
+ return result;
+ }
+
+ @Override
public String toString() {
StringBuilder sb = new StringBuilder("TokenizerChain(");
for (CharFilterFactory filter: charFilters) {