You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2015/02/07 08:40:34 UTC
svn commit: r1658030 - in
/lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src:
java/org/apache/lucene/analysis/ja/ test/org/apache/lucene/analysis/ja/
Author: sarowe
Date: Sat Feb 7 07:40:33 2015
New Revision: 1658030
URL: http://svn.apache.org/r1658030
Log:
LUCENE-6044: Fixed backcompat support for JapanesePartOfSpeechStopFilter with enablePositionIncrements=false
Added:
lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java (with props)
Modified:
lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java
Modified: lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java?rev=1658030&r1=1658029&r2=1658030&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java Sat Feb 7 07:40:33 2015
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
/**
* Factory for {@link org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter}.
@@ -43,11 +44,23 @@ import org.apache.lucene.analysis.util.T
public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private final String stopTagFiles;
private Set<String> stopTags;
+ private boolean enablePositionIncrements;
/** Creates a new JapanesePartOfSpeechStopFilterFactory */
public JapanesePartOfSpeechStopFilterFactory(Map<String,String> args) {
super(args);
stopTagFiles = get(args, "tags");
+
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_5_0_0) == false) {
+ boolean defaultValue = luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0);
+ enablePositionIncrements = getBoolean(args, "enablePositionIncrements", defaultValue);
+ if (enablePositionIncrements == false && luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
+ throw new IllegalArgumentException("enablePositionIncrements=false is not supported anymore as of Lucene 4.4");
+ }
+ } else if (args.containsKey("enablePositionIncrements")) {
+ throw new IllegalArgumentException("enablePositionIncrements is not a valid option as of Lucene 5.0");
+ }
+
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -70,8 +83,11 @@ public class JapanesePartOfSpeechStopFil
public TokenStream create(TokenStream stream) {
// if stoptags is null, it means the file is empty
if (stopTags != null) {
- final TokenStream filter = new JapanesePartOfSpeechStopFilter(stream, stopTags);
- return filter;
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
+ return new JapanesePartOfSpeechStopFilter(stream, stopTags);
+ } else {
+ return new Lucene43JapanesePartOfSpeechStopFilter(enablePositionIncrements, stream, stopTags);
+ }
} else {
return stream;
}
Added: lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java?rev=1658030&view=auto
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java (added)
+++ lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java Sat Feb 7 07:40:33 2015
@@ -0,0 +1,50 @@
+package org.apache.lucene.analysis.ja;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Set;
+
+import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.Lucene43FilteringTokenFilter;
+
+/**
+ * Backcompat JapanesePartOfSpeechStopFilter for versions 4.3 and before.
+ * @deprecated Use {@link org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter}
+ */
+@Deprecated
+public final class Lucene43JapanesePartOfSpeechStopFilter extends Lucene43FilteringTokenFilter {
+ private final Set<String> stopTags;
+ private final PartOfSpeechAttribute posAtt = addAttribute(PartOfSpeechAttribute.class);
+
+ /**
+ * Create a new {@link JapanesePartOfSpeechStopFilter}.
+ * @param input the {@link TokenStream} to consume
+ * @param stopTags the part-of-speech tags that should be removed
+ */
+ public Lucene43JapanesePartOfSpeechStopFilter(boolean enablePositionIncrements, TokenStream input, Set<String> stopTags) {
+ super(enablePositionIncrements, input);
+ this.stopTags = stopTags;
+ }
+
+ @Override
+ protected boolean accept() {
+ final String pos = posAtt.getPartOfSpeech();
+ return pos == null || !stopTags.contains(pos);
+ }
+}
Modified: lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java?rev=1658030&r1=1658029&r2=1658030&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java Sat Feb 7 07:40:33 2015
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.ja;
* limitations under the License.
*/
-import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
@@ -25,13 +24,14 @@ import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.util.Version;
/**
* Simple tests for {@link JapanesePartOfSpeechStopFilterFactory}
*/
public class TestJapanesePartOfSpeechStopFilterFactory extends BaseTokenStreamTestCase {
- public void testBasics() throws IOException {
+ public void testBasics() throws Exception {
String tags =
"# verb-main:\n" +
"åè©-èªç«\n";
@@ -63,4 +63,66 @@ public class TestJapanesePartOfSpeechSto
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
+
+ public void test43Backcompat() throws Exception {
+ String tags = "# particle-case-misc: Case particles.\n"
+ + "# e.g. ãã, ã, ã§, ã¨, ã«, ã¸, ãã, ã, ã®, ã«ã¦\n"
+ + "å©è©-æ ¼å©è©-ä¸è¬";
+
+ JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
+ tokenizerFactory.inform(new StringMockResourceLoader(""));
+ Tokenizer tokenizer = tokenizerFactory.create();
+ tokenizer.setReader(new StringReader("ç§ã¯å¶éã¹ãã¼ããè¶
ããã"));
+ Map<String,String> args = new HashMap<>();
+ args.put("luceneMatchVersion", Version.LUCENE_4_3_1.toString());
+ args.put("enablePositionIncrements", "true");
+ args.put("tags", "stoptags.txt");
+ JapanesePartOfSpeechStopFilterFactory factory = new JapanesePartOfSpeechStopFilterFactory(args);
+ factory.inform(new StringMockResourceLoader(tags));
+ TokenStream stream = factory.create(tokenizer);
+ assertTrue(stream instanceof Lucene43JapanesePartOfSpeechStopFilter);
+ assertTokenStreamContents(stream, new String[] { "ç§", "ã¯", "å¶é", "ã¹ãã¼ã", "è¶
ãã" },
+ new int[] {1, 1, 1, 1, 2});
+
+ tokenizer = tokenizerFactory.create();
+ tokenizer.setReader(new StringReader("ç§ã¯å¶éã¹ãã¼ããè¶
ããã"));
+ args = new HashMap<>();
+ args.put("luceneMatchVersion", Version.LUCENE_4_3_1.toString());
+ args.put("enablePositionIncrements", "false");
+ args.put("tags", "stoptags.txt");
+ factory = new JapanesePartOfSpeechStopFilterFactory(args);
+ factory.inform(new StringMockResourceLoader(tags));
+ stream = factory.create(tokenizer);
+ assertTrue(stream instanceof Lucene43JapanesePartOfSpeechStopFilter);
+ assertTokenStreamContents(stream, new String[]{"ç§", "ã¯", "å¶é", "ã¹ãã¼ã", "è¶
ãã"},
+ new int[] {1, 1, 1, 1, 1});
+
+ try {
+ args = new HashMap<>();
+ args.put("luceneMatchVersion", Version.LUCENE_4_4_0.toString());
+ args.put("enablePositionIncrements", "false");
+ args.put("tags", "stoptags.txt");
+ factory = new JapanesePartOfSpeechStopFilterFactory(args);
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("enablePositionIncrements=false is not supported"));
+ }
+ args = new HashMap<>();
+ args.put("luceneMatchVersion", Version.LUCENE_4_4_0.toString());
+ args.put("enablePositionIncrements", "true");
+ args.put("tags", "stoptags.txt");
+ factory = new JapanesePartOfSpeechStopFilterFactory(args);
+
+ try {
+ args = new HashMap<>();
+ args.put("luceneMatchVersion", Version.LATEST.toString());
+ args.put("enablePositionIncrements", "false");
+ args.put("tags", "stoptags.txt");
+ factory = new JapanesePartOfSpeechStopFilterFactory(args);
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("not a valid option"));
+ }
+ }
+
}