You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2015/02/07 08:40:34 UTC
svn commit: r1658030 - in /lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src: java/org/apache/lucene/analysis/ja/ test/org/apache/lucene/analysis/ja/

Author: sarowe
Date: Sat Feb  7 07:40:33 2015
New Revision: 1658030

URL: http://svn.apache.org/r1658030
Log:
LUCENE-6044: Fixed backcompat support for JapanesePartOfSpeechStopFilter with enablePositionIncrements=false

Added:
    lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java   (with props)
Modified:
    lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
    lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java

Modified: lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java?rev=1658030&r1=1658029&r2=1658030&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java Sat Feb  7 07:40:33 2015
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.util.C
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
 
 /**
  * Factory for {@link org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter}.
@@ -43,11 +44,23 @@ import org.apache.lucene.analysis.util.T
 public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
   private final String stopTagFiles;
   private Set<String> stopTags;
+  private boolean enablePositionIncrements;
 
   /** Creates a new JapanesePartOfSpeechStopFilterFactory */
   public JapanesePartOfSpeechStopFilterFactory(Map<String,String> args) {
     super(args);
     stopTagFiles = get(args, "tags");
+
+    if (luceneMatchVersion.onOrAfter(Version.LUCENE_5_0_0) == false) {
+      boolean defaultValue = luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0);
+      enablePositionIncrements = getBoolean(args, "enablePositionIncrements", defaultValue);
+      if (enablePositionIncrements == false && luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
+        throw new IllegalArgumentException("enablePositionIncrements=false is not supported anymore as of Lucene 4.4");
+      }
+    } else if (args.containsKey("enablePositionIncrements")) {
+      throw new IllegalArgumentException("enablePositionIncrements is not a valid option as of Lucene 5.0");
+    }
+
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
     }
@@ -70,8 +83,11 @@ public class JapanesePartOfSpeechStopFil
   public TokenStream create(TokenStream stream) {
     // if stoptags is null, it means the file is empty
     if (stopTags != null) {
-      final TokenStream filter = new JapanesePartOfSpeechStopFilter(stream, stopTags);
-      return filter;
+      if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
+        return new JapanesePartOfSpeechStopFilter(stream, stopTags);
+      } else {
+        return new Lucene43JapanesePartOfSpeechStopFilter(enablePositionIncrements, stream, stopTags);
+      }
     } else {
       return stream;
     }

Added: lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java?rev=1658030&view=auto
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java (added)
+++ lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Lucene43JapanesePartOfSpeechStopFilter.java Sat Feb  7 07:40:33 2015
@@ -0,0 +1,50 @@
+package org.apache.lucene.analysis.ja;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Set;
+
+import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.Lucene43FilteringTokenFilter;
+
+/**
+ * Backcompat JapanesePartOfSpeechStopFilter for versions 4.3 and before.
+ * @deprecated Use {@link org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter} 
+ */
+@Deprecated
+public final class Lucene43JapanesePartOfSpeechStopFilter extends Lucene43FilteringTokenFilter {
+  private final Set<String> stopTags;
+  private final PartOfSpeechAttribute posAtt = addAttribute(PartOfSpeechAttribute.class);
+
+  /**
+   * Create a new {@link JapanesePartOfSpeechStopFilter}.
+   * @param input    the {@link TokenStream} to consume
+   * @param stopTags the part-of-speech tags that should be removed
+   */
+  public Lucene43JapanesePartOfSpeechStopFilter(boolean enablePositionIncrements, TokenStream input, Set<String> stopTags) {
+    super(enablePositionIncrements, input);
+    this.stopTags = stopTags;
+  }
+
+  @Override
+  protected boolean accept() {
+    final String pos = posAtt.getPartOfSpeech();
+    return pos == null || !stopTags.contains(pos);
+  }
+}

Modified: lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java?rev=1658030&r1=1658029&r2=1658030&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java Sat Feb  7 07:40:33 2015
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.ja;
  * limitations under the License.
  */
 
-import java.io.IOException;
 import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;
@@ -25,13 +24,14 @@ import java.util.Map;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.util.Version;
 
 /**
  * Simple tests for {@link JapanesePartOfSpeechStopFilterFactory}
  */
 public class TestJapanesePartOfSpeechStopFilterFactory extends BaseTokenStreamTestCase {
-  public void testBasics() throws IOException {
+  public void testBasics() throws Exception {
     String tags = 
         "#  verb-main:\n" +
         "åè©-èªç«\n";
@@ -63,4 +63,66 @@ public class TestJapanesePartOfSpeechSto
       assertTrue(expected.getMessage().contains("Unknown parameters"));
     }
   }
+
+  public void test43Backcompat() throws Exception {
+    String tags = "#  particle-case-misc: Case particles.\n"
+                + "#  e.g. ãã, ã, ã§, ã¨, ã«, ã¸, ãã, ã, ã®, ã«ã¦\n"
+                + "å©è©-æ ¼å©è©-ä¸è¬";
+
+    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
+    tokenizerFactory.inform(new StringMockResourceLoader(""));
+    Tokenizer tokenizer = tokenizerFactory.create();
+    tokenizer.setReader(new StringReader("ç§ã¯å¶éã¹ãã¼ããè¶ããã"));
+    Map<String,String> args = new HashMap<>();
+    args.put("luceneMatchVersion", Version.LUCENE_4_3_1.toString());
+    args.put("enablePositionIncrements", "true");
+    args.put("tags", "stoptags.txt");
+    JapanesePartOfSpeechStopFilterFactory factory = new JapanesePartOfSpeechStopFilterFactory(args);
+    factory.inform(new StringMockResourceLoader(tags));
+    TokenStream stream = factory.create(tokenizer);
+    assertTrue(stream instanceof Lucene43JapanesePartOfSpeechStopFilter);
+    assertTokenStreamContents(stream, new String[] { "ç§", "ã¯", "å¶é", "ã¹ãã¼ã", "è¶ãã" }, 
+        new int[] {1, 1, 1, 1, 2});
+
+    tokenizer = tokenizerFactory.create();
+    tokenizer.setReader(new StringReader("ç§ã¯å¶éã¹ãã¼ããè¶ããã"));
+    args = new HashMap<>();
+    args.put("luceneMatchVersion", Version.LUCENE_4_3_1.toString());
+    args.put("enablePositionIncrements", "false");
+    args.put("tags", "stoptags.txt");
+    factory = new JapanesePartOfSpeechStopFilterFactory(args);
+    factory.inform(new StringMockResourceLoader(tags));
+    stream = factory.create(tokenizer);
+    assertTrue(stream instanceof Lucene43JapanesePartOfSpeechStopFilter);
+    assertTokenStreamContents(stream, new String[]{"ç§", "ã¯", "å¶é", "ã¹ãã¼ã", "è¶ãã"},
+        new int[] {1, 1, 1, 1, 1});
+    
+    try {
+      args = new HashMap<>();
+      args.put("luceneMatchVersion", Version.LUCENE_4_4_0.toString());
+      args.put("enablePositionIncrements", "false");
+      args.put("tags", "stoptags.txt");
+      factory = new JapanesePartOfSpeechStopFilterFactory(args);
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("enablePositionIncrements=false is not supported"));
+    }
+    args = new HashMap<>();
+    args.put("luceneMatchVersion", Version.LUCENE_4_4_0.toString());
+    args.put("enablePositionIncrements", "true");
+    args.put("tags", "stoptags.txt");
+    factory = new JapanesePartOfSpeechStopFilterFactory(args);
+
+    try {
+      args = new HashMap<>();
+      args.put("luceneMatchVersion", Version.LATEST.toString());
+      args.put("enablePositionIncrements", "false");
+      args.put("tags", "stoptags.txt");
+      factory = new JapanesePartOfSpeechStopFilterFactory(args);
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("not a valid option"));
+    }
+  }
+
 }