You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/22 19:50:11 UTC

svn commit: r1234573 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/analysis/ core/src/test-files/solr/conf/ core/src/test/org/apache/solr/analysis/

Author: uschindler
Date: Sun Jan 22 18:50:10 2012
New Revision: 1234573

URL: http://svn.apache.org/viewvc?rev=1234573&view=rev
Log:
SOLR-3054, LUCENE-3121: Add TypeTokenFilterFactory that creates TypeTokenFilter that filters tokens based on their TypeAttribute

Added:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java   (with props)
    lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt   (with props)
    lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt   (with props)
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java   (with props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1234573&r1=1234572&r2=1234573&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun Jan 22 18:50:10 2012
@@ -450,6 +450,10 @@ New Features
 * SOLR-1709: Distributed support for Date and Numeric Range Faceting
   (Peter Sturge, David Smiley, hossman, Simon Willnauer)
 
+* SOLR-3054, LUCENE-3121: Add TypeTokenFilterFactory that creates TypeTokenFilter
+  that filters tokens based on their TypeAttribute.  (Tommaso Teofili via
+  Uwe Schindler)
+
 Optimizations
 ----------------------
 * SOLR-1931: Speedup for LukeRequestHandler and admin/schema browser. New parameter

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java?rev=1234573&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java Sun Jan 22 18:50:10 2012
@@ -0,0 +1,89 @@
+package org.apache.solr.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.TypeTokenFilter;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Factory class for {@link TypeTokenFilter}
+ * <pre class="prettyprint" >
+ * &lt;fieldType name="chars" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+ *     &lt;filter class="solr.TypeTokenFilterFactory" types="stoptypes.txt" enablePositionIncrements="true"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ */
+public class TypeTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
+
+  @Override
+  public void init(Map<String, String> args) {
+    super.init(args);
+    assureMatchVersion();
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) {
+    String stopTypesFiles = args.get("types");
+    enablePositionIncrements = getBoolean("enablePositionIncrements", false);
+
+    if (stopTypesFiles != null) {
+      try {
+        List<String> files = StrUtils.splitFileNames(stopTypesFiles);
+        if (files.size() > 0) {
+          stopTypes = new HashSet<String>();
+          for (String file : files) {
+            List<String> typesLines = loader.getLines(file.trim());
+            stopTypes.addAll(typesLines);
+          }
+        }
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    } else {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required parameter: types.");
+    }
+  }
+
+  private Set<String> stopTypes;
+  private boolean enablePositionIncrements;
+
+  public boolean isEnablePositionIncrements() {
+    return enablePositionIncrements;
+  }
+
+  public Set<String> getStopTypes() {
+    return stopTypes;
+  }
+
+  @Override
+  public TokenStream create(TokenStream input) {
+    return new TypeTokenFilter(enablePositionIncrements, input, stopTypes);
+  }
+}

Added: lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt?rev=1234573&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt (added)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt Sun Jan 22 18:50:10 2012
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+<NUM>
+<EMAIL>

Added: lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt?rev=1234573&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt (added)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt Sun Jan 22 18:50:10 2012
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+<HOST>
+<APOSTROPHE>

Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java?rev=1234573&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java Sun Jan 22 18:50:10 2012
@@ -0,0 +1,86 @@
+package org.apache.solr.analysis;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.NumericTokenStream;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.core.SolrResourceLoader;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Testcase for {@link TypeTokenFilterFactory}
+ */
+public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
+
+  @Test
+  public void testInform() throws Exception {
+    ResourceLoader loader = new SolrResourceLoader(null, null);
+    TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
+    Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
+    args.put("types", "stoptypes-1.txt");
+    args.put("enablePositionIncrements", "true");
+    factory.init(args);
+    factory.inform(loader);
+    Set<String> types = factory.getStopTypes();
+    assertTrue("types is null and it shouldn't be", types != null);
+    assertTrue("types Size: " + types.size() + " is not: " + 2, types.size() == 2);
+    assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.isEnablePositionIncrements());
+
+    factory = new TypeTokenFilterFactory();
+    args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
+    args.put("enablePositionIncrements", "false");
+    factory.init(args);
+    factory.inform(loader);
+    types = factory.getStopTypes();
+    assertTrue("types is null and it shouldn't be", types != null);
+    assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
+    assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
+  }
+
+  @Test
+  public void testCreation() throws Exception {
+    TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
+    Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
+    args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
+    args.put("enablePositionIncrements", "false");
+    typeTokenFilterFactory.init(args);
+    NumericTokenStream input = new NumericTokenStream();
+    input.setIntValue(123);
+    typeTokenFilterFactory.create(input);
+  }
+
+  @Test
+  public void testMissingTypesParameter() throws Exception {
+    try {
+      TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
+      Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
+      args.put("enablePositionIncrements", "false");
+      typeTokenFilterFactory.init(args);
+      typeTokenFilterFactory.inform(new SolrResourceLoader(null, null));
+      fail("not supplying 'types' parameter should cause a SolrException");
+    } catch (SolrException e) {
+      // everything ok
+    }
+  }
+
+}