You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/22 19:50:11 UTC
svn commit: r1234573 - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/analysis/ core/src/test-files/solr/conf/
core/src/test/org/apache/solr/analysis/
Author: uschindler
Date: Sun Jan 22 18:50:10 2012
New Revision: 1234573
URL: http://svn.apache.org/viewvc?rev=1234573&view=rev
Log:
SOLR-3054, LUCENE-3121: Add TypeTokenFilterFactory that creates TypeTokenFilter that filters tokens based on their TypeAttribute
Added:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java (with props)
lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt (with props)
lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt (with props)
lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java (with props)
Modified:
lucene/dev/trunk/solr/CHANGES.txt
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1234573&r1=1234572&r2=1234573&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun Jan 22 18:50:10 2012
@@ -450,6 +450,10 @@ New Features
* SOLR-1709: Distributed support for Date and Numeric Range Faceting
(Peter Sturge, David Smiley, hossman, Simon Willnauer)
+* SOLR-3054, LUCENE-3121: Add TypeTokenFilterFactory that creates TypeTokenFilter
+ that filters tokens based on their TypeAttribute. (Tommaso Teofili via
+ Uwe Schindler)
+
Optimizations
----------------------
* SOLR-1931: Speedup for LukeRequestHandler and admin/schema browser. New parameter
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java?rev=1234573&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java Sun Jan 22 18:50:10 2012
@@ -0,0 +1,89 @@
+package org.apache.solr.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.TypeTokenFilter;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Factory class for {@link TypeTokenFilter}
+ * <pre class="prettyprint" >
+ * <fieldType name="chars" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.StandardTokenizerFactory"/>
+ * <filter class="solr.TypeTokenFilterFactory" types="stoptypes.txt" enablePositionIncrements="true"/>
+ * </analyzer>
+ * </fieldType></pre>
+ */
+public class TypeTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
+
+ @Override
+ public void init(Map<String, String> args) {
+ super.init(args);
+ assureMatchVersion();
+ }
+
+ @Override
+ public void inform(ResourceLoader loader) {
+ String stopTypesFiles = args.get("types");
+ enablePositionIncrements = getBoolean("enablePositionIncrements", false);
+
+ if (stopTypesFiles != null) {
+ try {
+ List<String> files = StrUtils.splitFileNames(stopTypesFiles);
+ if (files.size() > 0) {
+ stopTypes = new HashSet<String>();
+ for (String file : files) {
+ List<String> typesLines = loader.getLines(file.trim());
+ stopTypes.addAll(typesLines);
+ }
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ } else {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required parameter: types.");
+ }
+ }
+
+ private Set<String> stopTypes;
+ private boolean enablePositionIncrements;
+
+ public boolean isEnablePositionIncrements() {
+ return enablePositionIncrements;
+ }
+
+ public Set<String> getStopTypes() {
+ return stopTypes;
+ }
+
+ @Override
+ public TokenStream create(TokenStream input) {
+ return new TypeTokenFilter(enablePositionIncrements, input, stopTypes);
+ }
+}
Added: lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt?rev=1234573&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt (added)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt Sun Jan 22 18:50:10 2012
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+<NUM>
+<EMAIL>
Added: lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt?rev=1234573&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt (added)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt Sun Jan 22 18:50:10 2012
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+<HOST>
+<APOSTROPHE>
Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java?rev=1234573&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java Sun Jan 22 18:50:10 2012
@@ -0,0 +1,86 @@
+package org.apache.solr.analysis;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.NumericTokenStream;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.core.SolrResourceLoader;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Testcase for {@link TypeTokenFilterFactory}
+ */
+public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
+
+ @Test
+ public void testInform() throws Exception {
+ ResourceLoader loader = new SolrResourceLoader(null, null);
+ TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
+ Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
+ args.put("types", "stoptypes-1.txt");
+ args.put("enablePositionIncrements", "true");
+ factory.init(args);
+ factory.inform(loader);
+ Set<String> types = factory.getStopTypes();
+ assertTrue("types is null and it shouldn't be", types != null);
+ assertTrue("types Size: " + types.size() + " is not: " + 2, types.size() == 2);
+ assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.isEnablePositionIncrements());
+
+ factory = new TypeTokenFilterFactory();
+ args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
+ args.put("enablePositionIncrements", "false");
+ factory.init(args);
+ factory.inform(loader);
+ types = factory.getStopTypes();
+ assertTrue("types is null and it shouldn't be", types != null);
+ assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
+ assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
+ }
+
+ @Test
+ public void testCreation() throws Exception {
+ TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
+ Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
+ args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
+ args.put("enablePositionIncrements", "false");
+ typeTokenFilterFactory.init(args);
+ NumericTokenStream input = new NumericTokenStream();
+ input.setIntValue(123);
+ typeTokenFilterFactory.create(input);
+ }
+
+ @Test
+ public void testMissingTypesParameter() throws Exception {
+ try {
+ TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
+ Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
+ args.put("enablePositionIncrements", "false");
+ typeTokenFilterFactory.init(args);
+ typeTokenFilterFactory.inform(new SolrResourceLoader(null, null));
+ fail("not supplying 'types' parameter should cause a SolrException");
+ } catch (SolrException e) {
+ // everything ok
+ }
+ }
+
+}