You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ho...@apache.org on 2008/07/13 20:15:44 UTC

svn commit: r676390 - in /lucene/solr/trunk: ./ src/java/org/apache/solr/analysis/

Author: hossman
Date: Sun Jul 13 11:15:43 2008
New Revision: 676390

URL: http://svn.apache.org/viewvc?rev=676390&view=rev
Log:
add factories for 'new' Lucene analysis classes where it makes sense

Added:
    lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java   (with props)
    lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java   (with props)
    lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java   (with props)
    lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java   (with props)
    lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java   (with props)
    lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java   (with props)
Modified:
    lucene/solr/trunk/build.xml

Modified: lucene/solr/trunk/build.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/build.xml?rev=676390&r1=676389&r2=676390&view=diff
==============================================================================
--- lucene/solr/trunk/build.xml (original)
+++ lucene/solr/trunk/build.xml Sun Jul 13 11:15:43 2008
@@ -255,6 +255,18 @@
                  <regexp pattern="CachingTokenFilter"/>
               </linecontainsregexp>
               <linecontainsregexp negate="true">
+                 <!-- no way to leverage this in Solr -->
+                 <regexp pattern="HyphenationCompoundWordTokenFilter"/>
+              </linecontainsregexp>
+              <linecontainsregexp negate="true">
+                 <!-- no way to leverage these in Solr (yet) -->
+                 <regexp pattern="Sink\|Tee"/>
+              </linecontainsregexp>
+              <linecontainsregexp negate="true">
+                 <!-- Solr already has a different impl for this -->
+                 <regexp pattern="SynonymTokenFilter"/> 
+              </linecontainsregexp>
+              <linecontainsregexp negate="true">
                  <!-- solr and lucene both have one? ? ? ? -->
                  <regexp pattern="LengthFilter"/> 
               </linecontainsregexp>

Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,59 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.compound.*;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import java.util.List;
+import java.util.Set;
+import java.util.Map;
+import java.io.IOException;
+
+public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory  implements ResourceLoaderAware {
+  private Set dictionary;
+  private String dictFile;
+  private int minWordSize;
+  private int minSubwordSize;
+  private int maxSubwordSize;
+  private boolean onlyLongestMatch;
+  public void init(Map<String, String> args) {
+    super.init(args);
+    dictFile = args.get("dictionary");
+    minWordSize= getInt("minWordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+    minSubwordSize= getInt("minSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+    maxSubwordSize= getInt("maxSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+    onlyLongestMatch = getBoolean("onlyLongestMatch",true);
+  }
+  public void inform(ResourceLoader loader) {
+    try {
+      List<String> wlist = loader.getLines(dictFile);
+      dictionary = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+  public DictionaryCompoundWordTokenFilter create(TokenStream input) {
+    return new DictionaryCompoundWordTokenFilter(input,dictionary,minWordSize,minSubwordSize,maxSubwordSize,onlyLongestMatch);
+  }
+}
+

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,62 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.fr.*;
+import java.io.IOException;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Iterator;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.TokenFilter;
+import java.util.Map;
+import java.util.List;
+import java.util.Set;
+import java.io.IOException;
+
+public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
+
+  private Set articles;
+
+  public void inform(ResourceLoader loader) {
+    String articlesFile = args.get("articles");
+
+    if (articlesFile != null) {
+      try {
+        List<String> wlist = loader.getLines(articlesFile);
+        articles = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    } else {
+      throw new RuntimeException("No articles specified for ElisionFilterFactory");
+    }
+  }
+
+  public ElisionFilter create(TokenStream input) {
+    return new ElisionFilter(input,articles);
+  }
+}
+

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,40 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.payloads.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.Payload;
+import java.io.IOException;
+import java.util.Map;
+public class NumericPayloadTokenFilterFactory extends BaseTokenFilterFactory {
+  private float payload;
+  private String typeMatch;
+  public void init(Map<String, String> args) {
+    super.init(args);
+    payload = Float.parseFloat(args.get("payload"));
+    typeMatch = args.get("typeMatch");
+  }
+  public NumericPayloadTokenFilter create(TokenStream input) {
+    return new NumericPayloadTokenFilter(input,payload,typeMatch);
+  }
+}
+

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,44 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.shingle.*;
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.Iterator;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import java.util.Map;
+public class ShingleFilterFactory extends BaseTokenFilterFactory {
+  private int maxShingleSize;
+  private boolean outputUnigrams;
+  public void init(Map<String, String> args) {
+    super.init(args);
+    maxShingleSize = getInt("maxShingleSize", 
+                            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
+    outputUnigrams = getBoolean("outputUnigrams", true);
+  }
+  public ShingleFilter create(TokenStream input) {
+    ShingleFilter r = new ShingleFilter(input,maxShingleSize);
+    r.setOutputUnigrams(outputUnigrams);
+    return r;
+  }
+}
+

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,33 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.payloads.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.Payload;
+import java.io.IOException;
+import java.util.Map;
+public class TokenOffsetPayloadTokenFilterFactory extends BaseTokenFilterFactory {
+  public TokenOffsetPayloadTokenFilter create(TokenStream input) {
+    return new TokenOffsetPayloadTokenFilter(input);
+  }
+}
+

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,33 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.payloads.*;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.Payload;
+import java.io.IOException;
+import java.util.Map;
+public class TypeAsPayloadTokenFilterFactory extends BaseTokenFilterFactory {
+  public TypeAsPayloadTokenFilter create(TokenStream input) {
+    return new TypeAsPayloadTokenFilter(input);
+  }
+}
+

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL