You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ho...@apache.org on 2008/07/13 20:15:44 UTC
svn commit: r676390 - in /lucene/solr/trunk: ./
src/java/org/apache/solr/analysis/
Author: hossman
Date: Sun Jul 13 11:15:43 2008
New Revision: 676390
URL: http://svn.apache.org/viewvc?rev=676390&view=rev
Log:
add factories for 'new' Lucene analysis classes where it makes sense
Added:
lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java (with props)
Modified:
lucene/solr/trunk/build.xml
Modified: lucene/solr/trunk/build.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/build.xml?rev=676390&r1=676389&r2=676390&view=diff
==============================================================================
--- lucene/solr/trunk/build.xml (original)
+++ lucene/solr/trunk/build.xml Sun Jul 13 11:15:43 2008
@@ -255,6 +255,18 @@
<regexp pattern="CachingTokenFilter"/>
</linecontainsregexp>
<linecontainsregexp negate="true">
+ <!-- no way to leverage this in Solr -->
+ <regexp pattern="HyphenationCompoundWordTokenFilter"/>
+ </linecontainsregexp>
+ <linecontainsregexp negate="true">
+ <!-- no way to leverage these in Solr (yet) -->
+ <regexp pattern="Sink\|Tee"/>
+ </linecontainsregexp>
+ <linecontainsregexp negate="true">
+ <!-- Solr already has a different impl for this -->
+ <regexp pattern="SynonymTokenFilter"/>
+ </linecontainsregexp>
+ <linecontainsregexp negate="true">
<!-- solr and lucene both have one? ? ? ? -->
<regexp pattern="LengthFilter"/>
</linecontainsregexp>
Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,59 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.compound.*;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import java.util.List;
+import java.util.Set;
+import java.util.Map;
+import java.io.IOException;
+
+public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
+ private Set dictionary;
+ private String dictFile;
+ private int minWordSize;
+ private int minSubwordSize;
+ private int maxSubwordSize;
+ private boolean onlyLongestMatch;
+ public void init(Map<String, String> args) {
+ super.init(args);
+ dictFile = args.get("dictionary");
+ minWordSize= getInt("minWordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+ minSubwordSize= getInt("minSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+ maxSubwordSize= getInt("maxSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+ onlyLongestMatch = getBoolean("onlyLongestMatch",true);
+ }
+ public void inform(ResourceLoader loader) {
+ try {
+ List<String> wlist = loader.getLines(dictFile);
+ dictionary = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ public DictionaryCompoundWordTokenFilter create(TokenStream input) {
+ return new DictionaryCompoundWordTokenFilter(input,dictionary,minWordSize,minSubwordSize,maxSubwordSize,onlyLongestMatch);
+ }
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,62 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.fr.*;
+import java.io.IOException;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Iterator;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.TokenFilter;
+import java.util.Map;
+import java.util.List;
+import java.util.Set;
+import java.io.IOException;
+
+public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
+
+ private Set articles;
+
+ public void inform(ResourceLoader loader) {
+ String articlesFile = args.get("articles");
+
+ if (articlesFile != null) {
+ try {
+ List<String> wlist = loader.getLines(articlesFile);
+ articles = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ } else {
+ throw new RuntimeException("No articles specified for ElisionFilterFactory");
+ }
+ }
+
+ public ElisionFilter create(TokenStream input) {
+ return new ElisionFilter(input,articles);
+ }
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,40 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.payloads.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.Payload;
+import java.io.IOException;
+import java.util.Map;
+public class NumericPayloadTokenFilterFactory extends BaseTokenFilterFactory {
+ private float payload;
+ private String typeMatch;
+ public void init(Map<String, String> args) {
+ super.init(args);
+ payload = Float.parseFloat(args.get("payload"));
+ typeMatch = args.get("typeMatch");
+ }
+ public NumericPayloadTokenFilter create(TokenStream input) {
+ return new NumericPayloadTokenFilter(input,payload,typeMatch);
+ }
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,44 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.shingle.*;
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.Iterator;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import java.util.Map;
+public class ShingleFilterFactory extends BaseTokenFilterFactory {
+ private int maxShingleSize;
+ private boolean outputUnigrams;
+ public void init(Map<String, String> args) {
+ super.init(args);
+ maxShingleSize = getInt("maxShingleSize",
+ ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
+ outputUnigrams = getBoolean("outputUnigrams", true);
+ }
+ public ShingleFilter create(TokenStream input) {
+ ShingleFilter r = new ShingleFilter(input,maxShingleSize);
+ r.setOutputUnigrams(outputUnigrams);
+ return r;
+ }
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/ShingleFilterFactory.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,33 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.payloads.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.Payload;
+import java.io.IOException;
+import java.util.Map;
+public class TokenOffsetPayloadTokenFilterFactory extends BaseTokenFilterFactory {
+ public TokenOffsetPayloadTokenFilter create(TokenStream input) {
+ return new TokenOffsetPayloadTokenFilter(input);
+ }
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java?rev=676390&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java Sun Jul 13 11:15:43 2008
@@ -0,0 +1,33 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.payloads.*;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.Payload;
+import java.io.IOException;
+import java.util.Map;
+public class TypeAsPayloadTokenFilterFactory extends BaseTokenFilterFactory {
+ public TypeAsPayloadTokenFilter create(TokenStream input) {
+ return new TypeAsPayloadTokenFilter(input);
+ }
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL