You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/05/04 09:50:42 UTC
svn commit: r940761 - in /lucene/dev/trunk: lucene/contrib/
lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/
lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/
solr/src/java/org/apache/solr/...
Author: rmuir
Date: Tue May 4 07:50:41 2010
New Revision: 940761
URL: http://svn.apache.org/viewvc?rev=940761&view=rev
Log:
LUCENE-2413: consolidate commongrams into contrib/analyzers
Added:
lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/
lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
- copied, changed from r940756, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
- copied, changed from r940756, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html (with props)
lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/
lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
- copied, changed from r940756, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java
Removed:
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java
Modified:
lucene/dev/trunk/lucene/contrib/CHANGES.txt
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=940761&r1=940760&r2=940761&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue May 4 07:50:41 2010
@@ -155,6 +155,12 @@ New features
of AttributeSource.cloneAttributes() instances and the new copyTo() method.
(Steven Rowe via Uwe Schindler)
+ * LUCENE-2413: Consolidated Solr analysis components into contrib/analyzers.
+ New features from Solr now available to Lucene users include:
+ - o.a.l.analysis.commongrams: Constructs n-grams for frequently occurring terms
+ and phrases.
+ (... in progress)
+
Build
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java (from r940756, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java&p1=lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java&r1=940756&r2=940761&rev=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java Tue May 4 07:50:41 2010
@@ -7,7 +7,7 @@
* See the License for the specific language governing permissions and limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.commongrams;
import java.io.IOException;
import java.util.Arrays;
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java (from r940756, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java&p1=lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java&r1=940756&r2=940761&rev=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java Tue May 4 07:50:41 2010
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.commongrams;
import java.io.IOException;
@@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenF
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import static org.apache.solr.analysis.CommonGramsFilter.GRAM_TYPE;
+import static org.apache.lucene.analysis.commongrams.CommonGramsFilter.GRAM_TYPE;
/**
* Wrap a CommonGramsFilter optimizing phrase queries by only returning single
Added: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html?rev=940761&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html Tue May 4 07:50:41 2010
@@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Construct n-grams for frequently occurring terms and phrases.
+</body>
+</html>
Propchange: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (from r940756, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java&p1=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java&r1=940756&r2=940761&rev=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Tue May 4 07:50:41 2010
@@ -14,28 +14,29 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.commongrams;
import java.io.Reader;
import java.io.StringReader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
- * Tests CommonGramsQueryFilter
+ * Tests CommonGrams(Query)Filter
*/
-public class CommonGramsFilterTest extends BaseTokenTestCase {
+public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
private static final String[] commonWords = { "s", "a", "b", "c", "d", "the",
"of" };
public void testReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
@@ -56,7 +57,7 @@ public class CommonGramsFilterTest exten
public void testQueryReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
@@ -88,7 +89,7 @@ public class CommonGramsFilterTest exten
@Override
public TokenStream tokenStream(String field, Reader in) {
return new CommonGramsQueryFilter(new CommonGramsFilter(
- new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords));
+ new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords));
}
};
@@ -157,7 +158,7 @@ public class CommonGramsFilterTest exten
@Override
public TokenStream tokenStream(String field, Reader in) {
return new CommonGramsFilter(
- new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords);
+ new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords);
}
};
@@ -243,7 +244,7 @@ public class CommonGramsFilterTest exten
*/
public void testCaseSensitive() throws Exception {
final String input = "How The s a brown s cow d like A B thing?";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
Set common = CommonGramsFilter.makeCommonSet(commonWords);
TokenFilter cgf = new CommonGramsFilter(wt, common, false);
assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
@@ -256,7 +257,7 @@ public class CommonGramsFilterTest exten
*/
public void testLastWordisStopWord() throws Exception {
final String input = "dog the";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "dog_the" });
@@ -267,7 +268,7 @@ public class CommonGramsFilterTest exten
*/
public void testFirstWordisStopWord() throws Exception {
final String input = "the dog";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_dog" });
@@ -278,7 +279,7 @@ public class CommonGramsFilterTest exten
*/
public void testOneWordQueryStopWord() throws Exception {
final String input = "the";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the" });
@@ -289,7 +290,7 @@ public class CommonGramsFilterTest exten
*/
public void testOneWordQuery() throws Exception {
final String input = "monster";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "monster" });
@@ -300,7 +301,7 @@ public class CommonGramsFilterTest exten
*/
public void TestFirstAndLastStopWord() throws Exception {
final String input = "the of";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_of" });
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java?rev=940761&r1=940760&r2=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java Tue May 4 07:50:41 2010
@@ -22,6 +22,7 @@ import java.util.Set;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.util.plugin.ResourceLoaderAware;
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java?rev=940761&r1=940760&r2=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java Tue May 4 07:50:41 2010
@@ -23,6 +23,8 @@ import java.util.Set;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.util.plugin.ResourceLoaderAware;