You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/05/04 09:50:42 UTC

svn commit: r940761 - in /lucene/dev/trunk: lucene/contrib/ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/ solr/src/java/org/apache/solr/...

Author: rmuir
Date: Tue May  4 07:50:41 2010
New Revision: 940761

URL: http://svn.apache.org/viewvc?rev=940761&view=rev
Log:
LUCENE-2413: consolidate commongrams into contrib/analyzers

Added:
    lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/
    lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
      - copied, changed from r940756, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
    lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
      - copied, changed from r940756, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
    lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/
    lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
      - copied, changed from r940756, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java
Removed:
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java
Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=940761&r1=940760&r2=940761&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue May  4 07:50:41 2010
@@ -155,6 +155,12 @@ New features
    of AttributeSource.cloneAttributes() instances and the new copyTo() method.
    (Steven Rowe via Uwe Schindler)
 
+ * LUCENE-2413: Consolidated Solr analysis components into contrib/analyzers. 
+   New features from Solr now available to Lucene users include:
+   - o.a.l.analysis.commongrams: Constructs n-grams for frequently occurring terms
+     and phrases. 
+   (... in progress)
+
 Build
 
  * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 

Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java (from r940756, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java&p1=lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java&r1=940756&r2=940761&rev=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java Tue May  4 07:50:41 2010
@@ -7,7 +7,7 @@
  * See the License for the specific language governing permissions and limitations under the License. 
  */
 
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.commongrams;
 
 import java.io.IOException;
 import java.util.Arrays;

Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java (from r940756, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java&p1=lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java&r1=940756&r2=940761&rev=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java Tue May  4 07:50:41 2010
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.commongrams;
 
 import java.io.IOException;
 
@@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenF
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
-import static org.apache.solr.analysis.CommonGramsFilter.GRAM_TYPE;
+import static org.apache.lucene.analysis.commongrams.CommonGramsFilter.GRAM_TYPE;
 
 /**
  * Wrap a CommonGramsFilter optimizing phrase queries by only returning single

Added: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html?rev=940761&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html Tue May  4 07:50:41 2010
@@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Construct n-grams for frequently occurring terms and phrases.
+</body>
+</html>

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (from r940756, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java&p1=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java&r1=940756&r2=940761&rev=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Tue May  4 07:50:41 2010
@@ -14,28 +14,29 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.commongrams;
 
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 
 /**
- * Tests CommonGramsQueryFilter
+ * Tests CommonGrams(Query)Filter
  */
-public class CommonGramsFilterTest extends BaseTokenTestCase {
+public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
   private static final String[] commonWords = { "s", "a", "b", "c", "d", "the",
       "of" };
   
   public void testReset() throws Exception {
     final String input = "How the s a brown s cow d like A B thing?";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
     
     CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
@@ -56,7 +57,7 @@ public class CommonGramsFilterTest exten
   
   public void testQueryReset() throws Exception {
     final String input = "How the s a brown s cow d like A B thing?";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
     CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
     
@@ -88,7 +89,7 @@ public class CommonGramsFilterTest exten
       @Override
       public TokenStream tokenStream(String field, Reader in) {
         return new CommonGramsQueryFilter(new CommonGramsFilter(
-            new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords));
+            new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords));
       } 
     };
 
@@ -157,7 +158,7 @@ public class CommonGramsFilterTest exten
       @Override
       public TokenStream tokenStream(String field, Reader in) {
         return new CommonGramsFilter(
-            new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords);
+            new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords);
       } 
     };
 
@@ -243,7 +244,7 @@ public class CommonGramsFilterTest exten
    */
   public void testCaseSensitive() throws Exception {
     final String input = "How The s a brown s cow d like A B thing?";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     Set common = CommonGramsFilter.makeCommonSet(commonWords);
     TokenFilter cgf = new CommonGramsFilter(wt, common, false);
     assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
@@ -256,7 +257,7 @@ public class CommonGramsFilterTest exten
    */
   public void testLastWordisStopWord() throws Exception {
     final String input = "dog the";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "dog_the" });
@@ -267,7 +268,7 @@ public class CommonGramsFilterTest exten
    */
   public void testFirstWordisStopWord() throws Exception {
     final String input = "the dog";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "the_dog" });
@@ -278,7 +279,7 @@ public class CommonGramsFilterTest exten
    */
   public void testOneWordQueryStopWord() throws Exception {
     final String input = "the";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "the" });
@@ -289,7 +290,7 @@ public class CommonGramsFilterTest exten
    */
   public void testOneWordQuery() throws Exception {
     final String input = "monster";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "monster" });
@@ -300,7 +301,7 @@ public class CommonGramsFilterTest exten
    */
   public void TestFirstAndLastStopWord() throws Exception {
     final String input = "the of";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "the_of" });

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java?rev=940761&r1=940760&r2=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java Tue May  4 07:50:41 2010
@@ -22,6 +22,7 @@ import java.util.Set;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java?rev=940761&r1=940760&r2=940761&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java Tue May  4 07:50:41 2010
@@ -23,6 +23,8 @@ import java.util.Set;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.util.plugin.ResourceLoaderAware;