You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@archiva.apache.org by jd...@apache.org on 2008/12/05 07:35:49 UTC

svn commit: r723612 - in /archiva/branches/archiva-search-improvements/archiva-modules: archiva-base/archiva-consumers/archiva-lucene-consumers/src/main/java/org/apache/maven/archiva/consumers/lucene/ archiva-base/archiva-indexer/src/main/java/org/apac...

Author: jdumay
Date: Thu Dec  4 22:35:48 2008
New Revision: 723612

URL: http://svn.apache.org/viewvc?rev=723612&view=rev
Log:
Search now has good results. We removed the content indexing and actually included fields to search. Query now uses AND instead of the default OR for the QueryParser. Very googly.


Added:
    archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/analyzers/ArtifactIdTokenizer.java
Modified:
    archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-consumers/archiva-lucene-consumers/src/main/java/org/apache/maven/archiva/consumers/lucene/IndexContentConsumer.java
    archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentAnalyzer.java
    archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentConverter.java
    archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentHandlers.java
    archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentKeys.java
    archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentRecord.java
    archiva/branches/archiva-search-improvements/archiva-modules/archiva-web/archiva-xmlrpc/archiva-xmlrpc-services/src/test/java/org/apache/archiva/web/xmlrpc/services/SearchServiceImplTest.java

Modified: archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-consumers/archiva-lucene-consumers/src/main/java/org/apache/maven/archiva/consumers/lucene/IndexContentConsumer.java
URL: http://svn.apache.org/viewvc/archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-consumers/archiva-lucene-consumers/src/main/java/org/apache/maven/archiva/consumers/lucene/IndexContentConsumer.java?rev=723612&r1=723611&r2=723612&view=diff
==============================================================================
--- archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-consumers/archiva-lucene-consumers/src/main/java/org/apache/maven/archiva/consumers/lucene/IndexContentConsumer.java (original)
+++ archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-consumers/archiva-lucene-consumers/src/main/java/org/apache/maven/archiva/consumers/lucene/IndexContentConsumer.java Thu Dec  4 22:35:48 2008
@@ -159,10 +159,8 @@
         FileContentRecord record = new FileContentRecord();
         try
         {
-            File file = new File( repositoryDir, path );
             record.setRepositoryId( this.repository.getId() );
             record.setFilename( path );
-            record.setContents( FileUtils.readFileToString( file, null ) );
 
             // Test for possible artifact reference syntax.
             try
@@ -179,10 +177,6 @@
 
             index.modifyRecord( record );
         }
-        catch ( IOException e )
-        {
-            triggerConsumerError( READ_CONTENT, "Unable to read file contents: " + e.getMessage() );
-        }
         catch ( RepositoryIndexException e )
         {
             triggerConsumerError( INDEX_ERROR, "Unable to index file contents: " + e.getMessage() );

Modified: archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentAnalyzer.java
URL: http://svn.apache.org/viewvc/archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentAnalyzer.java?rev=723612&r1=723611&r2=723612&view=diff
==============================================================================
--- archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentAnalyzer.java (original)
+++ archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentAnalyzer.java Thu Dec  4 22:35:48 2008
@@ -23,6 +23,8 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.maven.archiva.indexer.lucene.analyzers.FilenamesTokenizer;
+import org.apache.maven.archiva.indexer.lucene.analyzers.ArtifactIdTokenizer;
+import org.apache.maven.archiva.indexer.lucene.analyzers.GroupIdTokenizer;
 
 import java.io.Reader;
 
@@ -42,6 +44,16 @@
             return new FilenamesTokenizer( reader );
         }
 
+        if ( FileContentKeys.ARTIFACTID.equals( field ))
+        {
+            return new ArtifactIdTokenizer(reader);
+        }
+
+        if ( FileContentKeys.GROUPID.equals( field ) )
+        {
+            return new GroupIdTokenizer(reader);
+        }
+
         return STANDARD.tokenStream( field, reader );
     }
 }

Modified: archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentConverter.java
URL: http://svn.apache.org/viewvc/archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentConverter.java?rev=723612&r1=723611&r2=723612&view=diff
==============================================================================
--- archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentConverter.java (original)
+++ archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentConverter.java Thu Dec  4 22:35:48 2008
@@ -37,7 +37,6 @@
 public class FileContentConverter
     implements LuceneEntryConverter
 {
-
     public Document convert( LuceneRepositoryContentRecord record )
     {
         if ( !( record instanceof FileContentRecord ) )
@@ -62,9 +61,8 @@
             doc.addFieldTokenized( ArtifactKeys.TYPE, filecontent.getArtifact().getType() );
             doc.addFieldUntokenized( ArtifactKeys.CLASSIFIER, filecontent.getArtifact().getClassifier() );
         }
-        
+
         doc.addFieldTokenized( FileContentKeys.FILENAME, filecontent.getFilename() );
-        doc.addFieldTokenized( FileContentKeys.CONTENT, filecontent.getContents() );
 
         return doc.getDocument();
     }
@@ -91,7 +89,6 @@
 
         // Filecontent Specifics
         record.setFilename( document.get( FileContentKeys.FILENAME ) );
-        record.setContents( document.get( FileContentKeys.CONTENT ) );
 
         return record;
     }

Modified: archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentHandlers.java
URL: http://svn.apache.org/viewvc/archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentHandlers.java?rev=723612&r1=723611&r2=723612&view=diff
==============================================================================
--- archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentHandlers.java (original)
+++ archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentHandlers.java Thu Dec  4 22:35:48 2008
@@ -43,8 +43,17 @@
     {
         analyzer = new FileContentAnalyzer();
         converter = new FileContentConverter();
-        queryParser = new MultiFieldQueryParser( new String[] { FileContentKeys.FILENAME, FileContentKeys.CONTENT },
-                                                 analyzer );
+        queryParser = new MultiFieldQueryParser( new String[] {
+                                                FileContentKeys.FILENAME,
+                                                FileContentKeys.ARTIFACTID,
+                                                FileContentKeys.GROUPID,
+                                                FileContentKeys.ARTIFACTID_EXACT,
+                                                FileContentKeys.GROUPID_EXACT,
+                                                FileContentKeys.VERSION,
+                                                FileContentKeys.VERSION_EXACT},
+                                                analyzer );
+        //We prefer the narrowing approach to search results.
+        queryParser.setDefaultOperator(MultiFieldQueryParser.Operator.AND);
     }
 
     public String getId()

Modified: archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentKeys.java
URL: http://svn.apache.org/viewvc/archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentKeys.java?rev=723612&r1=723611&r2=723612&view=diff
==============================================================================
--- archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentKeys.java (original)
+++ archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentKeys.java Thu Dec  4 22:35:48 2008
@@ -32,6 +32,4 @@
     public static final String ID = "filecontent";
 
     public static final String FILENAME = "filename";
-
-    public static final String CONTENT = "content";
 }

Modified: archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentRecord.java
URL: http://svn.apache.org/viewvc/archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentRecord.java?rev=723612&r1=723611&r2=723612&view=diff
==============================================================================
--- archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentRecord.java (original)
+++ archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentRecord.java Thu Dec  4 22:35:48 2008
@@ -39,8 +39,6 @@
      */
     private ArchivaArtifact artifact;
 
-    private String contents;
-
     public String getRepositoryId()
     {
         return repositoryId;
@@ -51,16 +49,6 @@
         this.repositoryId = repositoryId;
     }
 
-    public String getContents()
-    {
-        return contents;
-    }
-
-    public void setContents( String contents )
-    {
-        this.contents = contents;
-    }
-
     public String getPrimaryKey()
     {
         return repositoryId + ":" + filename;

Added: archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/analyzers/ArtifactIdTokenizer.java
URL: http://svn.apache.org/viewvc/archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/analyzers/ArtifactIdTokenizer.java?rev=723612&view=auto
==============================================================================
--- archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/analyzers/ArtifactIdTokenizer.java (added)
+++ archiva/branches/archiva-search-improvements/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/analyzers/ArtifactIdTokenizer.java Thu Dec  4 22:35:48 2008
@@ -0,0 +1,45 @@
+package org.apache.maven.archiva.indexer.lucene.analyzers;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.Reader;
+import org.apache.lucene.analysis.CharTokenizer;
+
+/**
+ * Lucene Tokenizer for {@link ArtifactKeys#ARTIFACTID} fields.
+ */
+public class ArtifactIdTokenizer extends CharTokenizer
+{
+    public ArtifactIdTokenizer( Reader reader )
+    {
+        super( reader );
+    }
+
+    /**
+     * Break on "-" for "atlassian-plugins-core"
+     * @param c
+     * @return
+     */
+    @Override
+    protected boolean isTokenChar(char c)
+    {
+        return (c != '-');
+    }
+}

Modified: archiva/branches/archiva-search-improvements/archiva-modules/archiva-web/archiva-xmlrpc/archiva-xmlrpc-services/src/test/java/org/apache/archiva/web/xmlrpc/services/SearchServiceImplTest.java
URL: http://svn.apache.org/viewvc/archiva/branches/archiva-search-improvements/archiva-modules/archiva-web/archiva-xmlrpc/archiva-xmlrpc-services/src/test/java/org/apache/archiva/web/xmlrpc/services/SearchServiceImplTest.java?rev=723612&r1=723611&r2=723612&view=diff
==============================================================================
--- archiva/branches/archiva-search-improvements/archiva-modules/archiva-web/archiva-xmlrpc/archiva-xmlrpc-services/src/test/java/org/apache/archiva/web/xmlrpc/services/SearchServiceImplTest.java (original)
+++ archiva/branches/archiva-search-improvements/archiva-modules/archiva-web/archiva-xmlrpc/archiva-xmlrpc-services/src/test/java/org/apache/archiva/web/xmlrpc/services/SearchServiceImplTest.java Thu Dec  4 22:35:48 2008
@@ -152,7 +152,6 @@
         FileContentRecord record = new FileContentRecord();
         record.setRepositoryId( "repo1.mirror" );
         record.setArtifact( artifact );
-        record.setContents( "org.apache.archiva:archiva-test:1.0:jar org.apache.archiva.test.MyClassName" );
         record.setFilename( "archiva-test-1.0.jar" );
                 
         results.addHit( record );
@@ -198,7 +197,6 @@
         FileContentRecord record = new FileContentRecord();
         record.setRepositoryId( "repo1.mirror" );
         record.setArtifact( artifact );
-        record.setContents( "org.apache.archiva:archiva-test:1.0:jar" );
         record.setFilename( "archiva-test-1.0.jar" );
                 
         results.addHit( record );