You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@archiva.apache.org by br...@apache.org on 2006/09/04 09:31:53 UTC
svn commit: r439966 - in /maven/archiva/trunk:
archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/
archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/
archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/ ar...
Author: brett
Date: Mon Sep 4 00:31:52 2006
New Revision: 439966
URL: http://svn.apache.org/viewvc?view=rev&rev=439966
Log:
[MRM-136] make the browse interface perform acceptably on large repositories
Modified:
maven/archiva/trunk/archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java
maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java
maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java
maven/archiva/trunk/archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java
maven/archiva/trunk/archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp
Modified: maven/archiva/trunk/archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java
URL: http://svn.apache.org/viewvc/maven/archiva/trunk/archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java?view=diff&rev=439966&r1=439965&r2=439966
==============================================================================
--- maven/archiva/trunk/archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java (original)
+++ maven/archiva/trunk/archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java Mon Sep 4 00:31:52 2006
@@ -45,7 +45,7 @@
* Task for discovering changes in the repository.
*
* @author <a href="mailto:brett@apache.org">Brett Porter</a>
- * @plexus.component role=org.apache.maven.archiva.scheduler.task.RepositoryTaskk" role-hint="indexer"
+ * @plexus.component role="org.apache.maven.archiva.scheduler.task.RepositoryTask" role-hint="indexer"
*/
public class IndexerTask
extends AbstractLogEnabled
Modified: maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java
URL: http://svn.apache.org/viewvc/maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java?view=diff&rev=439966&r1=439965&r2=439966
==============================================================================
--- maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java (original)
+++ maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java Mon Sep 4 00:31:52 2006
@@ -81,7 +81,7 @@
* Retrieve all primary keys of records in the index.
*
* @return the keys
- * @throws RepositoryIndexSearchException if there was an error searching the index
+ * @throws RepositoryIndexException if there was an error searching the index
*/
Collection getAllRecordKeys()
throws RepositoryIndexException;
@@ -97,4 +97,42 @@
*/
void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
throws RepositoryIndexException;
+
+ /**
+ * Get all the group IDs in the index.
+ *
+ * @return list of groups as strings
+ * @throws RepositoryIndexException if there is a problem searching for the group ID
+ */
+ List getAllGroupIds()
+ throws RepositoryIndexException;
+
+ /**
+ * Get the list of artifact IDs in a group in the index.
+ *
+ * @param groupId the group ID to search
+ * @return the list of artifact ID strings
+ * @throws RepositoryIndexSearchException if there is a problem searching for the group ID
+ */
+ List getArtifactIds( String groupId )
+ throws RepositoryIndexSearchException;
+
+ /**
+ * Get the list of available versions for a given artifact.
+ *
+ * @param groupId the group ID to search for
+ * @param artifactId the artifact ID to search for
+ * @return the list of version strings
+ * @throws RepositoryIndexSearchException if there is a problem searching for the artifact
+ */
+ List getVersions( String groupId, String artifactId )
+ throws RepositoryIndexSearchException;
+
+ /**
+ * Get the time when the index was last updated. Note that this does not monitor external processes or multiple
+ * instances of the index.
+ *
+ * @return the last updated time, or 0 if it has not been updated since the class was instantiated.
+ */
+ long getLastUpdatedTime();
}
Modified: maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java
URL: http://svn.apache.org/viewvc/maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java?view=diff&rev=439966&r1=439965&r2=439966
==============================================================================
--- maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java (original)
+++ maven/archiva/trunk/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java Mon Sep 4 00:31:52 2006
@@ -27,9 +27,12 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.TermQuery;
import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
import org.apache.maven.archiva.indexer.RepositoryIndexException;
import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
@@ -47,8 +50,8 @@
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.HashSet;
import java.util.Iterator;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -77,6 +80,8 @@
private MavenProjectBuilder projectBuilder;
+ private long lastUpdatedTime = 0;
+
public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
{
this.indexLocation = indexPath;
@@ -137,6 +142,7 @@
finally
{
closeQuietly( indexWriter );
+ lastUpdatedTime = System.currentTimeMillis();
}
}
@@ -276,7 +282,13 @@
public Collection getAllRecordKeys()
throws RepositoryIndexException
{
- Set keys = new HashSet();
+ return getAllFieldValues( FLD_PK );
+ }
+
+ private List getAllFieldValues( String fieldName )
+ throws RepositoryIndexException
+ {
+ List keys = new ArrayList();
if ( exists() )
{
@@ -286,8 +298,8 @@
{
indexReader = IndexReader.open( indexLocation );
- terms = indexReader.terms( new Term( FLD_PK, "" ) );
- while ( FLD_PK.equals( terms.term().field() ) )
+ terms = indexReader.terms( new Term( fieldName, "" ) );
+ while ( fieldName.equals( terms.term().field() ) )
{
keys.add( terms.term().text() );
@@ -353,7 +365,74 @@
finally
{
closeQuietly( indexModifier );
+ lastUpdatedTime = System.currentTimeMillis();
+ }
+ }
+
+ public List getAllGroupIds()
+ throws RepositoryIndexException
+ {
+ return getAllFieldValues( StandardIndexRecordFields.GROUPID_EXACT );
+ }
+
+ public List getArtifactIds( String groupId )
+ throws RepositoryIndexSearchException
+ {
+ return searchField( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
+ StandardIndexRecordFields.ARTIFACTID );
+ }
+
+ public List getVersions( String groupId, String artifactId )
+ throws RepositoryIndexSearchException
+ {
+ BooleanQuery query = new BooleanQuery();
+ query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
+ BooleanClause.Occur.MUST );
+ query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
+ BooleanClause.Occur.MUST );
+
+ return searchField( query, StandardIndexRecordFields.VERSION );
+ }
+
+ public long getLastUpdatedTime()
+ {
+ return lastUpdatedTime;
+ }
+
+ private List searchField( org.apache.lucene.search.Query luceneQuery, String fieldName )
+ throws RepositoryIndexSearchException
+ {
+ Set results = new LinkedHashSet();
+
+ IndexSearcher searcher;
+ try
+ {
+ searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
+ }
+ catch ( IOException e )
+ {
+ throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
+ }
+
+ try
+ {
+ Hits hits = searcher.search( luceneQuery );
+ for ( int i = 0; i < hits.length(); i++ )
+ {
+ Document doc = hits.doc( i );
+
+ results.add( doc.get( fieldName ) );
+ }
+ }
+ catch ( IOException e )
+ {
+ throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
+ }
+ finally
+ {
+ closeQuietly( searcher );
}
+ return new ArrayList( results );
}
private void flushProjectBuilderCacheHack()
Modified: maven/archiva/trunk/archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java
URL: http://svn.apache.org/viewvc/maven/archiva/trunk/archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java?view=diff&rev=439966&r1=439965&r2=439966
==============================================================================
--- maven/archiva/trunk/archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java (original)
+++ maven/archiva/trunk/archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java Mon Sep 4 00:31:52 2006
@@ -16,12 +16,6 @@
* limitations under the License.
*/
-import com.opensymphony.xwork.ActionSupport;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.TermQuery;
import org.apache.maven.archiva.configuration.Configuration;
import org.apache.maven.archiva.configuration.ConfigurationStore;
import org.apache.maven.archiva.configuration.ConfigurationStoreException;
@@ -30,32 +24,27 @@
import org.apache.maven.archiva.indexer.RepositoryArtifactIndexFactory;
import org.apache.maven.archiva.indexer.RepositoryIndexException;
import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
-import org.apache.maven.archiva.indexer.lucene.LuceneQuery;
-import org.apache.maven.archiva.indexer.record.StandardArtifactIndexRecord;
-import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
import org.codehaus.plexus.util.StringUtils;
+import org.codehaus.plexus.xwork.action.PlexusActionSupport;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
-import java.util.TreeSet;
/**
* Browse the repository.
*
- * @todo the tree part probably belongs in a browsing component, and the indexer could optimize how it retrieves the terms rather than querying everything!
+ * @todo cache should be a proper cache class that is a singleton requirement rather than static variables
* @plexus.component role="com.opensymphony.xwork.Action" role-hint="browseAction"
*/
public class BrowseAction
- extends ActionSupport
+ extends PlexusActionSupport
{
/**
* @plexus.requirement
@@ -84,8 +73,12 @@
private List versions;
+ private static GroupTreeNode rootNode;
+
+ private static long groupCacheTime;
+
public String browse()
- throws ConfigurationStoreException, RepositoryIndexException, IOException, RepositoryIndexSearchException
+ throws ConfigurationStoreException, RepositoryIndexException, IOException
{
RepositoryArtifactIndex index = getIndex();
@@ -130,6 +123,8 @@
if ( !rootNode.getChildren().containsKey( part ) )
{
// TODO: i18n
+ getLogger().debug(
+ "Can't find part: " + part + " for groupId " + groupId + " in children " + rootNode.getChildren() );
addActionError( "The group specified was not found" );
return ERROR;
}
@@ -141,16 +136,7 @@
this.groups = collateGroups( rootNode );
- List records = index.search(
- new LuceneQuery( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ) ) );
-
- Set artifactIds = new HashSet();
- for ( Iterator i = records.iterator(); i.hasNext(); )
- {
- StandardArtifactIndexRecord record = (StandardArtifactIndexRecord) i.next();
- artifactIds.add( record.getArtifactId() );
- }
- this.artifactIds = new ArrayList( artifactIds );
+ this.artifactIds = index.getArtifactIds( groupId );
Collections.sort( this.artifactIds );
return SUCCESS;
@@ -175,77 +161,66 @@
return ERROR;
}
- BooleanQuery query = new BooleanQuery();
- query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
- BooleanClause.Occur.MUST );
- query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
- BooleanClause.Occur.MUST );
-
- List records = index.search( new LuceneQuery( query ) );
+ this.versions = index.getVersions( groupId, artifactId );
+ Collections.sort( this.versions );
- if ( records.isEmpty() )
+ if ( versions.isEmpty() )
{
// TODO: i18n
addActionError( "Could not find any artifacts with the given group and artifact ID" );
return ERROR;
}
- Set versions = new HashSet();
- for ( Iterator i = records.iterator(); i.hasNext(); )
- {
- StandardArtifactIndexRecord record = (StandardArtifactIndexRecord) i.next();
- versions.add( record.getVersion() );
- }
-
- this.versions = new ArrayList( versions );
- Collections.sort( this.versions );
-
return SUCCESS;
}
private GroupTreeNode buildGroupTree( RepositoryArtifactIndex index )
- throws IOException, RepositoryIndexSearchException
+ throws IOException, RepositoryIndexException
{
// TODO: give action message if indexing is in progress
- // TODO: this will be inefficient over a very large number of artifacts, should be cached!
-
- List records = index.search( new LuceneQuery( new MatchAllDocsQuery() ) );
+ long lastUpdate = index.getLastUpdatedTime();
- Set groups = new TreeSet();
- for ( Iterator i = records.iterator(); i.hasNext(); )
+ if ( rootNode == null || lastUpdate > groupCacheTime )
{
- StandardArtifactIndexRecord record = (StandardArtifactIndexRecord) i.next();
- groups.add( record.getGroupId() );
- }
+ List groups = index.getAllGroupIds();
- GroupTreeNode rootNode = new GroupTreeNode();
+ getLogger().info( "Loaded " + groups.size() + " groups from index" );
- // build a tree structure
- for ( Iterator i = groups.iterator(); i.hasNext(); )
- {
- String groupId = (String) i.next();
+ rootNode = new GroupTreeNode();
- StringTokenizer tok = new StringTokenizer( groupId, GROUP_SEPARATOR );
+ // build a tree structure
+ for ( Iterator i = groups.iterator(); i.hasNext(); )
+ {
+ String groupId = (String) i.next();
- GroupTreeNode node = rootNode;
+ StringTokenizer tok = new StringTokenizer( groupId, GROUP_SEPARATOR );
- while ( tok.hasMoreTokens() )
- {
- String part = tok.nextToken();
+ GroupTreeNode node = rootNode;
- if ( !node.getChildren().containsKey( part ) )
+ while ( tok.hasMoreTokens() )
{
- GroupTreeNode newNode = new GroupTreeNode( part, node );
- node.addChild( newNode );
- node = newNode;
- }
- else
- {
- node = (GroupTreeNode) node.getChildren().get( part );
+ String part = tok.nextToken();
+
+ if ( !node.getChildren().containsKey( part ) )
+ {
+ GroupTreeNode newNode = new GroupTreeNode( part, node );
+ node.addChild( newNode );
+ node = newNode;
+ }
+ else
+ {
+ node = (GroupTreeNode) node.getChildren().get( part );
+ }
}
}
+ groupCacheTime = lastUpdate;
}
+ else
+ {
+ getLogger().debug( "Loaded groups from cache" );
+ }
+
return rootNode;
}
Modified: maven/archiva/trunk/archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp
URL: http://svn.apache.org/viewvc/maven/archiva/trunk/archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp?view=diff&rev=439966&r1=439965&r2=439966
==============================================================================
--- maven/archiva/trunk/archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp (original)
+++ maven/archiva/trunk/archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp Mon Sep 4 00:31:52 2006
@@ -58,7 +58,7 @@
<ww:set name="groups" value="groups"/>
<c:if test="${!empty(groups)}">
- <h2>Group / Artifact</h2>
+ <h2>Groups</h2>
<ul>
<c:forEach items="${groups}" var="groupId">
<c:set var="url">