You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@maven.apache.org by cs...@apache.org on 2022/04/25 15:24:36 UTC

[maven-indexer] branch MINDEXER-147-groups-in-mem created (now aae3194)

This is an automated email from the ASF dual-hosted git repository.

cstamas pushed a change to branch MINDEXER-147-groups-in-mem
in repository https://gitbox.apache.org/repos/asf/maven-indexer.git


      at aae3194  [MINDEXER-147] Move allGroups/rootGroups to in memory only

This branch includes the following new commits:

     new aae3194  [MINDEXER-147] Move allGroups/rootGroups to in memory only

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[maven-indexer] 01/01: [MINDEXER-147] Move allGroups/rootGroups to in memory only

Posted by cs...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

cstamas pushed a commit to branch MINDEXER-147-groups-in-mem
in repository https://gitbox.apache.org/repos/asf/maven-indexer.git

commit aae3194b70264404dcf511e6523b2f0ad1f21dda
Author: Tamas Cservenak <ta...@cservenak.net>
AuthorDate: Mon Apr 25 17:23:28 2022 +0200

    [MINDEXER-147] Move allGroups/rootGroups to in memory only
    
    Do not (mis)use Lucene index to store potentially huge
    dataset, while this field is really not even searched, it
    just contains aggregated artifact groups that are present
    on index.
---
 .../index/context/DefaultIndexingContext.java      | 64 +++-------------------
 .../maven/index/updater/IndexDataReader.java       |  6 +-
 .../maven/index/updater/IndexDataWriter.java       | 44 ++-------------
 .../index/packer/NEXUS4149TransferFormatTest.java  |  7 ++-
 4 files changed, 24 insertions(+), 97 deletions(-)

diff --git a/indexer-core/src/main/java/org/apache/maven/index/context/DefaultIndexingContext.java b/indexer-core/src/main/java/org/apache/maven/index/context/DefaultIndexingContext.java
index 4e3a4ce..243de94 100644
--- a/indexer-core/src/main/java/org/apache/maven/index/context/DefaultIndexingContext.java
+++ b/indexer-core/src/main/java/org/apache/maven/index/context/DefaultIndexingContext.java
@@ -26,13 +26,14 @@ import java.nio.channels.FileLock;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Date;
+import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
@@ -48,7 +49,6 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopScoreDocCollector;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -812,77 +812,29 @@ public class DefaultIndexingContext
     }
 
     public Set<String> getAllGroups()
-        throws IOException
     {
-        return getGroups( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, ArtifactInfo.ALL_GROUPS_LIST );
+        return allGroups.get();
     }
 
     public synchronized void setAllGroups( Collection<String> groups )
-        throws IOException
     {
-        setGroups( groups, ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, ArtifactInfo.ALL_GROUPS_LIST );
-        commit();
+        allGroups.set( new HashSet<>( groups ) );
     }
 
     public Set<String> getRootGroups()
         throws IOException
     {
-        return getGroups( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, ArtifactInfo.ROOT_GROUPS_LIST );
+        return rootGroups.get();
     }
 
     public synchronized void setRootGroups( Collection<String> groups )
-        throws IOException
     {
-        setGroups( groups, ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, ArtifactInfo.ROOT_GROUPS_LIST );
-        commit();
-    }
-
-    protected Set<String> getGroups( String field, String filedValue, String listField )
-        throws IOException, CorruptIndexException
-    {
-        final TopScoreDocCollector collector = TopScoreDocCollector.create( 1, Integer.MAX_VALUE );
-        final IndexSearcher indexSearcher = acquireIndexSearcher();
-        try
-        {
-            indexSearcher.search( new TermQuery( new Term( field, filedValue ) ), collector );
-            TopDocs topDocs = collector.topDocs();
-            // In Lucene 7 topDocs.totalHits is now a long, but we can safely cast this to an int because
-            // indexes are still bound to at most 2 billion (Integer.MAX_VALUE) documents
-            Set<String> groups = new LinkedHashSet<String>( (int) Math.max( 10L, topDocs.totalHits.value ) );
-            if ( topDocs.totalHits.value > 0 )
-            {
-                Document doc = indexSearcher.doc( topDocs.scoreDocs[0].doc );
-                String groupList = doc.get( listField );
-                if ( groupList != null )
-                {
-                    groups.addAll( Arrays.asList( groupList.split( "\\|" ) ) );
-                }
-            }
-            return groups;
-        }
-        finally
-        {
-            releaseIndexSearcher( indexSearcher );
-        }
+        rootGroups.set( new HashSet<>( groups ) );
     }
 
-    protected void setGroups( Collection<String> groups, String groupField, String groupFieldValue,
-                              String groupListField )
-        throws IOException, CorruptIndexException
-    {
-        final IndexWriter w = getIndexWriter();
-        w.updateDocument( new Term( groupField, groupFieldValue ),
-            createGroupsDocument( groups, groupField, groupFieldValue, groupListField ) );
-    }
+    private final AtomicReference<HashSet<String>> rootGroups = new AtomicReference<>( new HashSet<>() );
 
-    protected Document createGroupsDocument( Collection<String> groups, String field, String fieldValue,
-                                             String listField )
-    {
-        final Document groupDoc = new Document();
-        groupDoc.add( new Field( field, fieldValue, IndexerField.KEYWORD_STORED ) );
-        groupDoc.add( new StoredField( listField, ArtifactInfo.lst2str( groups ), IndexerField.KEYWORD_STORED ) );
-        return groupDoc;
-    }
+    private final AtomicReference<HashSet<String>> allGroups = new AtomicReference<>( new HashSet<>() );
 
     @Override
     public String toString()
diff --git a/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java b/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java
index d670cf0..b1c4237 100644
--- a/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java
+++ b/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java
@@ -100,7 +100,11 @@ public class IndexDataReader
 
                 rootGroups.add( ai.getRootGroup() );
                 allGroups.add( ai.getGroupId() );
-
+            }
+            else if ( doc.getField( ArtifactInfo.ALL_GROUPS ) != null
+                    || doc.getField( ArtifactInfo.ROOT_GROUPS ) != null )
+            {
+                // skip it
             }
             else
             {
diff --git a/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataWriter.java b/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataWriter.java
index 1be10d5..9c00122 100644
--- a/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataWriter.java
+++ b/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataWriter.java
@@ -26,9 +26,7 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Date;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 import java.util.zip.GZIPOutputStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -66,10 +64,6 @@ public class IndexDataWriter
 
     private final BufferedOutputStream bos;
 
-    private final Set<String> allGroups;
-
-    private final Set<String> rootGroups;
-
     private boolean descriptorWritten;
 
     public IndexDataWriter( OutputStream os )
@@ -79,8 +73,6 @@ public class IndexDataWriter
         gos = new GZIPOutputStream( bos, 1024 * 2 );
         dos = new DataOutputStream( gos );
 
-        this.allGroups = new HashSet<>();
-        this.rootGroups = new HashSet<>();
         this.descriptorWritten = false;
     }
 
@@ -91,7 +83,7 @@ public class IndexDataWriter
 
         int n = writeDocuments( indexReader, docIndexes );
 
-        writeGroupFields();
+        writeGroupFields( context );
 
         close();
 
@@ -118,15 +110,15 @@ public class IndexDataWriter
         dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
     }
 
-    public void writeGroupFields()
+    public void writeGroupFields( IndexingContext context )
         throws IOException
     {
         {
             List<IndexableField> allGroupsFields = new ArrayList<>( 2 );
             allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE,
                                             IndexerField.KEYWORD_STORED ) );
-            allGroupsFields.add( new StoredField( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ),
-                                            IndexerField.KEYWORD_STORED ) );
+            allGroupsFields.add( new StoredField( ArtifactInfo.ALL_GROUPS_LIST,
+                    ArtifactInfo.lst2str( context.getAllGroups() ), IndexerField.KEYWORD_STORED ) );
             writeDocumentFields( allGroupsFields );
         }
 
@@ -134,8 +126,8 @@ public class IndexDataWriter
             List<IndexableField> rootGroupsFields = new ArrayList<>( 2 );
             rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE,
                                              IndexerField.KEYWORD_STORED ) );
-            rootGroupsFields.add( new StoredField( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ),
-                                             IndexerField.KEYWORD_STORED ) );
+            rootGroupsFields.add( new StoredField( ArtifactInfo.ROOT_GROUPS_LIST,
+                    ArtifactInfo.lst2str( context.getRootGroups() ), IndexerField.KEYWORD_STORED ) );
             writeDocumentFields( rootGroupsFields );
         }
     }
@@ -197,30 +189,6 @@ public class IndexDataWriter
                 }
             }
 
-            if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) )
-            {
-                final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST );
-
-                if ( groupList != null && groupList.trim().length() > 0 )
-                {
-                    allGroups.addAll( ArtifactInfo.str2lst( groupList ) );
-                }
-
-                return false;
-            }
-
-            if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) )
-            {
-                final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST );
-
-                if ( groupList != null && groupList.trim().length() > 0 )
-                {
-                    rootGroups.addAll( ArtifactInfo.str2lst( groupList ) );
-                }
-
-                return false;
-            }
-
             if ( field.fieldType().stored() )
             {
                 storedFields.add( field );
diff --git a/indexer-core/src/test/java/org/apache/maven/index/packer/NEXUS4149TransferFormatTest.java b/indexer-core/src/test/java/org/apache/maven/index/packer/NEXUS4149TransferFormatTest.java
index b77d6b0..df4d921 100644
--- a/indexer-core/src/test/java/org/apache/maven/index/packer/NEXUS4149TransferFormatTest.java
+++ b/indexer-core/src/test/java/org/apache/maven/index/packer/NEXUS4149TransferFormatTest.java
@@ -115,10 +115,13 @@ public class NEXUS4149TransferFormatTest
 
         for ( IndexingContext member : mctx.getMembers() )
         {
-            Assert.assertEquals( "Members should have one root group!", 1, member.getRootGroups().size() );
+            if ( !"repo4".equals( member.getId() ) ) // repo4 is empty
+            {
+                Assert.assertEquals( "Members should have one root group!", 1, member.getRootGroups().size() );
+            }
         }
 
-        Assert.assertEquals( "Merged should have one root multiply members count!", mctx.getMembers().size(),
+        Assert.assertEquals( "Merged should have one root multiply members count (sans repo4)!", 3,
             mctx.getRootGroups().size() );
     }