You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by br...@apache.org on 2023/08/14 08:57:59 UTC

[solr-sandbox] branch main updated: Fix EncryptionMergePolicy to reencrypt each segment separately. (#63)

This is an automated email from the ASF dual-hosted git repository.

broustant pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr-sandbox.git


The following commit(s) were added to refs/heads/main by this push:
     new 9163b31  Fix EncryptionMergePolicy to reencrypt each segment separately. (#63)
9163b31 is described below

commit 9163b31d82e3d4df2b7f7c3d7e735ff44b8c7d49
Author: Bruno Roustant <33...@users.noreply.github.com>
AuthorDate: Mon Aug 14 10:57:55 2023 +0200

    Fix EncryptionMergePolicy to reencrypt each segment separately. (#63)
---
 ENCRYPTION.md                                      |  8 +-----
 .../solr/encryption/EncryptionDirectory.java       | 22 +++++++++++++--
 .../solr/encryption/EncryptionMergePolicy.java     | 16 ++++++-----
 .../solr/encryption/EncryptionRequestHandler.java  | 32 ++++++++++++----------
 .../org/apache/solr/encryption/EncryptionUtil.java | 14 ++++++++--
 .../solr/encryption/EncryptionHeavyLoadTest.java   |  2 +-
 6 files changed, 59 insertions(+), 35 deletions(-)

diff --git a/ENCRYPTION.md b/ENCRYPTION.md
index 17ae2e3..312eb59 100644
--- a/ENCRYPTION.md
+++ b/ENCRYPTION.md
@@ -33,11 +33,6 @@ on performance: expect -20% on most queries, -60% on multi-term queries.
 - Currently, this encryption module does not encrypt TLogs.
 That means the update requests data that are stored in these logs are cleartext.
 
-- Currently, EncryptionMergePolicy does not fully work, so it is disabled.
-That means a call to /admin/encrypt to re-encrypt a Solr Core index will trigger
-an optimized commit which merges all index segments into one. This works but is
-heavyweight.
-
 ## Installing and Configuring the Encryption Plug-In
 
 1. Configure the sharedLib directory in solr.xml (e.g. sharedLIb=lib) and place
@@ -66,8 +61,7 @@ the Encryption plug-in jar file into the specified folder.
         <str name="encrypterFactory">org.apache.solr.encryption.crypto.CipherAesCtrEncrypter$Factory</str>
     </directoryFactory>
 
-    <updateHandler class="org.apache.solr.encryption.EncryptionUpdateHandler">
-    </updateHandler>
+    <updateHandler class="org.apache.solr.encryption.EncryptionUpdateHandler"/>
 
     <requestHandler name="/admin/encrypt" class="org.apache.solr.encryption.EncryptionRequestHandler"/>
 
diff --git a/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java b/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
index 75fcc26..d771f74 100644
--- a/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
+++ b/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
@@ -44,6 +44,7 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.NoSuchElementException;
 import java.util.Objects;
 import java.util.stream.Collectors;
 
@@ -186,6 +187,13 @@ public class EncryptionDirectory extends FilterDirectory {
     out.writeByte((byte) i);
   }
 
+  /**
+   * Forces this {@link EncryptionDirectory} to read the user data of the latest commit, to refresh its cache.
+   */
+  public void forceReadCommitUserData() {
+    shouldReadCommitUserData = true;
+  }
+
   /**
    * Gets the user data from the latest commit, potentially reading the latest commit if the cache is stale.
    */
@@ -237,7 +245,13 @@ public class EncryptionDirectory extends FilterDirectory {
    * commit user data. Then, calls the {@link KeySupplier} to get the corresponding key secret.
    */
   protected byte[] getKeySecret(String keyRef) throws IOException {
-    String keyId = getKeyIdFromCommit(keyRef, getLatestCommitData().data);
+    String keyId;
+    try {
+      keyId = getKeyIdFromCommit(keyRef, getLatestCommitData().data);
+    } catch (NoSuchElementException e) {
+      shouldReadCommitUserData = true;
+      keyId = getKeyIdFromCommit(keyRef, getLatestCommitData().data);
+    }
     return keySupplier.getKeySecret(keyId, this::getKeyCookie);
   }
 
@@ -320,7 +334,8 @@ public class EncryptionDirectory extends FilterDirectory {
     throws IOException {
     List<SegmentCommitInfo> segmentsWithOldKeyId = null;
     if (log.isDebugEnabled()) {
-      log.debug("reading segments {} for key ids different from {}",
+      log.debug("{} reading segments {} for key ids different from {}",
+                ENCRYPTION_LOG_PREFIX,
                 segmentInfos.asList().stream().map(i -> i.info.name).collect(Collectors.toList()),
                 activeKeyId);
     }
@@ -330,7 +345,8 @@ public class EncryptionDirectory extends FilterDirectory {
           try (IndexInput fileInput = in.openInput(fileName, IOContext.READ)) {
             String keyRef = getKeyRefForReading(fileInput);
             String keyId = keyRef == null ? null : getKeyIdFromCommit(keyRef, segmentInfos.getUserData());
-            log.debug("reading file {} of segment {} => keyId={}", fileName, segmentCommitInfo.info.name, keyId);
+            log.debug("{} reading file {} of segment {} => keyId={}",
+                      ENCRYPTION_LOG_PREFIX, fileName, segmentCommitInfo.info.name, keyId);
             if (!Objects.equals(keyId, activeKeyId)) {
               if (segmentsWithOldKeyId == null) {
                 segmentsWithOldKeyId = new ArrayList<>();
diff --git a/encryption/src/main/java/org/apache/solr/encryption/EncryptionMergePolicy.java b/encryption/src/main/java/org/apache/solr/encryption/EncryptionMergePolicy.java
index 7f5a581..67b5214 100644
--- a/encryption/src/main/java/org/apache/solr/encryption/EncryptionMergePolicy.java
+++ b/encryption/src/main/java/org/apache/solr/encryption/EncryptionMergePolicy.java
@@ -60,20 +60,22 @@ public class EncryptionMergePolicy extends FilterMergePolicy {
     if (segmentInfos.size() == 0) {
       return null;
     }
-
-    //TODO: this does not seem to work correctly under heavy concurrent load.
-
     Directory dir = segmentInfos.info(0).info.dir;
     if (!(dir instanceof EncryptionDirectory)) {
       // This may happen if the DirectoryFactory configured is not the EncryptionDirectoryFactory,
-      // but this is a misconfiguration. Let's log a warning.
-      log.warn("{} is configured whereas {} is not set; check the DirectoryFactory configuration",
-               getClass().getName(), EncryptionDirectoryFactory.class.getName());
+      // but this is a misconfiguration. Let's log an error.
+      log.error("{} {} is configured whereas {} is not set; check the DirectoryFactory configuration",
+                ENCRYPTION_LOG_PREFIX, getClass().getName(), EncryptionDirectoryFactory.class.getName());
       return super.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext);
     }
     String keyRef = getActiveKeyRefFromCommit(segmentInfos.getUserData());
     String activeKeyId = keyRef == null ? null : getKeyIdFromCommit(keyRef, segmentInfos.getUserData());
-    List<SegmentCommitInfo> segmentsWithOldKeyId = ((EncryptionDirectory) dir).getSegmentsWithOldKeyId(segmentInfos, activeKeyId);
+    EncryptionDirectory encryptionDir = (EncryptionDirectory) dir;
+    // Make sure the EncryptionDirectory does not keep its cache for the commit user data.
+    // It must read the latest commit user data to get the latest active key, so below the
+    // segments with old key (to re-encrypt) are always accurate.
+    encryptionDir.forceReadCommitUserData();
+    List<SegmentCommitInfo> segmentsWithOldKeyId = encryptionDir.getSegmentsWithOldKeyId(segmentInfos, activeKeyId);
     if (segmentsWithOldKeyId.isEmpty()) {
       return null;
     }
diff --git a/encryption/src/main/java/org/apache/solr/encryption/EncryptionRequestHandler.java b/encryption/src/main/java/org/apache/solr/encryption/EncryptionRequestHandler.java
index fe1baf3..ba81608 100644
--- a/encryption/src/main/java/org/apache/solr/encryption/EncryptionRequestHandler.java
+++ b/encryption/src/main/java/org/apache/solr/encryption/EncryptionRequestHandler.java
@@ -176,6 +176,7 @@ public class EncryptionRequestHandler extends RequestHandlerBase {
                                 + EncryptionDirectoryFactory.class.getSimpleName()
                                 + " to use " + getClass().getSimpleName());
     }
+    log.debug("{} encrypt request for keyId={}", ENCRYPTION_LOG_PREFIX, keyId);
     boolean success = false;
     String encryptionState = STATE_PENDING;
     try {
@@ -189,7 +190,7 @@ public class EncryptionRequestHandler extends RequestHandlerBase {
 
       boolean encryptionComplete = false;
       if (isCommitActiveKeyId(keyId, segmentInfos)) {
-        log.debug("provided keyId={} is the current active key id", keyId);
+        log.debug("{} provided keyId={} is the current active key id", ENCRYPTION_LOG_PREFIX, keyId);
         if (Boolean.parseBoolean(segmentInfos.getUserData().get(COMMIT_ENCRYPTION_PENDING))) {
           encryptionComplete = areAllSegmentsEncryptedWithKeyId(keyId, req.getCore(), segmentInfos);
           if (encryptionComplete) {
@@ -209,11 +210,12 @@ public class EncryptionRequestHandler extends RequestHandlerBase {
         PendingKeyId pendingKeyId = pendingEncryptions.get(req.getCore().getName());
         if (pendingKeyId != null) {
           if (Objects.equals(pendingKeyId.keyId, keyId)) {
-            log.debug("ongoing encryption for keyId={}", keyId);
+            log.debug("{} ongoing encryption for keyId={}", ENCRYPTION_LOG_PREFIX, keyId);
             encryptionState = STATE_PENDING;
             success = true;
           } else {
-            log.debug("core busy encrypting for keyId={} different than requested keyId={}", pendingKeyId.keyId, keyId);
+            log.debug("{} core busy encrypting for keyId={} different than requested keyId={}",
+                      ENCRYPTION_LOG_PREFIX, pendingKeyId.keyId, keyId);
             encryptionState = STATE_BUSY;
           }
           return;
@@ -238,7 +240,8 @@ public class EncryptionRequestHandler extends RequestHandlerBase {
       } else {
         rsp.add(STATUS, STATUS_FAILURE);
       }
-      log.debug("responding encryption state={} success={} for keyId={}", encryptionState, success, keyId);
+      log.info("{} responding encryption state={} success={} for keyId={}",
+               ENCRYPTION_LOG_PREFIX, encryptionState, success, keyId);
       rsp.add(ENCRYPTION_STATE, encryptionState);
     }
   }
@@ -248,7 +251,7 @@ public class EncryptionRequestHandler extends RequestHandlerBase {
                                              SolrQueryRequest req,
                                              SolrQueryResponse rsp) throws IOException {
     // Commit no change, with the new active key id in the commit user data.
-    log.debug("commit on empty index for keyId={}", keyId);
+    log.debug("{} commit on empty index for keyId={}", ENCRYPTION_LOG_PREFIX, keyId);
     CommitUpdateCommand commitCmd = new CommitUpdateCommand(req, false);
     commitCmd.commitData = new HashMap<>(segmentInfos.getUserData());
     commitCmd.commitData.remove(COMMIT_ENCRYPTION_PENDING);
@@ -286,7 +289,7 @@ public class EncryptionRequestHandler extends RequestHandlerBase {
                                         SegmentInfos segmentInfos,
                                         SolrQueryRequest req) throws IOException {
     assert isCommitActiveKeyId(keyId, segmentInfos);
-    log.debug("commit encryption complete for keyId={}", keyId);
+    log.debug("{} commit encryption complete for keyId={}", ENCRYPTION_LOG_PREFIX, keyId);
     CommitUpdateCommand commitCmd = new CommitUpdateCommand(req, false);
     commitCmd.commitData = new HashMap<>(segmentInfos.getUserData());
     commitCmd.commitData.remove(COMMIT_ENCRYPTION_PENDING);
@@ -309,7 +312,7 @@ public class EncryptionRequestHandler extends RequestHandlerBase {
                                      SegmentInfos segmentInfos,
                                      SolrQueryRequest req,
                                      SolrQueryResponse rsp) throws IOException {
-    log.debug("commit encryption starting for keyId={}", keyId);
+    log.debug("{} commit encryption starting for keyId={}", ENCRYPTION_LOG_PREFIX, keyId);
     CommitUpdateCommand commitCmd = new CommitUpdateCommand(req, false);
     commitCmd.commitData = new HashMap<>(segmentInfos.getUserData());
     commitCmd.commitData.put(COMMIT_ENCRYPTION_PENDING, "true");
@@ -318,20 +321,18 @@ public class EncryptionRequestHandler extends RequestHandlerBase {
   }
 
   private void encryptAsync(SolrQueryRequest req, long startTimeMs) {
-    log.debug("submitting async encryption");
+    log.debug("{} submitting async encryption", ENCRYPTION_LOG_PREFIX);
     executor.submit(() -> {
       try {
-        log.debug("running async encryption");
+        log.debug("{} running async encryption", ENCRYPTION_LOG_PREFIX);
         CommitUpdateCommand commitCmd = new CommitUpdateCommand(req, true);
         // Trigger EncryptionMergePolicy.findForcedMerges() to re-encrypt
         // each segment which is not encrypted with the latest active key id.
-        // TODO: Set maxOptimizeSegments to Integer.MAX_VALUE to trigger EncryptionMergePolicy
-        //  when EncryptionHeavyLoadTest passes with it.
-        commitCmd.maxOptimizeSegments = 1;
+        commitCmd.maxOptimizeSegments = Integer.MAX_VALUE;
         req.getCore().getUpdateHandler().commit(commitCmd);
-        log.info("Successfully encrypted the index in " + elapsedTime(startTimeMs));
+        log.info("{} successfully encrypted the index in {}", ENCRYPTION_LOG_PREFIX, elapsedTime(startTimeMs));
       } catch (IOException e) {
-        log.error("Exception while encrypting the index after " + elapsedTime(startTimeMs), e);
+        log.error("{} exception while encrypting the index after {}", ENCRYPTION_LOG_PREFIX, elapsedTime(startTimeMs), e);
       } finally {
         synchronized (pendingEncryptionLock) {
           pendingEncryptions.remove(req.getCore().getName());
@@ -351,7 +352,8 @@ public class EncryptionRequestHandler extends RequestHandlerBase {
     try {
       EncryptionDirectory dir = (EncryptionDirectory) indexDir;
       List<SegmentCommitInfo> segmentsWithOldKeyId = dir.getSegmentsWithOldKeyId(segmentInfos, keyId);
-      log.debug("encryption is pending; {} segments do not have keyId={}", segmentsWithOldKeyId.size(), keyId);
+      log.debug("{} encryption is pending; {} segments do not have keyId={}",
+                ENCRYPTION_LOG_PREFIX, segmentsWithOldKeyId.size(), keyId);
       return segmentsWithOldKeyId.isEmpty();
     } finally {
       directoryFactory.release(indexDir);
diff --git a/encryption/src/main/java/org/apache/solr/encryption/EncryptionUtil.java b/encryption/src/main/java/org/apache/solr/encryption/EncryptionUtil.java
index f59b25a..ff353db 100644
--- a/encryption/src/main/java/org/apache/solr/encryption/EncryptionUtil.java
+++ b/encryption/src/main/java/org/apache/solr/encryption/EncryptionUtil.java
@@ -17,8 +17,11 @@
 package org.apache.solr.encryption;
 
 import org.apache.solr.common.util.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.annotation.Nullable;
+import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -31,6 +34,11 @@ import java.util.NoSuchElementException;
  */
 public class EncryptionUtil {
 
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  /** Log prefix for encryption, to ease log search. */
+  public static final String ENCRYPTION_LOG_PREFIX = "Encryption:";
+
   /**
    * Crypto parameter prefix, in the commit user data.
    * It includes the {@link EncryptionUpdateHandler#TRANSFERABLE_COMMIT_DATA} prefix to be transferred from a
@@ -62,7 +70,7 @@ public class EncryptionUtil {
    * Number of inactive key ids to keep when clearing the old inactive key ids.
    * @see #clearOldInactiveKeyIdsFromCommit
    */
-  private static final int INACTIVE_KEY_IDS_TO_KEEP = 5;
+  private static final int INACTIVE_KEY_IDS_TO_KEEP = 15;
 
   /**
    * Sets the new active encryption key id, and its optional cookie in the provided commit user data.
@@ -160,7 +168,8 @@ public class EncryptionUtil {
   /**
    * Clear the oldest inactive key ids to keep only the most recent ones.
    * We don't clear all the inactive key ids just in the improbable case there would be pending
-   * segment creations using previous key id(s) still in flight. This is really to be safe.
+   * segment creations using previous key id(s) still in flight. This helps during the
+   * heavy-load test where re-encryption has a crazy rate, and this is really safe in prod.
    */
   public static void clearOldInactiveKeyIdsFromCommit(Map<String, String> commitUserData) {
     // List the inactive key references.
@@ -180,6 +189,7 @@ public class EncryptionUtil {
       for (Integer keyRef : inactiveKeyRefs.subList(0, inactiveKeyRefs.size() - INACTIVE_KEY_IDS_TO_KEEP)) {
         commitUserData.remove(COMMIT_KEY_ID + keyRef);
         commitUserData.remove(COMMIT_KEY_COOKIE + keyRef);
+        log.info("{} removing inactive key ref={}", ENCRYPTION_LOG_PREFIX, keyRef);
       }
     }
   }
diff --git a/encryption/src/test/java/org/apache/solr/encryption/EncryptionHeavyLoadTest.java b/encryption/src/test/java/org/apache/solr/encryption/EncryptionHeavyLoadTest.java
index 15949c2..5a8608b 100644
--- a/encryption/src/test/java/org/apache/solr/encryption/EncryptionHeavyLoadTest.java
+++ b/encryption/src/test/java/org/apache/solr/encryption/EncryptionHeavyLoadTest.java
@@ -62,7 +62,7 @@ import static org.apache.solr.encryption.TestingKeySupplier.*;
 @RunWith(com.carrotsearch.randomizedtesting.RandomizedRunner.class)
 public class EncryptionHeavyLoadTest extends SolrCloudTestCase {
 
-  // Change the test duration manually to run longer, e.g. 3 minutes.
+  // Change the test duration manually to run longer, e.g. 20 minutes.
   private static final long TEST_DURATION_MS = TimeUnit.SECONDS.toMillis(10);
   private static final int RANDOM_DELAY_BETWEEN_INDEXING_BATCHES_MS = 50;
   private static final int RANDOM_NUM_DOCS_PER_BATCH = 200;