You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by kt...@apache.org on 2023/05/11 19:35:09 UTC

[accumulo] branch 2.1 updated: fixes race condition in metadata consistency check (#3392)

This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new 617bfcbd36 fixes race condition in metadata consistency check (#3392)
617bfcbd36 is described below

commit 617bfcbd36ac05955343a2f77c3423274c04df74
Author: Keith Turner <kt...@apache.org>
AuthorDate: Thu May 11 15:35:01 2023 -0400

    fixes race condition in metadata consistency check (#3392)
    
    While looking into #3386 I noticed the Accumulo metadata consistency
    check was incrementing a counter in the incorrect place.  It should
    increment before writing to the metadata table, but it does not.
    This could cause the check to report false postives.  The false
    positive in the case would be transient and should not repeat on
    subsequent checks.
    
    Also noticed a redundant check when deciding if the file should be
    added to the set of in memory files.  AFICT this redundant check
    is harmless, but it could cause problems for future changes.
---
 .../accumulo/tserver/tablet/DatafileManager.java   | 103 +++++++++++----------
 1 file changed, 52 insertions(+), 51 deletions(-)

diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/DatafileManager.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/DatafileManager.java
index 3873d4efb0..5b349c3f67 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/DatafileManager.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/DatafileManager.java
@@ -242,31 +242,30 @@ class DatafileManager {
     if (tablet.getExtent().isMeta()) {
       throw new IllegalArgumentException("Can not import files to a metadata tablet");
     }
-
-    synchronized (bulkFileImportLock) {
-
-      if (!paths.isEmpty()) {
-        long bulkTime = Long.MIN_VALUE;
-        if (setTime) {
-          for (DataFileValue dfv : paths.values()) {
-            long nextTime = tablet.getAndUpdateTime();
-            if (nextTime < bulkTime) {
-              throw new IllegalStateException(
-                  "Time went backwards unexpectedly " + nextTime + " " + bulkTime);
+    // increment start count before metadata update AND updating in memory map of files
+    metadataUpdateCount.updateAndGet(MetadataUpdateCount::incrementStart);
+    // do not place any code here between above stmt and try{}finally
+    try {
+      synchronized (bulkFileImportLock) {
+
+        if (!paths.isEmpty()) {
+          long bulkTime = Long.MIN_VALUE;
+          if (setTime) {
+            for (DataFileValue dfv : paths.values()) {
+              long nextTime = tablet.getAndUpdateTime();
+              if (nextTime < bulkTime) {
+                throw new IllegalStateException(
+                    "Time went backwards unexpectedly " + nextTime + " " + bulkTime);
+              }
+              bulkTime = nextTime;
+              dfv.setTime(bulkTime);
             }
-            bulkTime = nextTime;
-            dfv.setTime(bulkTime);
           }
-        }
 
-        newFiles = tablet.updatePersistedTime(bulkTime, paths, tid);
+          newFiles = tablet.updatePersistedTime(bulkTime, paths, tid);
+        }
       }
-    }
 
-    // increment start count before metadata update AND updating in memory map of files
-    metadataUpdateCount.updateAndGet(MetadataUpdateCount::incrementStart);
-    // do not place any code here between above stmt and try{}finally
-    try {
       synchronized (tablet) {
         for (Entry<StoredTabletFile,DataFileValue> tpath : newFiles.entrySet()) {
           if (datafileSizes.containsKey(tpath.getKey())) {
@@ -350,41 +349,43 @@ class DatafileManager {
         logFileOnly.add(unusedWalLog);
       }
     }
-    try {
-      // the order of writing to metadata and walog is important in the face of machine/process
-      // failures need to write to metadata before writing to walog, when things are done in the
-      // reverse order data could be lost... the minor compaction start even should be written
-      // before the following metadata write is made
-      newFile = tablet.updateTabletDataFile(commitSession.getMaxCommittedTime(), newDatafile, dfv,
-          unusedWalLogs, flushId);
-
-      // Mark that we have data we want to replicate
-      // This WAL could still be in use by other Tablets *from the same table*, so we can only mark
-      // that there is data to replicate,
-      // but it is *not* closed. We know it is not closed by the fact that this MinC triggered. A
-      // MinC cannot happen unless the
-      // tablet is online and thus these WALs are referenced by that tablet. Therefore, the WAL
-      // replication status cannot be 'closed'.
-      if (replicate) {
-        if (log.isDebugEnabled()) {
-          log.debug("Recording that data has been ingested into {} using {}", tablet.getExtent(),
-              logFileOnly);
-        }
-        for (String logFile : logFileOnly) {
-          @SuppressWarnings("deprecation")
-          Status status = org.apache.accumulo.server.replication.StatusUtil.openWithUnknownLength();
-          ReplicationTableUtil.updateFiles(tablet.getContext(), tablet.getExtent(), logFile,
-              status);
-        }
-      }
-    } finally {
-      tablet.finishClearingUnusedLogs();
-    }
 
     // increment start count before metadata update AND updating in memory map of files
     metadataUpdateCount.updateAndGet(MetadataUpdateCount::incrementStart);
     // do not place any code here between above stmt and try{}finally
     try {
+      try {
+        // the order of writing to metadata and walog is important in the face of machine/process
+        // failures need to write to metadata before writing to walog, when things are done in the
+        // reverse order data could be lost... the minor compaction start even should be written
+        // before the following metadata write is made
+        newFile = tablet.updateTabletDataFile(commitSession.getMaxCommittedTime(), newDatafile, dfv,
+            unusedWalLogs, flushId);
+
+        // Mark that we have data we want to replicate
+        // This WAL could still be in use by other Tablets *from the same table*, so we can only
+        // mark
+        // that there is data to replicate,
+        // but it is *not* closed. We know it is not closed by the fact that this MinC triggered. A
+        // MinC cannot happen unless the
+        // tablet is online and thus these WALs are referenced by that tablet. Therefore, the WAL
+        // replication status cannot be 'closed'.
+        if (replicate) {
+          if (log.isDebugEnabled()) {
+            log.debug("Recording that data has been ingested into {} using {}", tablet.getExtent(),
+                logFileOnly);
+          }
+          for (String logFile : logFileOnly) {
+            @SuppressWarnings("deprecation")
+            Status status =
+                org.apache.accumulo.server.replication.StatusUtil.openWithUnknownLength();
+            ReplicationTableUtil.updateFiles(tablet.getContext(), tablet.getExtent(), logFile,
+                status);
+          }
+        }
+      } finally {
+        tablet.finishClearingUnusedLogs();
+      }
 
       do {
         try {
@@ -403,7 +404,7 @@ class DatafileManager {
       synchronized (tablet) {
         t1 = System.currentTimeMillis();
 
-        if (dfv.getNumEntries() > 0 && newFile.isPresent()) {
+        if (newFile.isPresent()) {
           StoredTabletFile newFileStored = newFile.orElseThrow();
           if (datafileSizes.containsKey(newFileStored)) {
             log.error("Adding file that is already in set {}", newFileStored);