You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by ar...@apache.org on 2019/10/22 02:32:50 UTC

[hadoop-ozone] branch master updated: HDDS-2333. Enable sync option for OM non-HA. (#61)

This is an automated email from the ASF dual-hosted git repository.

arp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new c6c9794  HDDS-2333. Enable sync option for OM non-HA. (#61)
c6c9794 is described below

commit c6c9794fc590371ad9c3b8fdcd7a36ed42909b40
Author: Bharat Viswanadham <bh...@apache.org>
AuthorDate: Mon Oct 21 19:32:41 2019 -0700

    HDDS-2333. Enable sync option for OM non-HA. (#61)
---
 .../hadoop/hdds/utils/db/DBStoreBuilder.java       | 21 ++++++++++++++++++---
 .../org/apache/hadoop/hdds/utils/db/RDBStore.java  |  8 ++++----
 .../hadoop/hdds/utils/db/RocksDBConfiguration.java | 16 ++++++++++++++++
 .../hadoop/ozone/om/OmMetadataManagerImpl.java     | 22 ++++++++++++++++++++--
 .../ozone/om/ratis/OzoneManagerDoubleBuffer.java   | 19 ++++++++++++-------
 5 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java
index 263864f..5994252 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java
@@ -32,6 +32,7 @@ import org.rocksdb.InfoLogLevel;
 import org.rocksdb.RocksDB;
 import org.rocksdb.Statistics;
 import org.rocksdb.StatsLevel;
+import org.rocksdb.WriteOptions;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -70,6 +71,10 @@ public final class DBStoreBuilder {
   private RocksDBConfiguration rocksDBConfiguration;
 
   private DBStoreBuilder(OzoneConfiguration configuration) {
+    this(configuration, configuration.getObject(RocksDBConfiguration.class));
+  }
+  private DBStoreBuilder(OzoneConfiguration configuration,
+      RocksDBConfiguration rocksDBConfiguration) {
     tables = new HashSet<>();
     tableNames = new LinkedList<>();
     this.configuration = configuration;
@@ -77,14 +82,19 @@ public final class DBStoreBuilder {
     this.rocksDbStat = configuration.getTrimmed(
         OZONE_METADATA_STORE_ROCKSDB_STATISTICS,
         OZONE_METADATA_STORE_ROCKSDB_STATISTICS_DEFAULT);
-    this.rocksDBConfiguration =
-        configuration.getObject(RocksDBConfiguration.class);
+    this.rocksDBConfiguration = rocksDBConfiguration;
   }
 
+
   public static DBStoreBuilder newBuilder(OzoneConfiguration configuration) {
     return new DBStoreBuilder(configuration);
   }
 
+  public static DBStoreBuilder newBuilder(OzoneConfiguration configuration,
+      RocksDBConfiguration rocksDBConfiguration) {
+    return new DBStoreBuilder(configuration, rocksDBConfiguration);
+  }
+
   public DBStoreBuilder setProfile(DBProfile profile) {
     dbProfile = profile;
     return this;
@@ -143,11 +153,16 @@ public final class DBStoreBuilder {
     processDBProfile();
     processTables();
     DBOptions options = getDbProfile();
+
+    WriteOptions writeOptions = new WriteOptions();
+    writeOptions.setSync(rocksDBConfiguration.getSyncOption());
+
+
     File dbFile = getDBFile();
     if (!dbFile.getParentFile().exists()) {
       throw new IOException("The DB destination directory should exist.");
     }
-    return new RDBStore(dbFile, options, tables, registry);
+    return new RDBStore(dbFile, options, writeOptions, tables, registry);
   }
 
   /**
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java
index 53bd424..0e3c208 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java
@@ -71,10 +71,11 @@ public class RDBStore implements DBStore {
   @VisibleForTesting
   public RDBStore(File dbFile, DBOptions options,
                   Set<TableConfig> families) throws IOException {
-    this(dbFile, options, families, new CodecRegistry());
+    this(dbFile, options, new WriteOptions(), families, new CodecRegistry());
   }
 
-  public RDBStore(File dbFile, DBOptions options, Set<TableConfig> families,
+  public RDBStore(File dbFile, DBOptions options,
+      WriteOptions writeOptions, Set<TableConfig> families,
                   CodecRegistry registry)
       throws IOException {
     Preconditions.checkNotNull(dbFile, "DB file location cannot be null");
@@ -92,8 +93,7 @@ public class RDBStore implements DBStore {
 
     dbOptions = options;
     dbLocation = dbFile;
-    // TODO: Read from the next Config.
-    writeOptions = new WriteOptions();
+    this.writeOptions = writeOptions;
 
     try {
       db = RocksDB.open(dbOptions, dbLocation.getAbsolutePath(),
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDBConfiguration.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDBConfiguration.java
index 1a8c846..63e355c 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDBConfiguration.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDBConfiguration.java
@@ -59,4 +59,20 @@ public class RocksDBConfiguration {
     return rocksdbLogLevel;
   }
 
+  private boolean syncOption;
+  @Config(key = "rocksdb.writeoption.sync",
+      type = ConfigType.BOOLEAN,
+      defaultValue = "false",
+      tags = {ConfigTag.OM},
+      description = "Enable/Disable Sync option. If true write will be " +
+          "considered complete, once flushed to persistent storage. If false," +
+          " writes are flushed asynchronously.")
+  public void setSyncOption(boolean enabled) {
+    this.syncOption = enabled;
+  }
+
+  public boolean getSyncOption() {
+    return syncOption;
+  }
+
 }
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
index 95f21ae..433b5c6 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hdds.client.BlockID;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.utils.db.DBStore;
 import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
+import org.apache.hadoop.hdds.utils.db.RocksDBConfiguration;
 import org.apache.hadoop.hdds.utils.db.Table;
 import org.apache.hadoop.hdds.utils.db.Table.KeyValue;
 import org.apache.hadoop.hdds.utils.db.TableIterator;
@@ -244,9 +245,26 @@ public class OmMetadataManagerImpl implements OMMetadataManager {
     if (store == null) {
       File metaDir = OmUtils.getOmDbDir(configuration);
 
-      DBStoreBuilder dbStoreBuilder = DBStoreBuilder.newBuilder(configuration)
-          .setName(OM_DB_NAME)
+      RocksDBConfiguration rocksDBConfiguration =
+          configuration.getObject(RocksDBConfiguration.class);
+
+      // As When ratis is not enabled, when we perform put/commit to rocksdb we
+      // should turn on sync flag. This needs to be done as when we return
+      // response to client it is considered as complete, but if we have
+      // power failure or machine crashes the recent writes will be lost. To
+      // avoid those kind of failures we need to enable sync. When Ratis is
+      // enabled, ratis log provides us this guaranty. This check is needed
+      // until HA code path becomes default in OM.
+
+      // When ratis is not enabled override and set the sync.
+      if (!isRatisEnabled) {
+        rocksDBConfiguration.setSyncOption(true);
+      }
+
+      DBStoreBuilder dbStoreBuilder = DBStoreBuilder.newBuilder(configuration,
+          rocksDBConfiguration).setName(OM_DB_NAME)
           .setPath(Paths.get(metaDir.getPath()));
+
       this.store = addOMTablesAndCodecs(dbStoreBuilder).build();
       initializeOmTables();
     }
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java
index e5cadff..58b057f 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java
@@ -144,6 +144,18 @@ public class OzoneManagerDoubleBuffer {
           });
 
           omMetadataManager.getStore().commitBatchOperation(batchOperation);
+
+          // Complete futures first and then do other things. So, that
+          // handler threads will be released.
+          if (!isRatisEnabled) {
+            // Once all entries are flushed, we can complete their future.
+            readyFutureQueue.iterator().forEachRemaining((entry) -> {
+              entry.complete(null);
+            });
+
+            readyFutureQueue.clear();
+          }
+
           int flushedTransactionsSize = readyBuffer.size();
           flushedTransactionCount.addAndGet(flushedTransactionsSize);
           flushIterations.incrementAndGet();
@@ -173,14 +185,7 @@ public class OzoneManagerDoubleBuffer {
           // set metrics.
           updateMetrics(flushedTransactionsSize);
 
-          if (!isRatisEnabled) {
-            // Once all entries are flushed, we can complete their future.
-            readyFutureQueue.iterator().forEachRemaining((entry) -> {
-              entry.complete(null);
-            });
 
-            readyFutureQueue.clear();
-          }
         }
       } catch (InterruptedException ex) {
         Thread.currentThread().interrupt();


---------------------------------------------------------------------
To unsubscribe, e-mail: hdfs-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: hdfs-commits-help@hadoop.apache.org