You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by kt...@apache.org on 2021/01/12 14:25:51 UTC

[accumulo] branch main updated: Fixes #1321 makes metadata consistency level configurable for reads (#1864)

This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 2f7c871  Fixes #1321 makes metadata consistency level configurable for reads (#1864)
2f7c871 is described below

commit 2f7c87170afa100b8add6206d76effe19e7518e4
Author: Keith Turner <kt...@apache.org>
AuthorDate: Tue Jan 12 09:25:39 2021 -0500

    Fixes #1321 makes metadata consistency level configurable for reads (#1864)
---
 .../accumulo/core/clientImpl/ClientContext.java    |  4 ++-
 .../core/clientImpl/RootTabletLocator.java         |  4 ++-
 .../accumulo/core/metadata/schema/Ample.java       | 36 ++++++++++++++++++-
 .../accumulo/core/metadata/schema/AmpleImpl.java   |  5 ++-
 .../core/metadata/schema/TabletsMetadata.java      | 42 +++++++++++++++++++---
 .../server/master/state/ZooTabletStateStore.java   |  3 +-
 .../accumulo/server/metadata/ServerAmpleImpl.java  | 11 ++++--
 .../master/state/RootTabletStateStoreTest.java     |  3 +-
 8 files changed, 96 insertions(+), 12 deletions(-)

diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientContext.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientContext.java
index 1a3b002..84d3056 100644
--- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientContext.java
+++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientContext.java
@@ -60,6 +60,7 @@ import org.apache.accumulo.core.data.TableId;
 import org.apache.accumulo.core.master.state.tables.TableState;
 import org.apache.accumulo.core.metadata.RootTable;
 import org.apache.accumulo.core.metadata.schema.Ample;
+import org.apache.accumulo.core.metadata.schema.Ample.ReadConsistency;
 import org.apache.accumulo.core.metadata.schema.AmpleImpl;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata.Location;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata.LocationType;
@@ -337,7 +338,8 @@ public class ClientContext implements AccumuloClient {
       timer = new OpTimer().start();
     }
 
-    Location loc = getAmple().readTablet(RootTable.EXTENT, LOCATION).getLocation();
+    Location loc =
+        getAmple().readTablet(RootTable.EXTENT, ReadConsistency.EVENTUAL, LOCATION).getLocation();
 
     if (timer != null) {
       timer.stop();
diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootTabletLocator.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootTabletLocator.java
index 5641c1b..d91dbb5 100644
--- a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootTabletLocator.java
+++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootTabletLocator.java
@@ -33,6 +33,7 @@ import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.dataImpl.KeyExtent;
 import org.apache.accumulo.core.metadata.RootTable;
+import org.apache.accumulo.core.metadata.schema.Ample.ReadConsistency;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata.Location;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata.LocationType;
 import org.apache.accumulo.core.util.OpTimer;
@@ -106,7 +107,8 @@ public class RootTabletLocator extends TabletLocator {
       timer = new OpTimer().start();
     }
 
-    Location loc = context.getAmple().readTablet(RootTable.EXTENT, LOCATION).getLocation();
+    Location loc = context.getAmple()
+        .readTablet(RootTable.EXTENT, ReadConsistency.EVENTUAL, LOCATION).getLocation();
 
     if (timer != null) {
       timer.stop();
diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java b/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java
index bb0839a..ae8c0f1 100644
--- a/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java
+++ b/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java
@@ -110,14 +110,48 @@ public interface Ample {
   }
 
   /**
+   * Controls how Accumulo metadata is read. Currently this only impacts reading the root tablet
+   * stored in Zookeeper. Reading data stored in the Accumulo metadata table is always immediate
+   * consistency.
+   */
+  public enum ReadConsistency {
+    /**
+     * Read data in a a way that is slower, but should always yield the latest data. In addition to
+     * being slower, it possible this read consistency can place higher load on shared resource
+     * which can negatively impact an entire cluster.
+     */
+    IMMEDIATE,
+    /**
+     * Read data in a way that may be faster but may yield out of date data.
+     */
+    EVENTUAL
+  }
+
+  /**
+   * Read a single tablets metadata. No checking is done for prev row, so it could differ. The
+   * method will read the data using {@link ReadConsistency#IMMEDIATE}.
+   *
+   * @param extent
+   *          Reads tablet metadata using the table id and end row from this extent.
+   * @param colsToFetch
+   *          What tablets columns to fetch. If empty, then everything is fetched.
+   */
+  default TabletMetadata readTablet(KeyExtent extent, ColumnType... colsToFetch) {
+    return readTablet(extent, ReadConsistency.IMMEDIATE, colsToFetch);
+  }
+
+  /**
    * Read a single tablets metadata. No checking is done for prev row, so it could differ.
    *
    * @param extent
    *          Reads tablet metadata using the table id and end row from this extent.
+   * @param readConsistency
+   *          Controls how the data is read.
    * @param colsToFetch
    *          What tablets columns to fetch. If empty, then everything is fetched.
    */
-  TabletMetadata readTablet(KeyExtent extent, ColumnType... colsToFetch);
+  TabletMetadata readTablet(KeyExtent extent, ReadConsistency readConsistency,
+      ColumnType... colsToFetch);
 
   /**
    * Initiates mutating a single tablets persistent metadata. No data is persisted until the
diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/schema/AmpleImpl.java b/core/src/main/java/org/apache/accumulo/core/metadata/schema/AmpleImpl.java
index 2b78fcd..f5ea7d4 100644
--- a/core/src/main/java/org/apache/accumulo/core/metadata/schema/AmpleImpl.java
+++ b/core/src/main/java/org/apache/accumulo/core/metadata/schema/AmpleImpl.java
@@ -33,11 +33,14 @@ public class AmpleImpl implements Ample {
   }
 
   @Override
-  public TabletMetadata readTablet(KeyExtent extent, ColumnType... colsToFetch) {
+  public TabletMetadata readTablet(KeyExtent extent, ReadConsistency readConsistency,
+      ColumnType... colsToFetch) {
     Options builder = TabletsMetadata.builder().forTablet(extent);
     if (colsToFetch.length > 0)
       builder.fetch(colsToFetch);
 
+    builder.readConsistency(readConsistency);
+
     try (TabletsMetadata tablets = builder.build(client)) {
       return Iterables.getOnlyElement(tablets);
     }
diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletsMetadata.java b/core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletsMetadata.java
index f7d6a17..1471c39 100644
--- a/core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletsMetadata.java
+++ b/core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletsMetadata.java
@@ -18,7 +18,6 @@
  */
 package org.apache.accumulo.core.metadata.schema;
 
-import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.SuspendLocationColumn;
 import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.COMPACT_COLUMN;
 import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN;
 import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.FLUSH_COLUMN;
@@ -31,6 +30,7 @@ import java.util.EnumSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
@@ -45,6 +45,7 @@ import org.apache.accumulo.core.dataImpl.KeyExtent;
 import org.apache.accumulo.core.metadata.MetadataTable;
 import org.apache.accumulo.core.metadata.RootTable;
 import org.apache.accumulo.core.metadata.schema.Ample.DataLevel;
+import org.apache.accumulo.core.metadata.schema.Ample.ReadConsistency;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.BulkFileColumnFamily;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ClonedColumnFamily;
@@ -54,11 +55,14 @@ import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.Fu
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LastLocationColumnFamily;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.SuspendLocationColumn;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.ColumnFQ;
 import org.apache.accumulo.fate.zookeeper.ZooCache;
+import org.apache.accumulo.fate.zookeeper.ZooReader;
 import org.apache.hadoop.io.Text;
+import org.apache.zookeeper.KeeperException;
 
 import com.google.common.base.Preconditions;
 
@@ -80,15 +84,14 @@ public class TabletsMetadata implements Iterable<TabletMetadata>, AutoCloseable
     private boolean checkConsistency = false;
     private boolean saveKeyValues;
     private TableId tableId;
+    private ReadConsistency readConsistency = ReadConsistency.IMMEDIATE;
 
     @Override
     public TabletsMetadata build(AccumuloClient client) {
       Preconditions.checkState(level == null ^ table == null);
       if (level == DataLevel.ROOT) {
         ClientContext ctx = ((ClientContext) client);
-        ZooCache zc = ctx.getZooCache();
-        String zkRoot = ctx.getZooKeeperRoot();
-        return new TabletsMetadata(getRootMetadata(zkRoot, zc));
+        return new TabletsMetadata(getRootMetadata(ctx, readConsistency));
       } else {
         return buildNonRoot(client);
       }
@@ -245,6 +248,12 @@ public class TabletsMetadata implements Iterable<TabletMetadata>, AutoCloseable
       this.range = TabletsSection.getRange();
       return this;
     }
+
+    @Override
+    public Options readConsistency(ReadConsistency readConsistency) {
+      this.readConsistency = Objects.requireNonNull(readConsistency);
+      return this;
+    }
   }
 
   public interface Options {
@@ -261,6 +270,12 @@ public class TabletsMetadata implements Iterable<TabletMetadata>, AutoCloseable
      * Saves the key values seen in the metadata table for each tablet.
      */
     Options saveKeyValues();
+
+    /**
+     * Controls how the data is read. If not, set then the default is
+     * {@link ReadConsistency#IMMEDIATE}
+     */
+    Options readConsistency(ReadConsistency readConsistency);
   }
 
   public interface RangeOptions extends Options {
@@ -356,6 +371,25 @@ public class TabletsMetadata implements Iterable<TabletMetadata>, AutoCloseable
     return new Builder();
   }
 
+  private static TabletMetadata getRootMetadata(ClientContext ctx,
+      ReadConsistency readConsistency) {
+    String zkRoot = ctx.getZooKeeperRoot();
+    switch (readConsistency) {
+      case EVENTUAL:
+        return getRootMetadata(zkRoot, ctx.getZooCache());
+      case IMMEDIATE:
+        ZooReader zooReader = new ZooReader(ctx.getZooKeepers(), ctx.getZooKeepersSessionTimeOut());
+        try {
+          return RootTabletMetadata.fromJson(zooReader.getData(zkRoot + RootTable.ZROOT_TABLET))
+              .convertToTabletMetadata();
+        } catch (InterruptedException | KeeperException e) {
+          throw new RuntimeException(e);
+        }
+      default:
+        throw new IllegalArgumentException("Unknown consistency level " + readConsistency);
+    }
+  }
+
   public static TabletMetadata getRootMetadata(String zkRoot, ZooCache zc) {
     return RootTabletMetadata.fromJson(zc.get(zkRoot + RootTable.ZROOT_TABLET))
         .convertToTabletMetadata();
diff --git a/server/base/src/main/java/org/apache/accumulo/server/master/state/ZooTabletStateStore.java b/server/base/src/main/java/org/apache/accumulo/server/master/state/ZooTabletStateStore.java
index 5c0fd07..eb0dd22 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/master/state/ZooTabletStateStore.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/master/state/ZooTabletStateStore.java
@@ -28,6 +28,7 @@ import org.apache.accumulo.core.metadata.RootTable;
 import org.apache.accumulo.core.metadata.TServerInstance;
 import org.apache.accumulo.core.metadata.TabletLocationState;
 import org.apache.accumulo.core.metadata.schema.Ample;
+import org.apache.accumulo.core.metadata.schema.Ample.ReadConsistency;
 import org.apache.accumulo.core.metadata.schema.Ample.TabletMutator;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata.Location;
@@ -62,7 +63,7 @@ class ZooTabletStateStore implements TabletStateStore {
         finished = true;
         try {
 
-          TabletMetadata rootMeta = ample.readTablet(RootTable.EXTENT);
+          TabletMetadata rootMeta = ample.readTablet(RootTable.EXTENT, ReadConsistency.EVENTUAL);
 
           TServerInstance currentSession = null;
           TServerInstance futureSession = null;
diff --git a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java
index e735427..8e4b945 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java
@@ -47,8 +47,10 @@ import org.apache.accumulo.core.metadata.schema.AmpleImpl;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.DeletesSection;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.DeletesSection.SkewedKeyValue;
 import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.fate.zookeeper.ZooReader;
 import org.apache.accumulo.server.ServerContext;
 import org.apache.hadoop.io.Text;
+import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -160,8 +162,13 @@ public class ServerAmpleImpl extends AmpleImpl implements Ample {
   @Override
   public Iterator<String> getGcCandidates(DataLevel level, String continuePoint) {
     if (level == DataLevel.ROOT) {
-      byte[] json = context.getZooCache()
-          .get(context.getZooKeeperRoot() + RootTable.ZROOT_TABLET_GC_CANDIDATES);
+      var zooReader = new ZooReader(context.getZooKeepers(), context.getZooKeepersSessionTimeOut());
+      byte[] json;
+      try {
+        json = zooReader.getData(context.getZooKeeperRoot() + RootTable.ZROOT_TABLET_GC_CANDIDATES);
+      } catch (KeeperException | InterruptedException e) {
+        throw new RuntimeException(e);
+      }
       Stream<String> candidates = RootGcCandidates.fromJson(json).stream().sorted();
 
       if (continuePoint != null && !continuePoint.isEmpty()) {
diff --git a/server/base/src/test/java/org/apache/accumulo/server/master/state/RootTabletStateStoreTest.java b/server/base/src/test/java/org/apache/accumulo/server/master/state/RootTabletStateStoreTest.java
index 53c25e9..77a7e44 100644
--- a/server/base/src/test/java/org/apache/accumulo/server/master/state/RootTabletStateStoreTest.java
+++ b/server/base/src/test/java/org/apache/accumulo/server/master/state/RootTabletStateStoreTest.java
@@ -51,7 +51,8 @@ public class RootTabletStateStoreTest {
         RootTabletMetadata.getInitialJson("dir", "hdfs://nn/acc/tables/some/dir/0000.rf"), UTF_8);
 
     @Override
-    public TabletMetadata readTablet(KeyExtent extent, ColumnType... colsToFetch) {
+    public TabletMetadata readTablet(KeyExtent extent, ReadConsistency rc,
+        ColumnType... colsToFetch) {
       Preconditions.checkArgument(extent.equals(RootTable.EXTENT));
       return RootTabletMetadata.fromJson(json).convertToTabletMetadata();
     }