You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by bh...@apache.org on 2019/06/04 22:41:44 UTC
[hadoop] branch trunk updated: HDDS-1624 : Refactor operations
inside the bucket lock in OM key write. (#882)
This is an automated email from the ASF dual-hosted git repository.
bharat pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 1a78794 HDDS-1624 : Refactor operations inside the bucket lock in OM key write. (#882)
1a78794 is described below
commit 1a78794227167a1d14125d2385409390319529e4
Author: avijayanhwx <14...@users.noreply.github.com>
AuthorDate: Tue Jun 4 15:41:37 2019 -0700
HDDS-1624 : Refactor operations inside the bucket lock in OM key write. (#882)
---
.../java/org/apache/hadoop/utils/UniqueId.java | 69 +++++++++++++
.../java/org/apache/hadoop/utils/db/RDBTable.java | 10 ++
.../java/org/apache/hadoop/utils/db/Table.java | 10 ++
.../org/apache/hadoop/utils/db/TypedTable.java | 7 ++
.../apache/hadoop/utils/db/TestRDBTableStore.java | 21 +++-
.../hadoop/utils/db/TestTypedRDBTableStore.java | 37 ++++++-
.../hadoop/hdds/scm/block/BlockManagerImpl.java | 45 +--------
.../org/apache/hadoop/ozone/om/KeyManagerImpl.java | 107 +++++++++------------
8 files changed, 199 insertions(+), 107 deletions(-)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/UniqueId.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/UniqueId.java
new file mode 100644
index 0000000..d26d063
--- /dev/null
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/UniqueId.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.utils;
+
+import org.apache.hadoop.hdds.HddsUtils;
+
+/**
+ * This class uses system current time milliseconds to generate unique id.
+ */
+public final class UniqueId {
+ /*
+ * When we represent time in milliseconds using 'long' data type,
+ * the LSB bits are used. Currently we are only using 44 bits (LSB),
+ * 20 bits (MSB) are not used.
+ * We will exhaust this 44 bits only when we are in year 2525,
+ * until then we can safely use this 20 bits (MSB) for offset to generate
+ * unique id within millisecond.
+ *
+ * Year : Mon Dec 31 18:49:04 IST 2525
+ * TimeInMillis: 17545641544247
+ * Binary Representation:
+ * MSB (20 bits): 0000 0000 0000 0000 0000
+ * LSB (44 bits): 1111 1111 0101 0010 1001 1011 1011 0100 1010 0011 0111
+ *
+ * We have 20 bits to run counter, we should exclude the first bit (MSB)
+ * as we don't want to deal with negative values.
+ * To be on safer side we will use 'short' data type which is of length
+ * 16 bits and will give us 65,536 values for offset.
+ *
+ */
+
+ private static volatile short offset = 0;
+
+ /**
+ * Private constructor so that no one can instantiate this class.
+ */
+ private UniqueId() {}
+
+ /**
+ * Calculate and returns next unique id based on System#currentTimeMillis.
+ *
+ * @return unique long value
+ */
+ public static synchronized long next() {
+ long utcTime = HddsUtils.getUtcTime();
+ if ((utcTime & 0xFFFF000000000000L) == 0) {
+ return utcTime << Short.SIZE | (offset++ & 0x0000FFFF);
+ }
+ throw new RuntimeException("Got invalid UTC time," +
+ " cannot generate unique Id. UTC Time: " + utcTime);
+ }
+}
\ No newline at end of file
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBTable.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBTable.java
index 7bbe9d9..4213e2b 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBTable.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBTable.java
@@ -120,6 +120,16 @@ class RDBTable implements Table<byte[], byte[]> {
}
@Override
+ public boolean isExist(byte[] key) throws IOException {
+ try {
+ return db.get(handle, key) != null;
+ } catch (RocksDBException e) {
+ throw toIOException(
+ "Error in accessing DB. ", e);
+ }
+ }
+
+ @Override
public byte[] get(byte[] key) throws IOException {
try {
return db.get(handle, key);
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/Table.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/Table.java
index 905a68b..35243e8 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/Table.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/Table.java
@@ -59,6 +59,16 @@ public interface Table<KEY, VALUE> extends AutoCloseable {
boolean isEmpty() throws IOException;
/**
+ * Check if a given key exists in Metadata store.
+ * (Optimization to save on data deserialization)
+ * A lock on the key / bucket needs to be acquired before invoking this API.
+ * @param key metadata key
+ * @return true if the metadata store contains a key.
+ * @throws IOException on Failure
+ */
+ boolean isExist(KEY key) throws IOException;
+
+ /**
* Returns the value mapped to the given key in byte array or returns null
* if the key is not found.
*
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TypedTable.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TypedTable.java
index 6de6509..2562b1a 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TypedTable.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TypedTable.java
@@ -79,6 +79,13 @@ public class TypedTable<KEY, VALUE> implements Table<KEY, VALUE> {
return rawTable.isEmpty();
}
+ @Override
+ public boolean isExist(KEY key) throws IOException {
+ CacheValue<VALUE> cacheValue= cache.get(new CacheKey<>(key));
+ return (cacheValue != null && cacheValue.getValue() != null) ||
+ rawTable.isExist(codecRegistry.asRawData(key));
+ }
+
/**
* Returns the value mapped to the given key in byte array or returns null
* if the key is not found.
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBTableStore.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBTableStore.java
index 38d30c1..6b6cd75 100644
--- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBTableStore.java
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBTableStore.java
@@ -51,7 +51,7 @@ public class TestRDBTableStore {
Arrays.asList(DFSUtil.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY),
"First", "Second", "Third",
"Fourth", "Fifth",
- "Sixth");
+ "Sixth", "Seventh");
@Rule
public TemporaryFolder folder = new TemporaryFolder();
private RDBStore rdbStore = null;
@@ -228,4 +228,23 @@ public class TestRDBTableStore {
}
}
}
+
+ @Test
+ public void testIsExist() throws Exception {
+ try (Table<byte[], byte[]> testTable = rdbStore.getTable("Seventh")) {
+ byte[] key =
+ RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8);
+ byte[] value =
+ RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8);
+ testTable.put(key, value);
+ Assert.assertTrue(testTable.isExist(key));
+
+ testTable.delete(key);
+ Assert.assertFalse(testTable.isExist(key));
+
+ byte[] invalidKey =
+ RandomStringUtils.random(5).getBytes(StandardCharsets.UTF_8);
+ Assert.assertFalse(testTable.isExist(invalidKey));
+ }
+ }
}
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestTypedRDBTableStore.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestTypedRDBTableStore.java
index adedcaf..e48a5aa 100644
--- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestTypedRDBTableStore.java
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestTypedRDBTableStore.java
@@ -55,7 +55,7 @@ public class TestTypedRDBTableStore {
Arrays.asList(DFSUtil.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY),
"First", "Second", "Third",
"Fourth", "Fifth",
- "Sixth", "Seven");
+ "Sixth", "Seven", "Eighth");
@Rule
public TemporaryFolder folder = new TemporaryFolder();
private RDBStore rdbStore = null;
@@ -316,4 +316,39 @@ public class TestTypedRDBTableStore {
}
}
+
+ @Test
+ public void testIsExist() throws Exception {
+ try (Table<String, String> testTable = createTypedTable(
+ "Eighth")) {
+ String key =
+ RandomStringUtils.random(10);
+ String value = RandomStringUtils.random(10);
+ testTable.put(key, value);
+ Assert.assertTrue(testTable.isExist(key));
+
+ String invalidKey = key + RandomStringUtils.random(1);
+ Assert.assertFalse(testTable.isExist(invalidKey));
+
+ testTable.delete(key);
+ Assert.assertFalse(testTable.isExist(key));
+ }
+ }
+
+ @Test
+ public void testIsExistCache() throws Exception {
+ try (Table<String, String> testTable = createTypedTable(
+ "Eighth")) {
+ String key =
+ RandomStringUtils.random(10);
+ String value = RandomStringUtils.random(10);
+ testTable.addCacheEntry(new CacheKey<>(key),
+ new CacheValue<>(Optional.of(value), 1L));
+ Assert.assertTrue(testTable.isExist(key));
+
+ testTable.addCacheEntry(new CacheKey<>(key),
+ new CacheValue<>(Optional.absent(), 1L));
+ Assert.assertFalse(testTable.isExist(key));
+ }
+ }
}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
index 1ffd01d..625cdd1 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
@@ -26,7 +26,6 @@ import java.util.concurrent.TimeUnit;
import javax.management.ObjectName;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.StorageUnit;
-import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.client.BlockID;
import org.apache.hadoop.hdds.client.ContainerBlockID;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
@@ -46,6 +45,7 @@ import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.utils.UniqueId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -354,47 +354,4 @@ public class BlockManagerImpl implements BlockManager, BlockmanagerMXBean {
/**
* This class uses system current time milliseconds to generate unique id.
*/
- public static final class UniqueId {
- /*
- * When we represent time in milliseconds using 'long' data type,
- * the LSB bits are used. Currently we are only using 44 bits (LSB),
- * 20 bits (MSB) are not used.
- * We will exhaust this 44 bits only when we are in year 2525,
- * until then we can safely use this 20 bits (MSB) for offset to generate
- * unique id within millisecond.
- *
- * Year : Mon Dec 31 18:49:04 IST 2525
- * TimeInMillis: 17545641544247
- * Binary Representation:
- * MSB (20 bits): 0000 0000 0000 0000 0000
- * LSB (44 bits): 1111 1111 0101 0010 1001 1011 1011 0100 1010 0011 0111
- *
- * We have 20 bits to run counter, we should exclude the first bit (MSB)
- * as we don't want to deal with negative values.
- * To be on safer side we will use 'short' data type which is of length
- * 16 bits and will give us 65,536 values for offset.
- *
- */
-
- private static volatile short offset = 0;
-
- /**
- * Private constructor so that no one can instantiate this class.
- */
- private UniqueId() {}
-
- /**
- * Calculate and returns next unique id based on System#currentTimeMillis.
- *
- * @return unique long value
- */
- public static synchronized long next() {
- long utcTime = HddsUtils.getUtcTime();
- if ((utcTime & 0xFFFF000000000000L) == 0) {
- return utcTime << Short.SIZE | (offset++ & 0x0000FFFF);
- }
- throw new RuntimeException("Got invalid UTC time," +
- " cannot generate unique Id. UTC Time: " + utcTime);
- }
- }
}
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
index 895a47a..9a915d5 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
@@ -84,6 +84,7 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
.PartKeyInfo;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.utils.BackgroundService;
+import org.apache.hadoop.utils.UniqueId;
import org.apache.hadoop.utils.db.BatchOperation;
import org.apache.hadoop.utils.db.DBStore;
import org.apache.hadoop.utils.db.CodecRegistry;
@@ -406,10 +407,29 @@ public class KeyManagerImpl implements KeyManager {
String keyName = args.getKeyName();
validateBucket(volumeName, bucketName);
- long currentTime = Time.monotonicNowNanos();
+ long currentTime = UniqueId.next();
OmKeyInfo keyInfo;
- String openKey;
long openVersion;
+ // NOTE size of a key is not a hard limit on anything, it is a value that
+ // client should expect, in terms of current size of key. If client sets
+ // a value, then this value is used, otherwise, we allocate a single
+ // block which is the current size, if read by the client.
+ final long size = args.getDataSize() >= 0 ?
+ args.getDataSize() : scmBlockSize;
+ final List<OmKeyLocationInfo> locations = new ArrayList<>();
+
+ ReplicationFactor factor = args.getFactor();
+ if (factor == null) {
+ factor = useRatis ? ReplicationFactor.THREE : ReplicationFactor.ONE;
+ }
+
+ ReplicationType type = args.getType();
+ if (type == null) {
+ type = useRatis ? ReplicationType.RATIS : ReplicationType.STAND_ALONE;
+ }
+
+ String dbKeyName = metadataManager.getOzoneKey(
+ args.getVolumeName(), args.getBucketName(), args.getKeyName());
FileEncryptionInfo encInfo;
@@ -417,37 +437,7 @@ public class KeyManagerImpl implements KeyManager {
metadataManager.getLock().acquireBucketLock(volumeName, bucketName);
OmBucketInfo bucketInfo = getBucketInfo(volumeName, bucketName);
encInfo = getFileEncryptionInfo(bucketInfo);
- // NOTE size of a key is not a hard limit on anything, it is a value that
- // client should expect, in terms of current size of key. If client sets
- // a value, then this value is used, otherwise, we allocate a single
- // block which is the current size, if read by the client.
- long size = args.getDataSize() >= 0 ? args.getDataSize() : scmBlockSize;
- List<OmKeyLocationInfo> locations = new ArrayList<>();
- if (args.getIsMultipartKey()) {
- keyInfo = prepareMultipartKeyInfo(args, size, locations, encInfo);
- //TODO args.getMetadata
- } else {
- keyInfo = prepareKeyInfo(args, size, locations, encInfo);
- }
-
- openVersion = keyInfo.getLatestVersionLocations().getVersion();
- openKey = metadataManager.getOpenKey(
- volumeName, bucketName, keyName, currentTime);
- if (metadataManager.getOpenKeyTable().get(openKey) != null) {
- // This should not happen. If this condition is satisfied, it means
- // that we have generated a same openKeyId (i.e. currentTime) for two
- // different client who are trying to write the same key at the same
- // time. The chance of this happening is very, very minimal.
-
- // Do we really need this check? Can we avoid this to gain some
- // minor performance improvement?
- LOG.warn("Cannot allocate key. The generated open key id is already" +
- "used for the same key which is currently being written.");
- throw new OMException("Cannot allocate key. Not able to get a valid" +
- "open key id.", ResultCodes.KEY_ALLOCATION_ERROR);
- }
- LOG.debug("Key {} allocated in volume {} bucket {}",
- keyName, volumeName, bucketName);
+ keyInfo = prepareKeyInfo(args, dbKeyName, size, locations, encInfo);
} catch (OMException e) {
throw e;
} catch (IOException ex) {
@@ -457,7 +447,14 @@ public class KeyManagerImpl implements KeyManager {
} finally {
metadataManager.getLock().releaseBucketLock(volumeName, bucketName);
}
-
+ if (keyInfo == null) {
+ // the key does not exist, create a new object, the new blocks are the
+ // version 0
+ keyInfo = createKeyInfo(args, locations, factor, type, size, encInfo);
+ }
+ openVersion = keyInfo.getLatestVersionLocations().getVersion();
+ LOG.debug("Key {} allocated in volume {} bucket {}",
+ keyName, volumeName, bucketName);
allocateBlockInKey(keyInfo, args.getDataSize(), currentTime);
return new OpenKeySession(currentTime, keyInfo, openVersion);
}
@@ -485,33 +482,21 @@ public class KeyManagerImpl implements KeyManager {
}
}
- private OmKeyInfo prepareKeyInfo(OmKeyArgs args, long size,
+ private OmKeyInfo prepareKeyInfo(
+ OmKeyArgs keyArgs, String dbKeyName, long size,
List<OmKeyLocationInfo> locations, FileEncryptionInfo encInfo)
throws IOException {
- ReplicationFactor factor = args.getFactor();
- ReplicationType type = args.getType();
- OmKeyInfo keyInfo;
- // If user does not specify a replication strategy or
- // replication factor, OM will use defaults.
- if (factor == null) {
- factor = useRatis ? ReplicationFactor.THREE : ReplicationFactor.ONE;
- }
- if (type == null) {
- type = useRatis ? ReplicationType.RATIS : ReplicationType.STAND_ALONE;
- }
- String objectKey = metadataManager.getOzoneKey(
- args.getVolumeName(), args.getBucketName(), args.getKeyName());
- keyInfo = metadataManager.getKeyTable().get(objectKey);
- if (keyInfo != null) {
+ OmKeyInfo keyInfo = null;
+ if (keyArgs.getIsMultipartKey()) {
+ keyInfo = prepareMultipartKeyInfo(keyArgs, size, locations, encInfo);
+ //TODO args.getMetadata
+ } else if (metadataManager.getKeyTable().isExist(dbKeyName)) {
+ keyInfo = metadataManager.getKeyTable().get(dbKeyName);
// the key already exist, the new blocks will be added as new version
// when locations.size = 0, the new version will have identical blocks
// as its previous version
keyInfo.addNewVersion(locations);
keyInfo.setDataSize(size + keyInfo.getDataSize());
- } else {
- // the key does not exist, create a new object, the new blocks are the
- // version 0
- keyInfo = createKeyInfo(args, locations, factor, type, size, encInfo);
}
return keyInfo;
}
@@ -618,13 +603,15 @@ public class KeyManagerImpl implements KeyManager {
String volumeName = args.getVolumeName();
String bucketName = args.getBucketName();
String keyName = args.getKeyName();
- metadataManager.getLock().acquireBucketLock(volumeName, bucketName);
+ List<OmKeyLocationInfo> locationInfoList = args.getLocationInfoList();
+ String objectKey = metadataManager
+ .getOzoneKey(volumeName, bucketName, keyName);
+ String openKey = metadataManager
+ .getOpenKey(volumeName, bucketName, keyName, clientID);
+ Preconditions.checkNotNull(locationInfoList);
try {
+ metadataManager.getLock().acquireBucketLock(volumeName, bucketName);
validateBucket(volumeName, bucketName);
- String openKey = metadataManager.getOpenKey(volumeName, bucketName,
- keyName, clientID);
- String objectKey = metadataManager.getOzoneKey(
- volumeName, bucketName, keyName);
OmKeyInfo keyInfo = metadataManager.getOpenKeyTable().get(openKey);
if (keyInfo == null) {
throw new OMException("Commit a key without corresponding entry " +
@@ -632,8 +619,6 @@ public class KeyManagerImpl implements KeyManager {
}
keyInfo.setDataSize(args.getDataSize());
keyInfo.setModificationTime(Time.now());
- List<OmKeyLocationInfo> locationInfoList = args.getLocationInfoList();
- Preconditions.checkNotNull(locationInfoList);
//update the block length for each block
keyInfo.updateLocationInfoList(locationInfoList);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org