You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zh...@apache.org on 2015/05/04 19:57:36 UTC
[02/50] hadoop git commit: HADOOP-11645. Erasure Codec API covering
the essential aspects for an erasure code ( Contributed by Kai Zheng)
HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code ( Contributed by Kai Zheng)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c284ac0a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c284ac0a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c284ac0a
Branch: refs/heads/HDFS-7285
Commit: c284ac0a7f2ac3c2e38f7470694b1d4d2109c1b1
Parents: 8f01376
Author: Vinayakumar B <vi...@apache.org>
Authored: Tue Apr 7 16:05:22 2015 +0530
Committer: Zhe Zhang <zh...@apache.org>
Committed: Mon May 4 10:11:32 2015 -0700
----------------------------------------------------------------------
.../hadoop-common/CHANGES-HDFS-EC-7285.txt | 3 +
.../hadoop/io/erasurecode/ECBlockGroup.java | 18 ++++
.../erasurecode/codec/AbstractErasureCodec.java | 88 +++++++++++++++++++
.../io/erasurecode/codec/ErasureCodec.java | 56 ++++++++++++
.../io/erasurecode/codec/RSErasureCodec.java | 38 +++++++++
.../io/erasurecode/codec/XORErasureCodec.java | 45 ++++++++++
.../erasurecode/coder/AbstractErasureCoder.java | 7 ++
.../io/erasurecode/coder/ErasureCoder.java | 7 ++
.../io/erasurecode/grouper/BlockGrouper.java | 90 ++++++++++++++++++++
9 files changed, 352 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
index 7716728..c72394e 100644
--- a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
@@ -37,3 +37,6 @@
HADOOP-11805 Better to rename some raw erasure coders. Contributed by Kai Zheng
( Kai Zheng )
+
+ HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code
+ ( Kai Zheng via vinayakumarb )
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
index 2c851a5..0a86907 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
@@ -79,4 +79,22 @@ public class ECBlockGroup {
return false;
}
+ /**
+ * Get erased blocks count
+ * @return
+ */
+ public int getErasedCount() {
+ int erasedCount = 0;
+
+ for (ECBlock dataBlock : dataBlocks) {
+ if (dataBlock.isErased()) erasedCount++;
+ }
+
+ for (ECBlock parityBlock : parityBlocks) {
+ if (parityBlock.isErased()) erasedCount++;
+ }
+
+ return erasedCount;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java
new file mode 100644
index 0000000..9993786
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.coder.*;
+import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
+
+/**
+ * Abstract Erasure Codec that implements {@link ErasureCodec}.
+ */
+public abstract class AbstractErasureCodec extends Configured
+ implements ErasureCodec {
+
+ private ECSchema schema;
+
+ @Override
+ public void setSchema(ECSchema schema) {
+ this.schema = schema;
+ }
+
+ public String getName() {
+ return schema.getCodecName();
+ }
+
+ protected ECSchema getSchema() {
+ return schema;
+ }
+
+ @Override
+ public BlockGrouper createBlockGrouper() {
+ BlockGrouper blockGrouper = new BlockGrouper();
+ blockGrouper.setSchema(getSchema());
+
+ return blockGrouper;
+ }
+
+ @Override
+ public ErasureCoder createEncoder() {
+ ErasureCoder encoder = doCreateEncoder();
+ prepareErasureCoder(encoder);
+ return encoder;
+ }
+
+ /**
+ * Create a new encoder instance to be initialized afterwards.
+ * @return encoder
+ */
+ protected abstract ErasureCoder doCreateEncoder();
+
+ @Override
+ public ErasureCoder createDecoder() {
+ ErasureCoder decoder = doCreateDecoder();
+ prepareErasureCoder(decoder);
+ return decoder;
+ }
+
+ /**
+ * Create a new decoder instance to be initialized afterwards.
+ * @return decoder
+ */
+ protected abstract ErasureCoder doCreateDecoder();
+
+ private void prepareErasureCoder(ErasureCoder erasureCoder) {
+ if (getSchema() == null) {
+ throw new RuntimeException("No schema been set yet");
+ }
+
+ erasureCoder.setConf(getConf());
+ erasureCoder.initialize(getSchema());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java
new file mode 100644
index 0000000..e639484
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
+import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
+
+/**
+ * Erasure Codec API that's to cover the essential specific aspects of a code.
+ * Currently it cares only block grouper and erasure coder. In future we may
+ * add more aspects here to make the behaviors customizable.
+ */
+public interface ErasureCodec extends Configurable {
+
+ /**
+ * Set EC schema to be used by this codec.
+ * @param schema
+ */
+ public void setSchema(ECSchema schema);
+
+ /**
+ * Create block grouper
+ * @return block grouper
+ */
+ public BlockGrouper createBlockGrouper();
+
+ /**
+ * Create Erasure Encoder
+ * @return erasure encoder
+ */
+ public ErasureCoder createEncoder();
+
+ /**
+ * Create Erasure Decoder
+ * @return erasure decoder
+ */
+ public ErasureCoder createDecoder();
+
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java
new file mode 100644
index 0000000..9e91b60
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
+import org.apache.hadoop.io.erasurecode.coder.RSErasureDecoder;
+import org.apache.hadoop.io.erasurecode.coder.RSErasureEncoder;
+
+/**
+ * A Reed-Solomon erasure codec.
+ */
+public class RSErasureCodec extends AbstractErasureCodec {
+
+ @Override
+ protected ErasureCoder doCreateEncoder() {
+ return new RSErasureEncoder();
+ }
+
+ @Override
+ protected ErasureCoder doCreateDecoder() {
+ return new RSErasureDecoder();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java
new file mode 100644
index 0000000..0f726d7
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
+import org.apache.hadoop.io.erasurecode.coder.XORErasureDecoder;
+import org.apache.hadoop.io.erasurecode.coder.XORErasureEncoder;
+
+/**
+ * A XOR erasure codec.
+ */
+public class XORErasureCodec extends AbstractErasureCodec {
+
+ @Override
+ public void setSchema(ECSchema schema) {
+ super.setSchema(schema);
+ assert(schema.getNumParityUnits() == 1);
+ }
+
+ @Override
+ protected ErasureCoder doCreateEncoder() {
+ return new XORErasureEncoder();
+ }
+
+ @Override
+ protected ErasureCoder doCreateDecoder() {
+ return new XORErasureDecoder();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
index 0e4de89..e5bf11a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.erasurecode.ECSchema;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoder;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoderFactory;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
@@ -105,6 +106,12 @@ public abstract class AbstractErasureCoder
}
@Override
+ public void initialize(ECSchema schema) {
+ initialize(schema.getNumDataUnits(), schema.getNumParityUnits(),
+ schema.getChunkSize());
+ }
+
+ @Override
public int getNumDataUnits() {
return numDataUnits;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
index fb90156..64a82ea 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
+import org.apache.hadoop.io.erasurecode.ECSchema;
/**
* An erasure coder to perform encoding or decoding given a group. Generally it
@@ -45,6 +46,12 @@ public interface ErasureCoder extends Configurable {
public void initialize(int numDataUnits, int numParityUnits, int chunkSize);
/**
+ * Initialize with an EC schema.
+ * @param schema
+ */
+ public void initialize(ECSchema schema);
+
+ /**
* The number of data input units for the coding. A unit can be a byte,
* chunk or buffer or even a block.
* @return count of data input units
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java
new file mode 100644
index 0000000..bdc1624
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.grouper;
+
+import org.apache.hadoop.io.erasurecode.ECBlock;
+import org.apache.hadoop.io.erasurecode.ECBlockGroup;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+
+/**
+ * As part of a codec, to handle how to form a block group for encoding
+ * and provide instructions on how to recover erased blocks from a block group
+ */
+public class BlockGrouper {
+
+ private ECSchema schema;
+
+ /**
+ * Set EC schema.
+ * @param schema
+ */
+ public void setSchema(ECSchema schema) {
+ this.schema = schema;
+ }
+
+ /**
+ * Get EC schema.
+ * @return
+ */
+ protected ECSchema getSchema() {
+ return schema;
+ }
+
+ /**
+ * Get required data blocks count in a BlockGroup.
+ * @return count of required data blocks
+ */
+ public int getRequiredNumDataBlocks() {
+ return schema.getNumDataUnits();
+ }
+
+ /**
+ * Get required parity blocks count in a BlockGroup.
+ * @return count of required parity blocks
+ */
+ public int getRequiredNumParityBlocks() {
+ return schema.getNumParityUnits();
+ }
+
+ /**
+ * Calculating and organizing BlockGroup, to be called by ECManager
+ * @param dataBlocks Data blocks to compute parity blocks against
+ * @param parityBlocks To be computed parity blocks
+ * @return
+ */
+ public ECBlockGroup makeBlockGroup(ECBlock[] dataBlocks,
+ ECBlock[] parityBlocks) {
+
+ ECBlockGroup blockGroup = new ECBlockGroup(dataBlocks, parityBlocks);
+ return blockGroup;
+ }
+
+ /**
+ * Given a BlockGroup, tell if any of the missing blocks can be recovered,
+ * to be called by ECManager
+ * @param blockGroup a blockGroup that may contain erased blocks but not sure
+ * recoverable or not
+ * @return true if any erased block recoverable, false otherwise
+ */
+ public boolean anyRecoverable(ECBlockGroup blockGroup) {
+ int erasedCount = blockGroup.getErasedCount();
+
+ return erasedCount > 0 && erasedCount <= getRequiredNumParityBlocks();
+ }
+
+}