You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zh...@apache.org on 2015/05/04 19:57:36 UTC

[02/50] hadoop git commit: HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code ( Contributed by Kai Zheng)

HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code ( Contributed by Kai Zheng)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c284ac0a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c284ac0a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c284ac0a

Branch: refs/heads/HDFS-7285
Commit: c284ac0a7f2ac3c2e38f7470694b1d4d2109c1b1
Parents: 8f01376
Author: Vinayakumar B <vi...@apache.org>
Authored: Tue Apr 7 16:05:22 2015 +0530
Committer: Zhe Zhang <zh...@apache.org>
Committed: Mon May 4 10:11:32 2015 -0700

----------------------------------------------------------------------
 .../hadoop-common/CHANGES-HDFS-EC-7285.txt      |  3 +
 .../hadoop/io/erasurecode/ECBlockGroup.java     | 18 ++++
 .../erasurecode/codec/AbstractErasureCodec.java | 88 +++++++++++++++++++
 .../io/erasurecode/codec/ErasureCodec.java      | 56 ++++++++++++
 .../io/erasurecode/codec/RSErasureCodec.java    | 38 +++++++++
 .../io/erasurecode/codec/XORErasureCodec.java   | 45 ++++++++++
 .../erasurecode/coder/AbstractErasureCoder.java |  7 ++
 .../io/erasurecode/coder/ErasureCoder.java      |  7 ++
 .../io/erasurecode/grouper/BlockGrouper.java    | 90 ++++++++++++++++++++
 9 files changed, 352 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
index 7716728..c72394e 100644
--- a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
@@ -37,3 +37,6 @@
 
     HADOOP-11805 Better to rename some raw erasure coders. Contributed by Kai Zheng
     ( Kai Zheng )
+
+    HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code
+    ( Kai Zheng via vinayakumarb )

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
index 2c851a5..0a86907 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
@@ -79,4 +79,22 @@ public class ECBlockGroup {
     return false;
   }
 
+  /**
+   * Get erased blocks count
+   * @return
+   */
+  public int getErasedCount() {
+    int erasedCount = 0;
+
+    for (ECBlock dataBlock : dataBlocks) {
+      if (dataBlock.isErased()) erasedCount++;
+    }
+
+    for (ECBlock parityBlock : parityBlocks) {
+      if (parityBlock.isErased()) erasedCount++;
+    }
+
+    return erasedCount;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java
new file mode 100644
index 0000000..9993786
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.coder.*;
+import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
+
+/**
+ * Abstract Erasure Codec that implements {@link ErasureCodec}.
+ */
+public abstract class AbstractErasureCodec extends Configured
+    implements ErasureCodec {
+
+  private ECSchema schema;
+
+  @Override
+  public void setSchema(ECSchema schema) {
+    this.schema = schema;
+  }
+
+  public String getName() {
+    return schema.getCodecName();
+  }
+
+  protected ECSchema getSchema() {
+    return schema;
+  }
+
+  @Override
+  public BlockGrouper createBlockGrouper() {
+    BlockGrouper blockGrouper = new BlockGrouper();
+    blockGrouper.setSchema(getSchema());
+
+    return blockGrouper;
+  }
+
+  @Override
+  public ErasureCoder createEncoder() {
+    ErasureCoder encoder = doCreateEncoder();
+    prepareErasureCoder(encoder);
+    return encoder;
+  }
+
+  /**
+   * Create a new encoder instance to be initialized afterwards.
+   * @return encoder
+   */
+  protected abstract ErasureCoder doCreateEncoder();
+
+  @Override
+  public ErasureCoder createDecoder() {
+    ErasureCoder decoder = doCreateDecoder();
+    prepareErasureCoder(decoder);
+    return decoder;
+  }
+
+  /**
+   * Create a new decoder instance to be initialized afterwards.
+   * @return decoder
+   */
+  protected abstract ErasureCoder doCreateDecoder();
+
+  private void prepareErasureCoder(ErasureCoder erasureCoder) {
+    if (getSchema() == null) {
+      throw new RuntimeException("No schema been set yet");
+    }
+
+    erasureCoder.setConf(getConf());
+    erasureCoder.initialize(getSchema());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java
new file mode 100644
index 0000000..e639484
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
+import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
+
+/**
+ * Erasure Codec API that's to cover the essential specific aspects of a code.
+ * Currently it cares only block grouper and erasure coder. In future we may
+ * add more aspects here to make the behaviors customizable.
+ */
+public interface ErasureCodec extends Configurable {
+
+  /**
+   * Set EC schema to be used by this codec.
+   * @param schema
+   */
+  public void setSchema(ECSchema schema);
+
+  /**
+   * Create block grouper
+   * @return block grouper
+   */
+  public BlockGrouper createBlockGrouper();
+
+  /**
+   * Create Erasure Encoder
+   * @return erasure encoder
+   */
+  public ErasureCoder createEncoder();
+
+  /**
+   * Create Erasure Decoder
+   * @return erasure decoder
+   */
+  public ErasureCoder createDecoder();
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java
new file mode 100644
index 0000000..9e91b60
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
+import org.apache.hadoop.io.erasurecode.coder.RSErasureDecoder;
+import org.apache.hadoop.io.erasurecode.coder.RSErasureEncoder;
+
+/**
+ * A Reed-Solomon erasure codec.
+ */
+public class RSErasureCodec extends AbstractErasureCodec {
+
+  @Override
+  protected ErasureCoder doCreateEncoder() {
+    return new RSErasureEncoder();
+  }
+
+  @Override
+  protected ErasureCoder doCreateDecoder() {
+    return new RSErasureDecoder();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java
new file mode 100644
index 0000000..0f726d7
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
+import org.apache.hadoop.io.erasurecode.coder.XORErasureDecoder;
+import org.apache.hadoop.io.erasurecode.coder.XORErasureEncoder;
+
+/**
+ * A XOR erasure codec.
+ */
+public class XORErasureCodec extends AbstractErasureCodec {
+
+  @Override
+  public void setSchema(ECSchema schema) {
+    super.setSchema(schema);
+    assert(schema.getNumParityUnits() == 1);
+  }
+
+  @Override
+  protected ErasureCoder doCreateEncoder() {
+    return new XORErasureEncoder();
+  }
+
+  @Override
+  protected ErasureCoder doCreateDecoder() {
+    return new XORErasureDecoder();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
index 0e4de89..e5bf11a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.erasurecode.ECSchema;
 import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoder;
 import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoderFactory;
 import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
@@ -105,6 +106,12 @@ public abstract class AbstractErasureCoder
   }
 
   @Override
+  public void initialize(ECSchema schema) {
+      initialize(schema.getNumDataUnits(), schema.getNumParityUnits(),
+          schema.getChunkSize());
+  }
+
+  @Override
   public int getNumDataUnits() {
     return numDataUnits;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
index fb90156..64a82ea 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder;
 
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.io.erasurecode.ECBlockGroup;
+import org.apache.hadoop.io.erasurecode.ECSchema;
 
 /**
  * An erasure coder to perform encoding or decoding given a group. Generally it
@@ -45,6 +46,12 @@ public interface ErasureCoder extends Configurable {
   public void initialize(int numDataUnits, int numParityUnits, int chunkSize);
 
   /**
+   * Initialize with an EC schema.
+   * @param schema
+   */
+  public void initialize(ECSchema schema);
+
+  /**
    * The number of data input units for the coding. A unit can be a byte,
    * chunk or buffer or even a block.
    * @return count of data input units

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c284ac0a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java
new file mode 100644
index 0000000..bdc1624
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.grouper;
+
+import org.apache.hadoop.io.erasurecode.ECBlock;
+import org.apache.hadoop.io.erasurecode.ECBlockGroup;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+
+/**
+ * As part of a codec, to handle how to form a block group for encoding
+ * and provide instructions on how to recover erased blocks from a block group
+ */
+public class BlockGrouper {
+
+  private ECSchema schema;
+
+  /**
+   * Set EC schema.
+   * @param schema
+   */
+  public void setSchema(ECSchema schema) {
+    this.schema = schema;
+  }
+
+  /**
+   * Get EC schema.
+   * @return
+   */
+  protected ECSchema getSchema() {
+    return schema;
+  }
+
+  /**
+   * Get required data blocks count in a BlockGroup.
+   * @return count of required data blocks
+   */
+  public int getRequiredNumDataBlocks() {
+    return schema.getNumDataUnits();
+  }
+
+  /**
+   * Get required parity blocks count in a BlockGroup.
+   * @return count of required parity blocks
+   */
+  public int getRequiredNumParityBlocks() {
+    return schema.getNumParityUnits();
+  }
+
+  /**
+   * Calculating and organizing BlockGroup, to be called by ECManager
+   * @param dataBlocks Data blocks to compute parity blocks against
+   * @param parityBlocks To be computed parity blocks
+   * @return
+   */
+  public ECBlockGroup makeBlockGroup(ECBlock[] dataBlocks,
+                                     ECBlock[] parityBlocks) {
+
+    ECBlockGroup blockGroup = new ECBlockGroup(dataBlocks, parityBlocks);
+    return blockGroup;
+  }
+
+  /**
+   * Given a BlockGroup, tell if any of the missing blocks can be recovered,
+   * to be called by ECManager
+   * @param blockGroup a blockGroup that may contain erased blocks but not sure
+   *                   recoverable or not
+   * @return true if any erased block recoverable, false otherwise
+   */
+  public boolean anyRecoverable(ECBlockGroup blockGroup) {
+    int erasedCount = blockGroup.getErasedCount();
+
+    return erasedCount > 0 && erasedCount <= getRequiredNumParityBlocks();
+  }
+
+}