You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zh...@apache.org on 2015/03/30 19:24:55 UTC
[49/50] [abbrv] hadoop git commit: HADOOP-11664. Loading predefined
EC schemas from configuration. Contributed by Kai Zheng.
HADOOP-11664. Loading predefined EC schemas from configuration. Contributed by Kai Zheng.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d50bbd71
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d50bbd71
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d50bbd71
Branch: refs/heads/HDFS-7285
Commit: d50bbd71a08b56bccb4b47d11f131c0deb34bf2f
Parents: 8d49fc3
Author: Zhe Zhang <zh...@apache.org>
Authored: Fri Mar 27 14:52:50 2015 -0700
Committer: Zhe Zhang <zh...@apache.org>
Committed: Mon Mar 30 10:13:09 2015 -0700
----------------------------------------------------------------------
.../src/main/conf/ecschema-def.xml | 40 +++++
.../hadoop/fs/CommonConfigurationKeys.java | 5 +
.../hadoop/io/erasurecode/SchemaLoader.java | 147 +++++++++++++++++++
.../hadoop/io/erasurecode/TestSchemaLoader.java | 80 ++++++++++
4 files changed, 272 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d50bbd71/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml b/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml
new file mode 100644
index 0000000..e619485
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+
+<!--
+Please define your EC schemas here. Note, once these schemas are loaded
+and referenced by EC storage policies, any change to them will be ignored.
+You can modify and remove those not used yet, or add new ones.
+-->
+
+<schemas>
+ <schema name="RS-6-3">
+ <k>6</k>
+ <m>3</m>
+ <codec>RS</codec>
+ </schema>
+ <schema name="RS-10-4">
+ <k>10</k>
+ <m>4</m>
+ <codec>RS</codec>
+ </schema>
+</schemas>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d50bbd71/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
index 70fea01..af32674 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
@@ -141,6 +141,11 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
/** Supported erasure codec classes */
public static final String IO_ERASURECODE_CODECS_KEY = "io.erasurecode.codecs";
+ public static final String IO_ERASURECODE_SCHEMA_FILE_KEY =
+ "io.erasurecode.schema.file";
+ public static final String IO_ERASURECODE_SCHEMA_FILE_DEFAULT =
+ "ecschema-def.xml";
+
/** Use XOR raw coder when possible for the RS codec */
public static final String IO_ERASURECODE_CODEC_RS_USEXOR_KEY =
"io.erasurecode.codec.rs.usexor";
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d50bbd71/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java
new file mode 100644
index 0000000..c51ed37
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.w3c.dom.*;
+import org.xml.sax.SAXException;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.util.*;
+
+/**
+ * A EC schema loading utility that loads predefined EC schemas from XML file
+ */
+public class SchemaLoader {
+ private static final Log LOG = LogFactory.getLog(SchemaLoader.class.getName());
+
+ /**
+ * Load predefined ec schemas from configuration file. This file is
+ * expected to be in the XML format.
+ */
+ public List<ECSchema> loadSchema(Configuration conf) {
+ File confFile = getSchemaFile(conf);
+ if (confFile == null) {
+ LOG.warn("Not found any predefined EC schema file");
+ return Collections.emptyList();
+ }
+
+ try {
+ return loadSchema(confFile);
+ } catch (ParserConfigurationException e) {
+ throw new RuntimeException("Failed to load schema file: " + confFile);
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to load schema file: " + confFile);
+ } catch (SAXException e) {
+ throw new RuntimeException("Failed to load schema file: " + confFile);
+ }
+ }
+
+ private List<ECSchema> loadSchema(File schemaFile)
+ throws ParserConfigurationException, IOException, SAXException {
+
+ LOG.info("Loading predefined EC schema file " + schemaFile);
+
+ // Read and parse the schema file.
+ DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+ dbf.setIgnoringComments(true);
+ DocumentBuilder builder = dbf.newDocumentBuilder();
+ Document doc = builder.parse(schemaFile);
+ Element root = doc.getDocumentElement();
+
+ if (!"schemas".equals(root.getTagName())) {
+ throw new RuntimeException("Bad EC schema config file: " +
+ "top-level element not <schemas>");
+ }
+
+ NodeList elements = root.getChildNodes();
+ List<ECSchema> schemas = new ArrayList<ECSchema>();
+ for (int i = 0; i < elements.getLength(); i++) {
+ Node node = elements.item(i);
+ if (node instanceof Element) {
+ Element element = (Element) node;
+ if ("schema".equals(element.getTagName())) {
+ ECSchema schema = loadSchema(element);
+ schemas.add(schema);
+ } else {
+ LOG.warn("Bad element in EC schema configuration file: " +
+ element.getTagName());
+ }
+ }
+ }
+
+ return schemas;
+ }
+
+ /**
+ * Path to the XML file containing predefined ec schemas. If the path is
+ * relative, it is searched for in the classpath.
+ */
+ private File getSchemaFile(Configuration conf) {
+ String schemaFilePath = conf.get(
+ CommonConfigurationKeys.IO_ERASURECODE_SCHEMA_FILE_KEY,
+ CommonConfigurationKeys.IO_ERASURECODE_SCHEMA_FILE_DEFAULT);
+ File schemaFile = new File(schemaFilePath);
+ if (! schemaFile.isAbsolute()) {
+ URL url = Thread.currentThread().getContextClassLoader()
+ .getResource(schemaFilePath);
+ if (url == null) {
+ LOG.warn(schemaFilePath + " not found on the classpath.");
+ schemaFile = null;
+ } else if (! url.getProtocol().equalsIgnoreCase("file")) {
+ throw new RuntimeException(
+ "EC predefined schema file " + url +
+ " found on the classpath is not on the local filesystem.");
+ } else {
+ schemaFile = new File(url.getPath());
+ }
+ }
+
+ return schemaFile;
+ }
+
+ /**
+ * Loads a schema from a schema element in the configuration file
+ */
+ private ECSchema loadSchema(Element element) {
+ String schemaName = element.getAttribute("name");
+ Map<String, String> ecOptions = new HashMap<String, String>();
+ NodeList fields = element.getChildNodes();
+
+ for (int i = 0; i < fields.getLength(); i++) {
+ Node fieldNode = fields.item(i);
+ if (fieldNode instanceof Element) {
+ Element field = (Element) fieldNode;
+ String tagName = field.getTagName();
+ String value = ((Text) field.getFirstChild()).getData().trim();
+ ecOptions.put(tagName, value);
+ }
+ }
+
+ ECSchema schema = new ECSchema(schemaName, ecOptions);
+ return schema;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d50bbd71/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java
new file mode 100644
index 0000000..7bb0a9a
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.PrintWriter;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestSchemaLoader {
+
+ final static String TEST_DIR = new File(System.getProperty(
+ "test.build.data", "/tmp")).getAbsolutePath();
+
+ final static String SCHEMA_FILE = new File(TEST_DIR, "test-ecschema")
+ .getAbsolutePath();
+
+ @Test
+ public void testLoadSchema() throws Exception {
+ PrintWriter out = new PrintWriter(new FileWriter(SCHEMA_FILE));
+ out.println("<?xml version=\"1.0\"?>");
+ out.println("<schemas>");
+ out.println(" <schema name=\"RSk6m3\">");
+ out.println(" <k>6</k>");
+ out.println(" <m>3</m>");
+ out.println(" <codec>RS</codec>");
+ out.println(" </schema>");
+ out.println(" <schema name=\"RSk10m4\">");
+ out.println(" <k>10</k>");
+ out.println(" <m>4</m>");
+ out.println(" <codec>RS</codec>");
+ out.println(" </schema>");
+ out.println("</schemas>");
+ out.close();
+
+ Configuration conf = new Configuration();
+ conf.set(CommonConfigurationKeys.IO_ERASURECODE_SCHEMA_FILE_KEY,
+ SCHEMA_FILE);
+
+ SchemaLoader schemaLoader = new SchemaLoader();
+ List<ECSchema> schemas = schemaLoader.loadSchema(conf);
+
+ assertEquals(2, schemas.size());
+
+ ECSchema schema1 = schemas.get(0);
+ assertEquals("RSk6m3", schema1.getSchemaName());
+ assertEquals(3, schema1.getOptions().size());
+ assertEquals(6, schema1.getNumDataUnits());
+ assertEquals(3, schema1.getNumParityUnits());
+ assertEquals("RS", schema1.getCodecName());
+
+ ECSchema schema2 = schemas.get(1);
+ assertEquals("RSk10m4", schema2.getSchemaName());
+ assertEquals(3, schema2.getOptions().size());
+ assertEquals(10, schema2.getNumDataUnits());
+ assertEquals(4, schema2.getNumParityUnits());
+ assertEquals("RS", schema2.getCodecName());
+ }
+}
\ No newline at end of file