You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2015/12/14 11:00:07 UTC

metamodel git commit: METAMODEL-220: Fixed

Repository: metamodel
Updated Branches:
  refs/heads/master 2a4b85410 -> 16f3b0a00


METAMODEL-220: Fixed

Fixes #79

Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo
Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/16f3b0a0
Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/16f3b0a0
Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/16f3b0a0

Branch: refs/heads/master
Commit: 16f3b0a005c115b5f70d89425cd615e79865b908
Parents: 2a4b854
Author: Kasper Sørensen <i....@gmail.com>
Authored: Mon Dec 14 11:00:02 2015 +0100
Committer: Kasper Sørensen <i....@gmail.com>
Committed: Mon Dec 14 11:00:02 2015 +0100

----------------------------------------------------------------------
 CHANGES.md                                      |  1 +
 .../org/apache/metamodel/util/HdfsResource.java | 73 ++++++++++++++------
 .../apache/metamodel/util/HdfsResourceTest.java | 15 ++++
 3 files changed, 68 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/metamodel/blob/16f3b0a0/CHANGES.md
----------------------------------------------------------------------
diff --git a/CHANGES.md b/CHANGES.md
index 2474450..b1999ff 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -11,6 +11,7 @@
  * [METAMODEL-218] - Fixed conversion of STRING and NUMBER types to database-specific types in JDBC module.
  * [METAMODEL-205] - Added validation of Excel sheet name before attempting to create table (sheet).
  * [METAMODEL-219] - Made HdfsResource capable of incorporating Hadoop configuration files core-site.xml and hdfs-site.xml
+ * [METAMODEL-220] - Made HdfsResource capable of working with other URI schemes than 'hdfs'.
 
 ### Apache MetaModel 4.4.1
 

http://git-wip-us.apache.org/repos/asf/metamodel/blob/16f3b0a0/hadoop/src/main/java/org/apache/metamodel/util/HdfsResource.java
----------------------------------------------------------------------
diff --git a/hadoop/src/main/java/org/apache/metamodel/util/HdfsResource.java b/hadoop/src/main/java/org/apache/metamodel/util/HdfsResource.java
index 9523b23..b66280c 100644
--- a/hadoop/src/main/java/org/apache/metamodel/util/HdfsResource.java
+++ b/hadoop/src/main/java/org/apache/metamodel/util/HdfsResource.java
@@ -23,9 +23,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.net.URI;
 import java.util.Objects;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -45,8 +44,14 @@ public class HdfsResource extends AbstractResource implements Serializable {
 
     public static final String SYSTEM_PROPERTY_HADOOP_CONF_DIR_ENABLED = "metamodel.hadoop.use_hadoop_conf_dir";
 
-    private static final Pattern URL_PATTERN = Pattern.compile("hdfs://(.+):([0-9]+)/(.*)");
+    public static final String SCHEME_HDFS = "hdfs";
+    public static final String SCHEME_SWIFT = "swift";
+    public static final String SCHEME_EMRFS = "emrfs";
+    public static final String SCHEME_MAPRFS = "maprfs";
+    public static final String SCHEME_S3 = "s3";
+    public static final String SCHEME_FTP = "ftp";
 
+    private final String _scheme;
     private final String _hadoopConfDir;
     private final String _hostname;
     private final int _port;
@@ -57,7 +62,7 @@ public class HdfsResource extends AbstractResource implements Serializable {
      * Creates a {@link HdfsResource}
      *
      * @param url
-     *            a URL of the form: hdfs://hostname:port/path/to/file
+     *            a URL of the form: scheme://hostname:port/path/to/file
      */
     public HdfsResource(String url) {
         this(url, null);
@@ -67,7 +72,7 @@ public class HdfsResource extends AbstractResource implements Serializable {
      * Creates a {@link HdfsResource}
      *
      * @param url
-     *            a URL of the form: hdfs://hostname:port/path/to/file
+     *            a URL of the form: scheme://hostname:port/path/to/file
      * @param hadoopConfDir
      *            the path of a directory containing the Hadoop and HDFS
      *            configuration file(s).
@@ -76,19 +81,18 @@ public class HdfsResource extends AbstractResource implements Serializable {
         if (url == null) {
             throw new IllegalArgumentException("Url cannot be null");
         }
-        final Matcher matcher = URL_PATTERN.matcher(url);
-        if (!matcher.find()) {
-            throw new IllegalArgumentException(
-                    "Cannot parse url '" + url + "'. Must follow pattern: hdfs://hostname:port/path/to/file");
-        }
-        _hostname = matcher.group(1);
-        _port = Integer.parseInt(matcher.group(2));
-        _filepath = '/' + matcher.group(3);
+
+        final URI uri = URI.create(url);
+
+        _scheme = uri.getScheme();
+        _hostname = uri.getHost();
+        _port = uri.getPort();
+        _filepath = uri.getPath();
         _hadoopConfDir = hadoopConfDir;
     }
 
     /**
-     * Creates a {@link HdfsResource}
+     * Creates a {@link HdfsResource} using the "hdfs" scheme
      *
      * @param hostname
      *            the HDFS (namenode) hostname
@@ -98,12 +102,15 @@ public class HdfsResource extends AbstractResource implements Serializable {
      *            the path on HDFS to the file, starting with slash ('/')
      */
     public HdfsResource(String hostname, int port, String filepath) {
-        this(hostname, port, filepath, null);
+        this(SCHEME_HDFS, hostname, port, filepath, null);
     }
 
     /**
      * Creates a {@link HdfsResource}
      *
+     * @param scheme
+     *            the scheme to use (consider using {@link #SCHEME_HDFS} or any
+     *            of the other "SCHEME_" constants).
      * @param hostname
      *            the HDFS (namenode) hostname
      * @param port
@@ -114,13 +121,23 @@ public class HdfsResource extends AbstractResource implements Serializable {
      *            the path of a directory containing the Hadoop and HDFS
      *            configuration file(s).
      */
-    public HdfsResource(String hostname, int port, String filepath, String hadoopConfDir) {
+    public HdfsResource(String scheme, String hostname, int port, String filepath, String hadoopConfDir) {
+        _scheme = scheme;
         _hostname = hostname;
         _port = port;
         _filepath = filepath;
         _hadoopConfDir = hadoopConfDir;
     }
 
+    public String getScheme() {
+        if (_scheme == null) {
+            // should only happen for deserialized and old objects before
+            // METAMODEL-220 introduced dynamic schemes
+            return SCHEME_HDFS;
+        }
+        return _scheme;
+    }
+
     public String getFilepath() {
         return _filepath;
     }
@@ -148,7 +165,18 @@ public class HdfsResource extends AbstractResource implements Serializable {
 
     @Override
     public String getQualifiedPath() {
-        return "hdfs://" + _hostname + ":" + _port + _filepath;
+        final StringBuilder sb = new StringBuilder();
+        sb.append(getScheme());
+        sb.append("://");
+        if (_hostname != null) {
+            sb.append(_hostname);
+        }
+        if (_port > 0) {
+            sb.append(':');
+            sb.append(_port);
+        }
+        sb.append(_filepath);
+        return sb.toString();
     }
 
     @Override
@@ -252,7 +280,9 @@ public class HdfsResource extends AbstractResource implements Serializable {
 
     public Configuration getHadoopConfiguration() {
         final Configuration conf = new Configuration();
-        conf.set("fs.defaultFS", "hdfs://" + _hostname + ":" + _port);
+        if (_hostname != null && _port > 0) {
+            conf.set("fs.defaultFS", getScheme() + "://" + _hostname + ":" + _port);
+        }
 
         final File hadoopConfigurationDirectory = getHadoopConfigurationDirectoryToUse();
         if (hadoopConfigurationDirectory != null) {
@@ -323,7 +353,7 @@ public class HdfsResource extends AbstractResource implements Serializable {
 
     @Override
     public int hashCode() {
-        return Objects.hash(_filepath, _hostname, _port, _hadoopConfDir);
+        return Objects.hash(getScheme(), _filepath, _hostname, _port, _hadoopConfDir);
     }
 
     @Override
@@ -333,8 +363,9 @@ public class HdfsResource extends AbstractResource implements Serializable {
         }
         if (obj instanceof HdfsResource) {
             final HdfsResource other = (HdfsResource) obj;
-            return Objects.equals(_filepath, other._filepath) && Objects.equals(_hostname, other._hostname)
-                    && Objects.equals(_port, other._port) && Objects.equals(_hadoopConfDir, other._hadoopConfDir);
+            return Objects.equals(getScheme(), other.getScheme()) && Objects.equals(_filepath, other._filepath)
+                    && Objects.equals(_hostname, other._hostname) && Objects.equals(_port, other._port)
+                    && Objects.equals(_hadoopConfDir, other._hadoopConfDir);
         }
         return false;
     }

http://git-wip-us.apache.org/repos/asf/metamodel/blob/16f3b0a0/hadoop/src/test/java/org/apache/metamodel/util/HdfsResourceTest.java
----------------------------------------------------------------------
diff --git a/hadoop/src/test/java/org/apache/metamodel/util/HdfsResourceTest.java b/hadoop/src/test/java/org/apache/metamodel/util/HdfsResourceTest.java
index c6004d3..ad3393f 100644
--- a/hadoop/src/test/java/org/apache/metamodel/util/HdfsResourceTest.java
+++ b/hadoop/src/test/java/org/apache/metamodel/util/HdfsResourceTest.java
@@ -28,18 +28,33 @@ public class HdfsResourceTest extends TestCase {
         final HdfsResource res1 = new HdfsResource("hdfs://localhost:9000/home/metamodel.txt");
         assertEquals("hdfs://localhost:9000/home/metamodel.txt", res1.getQualifiedPath());
         assertEquals("metamodel.txt", res1.getName());
+        assertEquals("hdfs", res1.getScheme());
 
         final HdfsResource res2 = new HdfsResource("localhost", 9000, "/home/metamodel.txt");
         assertEquals("hdfs://localhost:9000/home/metamodel.txt", res2.getQualifiedPath());
         assertEquals("metamodel.txt", res2.getName());
+        assertEquals("hdfs", res2.getScheme());
 
         assertEquals(res1, res2);
 
         final HdfsResource res3 = new HdfsResource("localhost", 9000, "/home/apache.txt");
         assertEquals("hdfs://localhost:9000/home/apache.txt", res3.getQualifiedPath());
         assertEquals("apache.txt", res3.getName());
+        assertEquals("hdfs", res3.getScheme());
 
         assertFalse(res3.equals(res1));
+        
+        final HdfsResource res4 = new HdfsResource("emrfs://localhost:9000/home/metamodel.txt");
+        assertEquals("emrfs://localhost:9000/home/metamodel.txt", res4.getQualifiedPath());
+        assertEquals("metamodel.txt", res4.getName());
+        assertEquals("emrfs", res4.getScheme());
+        
+        assertFalse(res4.equals(res1));
+    }
+    
+    public void testNoHostOrPortInUrl() throws Exception {
+        final HdfsResource resource = new HdfsResource("emrfs:///home/metamodel.txt");
+        assertEquals("emrfs:///home/metamodel.txt", resource.getQualifiedPath());
     }
     
     public void testSerialization() throws Exception {