You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by rw...@apache.org on 2015/12/16 19:59:37 UTC

svn commit: r1720416 - in /commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs: HdfsFileSystem.java HdfsFileSystemConfigBuilder.java

Author: rwhitcomb
Date: Wed Dec 16 18:59:37 2015
New Revision: 1720416

URL: http://svn.apache.org/viewvc?rev=1720416&view=rev
Log:
VFS-586:  Add several additional ways to specify an alternate HDFS configuration.

Motivation:  the previous changes in this area allowed specifying the name of
one resource (in the CLASSPATH) for an alternate configuration.  Further use
with this feature showed the need for being able to specify more than one
resource (possibly), and to be able to load configurations from local files
as well as basically any way that HDFS configuration parameters can be
specified (according to the HDFS APIs).

Changes:  therefore, the API in HdfsFileSystemConfigBuilder was greatly
extended to allow:
a) Multiple resource names to be given;
b) File system paths to be specified (one or many);
c) URLs to be listed (multiple if desired) so configuration can be loaded
   from a web application;
d) Another HDFS Configuration object (one of these);
e) Any miscellaneous InputStream (only one of these).

And the configuration step in HdfsFileSystem was enhanced to check for
all these possible sources of configuration information and add the
available ones to the HDFS "Configuration" object during initialization.

One of the existing APIs was changed:  getConfigName -> getConfigNames
(with a corresponding change to the return type:  String -> String[]).

Note: I have not been able to figure out a good unit test for this yet,
but it is working quite well in our application.

Modified:
    commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystem.java
    commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystemConfigBuilder.java

Modified: commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystem.java
URL: http://svn.apache.org/viewvc/commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystem.java?rev=1720416&r1=1720415&r2=1720416&view=diff
==============================================================================
--- commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystem.java (original)
+++ commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystem.java Wed Dec 16 18:59:37 2015
@@ -16,8 +16,10 @@
  */
 package org.apache.commons.vfs2.provider.hdfs;
 
+import java.io.InputStream;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
+import java.net.URL;
 import java.net.URLDecoder;
 import java.util.Collection;
 
@@ -110,14 +112,51 @@ public class HdfsFileSystem extends Abst
             {
                 final String hdfsUri = name.getRootURI();
                 final HdfsFileSystemConfigBuilder builder = HdfsFileSystemConfigBuilder.getInstance();
-                final String configName = builder.getConfigName(getFileSystemOptions());
+                final FileSystemOptions options = getFileSystemOptions();
+                final String[] configNames = builder.getConfigNames(options);
+                final Path[] configPaths = builder.getConfigPaths(options);
+                final URL[] configURLs = builder.getConfigURLs(options);
+                final InputStream configStream = builder.getConfigInputStream(options);
+                final Configuration configConfiguration = builder.getConfigConfiguration(options);
 
                 final Configuration conf = new Configuration(true);
                 conf.set(FileSystem.FS_DEFAULT_NAME_KEY, hdfsUri);
-                if (configName != null)
+
+                // Load any alternate configuration parameters that may have been specified
+                // no matter where they might come from
+                if (configNames != null)
+                {
+                    for (String configName : configNames)
+                    {
+                        log.debug("Adding HDFS configuration resource: " + configName);
+                        conf.addResource(configName);
+                    }
+                }
+                if (configPaths != null)
+                {
+                    for (Path path : configPaths)
+                    {
+                        log.debug("Adding HDFS configuration path: " + path);
+                        conf.addResource(path);
+                    }
+                }
+                if (configURLs != null)
+                {
+                    for (URL url : configURLs)
+                    {
+                        log.debug("Adding HDFS configuration URL: " + url);
+                        conf.addResource(url);
+                    }
+                }
+                if (configStream != null)
+                {
+                    log.debug("Adding HDFS configuration stream");
+                    conf.addResource(configStream);
+                }
+                if (configConfiguration != null)
                 {
-                    log.debug("Adding HDFS configuration file: " + configName);
-                    conf.addResource(configName);
+                    log.debug("Adding HDFS configuration object");
+                    conf.addResource(configConfiguration);
                 }
 
                 try

Modified: commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystemConfigBuilder.java
URL: http://svn.apache.org/viewvc/commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystemConfigBuilder.java?rev=1720416&r1=1720415&r2=1720416&view=diff
==============================================================================
--- commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystemConfigBuilder.java (original)
+++ commons/proper/vfs/trunk/core/src/main/java/org/apache/commons/vfs2/provider/hdfs/HdfsFileSystemConfigBuilder.java Wed Dec 16 18:59:37 2015
@@ -16,9 +16,14 @@
  */
 package org.apache.commons.vfs2.provider.hdfs;
 
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
 import org.apache.commons.vfs2.FileSystem;
 import org.apache.commons.vfs2.FileSystemConfigBuilder;
 import org.apache.commons.vfs2.FileSystemOptions;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 
 /**
  * Configuration settings for the HdfsFileSystem.
@@ -28,7 +33,11 @@ import org.apache.commons.vfs2.FileSyste
 public final class HdfsFileSystemConfigBuilder extends FileSystemConfigBuilder
 {
     private static final HdfsFileSystemConfigBuilder BUILDER = new HdfsFileSystemConfigBuilder();
-    private static final String KEY_CONFIG_NAME = "configName";
+    private static final String KEY_CONFIG_NAMES  = "configNames";
+    private static final String KEY_CONFIG_PATHS  = "configPaths";
+    private static final String KEY_CONFIG_URLS   = "configURLs";
+    private static final String KEY_CONFIG_STREAM = "configStream";
+    private static final String KEY_CONFIG_CONF   = "configConf";
 
     private HdfsFileSystemConfigBuilder()
     {
@@ -53,34 +62,252 @@ public final class HdfsFileSystemConfigB
     }
 
     /**
-     * Get name of alternate configuration file.
+     * Get names of alternate configuration resources.
      *
-     * @return resource name of alternate configuration file or null.
+     * @return resource name list of alternate configurations or {@code null}.
      * @param opts The FileSystemOptions.
      * @see #setConfigName(FileSystemOptions, String)
      */
-    public String getConfigName(final FileSystemOptions opts)
+    public String[] getConfigNames(final FileSystemOptions opts)
     {
-        return this.getString(opts, KEY_CONFIG_NAME);
+        String names = this.getString(opts, KEY_CONFIG_NAMES);
+        return names == null || names.isEmpty() ? null : names.split(",");
     }
 
     /**
-     * Sets the name of configuration file to be loaded after the defaults.
+     * Sets the name of configuration resource to be loaded after the defaults.
      * <p>
-     * Specifies the name of a config file to override any specific HDFS settings.
+     * Specifies the name of a config resource to override any specific HDFS settings.
      * The property will be passed on to {@code org.apache.hadoop.conf.Configuration#addResource(String)}
      * after the URL was set as the default name with: {@code Configuration#set(FileSystem.FS_DEFAULT_NAME_KEY, url)}.
      * <p>
      * One use for this is to set a different value for the {@code dfs.client.use.datanode.hostname}
      * property in order to access HDFS files stored in an AWS installation (from outside their
      * firewall). There are other possible uses too.
+     * <p>
+     * This method may be called multiple times and all the specified resources will be loaded
+     * in the order they were specified.
+     * <p>
+     * Note also, that if a list of names is provided, separated by commas ({@code ","}), that
+     * this will work the same as calling this method a number of times with just one name each.
      *
      * @param opts The FileSystemOptions to modify.
-     * @param name resource name of additional configuration file or null.
+     * @param name resource name of additional configuration or {@code null} to unset all the
+     * values set so far.
+     * @see #getConfigNames
      */
     public void setConfigName(final FileSystemOptions opts, final String name)
     {
-        this.setParam(opts, KEY_CONFIG_NAME, name);
+        if (name == null || name.isEmpty())
+        {
+            this.setParam(opts, KEY_CONFIG_NAMES, null);
+        }
+        else
+        {
+            String previousNames = this.getString(opts, KEY_CONFIG_NAMES);
+            if (previousNames == null || previousNames.isEmpty())
+            {
+                this.setParam(opts, KEY_CONFIG_NAMES, name);
+            }
+            else
+            {
+                this.setParam(opts, KEY_CONFIG_NAMES, previousNames + "," + name);
+            }
+        }
+    }
+
+    /**
+     * Get paths of alternate configuration file system files.
+     *
+     * @return list of full paths of alternate configuration files or {@code null}.
+     * @param opts The FileSystemOptions.
+     * @see #setConfigPath(FileSystemOptions, Path)
+     */
+    public Path[] getConfigPaths(final FileSystemOptions opts)
+    {
+        String pathNames = this.getString(opts, KEY_CONFIG_PATHS);
+        if (pathNames == null || pathNames.isEmpty())
+        {
+            return null;
+        }
+        String[] paths = pathNames.split(",");
+        Path[] realPaths = new Path[paths.length];
+        for (int i = 0; i < paths.length; i++)
+        {
+            realPaths[i] = new Path(paths[i]);
+        }
+        return realPaths;
+    }
+
+    /**
+     * Sets the full path of configuration file to be loaded after the defaults.
+     * <p>
+     * Specifies the path of a local file system config file to override any specific HDFS settings.
+     * The property will be passed on to {@code org.apache.hadoop.conf.Configuration#addResource(Path)}
+     * after the URL was set as the default name with: {@code Configuration#set(FileSystem.FS_DEFAULT_NAME_KEY, url)}.
+     * <p>
+     * One use for this is to set a different value for the {@code dfs.client.use.datanode.hostname}
+     * property in order to access HDFS files stored in an AWS installation (from outside their
+     * firewall). There are other possible uses too.
+     * <p>
+     * This method may be called multiple times and all the specified resources will be loaded
+     * in the order they were specified.
+     *
+     * @param opts The FileSystemOptions to modify.
+     * @param path full path of additional configuration file (local file system) or {@code null}
+     * to unset all the path values set so far.
+     */
+    public void setConfigPath(final FileSystemOptions opts, final Path path)
+    {
+        if (path == null)
+        {
+            this.setParam(opts, KEY_CONFIG_PATHS, null);
+        }
+        else
+        {
+            String previousPathNames = this.getString(opts, KEY_CONFIG_PATHS);
+            if (previousPathNames == null || previousPathNames.isEmpty())
+            {
+                this.setParam(opts, KEY_CONFIG_PATHS, path.toString());
+            }
+            else
+            {
+                this.setParam(opts, KEY_CONFIG_PATHS, previousPathNames + "," + path.toString());
+            }
+        }
+    }
+
+    /**
+     * Get URLs of alternate configurations.
+     *
+     * @return list of alternate configuration URLs or {@code null}.
+     * @param opts The FileSystemOptions.
+     * @see #setConfigURL(FileSystemOptions, URL)
+     */
+    public URL[] getConfigURLs(final FileSystemOptions opts)
+    {
+        try
+        {
+            String urlNames = this.getString(opts, KEY_CONFIG_URLS);
+            if (urlNames == null || urlNames.isEmpty())
+            {
+                return null;
+            }
+            String[] urls = urlNames.split(",");
+            URL[] realURLs = new URL[urls.length];
+            for (int i = 0; i < urls.length; i++)
+            {
+                realURLs[i] = new URL(urls[i]);
+            }
+            return realURLs;
+        }
+        catch (MalformedURLException mue)
+        {
+            // This should never happen because we save it in the proper form
+        }
+        return null;
+    }
+
+    /**
+     * Sets the URL of configuration file to be loaded after the defaults.
+     * <p>
+     * Specifies the URL of a config file to override any specific HDFS settings.
+     * The property will be passed on to {@code org.apache.hadoop.conf.Configuration#addResource(URL)}
+     * after the URL was set as the default name with: {@code Configuration#set(FileSystem.FS_DEFAULT_NAME_KEY, url)}.
+     * <p>
+     * One use for this is to set a different value for the {@code dfs.client.use.datanode.hostname}
+     * property in order to access HDFS files stored in an AWS installation (from outside their
+     * firewall). There are other possible uses too.
+     * <p>
+     * This method may be called multiple times and all the specified resources will be loaded
+     * in the order they were specified.
+     *
+     * @param opts The FileSystemOptions to modify.
+     * @param url URL of additional configuration file or {@code null} to unset all the URL
+     * values set so far.
+     */
+    public void setConfigURL(final FileSystemOptions opts, final URL url)
+    {
+        if (url == null)
+        {
+            this.setParam(opts, KEY_CONFIG_URLS, null);
+        }
+        else
+        {
+            String previousURLNames = this.getString(opts, KEY_CONFIG_URLS);
+            if (previousURLNames == null || previousURLNames.isEmpty())
+            {
+                this.setParam(opts, KEY_CONFIG_URLS, url.toString());
+            }
+            else
+            {
+                this.setParam(opts, KEY_CONFIG_URLS, previousURLNames + "," + url.toString());
+            }
+        }
+    }
+
+    /**
+     * Get alternate configuration input stream.
+     *
+     * @return alternate configuration input stream or {@code null}.
+     * @param opts The FileSystemOptions.
+     * @see #setConfigInputStream(FileSystemOptions, InputStream)
+     */
+    public InputStream getConfigInputStream(final FileSystemOptions opts)
+    {
+        return (InputStream)this.getParam(opts, KEY_CONFIG_STREAM);
+    }
+
+    /**
+     * Sets the input stream of configuration file to be loaded after the defaults.
+     * <p>
+     * Specifies an input stream connected to a config file to override any specific HDFS settings.
+     * The property will be passed on to {@code org.apache.hadoop.conf.Configuration#addResource(InputStream)}
+     * after the URL was set as the default name with: {@code Configuration#set(FileSystem.FS_DEFAULT_NAME_KEY, url)}.
+     * <p>
+     * One use for this is to set a different value for the {@code dfs.client.use.datanode.hostname}
+     * property in order to access HDFS files stored in an AWS installation (from outside their
+     * firewall). There are other possible uses too.
+     *
+     * @param opts The FileSystemOptions to modify.
+     * @param inputStream input stream of additional configuration file or {@code null} to unset
+     * the configuration input stream previously set up.
+     */
+    public void setConfigInputStream(final FileSystemOptions opts, final InputStream inputStream)
+    {
+        this.setParam(opts, KEY_CONFIG_STREAM, inputStream);
+    }
+
+    /**
+     * Get alternate configuration object.
+     *
+     * @return alternate configuration object or {@code null}.
+     * @param opts The FileSystemOptions.
+     * @see #setConfigConfiguration(FileSystemOptions, Configuration)
+     */
+    public Configuration getConfigConfiguration(final FileSystemOptions opts)
+    {
+        return (Configuration)this.getParam(opts, KEY_CONFIG_CONF);
+    }
+
+    /**
+     * Sets the configuration object to be loaded after the defaults.
+     * <p>
+     * Specifies an already initialized configuration object to override any specific HDFS settings.
+     * The property will be passed on to {@code org.apache.hadoop.conf.Configuration#addResource(Configuration)}
+     * after the URL was set as the default name with: {@code Configuration#set(FileSystem.FS_DEFAULT_NAME_KEY, url)}.
+     * <p>
+     * One use for this is to set a different value for the {@code dfs.client.use.datanode.hostname}
+     * property in order to access HDFS files stored in an AWS installation (from outside their
+     * firewall). There are other possible uses too.
+     *
+     * @param opts The FileSystemOptions to modify.
+     * @param configuration additional configuration object or {@code null} to unset any configuration
+     * object previously set.
+     */
+    public void setConfigConfiguration(final FileSystemOptions opts, final Configuration configuration)
+    {
+        this.setParam(opts, KEY_CONFIG_CONF, configuration);
     }
 
 }