You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ki...@apache.org on 2020/02/04 18:25:14 UTC

[hadoop] branch branch-3.1 updated: HDFS-12491. Support wildcard in CLASSPATH for libhdfs. Contributed by Muhammad Samir Khan.

This is an automated email from the ASF dual-hosted git repository.

kihwal pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new a55a0a1  HDFS-12491. Support wildcard in CLASSPATH for libhdfs. Contributed by Muhammad Samir Khan.
a55a0a1 is described below

commit a55a0a1f6d4361ab28b919410eddc0a2daa8d34a
Author: Kihwal Lee <ki...@apache.org>
AuthorDate: Tue Feb 4 12:24:58 2020 -0600

    HDFS-12491. Support wildcard in CLASSPATH for libhdfs. Contributed by Muhammad Samir Khan.
    
    (cherry picked from commit 10a60fbe20bb08cdd71076ea9bf2ebb3a2f6226e)
---
 .../src/main/native/libhdfs/jni_helper.c           | 277 ++++++++++++++++++++-
 .../src/main/native/libhdfs/jni_helper.h           |  10 +-
 .../hadoop-hdfs/src/site/markdown/LibHdfs.md       |   3 +-
 3 files changed, 287 insertions(+), 3 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c
index c45d598..91a3c1c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c
@@ -24,6 +24,8 @@
 #include "os/mutexes.h"
 #include "os/thread_local_storage.h"
 
+#include <errno.h>
+#include <dirent.h>
 #include <stdio.h> 
 #include <string.h> 
 
@@ -358,6 +360,277 @@ done:
 
 
 /**
+ * For the given path, expand it by filling in with all *.jar or *.JAR files,
+ * separated by PATH_SEPARATOR. Assumes that expanded is big enough to hold the
+ * string, eg allocated after using this function with expanded=NULL to get the
+ * right size. Also assumes that the path ends with a "/.". The length of the
+ * expanded path is returned, which includes space at the end for either a
+ * PATH_SEPARATOR or null terminator.
+ */
+static ssize_t wildcard_expandPath(const char* path, char* expanded)
+{
+    struct dirent* file;
+    char* dest = expanded;
+    ssize_t length = 0;
+    size_t pathLength = strlen(path);
+    DIR* dir;
+
+    dir = opendir(path);
+    if (dir != NULL) {
+        // can open dir so try to match with all *.jar and *.JAR entries
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+        printf("wildcard_expandPath: %s\n", path);
+#endif
+
+        errno = 0;
+        while ((file = readdir(dir)) != NULL) {
+            const char* filename = file->d_name;
+            const size_t filenameLength = strlen(filename);
+            const char* jarExtension;
+
+            // If filename is smaller than 4 characters then it can not possibly
+            // have extension ".jar" or ".JAR"
+            if (filenameLength < 4) {
+                continue;
+            }
+
+            jarExtension = &filename[filenameLength-4];
+            if ((strcmp(jarExtension, ".jar") == 0) ||
+                (strcmp(jarExtension, ".JAR") == 0)) {
+
+                // pathLength includes an extra '.' which we'll use for either
+                // separator or null termination
+                length += pathLength + filenameLength;
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+                printf("wildcard_scanPath:\t%s\t:\t%zd\n", filename, length);
+#endif
+
+                if (expanded != NULL) {
+                    // pathLength includes an extra '.'
+                    strncpy(dest, path, pathLength-1);
+                    dest += pathLength - 1;
+                    strncpy(dest, filename, filenameLength);
+                    dest += filenameLength;
+                    *dest = PATH_SEPARATOR;
+                    dest++;
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+                    printf("wildcard_expandPath:\t%s\t:\t%s\n",
+                      filename, expanded);
+#endif
+                }
+            }
+        }
+
+        if (errno != 0) {
+            fprintf(stderr, "wildcard_expandPath: on readdir %s: %s\n",
+              path, strerror(errno));
+            length = -1;
+        }
+
+        if (closedir(dir) != 0) {
+            fprintf(stderr, "wildcard_expandPath: on closedir %s: %s\n",
+                    path, strerror(errno));
+        }
+    } else if ((errno != EACCES) && (errno != ENOENT) && (errno != ENOTDIR)) {
+        // can not opendir due to an error we can not handle
+        fprintf(stderr, "wildcard_expandPath: on opendir %s: %s\n", path,
+                strerror(errno));
+        length = -1;
+    }
+
+    if (length == 0) {
+        // either we failed to open dir due to EACCESS, ENOENT, or ENOTDIR, or
+        // we did not find any file that matches *.jar or *.JAR
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+        fprintf(stderr, "wildcard_expandPath: can not expand %.*s*: %s\n",
+                (int)(pathLength-1), path, strerror(errno));
+#endif
+
+        // in this case, the wildcard expansion is the same as the original
+        // +1 for PATH_SEPARTOR or null termination
+        length = pathLength + 1;
+        if (expanded != NULL) {
+            // pathLength includes an extra '.'
+            strncpy(dest, path, pathLength-1);
+            dest += pathLength-1;
+            *dest = '*'; // restore wildcard
+            dest++;
+            *dest = PATH_SEPARATOR;
+            dest++;
+        }
+    }
+
+    return length;
+}
+
+/**
+ * Helper to expand classpaths. Returns the total length of the expanded
+ * classpath. If expandedClasspath is not NULL, then fills that with the
+ * expanded classpath. It assumes that expandedClasspath is of correct size, eg
+ * allocated after using this function with expandedClasspath=NULL to get the
+ * right size.
+ */
+static ssize_t getClassPath_helper(const char *classpath, char* expandedClasspath)
+{
+    ssize_t length;
+    ssize_t retval;
+    char* expandedCP_curr;
+    char* cp_token;
+    char* classpath_dup;
+
+    classpath_dup = strdup(classpath);
+    if (classpath_dup == NULL) {
+        fprintf(stderr, "getClassPath_helper: failed strdup: %s\n",
+          strerror(errno));
+        return -1;
+    }
+
+    length = 0;
+
+    // expandedCP_curr is the current pointer
+    expandedCP_curr = expandedClasspath;
+
+    cp_token = strtok(classpath_dup, PATH_SEPARATOR_STR);
+    while (cp_token != NULL) {
+        size_t tokenlen;
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+        printf("%s\n", cp_token);
+#endif
+
+        tokenlen = strlen(cp_token);
+        // We only expand if token ends with "/*"
+        if ((tokenlen > 1) &&
+          (cp_token[tokenlen-1] == '*') && (cp_token[tokenlen-2] == '/')) {
+            // replace the '*' with '.' so that we don't have to allocate another
+            // string for passing to opendir() in wildcard_expandPath()
+            cp_token[tokenlen-1] = '.';
+            retval = wildcard_expandPath(cp_token, expandedCP_curr);
+            if (retval < 0) {
+                free(classpath_dup);
+                return -1;
+            }
+
+            length += retval;
+            if (expandedCP_curr != NULL) {
+                expandedCP_curr += retval;
+            }
+        } else {
+            // +1 for path separator or null terminator
+            length += tokenlen + 1;
+            if (expandedCP_curr != NULL) {
+                strncpy(expandedCP_curr, cp_token, tokenlen);
+                expandedCP_curr += tokenlen;
+                *expandedCP_curr = PATH_SEPARATOR;
+                expandedCP_curr++;
+            }
+        }
+
+        cp_token = strtok(NULL, PATH_SEPARATOR_STR);
+    }
+
+    // Fix the last ':' and use it to null terminate
+    if (expandedCP_curr != NULL) {
+        expandedCP_curr--;
+        *expandedCP_curr = '\0';
+    }
+
+    free(classpath_dup);
+    return length;
+}
+
+/**
+ * Gets the classpath. Wild card entries are resolved only if the entry ends
+ * with "/\*" (backslash to escape commenting) to match against .jar and .JAR.
+ * All other wild card entries (eg /path/to/dir/\*foo*) are not resolved,
+ * following JAVA default behavior, see:
+ * https://docs.oracle.com/javase/8/docs/technotes/tools/unix/classpath.html
+ */
+static char* getClassPath()
+{
+    char* classpath;
+    char* expandedClasspath;
+    ssize_t length;
+    ssize_t retval;
+
+    classpath = getenv("CLASSPATH");
+    if (classpath == NULL) {
+      return NULL;
+    }
+
+    // First, get the total size of the string we will need for the expanded
+    // classpath
+    length = getClassPath_helper(classpath, NULL);
+    if (length < 0) {
+      return NULL;
+    }
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+    printf("+++++++++++++++++\n");
+#endif
+
+    // we don't have to do anything if classpath has no valid wildcards
+    // we get length = 0 when CLASSPATH is set but empty
+    // if CLASSPATH is not empty, then length includes null terminator
+    // if length of expansion is same as original, then return a duplicate of
+    // original since expansion can only be longer
+    if ((length == 0) || ((length - 1) == strlen(classpath))) {
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+        if ((length == 0) && (strlen(classpath) != 0)) {
+            fprintf(stderr, "Something went wrong with getting the wildcard \
+              expansion length\n" );
+        }
+#endif
+
+        expandedClasspath = strdup(classpath);
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+        printf("Expanded classpath=%s\n", expandedClasspath);
+#endif
+
+        return expandedClasspath;
+    }
+
+    // Allocte memory for expanded classpath string
+    expandedClasspath = calloc(length, sizeof(char));
+    if (expandedClasspath == NULL) {
+        fprintf(stderr, "getClassPath: failed calloc: %s\n", strerror(errno));
+        return NULL;
+    }
+
+    // Actual expansion
+    retval = getClassPath_helper(classpath, expandedClasspath);
+    if (retval < 0) {
+        free(expandedClasspath);
+        return NULL;
+    }
+
+    // This should not happen, but dotting i's and crossing t's
+    if (retval != length) {
+        fprintf(stderr,
+          "Expected classpath expansion length to be %zu but instead got %zu\n",
+          length, retval);
+        free(expandedClasspath);
+        return NULL;
+    }
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+    printf("===============\n");
+    printf("Allocated %zd for expanding classpath\n", length);
+    printf("Used %zu for expanding classpath\n", strlen(expandedClasspath) + 1);
+    printf("Expanded classpath=%s\n", expandedClasspath);
+#endif
+
+    return expandedClasspath;
+}
+
+
+/**
  * Get the global JNI environemnt.
  *
  * We only have to create the JVM once.  After that, we can use it in
@@ -393,7 +666,7 @@ static JNIEnv* getGlobalJNIEnv(void)
 
     if (noVMs == 0) {
         //Get the environment variables for initializing the JVM
-        hadoopClassPath = getenv("CLASSPATH");
+        hadoopClassPath = getClassPath();
         if (hadoopClassPath == NULL) {
             fprintf(stderr, "Environment variable CLASSPATH not set!\n");
             return NULL;
@@ -404,6 +677,8 @@ static JNIEnv* getGlobalJNIEnv(void)
         snprintf(optHadoopClassPath, optHadoopClassPathLen,
                 "%s%s", hadoopClassPathVMArg, hadoopClassPath);
 
+        free(hadoopClassPath);
+
         // Determine the # of LIBHDFS_OPTS args
         hadoopJvmArgs = getenv("LIBHDFS_OPTS");
         if (hadoopJvmArgs != NULL)  {
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h
index e63ce53..f0d06d7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h
@@ -26,7 +26,15 @@
 #include <stdarg.h>
 #include <errno.h>
 
-#define PATH_SEPARATOR ':'
+#ifdef WIN32
+    #define PATH_SEPARATOR ';'
+    #define PATH_SEPARATOR_STR ";"
+#else
+    #define PATH_SEPARATOR ':'
+    #define PATH_SEPARATOR_STR ":"
+#endif
+
+// #define _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
 
 
 /** Denote the method we want to invoke as STATIC or INSTANCE */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md
index 7049dcb..ab0376e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md
@@ -61,7 +61,8 @@ See the CMake file for `test_libhdfs_ops.c` in the libhdfs source directory (`ha
 Common Problems
 ---------------
 
-The most common problem is the `CLASSPATH` is not set properly when calling a program that uses libhdfs. Make sure you set it to all the Hadoop jars needed to run Hadoop itself as well as the right configuration directory containing `hdfs-site.xml`. It is not valid to use wildcard syntax for specifying multiple jars. It may be useful to run `hadoop classpath --glob` or `hadoop classpath --jar <path`\> to generate the correct classpath for your deployment. See [Hadoop Commands Reference]( [...]
+The most common problem is the `CLASSPATH` is not set properly when calling a program that uses libhdfs. Make sure you set it to all the Hadoop jars needed to run Hadoop itself as well as the right configuration directory containing `hdfs-site.xml`.
+Wildcard entries in the `CLASSPATH` are now supported by libhdfs.
 
 Thread Safe
 -----------


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org