You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ki...@apache.org on 2020/02/04 18:25:14 UTC
[hadoop] branch branch-3.1 updated: HDFS-12491. Support wildcard in
CLASSPATH for libhdfs. Contributed by Muhammad Samir Khan.
This is an automated email from the ASF dual-hosted git repository.
kihwal pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new a55a0a1 HDFS-12491. Support wildcard in CLASSPATH for libhdfs. Contributed by Muhammad Samir Khan.
a55a0a1 is described below
commit a55a0a1f6d4361ab28b919410eddc0a2daa8d34a
Author: Kihwal Lee <ki...@apache.org>
AuthorDate: Tue Feb 4 12:24:58 2020 -0600
HDFS-12491. Support wildcard in CLASSPATH for libhdfs. Contributed by Muhammad Samir Khan.
(cherry picked from commit 10a60fbe20bb08cdd71076ea9bf2ebb3a2f6226e)
---
.../src/main/native/libhdfs/jni_helper.c | 277 ++++++++++++++++++++-
.../src/main/native/libhdfs/jni_helper.h | 10 +-
.../hadoop-hdfs/src/site/markdown/LibHdfs.md | 3 +-
3 files changed, 287 insertions(+), 3 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c
index c45d598..91a3c1c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c
@@ -24,6 +24,8 @@
#include "os/mutexes.h"
#include "os/thread_local_storage.h"
+#include <errno.h>
+#include <dirent.h>
#include <stdio.h>
#include <string.h>
@@ -358,6 +360,277 @@ done:
/**
+ * For the given path, expand it by filling in with all *.jar or *.JAR files,
+ * separated by PATH_SEPARATOR. Assumes that expanded is big enough to hold the
+ * string, eg allocated after using this function with expanded=NULL to get the
+ * right size. Also assumes that the path ends with a "/.". The length of the
+ * expanded path is returned, which includes space at the end for either a
+ * PATH_SEPARATOR or null terminator.
+ */
+static ssize_t wildcard_expandPath(const char* path, char* expanded)
+{
+ struct dirent* file;
+ char* dest = expanded;
+ ssize_t length = 0;
+ size_t pathLength = strlen(path);
+ DIR* dir;
+
+ dir = opendir(path);
+ if (dir != NULL) {
+ // can open dir so try to match with all *.jar and *.JAR entries
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+ printf("wildcard_expandPath: %s\n", path);
+#endif
+
+ errno = 0;
+ while ((file = readdir(dir)) != NULL) {
+ const char* filename = file->d_name;
+ const size_t filenameLength = strlen(filename);
+ const char* jarExtension;
+
+ // If filename is smaller than 4 characters then it can not possibly
+ // have extension ".jar" or ".JAR"
+ if (filenameLength < 4) {
+ continue;
+ }
+
+ jarExtension = &filename[filenameLength-4];
+ if ((strcmp(jarExtension, ".jar") == 0) ||
+ (strcmp(jarExtension, ".JAR") == 0)) {
+
+ // pathLength includes an extra '.' which we'll use for either
+ // separator or null termination
+ length += pathLength + filenameLength;
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+ printf("wildcard_scanPath:\t%s\t:\t%zd\n", filename, length);
+#endif
+
+ if (expanded != NULL) {
+ // pathLength includes an extra '.'
+ strncpy(dest, path, pathLength-1);
+ dest += pathLength - 1;
+ strncpy(dest, filename, filenameLength);
+ dest += filenameLength;
+ *dest = PATH_SEPARATOR;
+ dest++;
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+ printf("wildcard_expandPath:\t%s\t:\t%s\n",
+ filename, expanded);
+#endif
+ }
+ }
+ }
+
+ if (errno != 0) {
+ fprintf(stderr, "wildcard_expandPath: on readdir %s: %s\n",
+ path, strerror(errno));
+ length = -1;
+ }
+
+ if (closedir(dir) != 0) {
+ fprintf(stderr, "wildcard_expandPath: on closedir %s: %s\n",
+ path, strerror(errno));
+ }
+ } else if ((errno != EACCES) && (errno != ENOENT) && (errno != ENOTDIR)) {
+ // can not opendir due to an error we can not handle
+ fprintf(stderr, "wildcard_expandPath: on opendir %s: %s\n", path,
+ strerror(errno));
+ length = -1;
+ }
+
+ if (length == 0) {
+ // either we failed to open dir due to EACCESS, ENOENT, or ENOTDIR, or
+ // we did not find any file that matches *.jar or *.JAR
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+ fprintf(stderr, "wildcard_expandPath: can not expand %.*s*: %s\n",
+ (int)(pathLength-1), path, strerror(errno));
+#endif
+
+ // in this case, the wildcard expansion is the same as the original
+ // +1 for PATH_SEPARTOR or null termination
+ length = pathLength + 1;
+ if (expanded != NULL) {
+ // pathLength includes an extra '.'
+ strncpy(dest, path, pathLength-1);
+ dest += pathLength-1;
+ *dest = '*'; // restore wildcard
+ dest++;
+ *dest = PATH_SEPARATOR;
+ dest++;
+ }
+ }
+
+ return length;
+}
+
+/**
+ * Helper to expand classpaths. Returns the total length of the expanded
+ * classpath. If expandedClasspath is not NULL, then fills that with the
+ * expanded classpath. It assumes that expandedClasspath is of correct size, eg
+ * allocated after using this function with expandedClasspath=NULL to get the
+ * right size.
+ */
+static ssize_t getClassPath_helper(const char *classpath, char* expandedClasspath)
+{
+ ssize_t length;
+ ssize_t retval;
+ char* expandedCP_curr;
+ char* cp_token;
+ char* classpath_dup;
+
+ classpath_dup = strdup(classpath);
+ if (classpath_dup == NULL) {
+ fprintf(stderr, "getClassPath_helper: failed strdup: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ length = 0;
+
+ // expandedCP_curr is the current pointer
+ expandedCP_curr = expandedClasspath;
+
+ cp_token = strtok(classpath_dup, PATH_SEPARATOR_STR);
+ while (cp_token != NULL) {
+ size_t tokenlen;
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+ printf("%s\n", cp_token);
+#endif
+
+ tokenlen = strlen(cp_token);
+ // We only expand if token ends with "/*"
+ if ((tokenlen > 1) &&
+ (cp_token[tokenlen-1] == '*') && (cp_token[tokenlen-2] == '/')) {
+ // replace the '*' with '.' so that we don't have to allocate another
+ // string for passing to opendir() in wildcard_expandPath()
+ cp_token[tokenlen-1] = '.';
+ retval = wildcard_expandPath(cp_token, expandedCP_curr);
+ if (retval < 0) {
+ free(classpath_dup);
+ return -1;
+ }
+
+ length += retval;
+ if (expandedCP_curr != NULL) {
+ expandedCP_curr += retval;
+ }
+ } else {
+ // +1 for path separator or null terminator
+ length += tokenlen + 1;
+ if (expandedCP_curr != NULL) {
+ strncpy(expandedCP_curr, cp_token, tokenlen);
+ expandedCP_curr += tokenlen;
+ *expandedCP_curr = PATH_SEPARATOR;
+ expandedCP_curr++;
+ }
+ }
+
+ cp_token = strtok(NULL, PATH_SEPARATOR_STR);
+ }
+
+ // Fix the last ':' and use it to null terminate
+ if (expandedCP_curr != NULL) {
+ expandedCP_curr--;
+ *expandedCP_curr = '\0';
+ }
+
+ free(classpath_dup);
+ return length;
+}
+
+/**
+ * Gets the classpath. Wild card entries are resolved only if the entry ends
+ * with "/\*" (backslash to escape commenting) to match against .jar and .JAR.
+ * All other wild card entries (eg /path/to/dir/\*foo*) are not resolved,
+ * following JAVA default behavior, see:
+ * https://docs.oracle.com/javase/8/docs/technotes/tools/unix/classpath.html
+ */
+static char* getClassPath()
+{
+ char* classpath;
+ char* expandedClasspath;
+ ssize_t length;
+ ssize_t retval;
+
+ classpath = getenv("CLASSPATH");
+ if (classpath == NULL) {
+ return NULL;
+ }
+
+ // First, get the total size of the string we will need for the expanded
+ // classpath
+ length = getClassPath_helper(classpath, NULL);
+ if (length < 0) {
+ return NULL;
+ }
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+ printf("+++++++++++++++++\n");
+#endif
+
+ // we don't have to do anything if classpath has no valid wildcards
+ // we get length = 0 when CLASSPATH is set but empty
+ // if CLASSPATH is not empty, then length includes null terminator
+ // if length of expansion is same as original, then return a duplicate of
+ // original since expansion can only be longer
+ if ((length == 0) || ((length - 1) == strlen(classpath))) {
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+ if ((length == 0) && (strlen(classpath) != 0)) {
+ fprintf(stderr, "Something went wrong with getting the wildcard \
+ expansion length\n" );
+ }
+#endif
+
+ expandedClasspath = strdup(classpath);
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+ printf("Expanded classpath=%s\n", expandedClasspath);
+#endif
+
+ return expandedClasspath;
+ }
+
+ // Allocte memory for expanded classpath string
+ expandedClasspath = calloc(length, sizeof(char));
+ if (expandedClasspath == NULL) {
+ fprintf(stderr, "getClassPath: failed calloc: %s\n", strerror(errno));
+ return NULL;
+ }
+
+ // Actual expansion
+ retval = getClassPath_helper(classpath, expandedClasspath);
+ if (retval < 0) {
+ free(expandedClasspath);
+ return NULL;
+ }
+
+ // This should not happen, but dotting i's and crossing t's
+ if (retval != length) {
+ fprintf(stderr,
+ "Expected classpath expansion length to be %zu but instead got %zu\n",
+ length, retval);
+ free(expandedClasspath);
+ return NULL;
+ }
+
+#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
+ printf("===============\n");
+ printf("Allocated %zd for expanding classpath\n", length);
+ printf("Used %zu for expanding classpath\n", strlen(expandedClasspath) + 1);
+ printf("Expanded classpath=%s\n", expandedClasspath);
+#endif
+
+ return expandedClasspath;
+}
+
+
+/**
* Get the global JNI environemnt.
*
* We only have to create the JVM once. After that, we can use it in
@@ -393,7 +666,7 @@ static JNIEnv* getGlobalJNIEnv(void)
if (noVMs == 0) {
//Get the environment variables for initializing the JVM
- hadoopClassPath = getenv("CLASSPATH");
+ hadoopClassPath = getClassPath();
if (hadoopClassPath == NULL) {
fprintf(stderr, "Environment variable CLASSPATH not set!\n");
return NULL;
@@ -404,6 +677,8 @@ static JNIEnv* getGlobalJNIEnv(void)
snprintf(optHadoopClassPath, optHadoopClassPathLen,
"%s%s", hadoopClassPathVMArg, hadoopClassPath);
+ free(hadoopClassPath);
+
// Determine the # of LIBHDFS_OPTS args
hadoopJvmArgs = getenv("LIBHDFS_OPTS");
if (hadoopJvmArgs != NULL) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h
index e63ce53..f0d06d7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h
@@ -26,7 +26,15 @@
#include <stdarg.h>
#include <errno.h>
-#define PATH_SEPARATOR ':'
+#ifdef WIN32
+ #define PATH_SEPARATOR ';'
+ #define PATH_SEPARATOR_STR ";"
+#else
+ #define PATH_SEPARATOR ':'
+ #define PATH_SEPARATOR_STR ":"
+#endif
+
+// #define _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
/** Denote the method we want to invoke as STATIC or INSTANCE */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md
index 7049dcb..ab0376e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md
@@ -61,7 +61,8 @@ See the CMake file for `test_libhdfs_ops.c` in the libhdfs source directory (`ha
Common Problems
---------------
-The most common problem is the `CLASSPATH` is not set properly when calling a program that uses libhdfs. Make sure you set it to all the Hadoop jars needed to run Hadoop itself as well as the right configuration directory containing `hdfs-site.xml`. It is not valid to use wildcard syntax for specifying multiple jars. It may be useful to run `hadoop classpath --glob` or `hadoop classpath --jar <path`\> to generate the correct classpath for your deployment. See [Hadoop Commands Reference]( [...]
+The most common problem is the `CLASSPATH` is not set properly when calling a program that uses libhdfs. Make sure you set it to all the Hadoop jars needed to run Hadoop itself as well as the right configuration directory containing `hdfs-site.xml`.
+Wildcard entries in the `CLASSPATH` are now supported by libhdfs.
Thread Safe
-----------
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org