You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/09/05 23:17:23 UTC
arrow git commit: ARROW-1317: [Python] Attempt to set Hadoop
CLASSPATH when using JNI
Repository: arrow
Updated Branches:
refs/heads/master 2660dda40 -> f355354c2
ARROW-1317: [Python] Attempt to set Hadoop CLASSPATH when using JNI
Author: Wes McKinney <we...@twosigma.com>
Closes #1040 from wesm/ARROW-1317 and squashes the following commits:
274f839c [Wes McKinney] Add note to documentation
1d664b29 [Wes McKinney] If HADOOP_HOME is not set, see if 'hadoop' is in PATH
c6e52d12 [Wes McKinney] Try to set CLASSPATH if HADOOP_HOME is set but classpath is not
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f355354c
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f355354c
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f355354c
Branch: refs/heads/master
Commit: f355354c24d58ea86558e8fc46f33b47d402ac04
Parents: 2660dda
Author: Wes McKinney <we...@twosigma.com>
Authored: Tue Sep 5 19:17:19 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Sep 5 19:17:19 2017 -0400
----------------------------------------------------------------------
python/doc/source/filesystems.rst | 3 +++
python/pyarrow/hdfs.py | 19 +++++++++++++++++++
2 files changed, 22 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/f355354c/python/doc/source/filesystems.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/filesystems.rst b/python/doc/source/filesystems.rst
index c0530f9..5c3297b 100644
--- a/python/doc/source/filesystems.rst
+++ b/python/doc/source/filesystems.rst
@@ -54,6 +54,9 @@ LD_LIBRARY_PATH), and relies on some environment variables.
export CLASSPATH=`$HADOOP_HOME/bin/hdfs classpath --glob`
+If ``CLASSPATH`` is not set, then it will be set automatically if the
+``hadoop`` executable is in your system path, or if ``HADOOP_HOME`` is set.
+
You can also use libhdfs3, a thirdparty C++ library for HDFS from Pivotal Labs:
.. code-block:: python
http://git-wip-us.apache.org/repos/asf/arrow/blob/f355354c/python/pyarrow/hdfs.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
index 2f20be2..3c9d041 100644
--- a/python/pyarrow/hdfs.py
+++ b/python/pyarrow/hdfs.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+import os
import posixpath
from pyarrow.util import implements
@@ -30,6 +31,9 @@ class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):
def __init__(self, host="default", port=0, user=None, kerb_ticket=None,
driver='libhdfs'):
+ if driver == 'libhdfs':
+ _maybe_set_hadoop_classpath()
+
self._connect(host, port, user, kerb_ticket, driver)
@implements(FileSystem.isdir)
@@ -105,6 +109,21 @@ class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):
yield tup
+def _maybe_set_hadoop_classpath():
+ import subprocess
+
+ if 'hadoop' in os.environ.get('CLASSPATH', ''):
+ return
+
+ if 'HADOOP_HOME' in os.environ:
+ hadoop_bin = '{0}/bin/hadoop'.format(os.environ['HADOOP_HOME'])
+ else:
+ hadoop_bin = 'hadoop'
+
+ classpath = subprocess.check_output([hadoop_bin, 'classpath', '--glob'])
+ os.environ['CLASSPATH'] = classpath.decode('utf-8')
+
+
def _libhdfs_walk_files_dirs(top_path, contents):
files = []
directories = []