You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2017/10/19 20:04:59 UTC

orc git commit: ORC-17. Support HDFS as a C++ plugin module.

Repository: orc
Updated Branches:
  refs/heads/master 10c0a858b -> 5831033eb


ORC-17. Support HDFS as a C++ plugin module.

Fixes #134

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/5831033e
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/5831033e
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/5831033e

Branch: refs/heads/master
Commit: 5831033eb4547e55f632ff12b81081da8274397c
Parents: 10c0a85
Author: Anatoli Shein <an...@hpe.com>
Authored: Wed Aug 30 11:14:18 2017 -0400
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Oct 19 13:03:54 2017 -0700

----------------------------------------------------------------------
 .travis.yml                             |  10 +-
 CMakeLists.txt                          |   5 +
 c++/include/CMakeLists.txt              |  55 ---------
 c++/include/orc/OrcFile.hh              |  12 ++
 c++/libs/libhdfspp/imported_timestamp   |  10 ++
 c++/libs/libhdfspp/libhdfspp.tar.gz     | Bin 0 -> 950688 bytes
 c++/libs/libhdfspp/pull_hdfs.sh         |  32 +++++
 c++/src/CMakeLists.txt                  |  14 ++-
 c++/src/OrcFile.cc                      |  15 ++-
 c++/src/OrcHdfsFile.cc                  | 173 +++++++++++++++++++++++++++
 cmake_modules/CheckSourceCompiles.cmake |  75 ++++++++++++
 cmake_modules/FindCyrusSASL.cmake       |  49 ++++++++
 cmake_modules/ThirdpartyToolchain.cmake |  60 ++++++++++
 docker/centos7/Dockerfile               |   1 +
 docker/debian8/Dockerfile               |   2 +
 docker/ubuntu14/Dockerfile              |   2 +
 docker/ubuntu16/Dockerfile              |   2 +
 tools/src/FileContents.cc               |   2 +-
 tools/src/FileMemory.cc                 |   2 +-
 tools/src/FileMetadata.cc               |   4 +-
 tools/src/FileScan.cc                   |   2 +-
 tools/src/FileStatistics.cc             |   2 +-
 22 files changed, 465 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 36570b7..86bd67f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,6 +12,14 @@ matrix:
   - compiler: clang
     os: osx
     osx_image: xcode6.4
+  - compiler: clang
+    os: osx
+    osx_image: xcode8.3
+    script:
+    - mkdir build
+    - cd build
+    - cmake -DOPENSSL_ROOT_DIR=`brew --prefix openssl` ..
+    - make package test-out
 
 jdk:
   - openjdk7
@@ -22,4 +30,4 @@ script:
   - mkdir build
   - cd build
   - cmake ..
-  - make package test-out
+  - make package test-out
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9776b13..1dc91e4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,10 @@ option (BUILD_JAVA
     "Include ORC Java library in the build process"
      ON)
 
+option (BUILD_LIBHDFSPP
+    "Include LIBHDFSPP library in the build process"
+     ON)
+
 # Make sure that a build type is selected
 if (NOT CMAKE_BUILD_TYPE)
   message(STATUS "No build type selected, default to ReleaseWithDebugInfo")
@@ -84,6 +88,7 @@ endif ()
 
 enable_testing()
 
+INCLUDE(CheckSourceCompiles)
 INCLUDE(ThirdpartyToolchain)
 
 set (EXAMPLE_DIRECTORY ${CMAKE_SOURCE_DIR}/examples)

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/include/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/c++/include/CMakeLists.txt b/c++/include/CMakeLists.txt
index bd32f7b..33c5495 100644
--- a/c++/include/CMakeLists.txt
+++ b/c++/include/CMakeLists.txt
@@ -10,61 +10,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}")
-
-INCLUDE(CheckCXXSourceCompiles)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include <initializer_list>
-    struct A {
-      A(std::initializer_list<int> list);
-    };
-    int main(int,char*[]){
-    }"
-  ORC_CXX_HAS_INITIALIZER_LIST
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    int main(int,char*[]) noexcept {
-      return 0;
-    }"
-  ORC_CXX_HAS_NOEXCEPT
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    int main(int,char* argv[]){
-      return argv[0] != nullptr;
-    }"
-  ORC_CXX_HAS_NULLPTR
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    struct A {
-      virtual ~A();
-      virtual void foo();
-    };
-    struct B: public A {
-      virtual void foo() override;
-    };
-    int main(int,char*[]){
-    }"
-  ORC_CXX_HAS_OVERRIDE
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include<memory>
-    int main(int,char* []){
-      std::unique_ptr<int> ptr(new int);
-    }"
-  ORC_CXX_HAS_UNIQUE_PTR
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include <cstdint>
-    int main(int, char*[]) { }"
-  ORC_CXX_HAS_CSTDINT
-)
-
 configure_file (
   "orc/orc-config.hh.in"
   "${CMAKE_CURRENT_BINARY_DIR}/orc/orc-config.hh"

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/include/orc/OrcFile.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/OrcFile.hh b/c++/include/orc/OrcFile.hh
index cb2f8e5..bd866c2 100644
--- a/c++/include/orc/OrcFile.hh
+++ b/c++/include/orc/OrcFile.hh
@@ -103,12 +103,24 @@ namespace orc {
   };
 
   /**
+   * Create a stream to a local file or HDFS file if path begins with "hdfs://"
+   * @param path the name of the file in the local file system or HDFS
+   */
+  ORC_UNIQUE_PTR<InputStream> readFile(const std::string& path);
+
+  /**
    * Create a stream to a local file.
    * @param path the name of the file in the local file system
    */
   ORC_UNIQUE_PTR<InputStream> readLocalFile(const std::string& path);
 
   /**
+   * Create a stream to an HDFS file.
+   * @param path the uri of the file in HDFS
+   */
+  ORC_UNIQUE_PTR<InputStream> readHdfsFile(const std::string& path);
+
+  /**
    * Create a reader to the for the ORC file.
    * @param stream the stream to read
    * @param options the options for reading the file

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/libs/libhdfspp/imported_timestamp
----------------------------------------------------------------------
diff --git a/c++/libs/libhdfspp/imported_timestamp b/c++/libs/libhdfspp/imported_timestamp
new file mode 100644
index 0000000..84965ce
--- /dev/null
+++ b/c++/libs/libhdfspp/imported_timestamp
@@ -0,0 +1,10 @@
+Wed Aug 30 10:56:51 EDT 2017
+HDFS-10787
+commit 9587bb04a818a2661e264f619b09c15ce10ff38e
+Author: Anatoli Shein <an...@hpe.com>
+Date:   Wed Aug 30 10:49:42 2017 -0400
+
+    fixed warnings3
+diffs: --------------
+       --------------
+Wed Aug 30 10:56:51 EDT 2017

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/libs/libhdfspp/libhdfspp.tar.gz
----------------------------------------------------------------------
diff --git a/c++/libs/libhdfspp/libhdfspp.tar.gz b/c++/libs/libhdfspp/libhdfspp.tar.gz
new file mode 100644
index 0000000..510c304
Binary files /dev/null and b/c++/libs/libhdfspp/libhdfspp.tar.gz differ

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/libs/libhdfspp/pull_hdfs.sh
----------------------------------------------------------------------
diff --git a/c++/libs/libhdfspp/pull_hdfs.sh b/c++/libs/libhdfspp/pull_hdfs.sh
new file mode 100755
index 0000000..a207a93
--- /dev/null
+++ b/c++/libs/libhdfspp/pull_hdfs.sh
@@ -0,0 +1,32 @@
+if [ -z "$1" ]; then
+    echo "Usage: pull_hdfs [path_to_hdfs_git_root]"
+    exit 1;
+fi
+if [ ! -d "$1" ]; then
+    echo "$1 is not a directory"
+fi
+if [ ! -d "$1/hadoop-hdfs-project" ]; then
+    echo "$1 is not the root of a hadoop git checkout"
+fi
+
+HADOOP_ROOT=$1
+echo HADOOP_ROOT=$HADOOP_ROOT
+OUT=$(readlink -m `dirname $0`)
+echo OUT=$OUT
+TS=$OUT/imported_timestamp
+
+    cd $HADOOP_ROOT &&
+    mvn -pl :hadoop-hdfs-native-client -Pnative compile -Dnative_make_args="copy_hadoop_files"
+    (date > $TS; git rev-parse --abbrev-ref HEAD >> $TS; git log -n 1 >> $TS;  \
+        echo "diffs: --------------" >> $TS; git diff HEAD >> $TS; \
+        echo "       --------------" >> $TS)
+    cd $OUT &&
+    #Delete everything except for pull_hdfs.sh and imported_timestamp
+    find . ! -name 'pull_hdfs.sh' ! -name 'imported_timestamp' ! -name '.' ! -name '..' -exec rm -rf {} + &&
+    cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp . &&
+    cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/libhdfspp/extern libhdfspp/ &&
+    cd libhdfspp &&
+	tar -czf ../libhdfspp.tar.gz * &&
+	cd .. &&
+	rm -rf libhdfspp &&
+	date >> $TS
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index 243efa6..b6fe1bf 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -123,6 +123,7 @@ include_directories (
   ${ZLIB_INCLUDE_DIRS}
   ${SNAPPY_INCLUDE_DIRS}
   ${LZ4_INCLUDE_DIRS}
+  ${LIBHDFSPP_INCLUDE_DIRS}
   )
 
 add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
@@ -132,7 +133,7 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
         "${CMAKE_SOURCE_DIR}/proto/orc_proto.proto"
 )
 
-add_library (orc STATIC
+set(SOURCE_FILES
   "${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
   orc_proto.pb.h
   io/InputStream.cc
@@ -161,6 +162,12 @@ add_library (orc STATIC
   Writer.cc
   )
 
+if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
+  set(SOURCE_FILES ${SOURCE_FILES} OrcHdfsFile.cc)
+endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
+
+add_library (orc STATIC ${SOURCE_FILES})
+
 install(TARGETS orc DESTINATION lib)
 
 target_link_libraries (orc
@@ -168,6 +175,11 @@ target_link_libraries (orc
   ${ZLIB_LIBRARIES}
   ${SNAPPY_LIBRARIES}
   ${LZ4_LIBRARIES}
+  ${LIBHDFSPP_LIBRARIES}
   )
 
 add_dependencies(orc protobuf)
+
+if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
+  add_definitions(-DBUILD_LIBHDFSPP)
+endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/src/OrcFile.cc
----------------------------------------------------------------------
diff --git a/c++/src/OrcFile.cc b/c++/src/OrcFile.cc
index 5d9bc80..2331c79 100644
--- a/c++/src/OrcFile.cc
+++ b/c++/src/OrcFile.cc
@@ -28,6 +28,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
+#include <string.h>
 
 namespace orc {
 
@@ -86,8 +87,20 @@ namespace orc {
     close(file);
   }
 
+  std::unique_ptr<InputStream> readFile(const std::string& path) {
+#ifdef BUILD_LIBHDFSPP
+    if(strncmp (path.c_str(), "hdfs://", 7) == 0){
+      return orc::readHdfsFile(std::string(path));
+    } else {
+#endif
+      return orc::readLocalFile(std::string(path));
+#ifdef BUILD_LIBHDFSPP
+      }
+#endif
+  }
+
   std::unique_ptr<InputStream> readLocalFile(const std::string& path) {
-    return std::unique_ptr<InputStream>(new FileInputStream(path));
+      return std::unique_ptr<InputStream>(new FileInputStream(path));
   }
 
   OutputStream::~OutputStream() {

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/src/OrcHdfsFile.cc
----------------------------------------------------------------------
diff --git a/c++/src/OrcHdfsFile.cc b/c++/src/OrcHdfsFile.cc
new file mode 100644
index 0000000..fcfd531
--- /dev/null
+++ b/c++/src/OrcHdfsFile.cc
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/OrcFile.hh"
+
+#include "Adaptor.hh"
+#include "Exceptions.hh"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "hdfspp/hdfspp.h"
+
+namespace orc {
+
+  class HdfsFileInputStream : public InputStream {
+  private:
+    std::string filename;
+    std::unique_ptr<hdfs::FileHandle> file;
+    std::unique_ptr<hdfs::FileSystem> file_system;
+    uint64_t totalLength;
+    const uint64_t READ_SIZE = 1024 * 1024; //1 MB
+
+  public:
+    HdfsFileInputStream(std::string _filename) {
+      filename = _filename ;
+
+      //Building a URI object from the given uri_path
+      hdfs::URI uri;
+      try {
+        uri = hdfs::URI::parse_from_string(filename);
+      } catch (const hdfs::uri_parse_error&) {
+        throw ParseError("Malformed URI: " + filename);
+      }
+
+      //This sets conf path to default "$HADOOP_CONF_DIR" or "/etc/hadoop/conf"
+      //and loads configs core-site.xml and hdfs-site.xml from the conf path
+      hdfs::ConfigParser parser;
+      if(!parser.LoadDefaultResources()){
+        throw ParseError("Could not load default resources. ");
+      }
+      auto stats = parser.ValidateResources();
+      //validating core-site.xml
+      if(!stats[0].second.ok()){
+        throw ParseError(stats[0].first + " is invalid: " + stats[0].second.ToString());
+      }
+      //validating hdfs-site.xml
+      if(!stats[1].second.ok()){
+        throw ParseError(stats[1].first + " is invalid: " + stats[1].second.ToString());
+      }
+      hdfs::Options options;
+      if(!parser.get_options(options)){
+        throw ParseError("Could not load Options object. ");
+      }
+      hdfs::IoService * io_service = hdfs::IoService::New();
+      //Wrapping file_system into a unique pointer to guarantee deletion
+      file_system = std::unique_ptr<hdfs::FileSystem>(
+          hdfs::FileSystem::New(io_service, "", options));
+      if (file_system.get() == nullptr) {
+        throw ParseError("Can't create FileSystem object. ");
+      }
+      hdfs::Status status;
+      //Checking if the user supplied the host
+      if(!uri.get_host().empty()){
+        //Using port if supplied, otherwise using "" to look up port in configs
+        std::string port = uri.has_port() ?
+            std::to_string(uri.get_port()) : "";
+        status = file_system->Connect(uri.get_host(), port);
+        if (!status.ok()) {
+          throw ParseError("Can't connect to " + uri.get_host()
+              + ":" + port + ". " + status.ToString());
+        }
+      } else {
+        status = file_system->ConnectToDefaultFs();
+        if (!status.ok()) {
+          if(!options.defaultFS.get_host().empty()){
+            throw ParseError("Error connecting to " +
+                options.defaultFS.str() + ". " + status.ToString());
+          } else {
+            throw ParseError(
+                "Error connecting to the cluster: defaultFS is empty. "
+                + status.ToString());
+          }
+        }
+      }
+
+      if (file_system.get() == nullptr) {
+        throw ParseError("Can't connect the file system. ");
+      }
+
+      hdfs::FileHandle *file_raw = nullptr;
+      status = file_system->Open(uri.get_path(), &file_raw);
+      if (!status.ok()) {
+        throw ParseError("Can't open "
+            + uri.get_path() + ". " + status.ToString());
+      }
+      //Wrapping file_raw into a unique pointer to guarantee deletion
+      file.reset(file_raw);
+
+      hdfs::StatInfo stat_info;
+      status = file_system->GetFileInfo(uri.get_path(), stat_info);
+      if (!status.ok()) {
+        throw ParseError("Can't stat "
+            + uri.get_path() + ". " + status.ToString());
+      }
+      totalLength = stat_info.length;
+    }
+
+    uint64_t getLength() const override {
+      return totalLength;
+    }
+
+    uint64_t getNaturalReadSize() const override {
+      return READ_SIZE;
+    }
+
+    void read(void* buf,
+              uint64_t length,
+              uint64_t offset) override {
+
+      if (!buf) {
+        throw ParseError("Buffer is null");
+      }
+
+      hdfs::Status status;
+      size_t total_bytes_read = 0;
+      size_t last_bytes_read = 0;
+
+      do {
+        status = file->PositionRead(buf,
+            static_cast<size_t>(length) - total_bytes_read,
+            static_cast<off_t>(offset + total_bytes_read), &last_bytes_read);
+        if(!status.ok()) {
+          throw ParseError("Error reading the file: " + status.ToString());
+        }
+        total_bytes_read += last_bytes_read;
+      } while (total_bytes_read < length);
+    }
+
+    const std::string& getName() const override {
+      return filename;
+    }
+
+    ~HdfsFileInputStream();
+  };
+
+  HdfsFileInputStream::~HdfsFileInputStream() {
+  }
+
+  std::unique_ptr<InputStream> readHdfsFile(const std::string& path) {
+    return std::unique_ptr<InputStream>(new HdfsFileInputStream(path));
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/cmake_modules/CheckSourceCompiles.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/CheckSourceCompiles.cmake b/cmake_modules/CheckSourceCompiles.cmake
new file mode 100644
index 0000000..7f337fc
--- /dev/null
+++ b/cmake_modules/CheckSourceCompiles.cmake
@@ -0,0 +1,75 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}")
+
+INCLUDE(CheckCXXSourceCompiles)
+
+CHECK_CXX_SOURCE_COMPILES("
+    #include <initializer_list>
+    struct A {
+      A(std::initializer_list<int> list);
+    };
+    int main(int,char*[]){
+    }"
+  ORC_CXX_HAS_INITIALIZER_LIST
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+    int main(int,char*[]) noexcept {
+      return 0;
+    }"
+  ORC_CXX_HAS_NOEXCEPT
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+    int main(int,char* argv[]){
+      return argv[0] != nullptr;
+    }"
+  ORC_CXX_HAS_NULLPTR
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+    struct A {
+      virtual ~A();
+      virtual void foo();
+    };
+    struct B: public A {
+      virtual void foo() override;
+    };
+    int main(int,char*[]){
+    }"
+  ORC_CXX_HAS_OVERRIDE
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+    #include<memory>
+    int main(int,char* []){
+      std::unique_ptr<int> ptr(new int);
+    }"
+  ORC_CXX_HAS_UNIQUE_PTR
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+    #include <cstdint>
+    int main(int, char*[]) { }"
+  ORC_CXX_HAS_CSTDINT
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+    #include <thread>
+    int main(void) {
+      thread_local int s;
+      return s;
+    }"
+  ORC_CXX_HAS_THREAD_LOCAL
+)

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/cmake_modules/FindCyrusSASL.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindCyrusSASL.cmake b/cmake_modules/FindCyrusSASL.cmake
new file mode 100644
index 0000000..8ce027f
--- /dev/null
+++ b/cmake_modules/FindCyrusSASL.cmake
@@ -0,0 +1,49 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# - Find Cyrus SASL (sasl.h, libsasl2.so)
+#
+# This module defines
+#  CYRUS_SASL_INCLUDE_DIR, directory containing headers
+#  CYRUS_SASL_SHARED_LIB, path to Cyrus SASL's shared library
+#  CYRUS_SASL_FOUND, whether Cyrus SASL and its plugins have been found
+#
+# N.B: we do _not_ include sasl in thirdparty, for a fairly subtle reason. The
+# TLDR version is that newer versions of cyrus-sasl (>=2.1.26) have a bug fix
+# for https://bugzilla.cyrusimap.org/show_bug.cgi?id=3590, but that bug fix
+# relied on a change both on the plugin side and on the library side. If you
+# then try to run the new version of sasl (e.g from our thirdparty tree) with
+# an older version of a plugin (eg from RHEL6 install), you'll get a SASL_NOMECH
+# error due to this bug.
+#
+# In practice, Cyrus-SASL is so commonly used and generally non-ABI-breaking that
+# we should be OK to depend on the host installation.
+
+# Note that this is modified from the version that was copied from our
+# friends at the Kudu project.  The original version implicitly required
+# the Cyrus SASL.  This version will only complain if REQUIRED is added.
+
+
+find_path(CYRUS_SASL_INCLUDE_DIR sasl/sasl.h)
+find_library(CYRUS_SASL_SHARED_LIB sasl2)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CYRUS_SASL DEFAULT_MSG
+  CYRUS_SASL_SHARED_LIB CYRUS_SASL_INCLUDE_DIR)
+
+MARK_AS_ADVANCED(CYRUS_SASL_INCLUDE_DIR CYRUS_SASL_SHARED_LIB)

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index eef2827..c2ef765 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -148,6 +148,7 @@ set (PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install")
 set (PROTOBUF_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep-prefix/src/protobuf_ep")
 set (PROTOBUF_INCLUDE_DIRS "${PROTOBUF_PREFIX}/include")
 set (PROTOBUF_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}")
+set (PROTOC_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}")
 set (PROTOBUF_EXECUTABLE "${PROTOBUF_PREFIX}/bin/protoc")
 set (PROTOBUF_SRC_URL "https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protobuf-${PROTOBUF_VERSION}.tar.gz")
 
@@ -168,7 +169,66 @@ add_library (protobuf STATIC IMPORTED)
 set_target_properties (protobuf PROPERTIES IMPORTED_LOCATION ${PROTOBUF_STATIC_LIB})
 add_dependencies (protobuf protobuf_ep)
 set (PROTOBUF_LIBRARIES protobuf)
+
+add_library (protoc STATIC IMPORTED)
+set_target_properties (protoc PROPERTIES IMPORTED_LOCATION ${PROTOC_STATIC_LIB})
+add_dependencies (protoc protobuf_ep)
+
 install(DIRECTORY ${PROTOBUF_PREFIX}/lib DESTINATION .
                                          PATTERN "pkgconfig" EXCLUDE
                                          PATTERN "*.so*" EXCLUDE
                                          PATTERN "*.dylib" EXCLUDE)
+
+# ----------------------------------------------------------------------
+# LIBHDFSPP
+
+if(BUILD_LIBHDFSPP)
+  if(ORC_CXX_HAS_THREAD_LOCAL)
+    find_package(CyrusSASL)
+    find_package(OpenSSL)
+    find_package(Threads)
+
+    set (LIBHDFSPP_PREFIX "${THIRDPARTY_DIR}/libhdfspp_ep-install")
+    set (LIBHDFSPP_INCLUDE_DIRS "${LIBHDFSPP_PREFIX}/include")
+    set (LIBHDFSPP_STATIC_LIB_NAME hdfspp_static)
+    set (LIBHDFSPP_STATIC_LIB "${LIBHDFSPP_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${LIBHDFSPP_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set (LIBHDFSPP_SRC_URL "${CMAKE_SOURCE_DIR}/c++/libs/libhdfspp/libhdfspp.tar.gz")
+    set (LIBHDFSPP_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                          -DCMAKE_INSTALL_PREFIX=${LIBHDFSPP_PREFIX}
+                          -DPROTOBUF_INCLUDE_DIR=${PROTOBUF_INCLUDE_DIRS}
+                          -DPROTOBUF_LIBRARY=${PROTOBUF_STATIC_LIB}
+                          -DPROTOBUF_PROTOC_LIBRARY=${PROTOC_STATIC_LIB}
+                          -DPROTOBUF_PROTOC_EXECUTABLE=${PROTOBUF_EXECUTABLE}
+                          -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR}
+                          -DCMAKE_C_FLAGS=${EP_C_FLAGS}
+                          -DBUILD_SHARED_LIBS=OFF
+                          -DHDFSPP_LIBRARY_ONLY=TRUE
+                          -DBUILD_SHARED_HDFSPP=FALSE)
+
+    ExternalProject_Add (libhdfspp_ep
+      DEPENDS protobuf_ep
+      URL ${LIBHDFSPP_SRC_URL}
+      LOG_DOWNLOAD 0
+      LOG_CONFIGURE 0
+      LOG_BUILD 0
+      LOG_INSTALL 0
+      BUILD_BYPRODUCTS "${LIBHDFSPP_STATIC_LIB}"
+      CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS})
+
+    include_directories (SYSTEM ${LIBHDFSPP_INCLUDE_DIRS})
+    add_library (libhdfspp STATIC IMPORTED)
+    set_target_properties (libhdfspp PROPERTIES IMPORTED_LOCATION ${LIBHDFSPP_STATIC_LIB})
+    set (LIBHDFSPP_LIBRARIES libhdfspp ${CYRUS_SASL_SHARED_LIB} ${OPENSSL_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+    add_dependencies (libhdfspp libhdfspp_ep)
+    install(DIRECTORY ${LIBHDFSPP_PREFIX}/lib DESTINATION .
+                                         PATTERN "pkgconfig" EXCLUDE
+                                         PATTERN "*.so*" EXCLUDE
+                                         PATTERN "*.dylib" EXCLUDE)
+  else(ORC_CXX_HAS_THREAD_LOCAL)
+    message(WARNING
+    "WARNING: Libhdfs++ library was not built because the required feature \
+    thread_local storage is not supported by your compiler. Known compilers that \
+    support this feature: GCC, Visual Studio, Clang (community version), \
+    Clang (version for iOS 9 and later), Clang (version for Xcode 8 and later)")
+  endif(ORC_CXX_HAS_THREAD_LOCAL)
+endif(BUILD_LIBHDFSPP)

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/docker/centos7/Dockerfile
----------------------------------------------------------------------
diff --git a/docker/centos7/Dockerfile b/docker/centos7/Dockerfile
index 22b3dd0..64adb14 100644
--- a/docker/centos7/Dockerfile
+++ b/docker/centos7/Dockerfile
@@ -24,6 +24,7 @@ RUN yum check-update || true
 RUN yum install -y \
   cmake \
   curl-devel \
+  cyrus-sasl-devel \
   expat-devel \
   gcc \
   gcc-c++ \

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/docker/debian8/Dockerfile
----------------------------------------------------------------------
diff --git a/docker/debian8/Dockerfile b/docker/debian8/Dockerfile
index 81fe8e8..504d3a2 100644
--- a/docker/debian8/Dockerfile
+++ b/docker/debian8/Dockerfile
@@ -26,6 +26,8 @@ RUN apt-get install -y \
   gcc \
   g++ \
   git \
+  libsasl2-dev \
+  libssl-dev \
   make \
   maven \
   openjdk-7-jdk

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/docker/ubuntu14/Dockerfile
----------------------------------------------------------------------
diff --git a/docker/ubuntu14/Dockerfile b/docker/ubuntu14/Dockerfile
index 67dc289..4f25b26 100644
--- a/docker/ubuntu14/Dockerfile
+++ b/docker/ubuntu14/Dockerfile
@@ -26,6 +26,8 @@ RUN apt-get install -y \
   gcc \
   g++ \
   git \
+  libsasl2-dev \
+  libssl-dev \
   make \
   maven \
   openjdk-7-jdk

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/docker/ubuntu16/Dockerfile
----------------------------------------------------------------------
diff --git a/docker/ubuntu16/Dockerfile b/docker/ubuntu16/Dockerfile
index b5761fa..41d65e7 100644
--- a/docker/ubuntu16/Dockerfile
+++ b/docker/ubuntu16/Dockerfile
@@ -27,6 +27,8 @@ RUN apt-get install -y \
   gcc \
   g++ \
   git \
+  libsasl2-dev \
+  libssl-dev \
   make \
   maven \
   tzdata

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileContents.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc
index 9ff86a0..5a44525 100644
--- a/tools/src/FileContents.cc
+++ b/tools/src/FileContents.cc
@@ -30,7 +30,7 @@ void printContents(const char* filename, const orc::RowReaderOptions& rowReaderO
   orc::ReaderOptions readerOpts;
   std::unique_ptr<orc::Reader> reader;
   std::unique_ptr<orc::RowReader> rowReader;
-  reader = orc::createReader(orc::readLocalFile(std::string(filename)), readerOpts);
+  reader = orc::createReader(orc::readFile(std::string(filename)), readerOpts);
   rowReader = reader->createRowReader(rowReaderOpts);
 
   std::unique_ptr<orc::ColumnVectorBatch> batch = rowReader->createRowBatch(1000);

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileMemory.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMemory.cc b/tools/src/FileMemory.cc
index c133501..de07e24 100644
--- a/tools/src/FileMemory.cc
+++ b/tools/src/FileMemory.cc
@@ -71,7 +71,7 @@ void processFile(const char* filename,
   readerOpts.setMemoryPool(*(pool.get()));
 
   std::unique_ptr<orc::Reader> reader =
-                  orc::createReader(orc::readLocalFile(std::string(filename)), readerOpts);
+                  orc::createReader(orc::readFile(std::string(filename)), readerOpts);
   std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader(rowReaderOpts);
 
   std::unique_ptr<orc::ColumnVectorBatch> batch =

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileMetadata.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc
index 58fe8a2..5731662 100644
--- a/tools/src/FileMetadata.cc
+++ b/tools/src/FileMetadata.cc
@@ -83,7 +83,7 @@ void printRawTail(std::ostream& out,
                   const char*filename) {
   out << "Raw file tail: " << filename << "\n";
   std::unique_ptr<orc::Reader> reader =
-    orc::createReader(orc::readLocalFile(filename), orc::ReaderOptions());
+    orc::createReader(orc::readFile(filename), orc::ReaderOptions());
   // Parse the file tail from the serialized one.
   orc::proto::FileTail tail;
   if (!tail.ParseFromString(reader->getSerializedFileTail())) {
@@ -94,7 +94,7 @@ void printRawTail(std::ostream& out,
 
 void printMetadata(std::ostream & out, const char*filename, bool verbose) {
   std::unique_ptr<orc::Reader> reader =
-    orc::createReader(orc::readLocalFile(filename), orc::ReaderOptions());
+    orc::createReader(orc::readFile(filename), orc::ReaderOptions());
   out << "{ \"name\": \"" << filename << "\",\n";
   uint64_t numberColumns = reader->getType().getMaximumColumnId() + 1;
   out << "  \"type\": \""

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileScan.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileScan.cc b/tools/src/FileScan.cc
index fd5a32b..ed8f323 100644
--- a/tools/src/FileScan.cc
+++ b/tools/src/FileScan.cc
@@ -29,7 +29,7 @@
 void scanFile(std::ostream & out, const char* filename, uint64_t batchSize) {
   orc::ReaderOptions readerOpts;
   std::unique_ptr<orc::Reader> reader =
-    orc::createReader(orc::readLocalFile(filename), readerOpts);
+    orc::createReader(orc::readFile(filename), readerOpts);
   std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader();
   std::unique_ptr<orc::ColumnVectorBatch> batch =
     rowReader->createRowBatch(batchSize);

http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileStatistics.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileStatistics.cc b/tools/src/FileStatistics.cc
index 72edffd..98e2f62 100644
--- a/tools/src/FileStatistics.cc
+++ b/tools/src/FileStatistics.cc
@@ -29,7 +29,7 @@ void printStatistics(const char *filename, bool withIndex) {
 
   orc::ReaderOptions opts;
   std::unique_ptr<orc::Reader> reader;
-  reader = orc::createReader(orc::readLocalFile(std::string(filename)), opts);
+  reader = orc::createReader(orc::readFile(std::string(filename)), opts);
 
   // print out all selected columns statistics.
   std::unique_ptr<orc::Statistics> colStats = reader->getStatistics();