You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2017/10/19 20:04:59 UTC
orc git commit: ORC-17. Support HDFS as a C++ plugin module.
Repository: orc
Updated Branches:
refs/heads/master 10c0a858b -> 5831033eb
ORC-17. Support HDFS as a C++ plugin module.
Fixes #134
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/5831033e
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/5831033e
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/5831033e
Branch: refs/heads/master
Commit: 5831033eb4547e55f632ff12b81081da8274397c
Parents: 10c0a85
Author: Anatoli Shein <an...@hpe.com>
Authored: Wed Aug 30 11:14:18 2017 -0400
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Oct 19 13:03:54 2017 -0700
----------------------------------------------------------------------
.travis.yml | 10 +-
CMakeLists.txt | 5 +
c++/include/CMakeLists.txt | 55 ---------
c++/include/orc/OrcFile.hh | 12 ++
c++/libs/libhdfspp/imported_timestamp | 10 ++
c++/libs/libhdfspp/libhdfspp.tar.gz | Bin 0 -> 950688 bytes
c++/libs/libhdfspp/pull_hdfs.sh | 32 +++++
c++/src/CMakeLists.txt | 14 ++-
c++/src/OrcFile.cc | 15 ++-
c++/src/OrcHdfsFile.cc | 173 +++++++++++++++++++++++++++
cmake_modules/CheckSourceCompiles.cmake | 75 ++++++++++++
cmake_modules/FindCyrusSASL.cmake | 49 ++++++++
cmake_modules/ThirdpartyToolchain.cmake | 60 ++++++++++
docker/centos7/Dockerfile | 1 +
docker/debian8/Dockerfile | 2 +
docker/ubuntu14/Dockerfile | 2 +
docker/ubuntu16/Dockerfile | 2 +
tools/src/FileContents.cc | 2 +-
tools/src/FileMemory.cc | 2 +-
tools/src/FileMetadata.cc | 4 +-
tools/src/FileScan.cc | 2 +-
tools/src/FileStatistics.cc | 2 +-
22 files changed, 465 insertions(+), 64 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 36570b7..86bd67f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,6 +12,14 @@ matrix:
- compiler: clang
os: osx
osx_image: xcode6.4
+ - compiler: clang
+ os: osx
+ osx_image: xcode8.3
+ script:
+ - mkdir build
+ - cd build
+ - cmake -DOPENSSL_ROOT_DIR=`brew --prefix openssl` ..
+ - make package test-out
jdk:
- openjdk7
@@ -22,4 +30,4 @@ script:
- mkdir build
- cd build
- cmake ..
- - make package test-out
+ - make package test-out
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9776b13..1dc91e4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,10 @@ option (BUILD_JAVA
"Include ORC Java library in the build process"
ON)
+option (BUILD_LIBHDFSPP
+ "Include LIBHDFSPP library in the build process"
+ ON)
+
# Make sure that a build type is selected
if (NOT CMAKE_BUILD_TYPE)
message(STATUS "No build type selected, default to ReleaseWithDebugInfo")
@@ -84,6 +88,7 @@ endif ()
enable_testing()
+INCLUDE(CheckSourceCompiles)
INCLUDE(ThirdpartyToolchain)
set (EXAMPLE_DIRECTORY ${CMAKE_SOURCE_DIR}/examples)
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/include/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/c++/include/CMakeLists.txt b/c++/include/CMakeLists.txt
index bd32f7b..33c5495 100644
--- a/c++/include/CMakeLists.txt
+++ b/c++/include/CMakeLists.txt
@@ -10,61 +10,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}")
-
-INCLUDE(CheckCXXSourceCompiles)
-
-CHECK_CXX_SOURCE_COMPILES("
- #include <initializer_list>
- struct A {
- A(std::initializer_list<int> list);
- };
- int main(int,char*[]){
- }"
- ORC_CXX_HAS_INITIALIZER_LIST
-)
-
-CHECK_CXX_SOURCE_COMPILES("
- int main(int,char*[]) noexcept {
- return 0;
- }"
- ORC_CXX_HAS_NOEXCEPT
-)
-
-CHECK_CXX_SOURCE_COMPILES("
- int main(int,char* argv[]){
- return argv[0] != nullptr;
- }"
- ORC_CXX_HAS_NULLPTR
-)
-
-CHECK_CXX_SOURCE_COMPILES("
- struct A {
- virtual ~A();
- virtual void foo();
- };
- struct B: public A {
- virtual void foo() override;
- };
- int main(int,char*[]){
- }"
- ORC_CXX_HAS_OVERRIDE
-)
-
-CHECK_CXX_SOURCE_COMPILES("
- #include<memory>
- int main(int,char* []){
- std::unique_ptr<int> ptr(new int);
- }"
- ORC_CXX_HAS_UNIQUE_PTR
-)
-
-CHECK_CXX_SOURCE_COMPILES("
- #include <cstdint>
- int main(int, char*[]) { }"
- ORC_CXX_HAS_CSTDINT
-)
-
configure_file (
"orc/orc-config.hh.in"
"${CMAKE_CURRENT_BINARY_DIR}/orc/orc-config.hh"
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/include/orc/OrcFile.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/OrcFile.hh b/c++/include/orc/OrcFile.hh
index cb2f8e5..bd866c2 100644
--- a/c++/include/orc/OrcFile.hh
+++ b/c++/include/orc/OrcFile.hh
@@ -103,12 +103,24 @@ namespace orc {
};
/**
+ * Create a stream to a local file or HDFS file if path begins with "hdfs://"
+ * @param path the name of the file in the local file system or HDFS
+ */
+ ORC_UNIQUE_PTR<InputStream> readFile(const std::string& path);
+
+ /**
* Create a stream to a local file.
* @param path the name of the file in the local file system
*/
ORC_UNIQUE_PTR<InputStream> readLocalFile(const std::string& path);
/**
+ * Create a stream to an HDFS file.
+ * @param path the uri of the file in HDFS
+ */
+ ORC_UNIQUE_PTR<InputStream> readHdfsFile(const std::string& path);
+
+ /**
* Create a reader to the for the ORC file.
* @param stream the stream to read
* @param options the options for reading the file
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/libs/libhdfspp/imported_timestamp
----------------------------------------------------------------------
diff --git a/c++/libs/libhdfspp/imported_timestamp b/c++/libs/libhdfspp/imported_timestamp
new file mode 100644
index 0000000..84965ce
--- /dev/null
+++ b/c++/libs/libhdfspp/imported_timestamp
@@ -0,0 +1,10 @@
+Wed Aug 30 10:56:51 EDT 2017
+HDFS-10787
+commit 9587bb04a818a2661e264f619b09c15ce10ff38e
+Author: Anatoli Shein <an...@hpe.com>
+Date: Wed Aug 30 10:49:42 2017 -0400
+
+ fixed warnings3
+diffs: --------------
+ --------------
+Wed Aug 30 10:56:51 EDT 2017
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/libs/libhdfspp/libhdfspp.tar.gz
----------------------------------------------------------------------
diff --git a/c++/libs/libhdfspp/libhdfspp.tar.gz b/c++/libs/libhdfspp/libhdfspp.tar.gz
new file mode 100644
index 0000000..510c304
Binary files /dev/null and b/c++/libs/libhdfspp/libhdfspp.tar.gz differ
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/libs/libhdfspp/pull_hdfs.sh
----------------------------------------------------------------------
diff --git a/c++/libs/libhdfspp/pull_hdfs.sh b/c++/libs/libhdfspp/pull_hdfs.sh
new file mode 100755
index 0000000..a207a93
--- /dev/null
+++ b/c++/libs/libhdfspp/pull_hdfs.sh
@@ -0,0 +1,32 @@
+if [ -z "$1" ]; then
+ echo "Usage: pull_hdfs [path_to_hdfs_git_root]"
+ exit 1;
+fi
+if [ ! -d "$1" ]; then
+ echo "$1 is not a directory"
+fi
+if [ ! -d "$1/hadoop-hdfs-project" ]; then
+ echo "$1 is not the root of a hadoop git checkout"
+fi
+
+HADOOP_ROOT=$1
+echo HADOOP_ROOT=$HADOOP_ROOT
+OUT=$(readlink -m `dirname $0`)
+echo OUT=$OUT
+TS=$OUT/imported_timestamp
+
+ cd $HADOOP_ROOT &&
+ mvn -pl :hadoop-hdfs-native-client -Pnative compile -Dnative_make_args="copy_hadoop_files"
+ (date > $TS; git rev-parse --abbrev-ref HEAD >> $TS; git log -n 1 >> $TS; \
+ echo "diffs: --------------" >> $TS; git diff HEAD >> $TS; \
+ echo " --------------" >> $TS)
+ cd $OUT &&
+ #Delete everything except for pull_hdfs.sh and imported_timestamp
+ find . ! -name 'pull_hdfs.sh' ! -name 'imported_timestamp' ! -name '.' ! -name '..' -exec rm -rf {} + &&
+ cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp . &&
+ cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/libhdfspp/extern libhdfspp/ &&
+ cd libhdfspp &&
+ tar -czf ../libhdfspp.tar.gz * &&
+ cd .. &&
+ rm -rf libhdfspp &&
+ date >> $TS
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index 243efa6..b6fe1bf 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -123,6 +123,7 @@ include_directories (
${ZLIB_INCLUDE_DIRS}
${SNAPPY_INCLUDE_DIRS}
${LZ4_INCLUDE_DIRS}
+ ${LIBHDFSPP_INCLUDE_DIRS}
)
add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
@@ -132,7 +133,7 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
"${CMAKE_SOURCE_DIR}/proto/orc_proto.proto"
)
-add_library (orc STATIC
+set(SOURCE_FILES
"${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
orc_proto.pb.h
io/InputStream.cc
@@ -161,6 +162,12 @@ add_library (orc STATIC
Writer.cc
)
+if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
+ set(SOURCE_FILES ${SOURCE_FILES} OrcHdfsFile.cc)
+endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
+
+add_library (orc STATIC ${SOURCE_FILES})
+
install(TARGETS orc DESTINATION lib)
target_link_libraries (orc
@@ -168,6 +175,11 @@ target_link_libraries (orc
${ZLIB_LIBRARIES}
${SNAPPY_LIBRARIES}
${LZ4_LIBRARIES}
+ ${LIBHDFSPP_LIBRARIES}
)
add_dependencies(orc protobuf)
+
+if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
+ add_definitions(-DBUILD_LIBHDFSPP)
+endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/src/OrcFile.cc
----------------------------------------------------------------------
diff --git a/c++/src/OrcFile.cc b/c++/src/OrcFile.cc
index 5d9bc80..2331c79 100644
--- a/c++/src/OrcFile.cc
+++ b/c++/src/OrcFile.cc
@@ -28,6 +28,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
+#include <string.h>
namespace orc {
@@ -86,8 +87,20 @@ namespace orc {
close(file);
}
+ std::unique_ptr<InputStream> readFile(const std::string& path) {
+#ifdef BUILD_LIBHDFSPP
+ if(strncmp (path.c_str(), "hdfs://", 7) == 0){
+ return orc::readHdfsFile(std::string(path));
+ } else {
+#endif
+ return orc::readLocalFile(std::string(path));
+#ifdef BUILD_LIBHDFSPP
+ }
+#endif
+ }
+
std::unique_ptr<InputStream> readLocalFile(const std::string& path) {
- return std::unique_ptr<InputStream>(new FileInputStream(path));
+ return std::unique_ptr<InputStream>(new FileInputStream(path));
}
OutputStream::~OutputStream() {
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/c++/src/OrcHdfsFile.cc
----------------------------------------------------------------------
diff --git a/c++/src/OrcHdfsFile.cc b/c++/src/OrcHdfsFile.cc
new file mode 100644
index 0000000..fcfd531
--- /dev/null
+++ b/c++/src/OrcHdfsFile.cc
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/OrcFile.hh"
+
+#include "Adaptor.hh"
+#include "Exceptions.hh"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "hdfspp/hdfspp.h"
+
+namespace orc {
+
+ class HdfsFileInputStream : public InputStream {
+ private:
+ std::string filename;
+ std::unique_ptr<hdfs::FileHandle> file;
+ std::unique_ptr<hdfs::FileSystem> file_system;
+ uint64_t totalLength;
+ const uint64_t READ_SIZE = 1024 * 1024; //1 MB
+
+ public:
+ HdfsFileInputStream(std::string _filename) {
+ filename = _filename ;
+
+ //Building a URI object from the given uri_path
+ hdfs::URI uri;
+ try {
+ uri = hdfs::URI::parse_from_string(filename);
+ } catch (const hdfs::uri_parse_error&) {
+ throw ParseError("Malformed URI: " + filename);
+ }
+
+ //This sets conf path to default "$HADOOP_CONF_DIR" or "/etc/hadoop/conf"
+ //and loads configs core-site.xml and hdfs-site.xml from the conf path
+ hdfs::ConfigParser parser;
+ if(!parser.LoadDefaultResources()){
+ throw ParseError("Could not load default resources. ");
+ }
+ auto stats = parser.ValidateResources();
+ //validating core-site.xml
+ if(!stats[0].second.ok()){
+ throw ParseError(stats[0].first + " is invalid: " + stats[0].second.ToString());
+ }
+ //validating hdfs-site.xml
+ if(!stats[1].second.ok()){
+ throw ParseError(stats[1].first + " is invalid: " + stats[1].second.ToString());
+ }
+ hdfs::Options options;
+ if(!parser.get_options(options)){
+ throw ParseError("Could not load Options object. ");
+ }
+ hdfs::IoService * io_service = hdfs::IoService::New();
+ //Wrapping file_system into a unique pointer to guarantee deletion
+ file_system = std::unique_ptr<hdfs::FileSystem>(
+ hdfs::FileSystem::New(io_service, "", options));
+ if (file_system.get() == nullptr) {
+ throw ParseError("Can't create FileSystem object. ");
+ }
+ hdfs::Status status;
+ //Checking if the user supplied the host
+ if(!uri.get_host().empty()){
+ //Using port if supplied, otherwise using "" to look up port in configs
+ std::string port = uri.has_port() ?
+ std::to_string(uri.get_port()) : "";
+ status = file_system->Connect(uri.get_host(), port);
+ if (!status.ok()) {
+ throw ParseError("Can't connect to " + uri.get_host()
+ + ":" + port + ". " + status.ToString());
+ }
+ } else {
+ status = file_system->ConnectToDefaultFs();
+ if (!status.ok()) {
+ if(!options.defaultFS.get_host().empty()){
+ throw ParseError("Error connecting to " +
+ options.defaultFS.str() + ". " + status.ToString());
+ } else {
+ throw ParseError(
+ "Error connecting to the cluster: defaultFS is empty. "
+ + status.ToString());
+ }
+ }
+ }
+
+ if (file_system.get() == nullptr) {
+ throw ParseError("Can't connect the file system. ");
+ }
+
+ hdfs::FileHandle *file_raw = nullptr;
+ status = file_system->Open(uri.get_path(), &file_raw);
+ if (!status.ok()) {
+ throw ParseError("Can't open "
+ + uri.get_path() + ". " + status.ToString());
+ }
+ //Wrapping file_raw into a unique pointer to guarantee deletion
+ file.reset(file_raw);
+
+ hdfs::StatInfo stat_info;
+ status = file_system->GetFileInfo(uri.get_path(), stat_info);
+ if (!status.ok()) {
+ throw ParseError("Can't stat "
+ + uri.get_path() + ". " + status.ToString());
+ }
+ totalLength = stat_info.length;
+ }
+
+ uint64_t getLength() const override {
+ return totalLength;
+ }
+
+ uint64_t getNaturalReadSize() const override {
+ return READ_SIZE;
+ }
+
+ void read(void* buf,
+ uint64_t length,
+ uint64_t offset) override {
+
+ if (!buf) {
+ throw ParseError("Buffer is null");
+ }
+
+ hdfs::Status status;
+ size_t total_bytes_read = 0;
+ size_t last_bytes_read = 0;
+
+ do {
+ status = file->PositionRead(buf,
+ static_cast<size_t>(length) - total_bytes_read,
+ static_cast<off_t>(offset + total_bytes_read), &last_bytes_read);
+ if(!status.ok()) {
+ throw ParseError("Error reading the file: " + status.ToString());
+ }
+ total_bytes_read += last_bytes_read;
+ } while (total_bytes_read < length);
+ }
+
+ const std::string& getName() const override {
+ return filename;
+ }
+
+ ~HdfsFileInputStream();
+ };
+
+ HdfsFileInputStream::~HdfsFileInputStream() {
+ }
+
+ std::unique_ptr<InputStream> readHdfsFile(const std::string& path) {
+ return std::unique_ptr<InputStream>(new HdfsFileInputStream(path));
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/cmake_modules/CheckSourceCompiles.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/CheckSourceCompiles.cmake b/cmake_modules/CheckSourceCompiles.cmake
new file mode 100644
index 0000000..7f337fc
--- /dev/null
+++ b/cmake_modules/CheckSourceCompiles.cmake
@@ -0,0 +1,75 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}")
+
+INCLUDE(CheckCXXSourceCompiles)
+
+CHECK_CXX_SOURCE_COMPILES("
+ #include <initializer_list>
+ struct A {
+ A(std::initializer_list<int> list);
+ };
+ int main(int,char*[]){
+ }"
+ ORC_CXX_HAS_INITIALIZER_LIST
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+ int main(int,char*[]) noexcept {
+ return 0;
+ }"
+ ORC_CXX_HAS_NOEXCEPT
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+ int main(int,char* argv[]){
+ return argv[0] != nullptr;
+ }"
+ ORC_CXX_HAS_NULLPTR
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+ struct A {
+ virtual ~A();
+ virtual void foo();
+ };
+ struct B: public A {
+ virtual void foo() override;
+ };
+ int main(int,char*[]){
+ }"
+ ORC_CXX_HAS_OVERRIDE
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+ #include<memory>
+ int main(int,char* []){
+ std::unique_ptr<int> ptr(new int);
+ }"
+ ORC_CXX_HAS_UNIQUE_PTR
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+ #include <cstdint>
+ int main(int, char*[]) { }"
+ ORC_CXX_HAS_CSTDINT
+)
+
+CHECK_CXX_SOURCE_COMPILES("
+ #include <thread>
+ int main(void) {
+ thread_local int s;
+ return s;
+ }"
+ ORC_CXX_HAS_THREAD_LOCAL
+)
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/cmake_modules/FindCyrusSASL.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindCyrusSASL.cmake b/cmake_modules/FindCyrusSASL.cmake
new file mode 100644
index 0000000..8ce027f
--- /dev/null
+++ b/cmake_modules/FindCyrusSASL.cmake
@@ -0,0 +1,49 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# - Find Cyrus SASL (sasl.h, libsasl2.so)
+#
+# This module defines
+# CYRUS_SASL_INCLUDE_DIR, directory containing headers
+# CYRUS_SASL_SHARED_LIB, path to Cyrus SASL's shared library
+# CYRUS_SASL_FOUND, whether Cyrus SASL and its plugins have been found
+#
+# N.B: we do _not_ include sasl in thirdparty, for a fairly subtle reason. The
+# TLDR version is that newer versions of cyrus-sasl (>=2.1.26) have a bug fix
+# for https://bugzilla.cyrusimap.org/show_bug.cgi?id=3590, but that bug fix
+# relied on a change both on the plugin side and on the library side. If you
+# then try to run the new version of sasl (e.g from our thirdparty tree) with
+# an older version of a plugin (eg from RHEL6 install), you'll get a SASL_NOMECH
+# error due to this bug.
+#
+# In practice, Cyrus-SASL is so commonly used and generally non-ABI-breaking that
+# we should be OK to depend on the host installation.
+
+# Note that this is modified from the version that was copied from our
+# friends at the Kudu project. The original version implicitly required
+# the Cyrus SASL. This version will only complain if REQUIRED is added.
+
+
+find_path(CYRUS_SASL_INCLUDE_DIR sasl/sasl.h)
+find_library(CYRUS_SASL_SHARED_LIB sasl2)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CYRUS_SASL DEFAULT_MSG
+ CYRUS_SASL_SHARED_LIB CYRUS_SASL_INCLUDE_DIR)
+
+MARK_AS_ADVANCED(CYRUS_SASL_INCLUDE_DIR CYRUS_SASL_SHARED_LIB)
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index eef2827..c2ef765 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -148,6 +148,7 @@ set (PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install")
set (PROTOBUF_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep-prefix/src/protobuf_ep")
set (PROTOBUF_INCLUDE_DIRS "${PROTOBUF_PREFIX}/include")
set (PROTOBUF_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}")
+set (PROTOC_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}")
set (PROTOBUF_EXECUTABLE "${PROTOBUF_PREFIX}/bin/protoc")
set (PROTOBUF_SRC_URL "https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protobuf-${PROTOBUF_VERSION}.tar.gz")
@@ -168,7 +169,66 @@ add_library (protobuf STATIC IMPORTED)
set_target_properties (protobuf PROPERTIES IMPORTED_LOCATION ${PROTOBUF_STATIC_LIB})
add_dependencies (protobuf protobuf_ep)
set (PROTOBUF_LIBRARIES protobuf)
+
+add_library (protoc STATIC IMPORTED)
+set_target_properties (protoc PROPERTIES IMPORTED_LOCATION ${PROTOC_STATIC_LIB})
+add_dependencies (protoc protobuf_ep)
+
install(DIRECTORY ${PROTOBUF_PREFIX}/lib DESTINATION .
PATTERN "pkgconfig" EXCLUDE
PATTERN "*.so*" EXCLUDE
PATTERN "*.dylib" EXCLUDE)
+
+# ----------------------------------------------------------------------
+# LIBHDFSPP
+
+if(BUILD_LIBHDFSPP)
+ if(ORC_CXX_HAS_THREAD_LOCAL)
+ find_package(CyrusSASL)
+ find_package(OpenSSL)
+ find_package(Threads)
+
+ set (LIBHDFSPP_PREFIX "${THIRDPARTY_DIR}/libhdfspp_ep-install")
+ set (LIBHDFSPP_INCLUDE_DIRS "${LIBHDFSPP_PREFIX}/include")
+ set (LIBHDFSPP_STATIC_LIB_NAME hdfspp_static)
+ set (LIBHDFSPP_STATIC_LIB "${LIBHDFSPP_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${LIBHDFSPP_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+ set (LIBHDFSPP_SRC_URL "${CMAKE_SOURCE_DIR}/c++/libs/libhdfspp/libhdfspp.tar.gz")
+ set (LIBHDFSPP_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+ -DCMAKE_INSTALL_PREFIX=${LIBHDFSPP_PREFIX}
+ -DPROTOBUF_INCLUDE_DIR=${PROTOBUF_INCLUDE_DIRS}
+ -DPROTOBUF_LIBRARY=${PROTOBUF_STATIC_LIB}
+ -DPROTOBUF_PROTOC_LIBRARY=${PROTOC_STATIC_LIB}
+ -DPROTOBUF_PROTOC_EXECUTABLE=${PROTOBUF_EXECUTABLE}
+ -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR}
+ -DCMAKE_C_FLAGS=${EP_C_FLAGS}
+ -DBUILD_SHARED_LIBS=OFF
+ -DHDFSPP_LIBRARY_ONLY=TRUE
+ -DBUILD_SHARED_HDFSPP=FALSE)
+
+ ExternalProject_Add (libhdfspp_ep
+ DEPENDS protobuf_ep
+ URL ${LIBHDFSPP_SRC_URL}
+ LOG_DOWNLOAD 0
+ LOG_CONFIGURE 0
+ LOG_BUILD 0
+ LOG_INSTALL 0
+ BUILD_BYPRODUCTS "${LIBHDFSPP_STATIC_LIB}"
+ CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS})
+
+ include_directories (SYSTEM ${LIBHDFSPP_INCLUDE_DIRS})
+ add_library (libhdfspp STATIC IMPORTED)
+ set_target_properties (libhdfspp PROPERTIES IMPORTED_LOCATION ${LIBHDFSPP_STATIC_LIB})
+ set (LIBHDFSPP_LIBRARIES libhdfspp ${CYRUS_SASL_SHARED_LIB} ${OPENSSL_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+ add_dependencies (libhdfspp libhdfspp_ep)
+ install(DIRECTORY ${LIBHDFSPP_PREFIX}/lib DESTINATION .
+ PATTERN "pkgconfig" EXCLUDE
+ PATTERN "*.so*" EXCLUDE
+ PATTERN "*.dylib" EXCLUDE)
+ else(ORC_CXX_HAS_THREAD_LOCAL)
+ message(WARNING
+ "WARNING: Libhdfs++ library was not built because the required feature \
+ thread_local storage is not supported by your compiler. Known compilers that \
+ support this feature: GCC, Visual Studio, Clang (community version), \
+ Clang (version for iOS 9 and later), Clang (version for Xcode 8 and later)")
+ endif(ORC_CXX_HAS_THREAD_LOCAL)
+endif(BUILD_LIBHDFSPP)
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/docker/centos7/Dockerfile
----------------------------------------------------------------------
diff --git a/docker/centos7/Dockerfile b/docker/centos7/Dockerfile
index 22b3dd0..64adb14 100644
--- a/docker/centos7/Dockerfile
+++ b/docker/centos7/Dockerfile
@@ -24,6 +24,7 @@ RUN yum check-update || true
RUN yum install -y \
cmake \
curl-devel \
+ cyrus-sasl-devel \
expat-devel \
gcc \
gcc-c++ \
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/docker/debian8/Dockerfile
----------------------------------------------------------------------
diff --git a/docker/debian8/Dockerfile b/docker/debian8/Dockerfile
index 81fe8e8..504d3a2 100644
--- a/docker/debian8/Dockerfile
+++ b/docker/debian8/Dockerfile
@@ -26,6 +26,8 @@ RUN apt-get install -y \
gcc \
g++ \
git \
+ libsasl2-dev \
+ libssl-dev \
make \
maven \
openjdk-7-jdk
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/docker/ubuntu14/Dockerfile
----------------------------------------------------------------------
diff --git a/docker/ubuntu14/Dockerfile b/docker/ubuntu14/Dockerfile
index 67dc289..4f25b26 100644
--- a/docker/ubuntu14/Dockerfile
+++ b/docker/ubuntu14/Dockerfile
@@ -26,6 +26,8 @@ RUN apt-get install -y \
gcc \
g++ \
git \
+ libsasl2-dev \
+ libssl-dev \
make \
maven \
openjdk-7-jdk
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/docker/ubuntu16/Dockerfile
----------------------------------------------------------------------
diff --git a/docker/ubuntu16/Dockerfile b/docker/ubuntu16/Dockerfile
index b5761fa..41d65e7 100644
--- a/docker/ubuntu16/Dockerfile
+++ b/docker/ubuntu16/Dockerfile
@@ -27,6 +27,8 @@ RUN apt-get install -y \
gcc \
g++ \
git \
+ libsasl2-dev \
+ libssl-dev \
make \
maven \
tzdata
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileContents.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc
index 9ff86a0..5a44525 100644
--- a/tools/src/FileContents.cc
+++ b/tools/src/FileContents.cc
@@ -30,7 +30,7 @@ void printContents(const char* filename, const orc::RowReaderOptions& rowReaderO
orc::ReaderOptions readerOpts;
std::unique_ptr<orc::Reader> reader;
std::unique_ptr<orc::RowReader> rowReader;
- reader = orc::createReader(orc::readLocalFile(std::string(filename)), readerOpts);
+ reader = orc::createReader(orc::readFile(std::string(filename)), readerOpts);
rowReader = reader->createRowReader(rowReaderOpts);
std::unique_ptr<orc::ColumnVectorBatch> batch = rowReader->createRowBatch(1000);
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileMemory.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMemory.cc b/tools/src/FileMemory.cc
index c133501..de07e24 100644
--- a/tools/src/FileMemory.cc
+++ b/tools/src/FileMemory.cc
@@ -71,7 +71,7 @@ void processFile(const char* filename,
readerOpts.setMemoryPool(*(pool.get()));
std::unique_ptr<orc::Reader> reader =
- orc::createReader(orc::readLocalFile(std::string(filename)), readerOpts);
+ orc::createReader(orc::readFile(std::string(filename)), readerOpts);
std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader(rowReaderOpts);
std::unique_ptr<orc::ColumnVectorBatch> batch =
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileMetadata.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc
index 58fe8a2..5731662 100644
--- a/tools/src/FileMetadata.cc
+++ b/tools/src/FileMetadata.cc
@@ -83,7 +83,7 @@ void printRawTail(std::ostream& out,
const char*filename) {
out << "Raw file tail: " << filename << "\n";
std::unique_ptr<orc::Reader> reader =
- orc::createReader(orc::readLocalFile(filename), orc::ReaderOptions());
+ orc::createReader(orc::readFile(filename), orc::ReaderOptions());
// Parse the file tail from the serialized one.
orc::proto::FileTail tail;
if (!tail.ParseFromString(reader->getSerializedFileTail())) {
@@ -94,7 +94,7 @@ void printRawTail(std::ostream& out,
void printMetadata(std::ostream & out, const char*filename, bool verbose) {
std::unique_ptr<orc::Reader> reader =
- orc::createReader(orc::readLocalFile(filename), orc::ReaderOptions());
+ orc::createReader(orc::readFile(filename), orc::ReaderOptions());
out << "{ \"name\": \"" << filename << "\",\n";
uint64_t numberColumns = reader->getType().getMaximumColumnId() + 1;
out << " \"type\": \""
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileScan.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileScan.cc b/tools/src/FileScan.cc
index fd5a32b..ed8f323 100644
--- a/tools/src/FileScan.cc
+++ b/tools/src/FileScan.cc
@@ -29,7 +29,7 @@
void scanFile(std::ostream & out, const char* filename, uint64_t batchSize) {
orc::ReaderOptions readerOpts;
std::unique_ptr<orc::Reader> reader =
- orc::createReader(orc::readLocalFile(filename), readerOpts);
+ orc::createReader(orc::readFile(filename), readerOpts);
std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader();
std::unique_ptr<orc::ColumnVectorBatch> batch =
rowReader->createRowBatch(batchSize);
http://git-wip-us.apache.org/repos/asf/orc/blob/5831033e/tools/src/FileStatistics.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileStatistics.cc b/tools/src/FileStatistics.cc
index 72edffd..98e2f62 100644
--- a/tools/src/FileStatistics.cc
+++ b/tools/src/FileStatistics.cc
@@ -29,7 +29,7 @@ void printStatistics(const char *filename, bool withIndex) {
orc::ReaderOptions opts;
std::unique_ptr<orc::Reader> reader;
- reader = orc::createReader(orc::readLocalFile(std::string(filename)), opts);
+ reader = orc::createReader(orc::readFile(std::string(filename)), opts);
// print out all selected columns statistics.
std::unique_ptr<orc::Statistics> colStats = reader->getStatistics();