You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/06/23 23:07:01 UTC

[1/2] arrow git commit: ARROW-1142: [C++] Port over compression toolchain and interfaces from parquet-cpp, use Arrow-style error handling

Repository: arrow
Updated Branches:
  refs/heads/master 1514016a7 -> 98f7cac6e


http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/compression.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h
new file mode 100644
index 0000000..6886d04
--- /dev/null
+++ b/cpp/src/arrow/util/compression.h
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_COMPRESSION_H
+#define ARROW_UTIL_COMPRESSION_H
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+struct Compression {
+  enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI };
+};
+
+class ARROW_EXPORT Codec {
+ public:
+  virtual ~Codec();
+
+  static Status Create(Compression::type codec, std::unique_ptr<Codec>* out);
+
+  virtual Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+      uint8_t* output_buffer) = 0;
+
+  virtual Status Compress(int64_t input_len, const uint8_t* input,
+      int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) = 0;
+
+  virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) = 0;
+
+  virtual const char* name() const = 0;
+};
+
+// Snappy codec.
+class ARROW_EXPORT SnappyCodec : public Codec {
+ public:
+  Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+      uint8_t* output_buffer) override;
+
+  Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len,
+      uint8_t* output_buffer, int64_t* output_length) override;
+
+  int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override;
+
+  const char* name() const override { return "snappy"; }
+};
+
+// Brotli codec.
+class ARROW_EXPORT BrotliCodec : public Codec {
+ public:
+  Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+      uint8_t* output_buffer) override;
+
+  Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len,
+      uint8_t* output_buffer, int64_t* output_length) override;
+
+  int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override;
+
+  const char* name() const override { return "brotli"; }
+};
+
+// GZip codec.
+class ARROW_EXPORT GZipCodec : public Codec {
+ public:
+  /// Compression formats supported by the zlib library
+  enum Format {
+    ZLIB,
+    DEFLATE,
+    GZIP,
+  };
+
+  explicit GZipCodec(Format format = GZIP);
+  virtual ~GZipCodec();
+
+  Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+      uint8_t* output_buffer) override;
+
+  Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len,
+      uint8_t* output_buffer, int64_t* output_length) override;
+
+  int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override;
+
+  const char* name() const override;
+
+ private:
+  // The gzip compressor is stateful
+  class GZipCodecImpl;
+  std::unique_ptr<GZipCodecImpl> impl_;
+};
+
+}  // namespace arrow
+
+#endif

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/logging.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 49f1699..8a929da 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -39,9 +39,10 @@ namespace arrow {
 #define ARROW_LOG_INTERNAL(level) ::arrow::internal::CerrLog(level)
 #define ARROW_LOG(level) ARROW_LOG_INTERNAL(ARROW_##level)
 
-#define ARROW_CHECK(condition)                               \
-  (condition) ? 0 : ::arrow::internal::FatalLog(ARROW_FATAL) \
-                        << __FILE__ << __LINE__ << " Check failed: " #condition " "
+#define ARROW_CHECK(condition)                           \
+  (condition) ? 0                                        \
+              : ::arrow::internal::FatalLog(ARROW_FATAL) \
+                    << __FILE__ << __LINE__ << " Check failed: " #condition " "
 
 #ifdef NDEBUG
 #define ARROW_DFATAL ARROW_WARNING

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/plasma/malloc.cc
----------------------------------------------------------------------
diff --git a/cpp/src/plasma/malloc.cc b/cpp/src/plasma/malloc.cc
index e7ffd1a..97c9a16 100644
--- a/cpp/src/plasma/malloc.cc
+++ b/cpp/src/plasma/malloc.cc
@@ -42,7 +42,7 @@ int fake_munmap(void*, int64_t);
 #define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
 #define DEFAULT_GRANULARITY ((size_t)128U * 1024U)
 
-#include "thirdparty/dlmalloc.c"
+#include "thirdparty/dlmalloc.c"  // NOLINT
 
 #undef MMAP
 #undef MUNMAP

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/plasma/test/client_tests.cc
----------------------------------------------------------------------
diff --git a/cpp/src/plasma/test/client_tests.cc b/cpp/src/plasma/test/client_tests.cc
index dc45773..29b5b13 100644
--- a/cpp/src/plasma/test/client_tests.cc
+++ b/cpp/src/plasma/test/client_tests.cc
@@ -29,7 +29,7 @@
 #include "plasma/plasma.h"
 #include "plasma/protocol.h"
 
-std::string g_test_executable;
+std::string g_test_executable;  // NOLINT
 
 class TestPlasmaStore : public ::testing::Test {
  public:

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/dev/release/check-rat-report.py
----------------------------------------------------------------------
diff --git a/dev/release/check-rat-report.py b/dev/release/check-rat-report.py
new file mode 100644
index 0000000..e30d72b
--- /dev/null
+++ b/dev/release/check-rat-report.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+##############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+##############################################################################
+import fnmatch
+import re
+import sys
+import xml.etree.ElementTree as ET
+
+if len(sys.argv) != 3:
+    sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" %
+                     sys.argv[0])
+    sys.exit(1)
+
+exclude_globs_filename = sys.argv[1]
+xml_filename = sys.argv[2]
+
+globs = [line.strip() for line in open(exclude_globs_filename, "r")]
+
+tree = ET.parse(xml_filename)
+root = tree.getroot()
+resources = root.findall('resource')
+
+all_ok = True
+for r in resources:
+    approvals = r.findall('license-approval')
+    if not approvals or approvals[0].attrib['name'] == 'true':
+        continue
+    clean_name = re.sub('^[^/]+/', '', r.attrib['name'])
+    excluded = False
+    for g in globs:
+        if fnmatch.fnmatch(clean_name, g):
+            excluded = True
+            break
+    if not excluded:
+        sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % (
+            clean_name, r.attrib['name'], approvals[0].attrib['name']))
+        all_ok = False
+
+if not all_ok:
+    sys.exit(1)
+
+print('OK')
+sys.exit(0)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/dev/release/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
new file mode 100644
index 0000000..286793e
--- /dev/null
+++ b/dev/release/rat_exclude_files.txt
@@ -0,0 +1,66 @@
+*.gitignore
+*_generated.h
+*.json
+cpp/src/arrow/io/mman.h
+cpp/src/arrow/util/random.h
+cpp/src/arrow/status.cc
+cpp/src/arrow/status.h
+cpp/build-support/asan_symbolize.py
+cpp/build-support/cpplint.py
+cpp/cmake_modules/BuildUtils.cmake
+cpp/cmake_modules/FindPythonLibsNew.cmake
+cpp/cmake_modules/FindNumPy.cmake
+cpp/cmake_modules/SetupCxxFlags.cmake
+cpp/cmake_modules/SnappyCMakeLists.txt
+cpp/cmake_modules/SnappyConfig.h
+cpp/cmake_modules/CompilerInfo.cmake
+cpp/src/plasma/thirdparty/ae/ae.c
+cpp/src/plasma/thirdparty/ae/ae.h
+cpp/src/plasma/thirdparty/ae/ae_epoll.c
+cpp/src/plasma/thirdparty/ae/ae_evport.c
+cpp/src/plasma/thirdparty/ae/ae_kqueue.c
+cpp/src/plasma/thirdparty/ae/ae_select.c
+cpp/src/plasma/thirdparty/ae/config.h
+cpp/src/plasma/thirdparty/ae/zmalloc.h
+cpp/src/plasma/thirdparty/dlmalloc.c
+cpp/src/plasma/thirdparty/xxhash.cc
+cpp/src/plasma/thirdparty/xxhash.h
+dev/release/rat_exclude_files.txt
+js/.npmignore
+python/cmake_modules/BuildUtils.cmake
+python/cmake_modules/FindPythonLibsNew.cmake
+python/cmake_modules/FindNumPy.cmake
+python/cmake_modules/SetupCxxFlags.cmake
+python/cmake_modules/CompilerInfo.cmake
+python/doc/requirements.txt
+python/MANIFEST.in
+python/pyarrow/includes/__init__.pxd
+python/pyarrow/tests/__init__.py
+python/requirements.txt
+pax_global_header
+MANIFEST.in
+__init__.pxd
+__init__.py
+requirements.txt
+version
+*.m4
+configure
+config.sub
+config.h.in
+compile
+missing
+install-sh
+config.guess
+depcomp
+ltmain.sh
+arrow-glib.types
+arrow-glib-sections.txt
+arrow-glib-overrides.txt
+gtk-doc.make
+*.html
+*.sgml
+*.css
+*.png
+*.svg
+*.devhelp2
+*.scss

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/dev/release/run-rat.sh
----------------------------------------------------------------------
diff --git a/dev/release/run-rat.sh b/dev/release/run-rat.sh
index 757604f..53a322a 100755
--- a/dev/release/run-rat.sh
+++ b/dev/release/run-rat.sh
@@ -21,65 +21,15 @@
 # download apache rat
 curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/0.12/apache-rat-0.12.jar > apache-rat-0.12.jar
 
-RAT="java -jar apache-rat-0.12.jar -d "
+RAT="java -jar apache-rat-0.12.jar -x "
+
+RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 
 # generate the rat report
-$RAT $1 \
-  -e ".*" \
-  -e mman.h \
-  -e "*_generated.h" \
-  -e "*.json" \
-  -e random.h \
-  -e status.cc \
-  -e status.h \
-  -e asan_symbolize.py \
-  -e cpplint.py \
-  -e BuildUtils.cmake \
-  -e FindPythonLibsNew.cmake \
-  -e FindNumPy.cmake \
-  -e SetupCxxFlags.cmake \
-  -e CompilerInfo.cmake \
-  -e pax_global_header \
-  -e MANIFEST.in \
-  -e __init__.pxd \
-  -e __init__.py \
-  -e requirements.txt \
-  -e version \
-  -e "*.m4" \
-  -e configure \
-  -e config.sub \
-  -e config.h.in \
-  -e compile \
-  -e missing \
-  -e install-sh \
-  -e config.guess \
-  -e depcomp \
-  -e ltmain.sh \
-  -e arrow-glib.types \
-  -e arrow-glib-sections.txt \
-  -e arrow-glib-overrides.txt \
-  -e gtk-doc.make \
-  -e ae.c \
-  -e ae.h \
-  -e ae_epoll.c \
-  -e ae_evport.c \
-  -e ae_kqueue.c \
-  -e ae_select.c \
-  -e config.h \
-  -e zmalloc.h \
-  -e dlmalloc.c \
-  -e xxhash.cc \
-  -e xxhash.h \
-  -e "*.html" \
-  -e "*.sgml" \
-  -e "*.css" \
-  -e "*.png" \
-  -e "*.svg" \
-  -e "*.devhelp2" \
-  -e "*.scss" \
-  > rat.txt
-cat rat.txt
-UNAPPROVED=`cat rat.txt  | grep "Unknown Licenses" | head -n 1 | cut -d " " -f 1`
+$RAT $1 > rat.txt
+python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt
+cat filtered_rat.txt
+UNAPPROVED=`cat filtered_rat.txt  | grep "NOT APPROVED" | wc -l`
 
 if [ "0" -eq "${UNAPPROVED}" ]; then
   echo "No unapproved licenses"


[2/2] arrow git commit: ARROW-1142: [C++] Port over compression toolchain and interfaces from parquet-cpp, use Arrow-style error handling

Posted by we...@apache.org.
ARROW-1142: [C++] Port over compression toolchain and interfaces from parquet-cpp, use Arrow-style error handling

Author: Wes McKinney <we...@twosigma.com>

Closes #771 from wesm/import-parquet-compression and squashes the following commits:

b7609f8e [Wes McKinney] Boost toolchain tweaks
44e77a64 [Wes McKinney] Install compression.h
732e426e [Wes McKinney] Revert bash equality test
31b2705a [Wes McKinney] cpplint
2ef43de2 [Wes McKinney] Clean up RAT exclusions, use absolute paths
cbbaecf0 [Wes McKinney] Add some license headers, use Apache Kudu approach for managing a lot of RAT exclusions
198dee16 [Wes McKinney] Clean up build dependencies
da31c2cb [Wes McKinney] Port over compression toolchain and interfaces from parquet-cpp, adapt to Arrow-style error handling


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/98f7cac6
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/98f7cac6
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/98f7cac6

Branch: refs/heads/master
Commit: 98f7cac6e162d9775d615d07b9867c1ec0030f82
Parents: 1514016
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri Jun 23 19:06:55 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Jun 23 19:06:55 2017 -0400

----------------------------------------------------------------------
 .readthedocs.yml                            |  17 +
 .travis.yml                                 |  17 +
 ci/travis_script_cpp.sh                     |   2 +-
 cpp/.clang-format                           |  28 +-
 cpp/.clang-tidy                             |  19 +-
 cpp/.clang-tidy-ignore                      |  16 +
 cpp/CMakeLists.txt                          | 505 ++----------------
 cpp/cmake_modules/FindBrotli.cmake          | 116 ++++
 cpp/cmake_modules/FindSnappy.cmake          |  94 ++++
 cpp/cmake_modules/FindZLIB.cmake            | 105 ++++
 cpp/cmake_modules/SnappyCMakeLists.txt      |  85 +++
 cpp/cmake_modules/SnappyConfig.h            |  36 ++
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 641 +++++++++++++++++++++++
 cpp/src/arrow/python/CMakeLists.txt         |   4 +-
 cpp/src/arrow/util/CMakeLists.txt           |   7 +-
 cpp/src/arrow/util/compression-test.cc      |  89 ++++
 cpp/src/arrow/util/compression.cc           | 327 ++++++++++++
 cpp/src/arrow/util/compression.h            | 109 ++++
 cpp/src/arrow/util/logging.h                |   7 +-
 cpp/src/plasma/malloc.cc                    |   2 +-
 cpp/src/plasma/test/client_tests.cc         |   2 +-
 dev/release/check-rat-report.py             |  59 +++
 dev/release/rat_exclude_files.txt           |  66 +++
 dev/release/run-rat.sh                      |  64 +--
 24 files changed, 1890 insertions(+), 527 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/.readthedocs.yml
----------------------------------------------------------------------
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 2e1fe3f..11a7d70 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -1,2 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 conda:
     file: python/doc/environment.yml

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index a32562f..315cbd2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 sudo: required
 dist: trusty
 addons:

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/ci/travis_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh
index d555cab..c368a1d 100755
--- a/ci/travis_script_cpp.sh
+++ b/ci/travis_script_cpp.sh
@@ -17,7 +17,7 @@ set -e
 : ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}
 
 # Check licenses according to Apache policy
-git archive HEAD -o arrow-src.tar.gz
+git archive HEAD --prefix=apache-arrow/ --output=arrow-src.tar.gz
 ./dev/release/run-rat.sh arrow-src.tar.gz
 
 pushd $CPP_BUILD_DIR

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/.clang-format
----------------------------------------------------------------------
diff --git a/cpp/.clang-format b/cpp/.clang-format
index 7d5b3cf..33f282a 100644
--- a/cpp/.clang-format
+++ b/cpp/.clang-format
@@ -1,34 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 ---
 Language:        Cpp
 # BasedOnStyle:  Google
 AccessModifierOffset: -1
-AlignAfterOpenBracket: false 
+AlignAfterOpenBracket: false
 AlignConsecutiveAssignments: false
 AlignEscapedNewlinesLeft: true
 AlignOperands:   true
 AlignTrailingComments: true
 AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: true 
+AllowShortBlocksOnASingleLine: true
 AllowShortCaseLabelsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: Inline
 AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: false 
+AllowShortLoopsOnASingleLine: false
 AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakBeforeMultilineStrings: true
 AlwaysBreakTemplateDeclarations: true
 BinPackArguments: true
-BinPackParameters: true 
+BinPackParameters: true
 BreakBeforeBinaryOperators: None
 BreakBeforeBraces: Attach
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: false
-ColumnLimit: 90 
+ColumnLimit: 90
 CommentPragmas:  '^ IWYU pragma:'
 ConstructorInitializerAllOnOneLineOrOnePerLine: true
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
-DerivePointerAlignment: false 
+DerivePointerAlignment: false
 DisableFormat:   false
 ExperimentalAutoDetectBinPacking: false
 ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/.clang-tidy
----------------------------------------------------------------------
diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy
index deaa9bd..b6b5a81 100644
--- a/cpp/.clang-tidy
+++ b/cpp/.clang-tidy
@@ -1,8 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 ---
 Checks:          'clang-diagnostic-*,clang-analyzer-*,-clang-analyzer-alpha*,google-.*,modernize-.*,readablity-.*'
 HeaderFilterRegex: 'arrow/.*'
 AnalyzeTemporaryDtors: true
-CheckOptions:    
+CheckOptions:
   - key:             google-readability-braces-around-statements.ShortStatementLines
     value:           '1'
   - key:             google-readability-function-size.StatementThreshold
@@ -11,4 +27,3 @@ CheckOptions:
     value:           '10'
   - key:             google-readability-namespace-comments.SpacesBeforeComments
     value:           '2'
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/.clang-tidy-ignore
----------------------------------------------------------------------
diff --git a/cpp/.clang-tidy-ignore b/cpp/.clang-tidy-ignore
index 5ab4d20..3270b97 100644
--- a/cpp/.clang-tidy-ignore
+++ b/cpp/.clang-tidy-ignore
@@ -1,2 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 ipc-adapter-test.cc
 memory-pool-test.cc

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5ba56e5..49e1d97 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -136,6 +136,18 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(ARROW_PLASMA
     "Build the plasma object store along with Arrow"
     OFF)
+
+  option(ARROW_ZLIB_VENDORED
+    "Build our own zlib (some libz.a aren't configured for static linking)"
+    ON)
+  if (MSVC)
+    set(BROTLI_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING
+      "Brotli static lib suffix used on Windows with MSVC (default _static)")
+    set(SNAPPY_MSVC_STATIC_LIB_SUFFIX "" CACHE STRING
+      "Snappy static lib suffix used on Windows with MSVC (default is empty string)")
+    set(ZLIB_MSVC_STATIC_LIB_SUFFIX "libstatic" CACHE STRING
+      "Zlib static lib suffix used on Windows with MSVC (default libstatic)")
+  endif()
 endif()
 
 if(ARROW_BUILD_TESTS)
@@ -166,9 +178,14 @@ if (ARROW_NO_DEPRECATED_API)
   add_definitions(-DARROW_NO_DEPRECATED_API)
 endif()
 
+############################################################
+# Dependencies
+############################################################
+
+include(ThirdpartyToolchain)
+
 # Add common flags
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COMMON_FLAGS}")
-set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARROW_CXXFLAGS}")
 
 message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
@@ -401,444 +418,6 @@ endfunction()
 enable_testing()
 
 ############################################################
-# Dependencies
-############################################################
-
-# ----------------------------------------------------------------------
-# Thirdparty toolchain
-
-set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
-set(GFLAGS_VERSION "2.1.2")
-set(GTEST_VERSION "1.8.0")
-set(GBENCHMARK_VERSION "1.1.0")
-set(FLATBUFFERS_VERSION "1.6.0")
-set(JEMALLOC_VERSION "4.4.0")
-
-if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
-  set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
-  set(RAPIDJSON_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
-  set(JEMALLOC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
-  set(GFLAGS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
-
-  if (NOT DEFINED ENV{BOOST_ROOT})
-    # Since we have to set this in the environment, we check whether
-    # $BOOST_ROOT is defined inside here
-    set(ENV{BOOST_ROOT} "$ENV{ARROW_BUILD_TOOLCHAIN}")
-  endif()
-endif()
-
-if (DEFINED ENV{FLATBUFFERS_HOME})
-  set(FLATBUFFERS_HOME "$ENV{FLATBUFFERS_HOME}")
-endif()
-
-if (DEFINED ENV{RAPIDJSON_HOME})
-  set(RAPIDJSON_HOME "$ENV{RAPIDJSON_HOME}")
-endif()
-
-if (DEFINED ENV{JEMALLOC_HOME})
-  set(JEMALLOC_HOME "$ENV{JEMALLOC_HOME}")
-endif()
-
-if (DEFINED ENV{GFLAGS_HOME})
-  set(GFLAGS_HOME "$ENV{GFLAGS_HOME}")
-endif()
-
-# ----------------------------------------------------------------------
-# Find pthreads
-
-if (NOT MSVC)
-  find_library(PTHREAD_LIBRARY pthread)
-  message(STATUS "Found pthread: ${PTHREAD_LIBRARY}")
-endif()
-
-# ----------------------------------------------------------------------
-# Add Boost dependencies (code adapted from Apache Kudu (incubating))
-
-set(Boost_DEBUG TRUE)
-set(Boost_USE_MULTITHREADED ON)
-set(Boost_ADDITIONAL_VERSIONS
-  "1.63.0" "1.63"
-  "1.62.0" "1.61"
-  "1.61.0" "1.62"
-  "1.60.0" "1.60")
-
-if (ARROW_BOOST_USE_SHARED)
-  # Find shared Boost libraries.
-  set(Boost_USE_STATIC_LIBS OFF)
-
-  if(MSVC)
-    # disable autolinking in boost
-    add_definitions(-DBOOST_ALL_NO_LIB)
-
-    # force all boost libraries to dynamic link
-    add_definitions(-DBOOST_ALL_DYN_LINK)
-  endif()
-
-  if (ARROW_BOOST_HEADER_ONLY)
-    find_package(Boost)
-  else()
-    find_package(Boost COMPONENTS system filesystem REQUIRED)
-    if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
-      set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
-      set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
-    else()
-      set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
-      set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
-    endif()
-    set(BOOST_SYSTEM_LIBRARY boost_system_shared)
-    set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
-  endif()
-else()
-  # Find static boost headers and libs
-  # TODO Differentiate here between release and debug builds
-  set(Boost_USE_STATIC_LIBS ON)
-  if (ARROW_BOOST_HEADER_ONLY)
-    find_package(Boost)
-  else()
-    find_package(Boost COMPONENTS system filesystem regex REQUIRED)
-    if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
-      set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
-      set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
-    else()
-      set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
-      set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
-    endif()
-    set(BOOST_SYSTEM_LIBRARY boost_system_static)
-    set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
-  endif()
-endif()
-
-message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS})
-message(STATUS "Boost libraries: " ${Boost_LIBRARIES})
-
-if (NOT ARROW_BOOST_HEADER_ONLY)
-  ADD_THIRDPARTY_LIB(boost_system
-      STATIC_LIB "${BOOST_STATIC_SYSTEM_LIBRARY}"
-      SHARED_LIB "${BOOST_SHARED_SYSTEM_LIBRARY}")
-
-  ADD_THIRDPARTY_LIB(boost_filesystem
-      STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}"
-      SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}")
-
-  SET(ARROW_BOOST_LIBS boost_system boost_filesystem)
-endif()
-
-include_directories(SYSTEM ${Boost_INCLUDE_DIR})
-
-if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
-  add_custom_target(unittest ctest -L unittest)
-
-  if("$ENV{GTEST_HOME}" STREQUAL "")
-    if(APPLE)
-      set(GTEST_CMAKE_CXX_FLAGS "-fPIC -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes")
-    elseif(NOT MSVC)
-      set(GTEST_CMAKE_CXX_FLAGS "-fPIC")
-    endif()
-    string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
-    set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}} ${GTEST_CMAKE_CXX_FLAGS}")
-
-    set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix/src/googletest_ep")
-    set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include")
-    set(GTEST_STATIC_LIB
-      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GTEST_MAIN_STATIC_LIB
-      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GTEST_VENDORED 1)
-    set(GTEST_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                         -DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX}
-                         -Dgtest_force_shared_crt=ON
-                         -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS})
-
-    if (CMAKE_VERSION VERSION_GREATER "3.2")
-      # BUILD_BYPRODUCTS is a 3.2+ feature
-      ExternalProject_Add(googletest_ep
-        URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
-        BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB}
-        CMAKE_ARGS ${GTEST_CMAKE_ARGS})
-    else()
-      ExternalProject_Add(googletest_ep
-        URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
-        CMAKE_ARGS ${GTEST_CMAKE_ARGS})
-    endif()
-  else()
-    find_package(GTest REQUIRED)
-    set(GTEST_VENDORED 0)
-  endif()
-
-  message(STATUS "GTest include dir: ${GTEST_INCLUDE_DIR}")
-  message(STATUS "GTest static library: ${GTEST_STATIC_LIB}")
-  include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(gtest
-    STATIC_LIB ${GTEST_STATIC_LIB})
-  ADD_THIRDPARTY_LIB(gtest_main
-    STATIC_LIB ${GTEST_MAIN_STATIC_LIB})
-
-  if(GTEST_VENDORED)
-    add_dependencies(gtest googletest_ep)
-    add_dependencies(gtest_main googletest_ep)
-  endif()
-
-  # gflags (formerly Googleflags) command line parsing
-  if("${GFLAGS_HOME}" STREQUAL "")
-    set(GFLAGS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS})
-
-    set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep")
-    set(GFLAGS_HOME "${GFLAGS_PREFIX}")
-    set(GFLAGS_INCLUDE_DIR "${GFLAGS_PREFIX}/include")
-    if(MSVC)
-      set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/gflags_static.lib")
-    else()
-      set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/libgflags.a")
-    endif()
-    set(GFLAGS_VENDORED 1)
-    set(GFLAGS_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                          -DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX}
-                          -DBUILD_SHARED_LIBS=OFF
-                          -DBUILD_STATIC_LIBS=ON
-                          -DBUILD_PACKAGING=OFF
-                          -DBUILD_TESTING=OFF
-                          -BUILD_CONFIG_TESTS=OFF
-                          -DINSTALL_HEADERS=ON
-                          -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS})
-    if (CMAKE_VERSION VERSION_GREATER "3.2")
-      # BUILD_BYPRODUCTS is a 3.2+ feature
-      ExternalProject_Add(gflags_ep
-        GIT_REPOSITORY https://github.com/gflags/gflags.git
-        GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee
-        BUILD_IN_SOURCE 1
-        BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}"
-        CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
-    else()
-      ExternalProject_Add(gflags_ep
-        GIT_REPOSITORY https://github.com/gflags/gflags.git
-        GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee
-        BUILD_IN_SOURCE 1
-        CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
-    endif()
-  else()
-    set(GFLAGS_VENDORED 0)
-    find_package(GFlags REQUIRED)
-  endif()
-
-  message(STATUS "GFlags include dir: ${GFLAGS_INCLUDE_DIR}")
-  message(STATUS "GFlags static library: ${GFLAGS_STATIC_LIB}")
-  include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(gflags
-    STATIC_LIB ${GFLAGS_STATIC_LIB})
-  if(MSVC)
-    set_target_properties(gflags
-      PROPERTIES
-      IMPORTED_LINK_INTERFACE_LIBRARIES "shlwapi.lib")
-  endif()
-
-  if(GFLAGS_VENDORED)
-    add_dependencies(gflags gflags_ep)
-  endif()
-endif()
-
-if(ARROW_BUILD_BENCHMARKS)
-  add_custom_target(runbenchmark ctest -L benchmark)
-
-  if("$ENV{GBENCHMARK_HOME}" STREQUAL "")
-    if(APPLE)
-      set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC -std=c++11 -stdlib=libc++")
-    elseif(NOT MSVC)
-      set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC --std=c++11")
-    endif()
-
-    set(GBENCHMARK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install")
-    set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include")
-    set(GBENCHMARK_STATIC_LIB "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GBENCHMARK_VENDORED 1)
-    set(GBENCHMARK_CMAKE_ARGS
-          "-DCMAKE_BUILD_TYPE=Release"
-          "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}"
-          "-DBENCHMARK_ENABLE_TESTING=OFF"
-          "-DCMAKE_CXX_FLAGS=${GBENCHMARK_CMAKE_CXX_FLAGS}")
-    if (APPLE)
-      set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON")
-    endif()
-    if (CMAKE_VERSION VERSION_GREATER "3.2")
-      # BUILD_BYPRODUCTS is a 3.2+ feature
-      ExternalProject_Add(gbenchmark_ep
-        URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
-        BUILD_BYPRODUCTS "${GBENCHMARK_STATIC_LIB}"
-        CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS})
-    else()
-      ExternalProject_Add(gbenchmark_ep
-        URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
-        CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS})
-    endif()
-  else()
-    find_package(GBenchmark REQUIRED)
-    set(GBENCHMARK_VENDORED 0)
-  endif()
-
-  message(STATUS "GBenchmark include dir: ${GBENCHMARK_INCLUDE_DIR}")
-  message(STATUS "GBenchmark static library: ${GBENCHMARK_STATIC_LIB}")
-  include_directories(SYSTEM ${GBENCHMARK_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(benchmark
-    STATIC_LIB ${GBENCHMARK_STATIC_LIB})
-
-  if(GBENCHMARK_VENDORED)
-    add_dependencies(benchmark gbenchmark_ep)
-  endif()
-endif()
-
-if (ARROW_IPC)
-  # RapidJSON, header only dependency
-  if("${RAPIDJSON_HOME}" STREQUAL "")
-    ExternalProject_Add(rapidjson_ep
-      PREFIX "${CMAKE_BINARY_DIR}"
-      URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz"
-      URL_MD5 "badd12c511e081fec6c89c43a7027bce"
-      CONFIGURE_COMMAND ""
-      BUILD_COMMAND ""
-      BUILD_IN_SOURCE 1
-      INSTALL_COMMAND "")
-
-    ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
-    set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include")
-    set(RAPIDJSON_VENDORED 1)
-  else()
-    set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_HOME}/include")
-    set(RAPIDJSON_VENDORED 0)
-  endif()
-  message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
-  include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
-
-  ## Flatbuffers
-  if("${FLATBUFFERS_HOME}" STREQUAL "")
-    set(FLATBUFFERS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install")
-    ExternalProject_Add(flatbuffers_ep
-      URL "https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz"
-      CMAKE_ARGS
-      "-DCMAKE_CXX_FLAGS=-fPIC"
-      "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}"
-      "-DFLATBUFFERS_BUILD_TESTS=OFF")
-
-    set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include")
-    set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc")
-    set(FLATBUFFERS_VENDORED 1)
-  else()
-    find_package(Flatbuffers REQUIRED)
-    set(FLATBUFFERS_VENDORED 0)
-  endif()
-
-  message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}")
-  message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}")
-  include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
-endif()
-#----------------------------------------------------------------------
-
-if (MSVC)
-  # jemalloc is not supported on Windows
-  set(ARROW_JEMALLOC off)
-endif()
-
-if (ARROW_JEMALLOC)
-  find_package(jemalloc)
-
-  if(NOT JEMALLOC_FOUND)
-    set(ARROW_JEMALLOC_USE_SHARED OFF)
-    set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/")
-    set(JEMALLOC_HOME "${JEMALLOC_PREFIX}")
-    set(JEMALLOC_INCLUDE_DIR "${JEMALLOC_PREFIX}/include")
-    set(JEMALLOC_SHARED_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}")
-    set(JEMALLOC_STATIC_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(JEMALLOC_VENDORED 1)
-    if (CMAKE_VERSION VERSION_GREATER "3.2")
-      # BUILD_BYPRODUCTS is a 3.2+ feature
-      ExternalProject_Add(jemalloc_ep
-        URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2
-        CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix="
-        BUILD_IN_SOURCE 1
-        BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
-        BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}" "${JEMALLOC_SHARED_LIB}"
-        INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install)
-    else()
-      ExternalProject_Add(jemalloc_ep
-        URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2
-        CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix="
-        BUILD_IN_SOURCE 1
-        BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
-        INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install)
-    endif()
-  else()
-    set(JEMALLOC_VENDORED 0)
-  endif()
-
-  include_directories(SYSTEM ${JEMALLOC_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(jemalloc
-    STATIC_LIB ${JEMALLOC_STATIC_LIB}
-    SHARED_LIB ${JEMALLOC_SHARED_LIB}
-    DEPS ${PTHREAD_LIBRARY})
-endif()
-
-## Google PerfTools
-##
-## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
-## near definition of ARROW_USING_GOLD).
-# find_package(GPerf REQUIRED)
-# if (NOT "${ARROW_USE_ASAN}" AND
-#     NOT "${ARROW_USE_TSAN}" AND
-#     NOT ("${ARROW_USING_GOLD}" AND "${ARROW_LINK}" STREQUAL "d"))
-#   ADD_THIRDPARTY_LIB(tcmalloc
-#     STATIC_LIB "${TCMALLOC_STATIC_LIB}"
-#     SHARED_LIB "${TCMALLOC_SHARED_LIB}")
-#   ADD_THIRDPARTY_LIB(profiler
-#     STATIC_LIB "${PROFILER_STATIC_LIB}"
-#     SHARED_LIB "${PROFILER_SHARED_LIB}")
-#   list(APPEND ARROW_BASE_LIBS tcmalloc profiler)
-#   add_definitions("-DTCMALLOC_ENABLED")
-#   set(ARROW_TCMALLOC_AVAILABLE 1)
-# endif()
-
-########################################################################
-# HDFS thirdparty setup
-
-if (DEFINED ENV{HADOOP_HOME})
-  set(HADOOP_HOME $ENV{HADOOP_HOME})
-  if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h")
-    message(STATUS "Did not find hdfs.h in expected location, using vendored one")
-    set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
-  endif()
-else()
-  set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
-endif()
-
-set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h")
-if (NOT EXISTS ${HDFS_H_PATH})
-  message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}")
-endif()
-message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH})
-
-include_directories(SYSTEM "${HADOOP_HOME}/include")
-
-############################################################
-# Linker setup
-############################################################
-set(ARROW_MIN_TEST_LIBS
-  ${ARROW_STATIC_LINK_LIBS}
-  arrow_static
-  gtest
-  gtest_main
-  ${ARROW_BASE_LIBS})
-
-if(NOT MSVC)
-  set(ARROW_MIN_TEST_LIBS
-    ${ARROW_MIN_TEST_LIBS}
-    ${CMAKE_DL_LIBS})
-endif()
-
-set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
-
-set(ARROW_BENCHMARK_LINK_LIBS
-  arrow_static
-  arrow_benchmark_main
-  ${ARROW_BASE_LIBS})
-
-############################################################
 # "make ctags" target
 ############################################################
 if (UNIX)
@@ -936,16 +515,41 @@ if (${CLANG_TIDY_FOUND})
 
 endif()
 
-
-
 ############################################################
-# Subdirectories
+# Linker and Dependencies
 ############################################################
 
-set(ARROW_LINK_LIBS
-    )
+set(ARROW_STATIC_LINK_LIBS
+  brotli_dec
+  brotli_enc
+  brotli_common
+  snappy
+  zlib)
+
+set(ARROW_DEPENDENCIES
+  ${ARROW_STATIC_LINK_LIBS})
 
-set(ARROW_STATIC_LINK_LIBS)
+set(ARROW_MIN_TEST_LIBS
+  arrow_static
+  ${ARROW_STATIC_LINK_LIBS}
+  gtest
+  gtest_main)
+
+if(NOT MSVC)
+  set(ARROW_MIN_TEST_LIBS
+    ${ARROW_MIN_TEST_LIBS}
+    ${CMAKE_DL_LIBS})
+endif()
+
+set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
+
+set(ARROW_BENCHMARK_LINK_LIBS
+  arrow_static
+  arrow_benchmark_main
+  ${ARROW_STATIC_LINK_LIBS})
+
+set(ARROW_LINK_LIBS
+  ${ARROW_STATIC_LINK_LIBS})
 
 set(ARROW_SHARED_PRIVATE_LINK_LIBS
   ${BOOST_SYSTEM_LIBRARY}
@@ -1009,13 +613,9 @@ elseif (NOT MSVC)
     ${PTHREAD_LIBRARY})
 endif()
 
-if(RAPIDJSON_VENDORED)
-  set(ARROW_DEPENDENCIES ${ARROW_DEPENDENCIES} rapidjson_ep)
-endif()
-
-if(FLATBUFFERS_VENDORED)
-  set(ARROW_DEPENDENCIES ${ARROW_DEPENDENCIES} flatbuffers_ep)
-endif()
+############################################################
+# Subdirectories
+############################################################
 
 if(NOT WIN32 AND ARROW_PLASMA)
   add_subdirectory(src/plasma)
@@ -1048,6 +648,7 @@ set(ARROW_SRCS
   src/arrow/io/memory.cc
 
   src/arrow/util/bit-util.cc
+  src/arrow/util/compression.cc
   src/arrow/util/decimal.cc
   src/arrow/util/key_value_metadata.cc
 )

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/FindBrotli.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindBrotli.cmake b/cpp/cmake_modules/FindBrotli.cmake
new file mode 100644
index 0000000..f2e714c
--- /dev/null
+++ b/cpp/cmake_modules/FindBrotli.cmake
@@ -0,0 +1,116 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find Brotli headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(Brotli)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  Brotli_HOME - When set, this path is inspected instead of standard library
+#                locations as the root of the Brotli installation.
+#                The environment variable BROTLI_HOME overrides this veriable.
+#
+# This module defines
+#  BROTLI_INCLUDE_DIR, directory containing headers
+#  BROTLI_LIBS, directory containing brotli libraries
+#  BROTLI_STATIC_LIB, path to libbrotli.a
+#  BROTLI_SHARED_LIB, path to libbrotli's shared library
+#  BROTLI_FOUND, whether brotli has been found
+
+if( NOT "${BROTLI_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "${BROTLI_HOME}" _native_path )
+    list( APPEND _brotli_roots ${_native_path} )
+elseif ( Brotli_HOME )
+    list( APPEND _brotli_roots ${Brotli_HOME} )
+endif()
+
+find_path( BROTLI_INCLUDE_DIR NAMES brotli/decode.h
+  PATHS ${_brotli_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "include" )
+
+find_library( BROTLI_LIBRARY_ENC NAMES libbrotlienc.a brotlienc
+  PATHS ${_brotli_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" )
+
+find_library( BROTLI_LIBRARY_DEC NAMES libbrotlidec.a brotlidec
+  PATHS ${_brotli_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" )
+
+find_library( BROTLI_LIBRARY_COMMON NAMES libbrotlicommon.a brotlicommon
+  PATHS ${_brotli_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" )
+
+set(BROTLI_LIBRARIES ${BROTLI_LIBRARY_ENC} ${BROTLI_LIBRARY_DEC}
+    ${BROTLI_LIBRARY_COMMON})
+
+if (BROTLI_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR BROTLI_LIBRARIES))
+  set(BROTLI_FOUND TRUE)
+  get_filename_component( BROTLI_LIBS ${BROTLI_LIBRARY_ENC} PATH )
+  set(BROTLI_LIB_NAME brotli)
+  if (MSVC AND NOT BROTLI_MSVC_STATIC_LIB_SUFFIX)
+    set(BROTLI_MSVC_STATIC_LIB_SUFFIX _static)
+  endif()
+  set(BROTLI_STATIC_LIB
+      ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
+      ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
+      ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(BROTLI_STATIC_LIBRARY_ENC ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(BROTLI_STATIC_LIBRARY_DEC ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(BROTLI_STATIC_LIBRARY_COMMON ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(BROTLI_SHARED_LIB
+      ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${CMAKE_SHARED_LIBRARY_SUFFIX}
+      ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${CMAKE_SHARED_LIBRARY_SUFFIX}
+      ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${CMAKE_SHARED_LIBRARY_SUFFIX})
+else ()
+  set(BROTLI_FOUND FALSE)
+endif ()
+
+if (BROTLI_FOUND)
+  if (NOT Brotli_FIND_QUIETLY)
+    if (PARQUET_MINIMAL_DEPENDENCY)
+      message(STATUS "Found the Brotli headers: ${BROTLI_INCLUDE_DIR}")
+    else ()
+      message(STATUS "Found the Brotli library: ${BROTLI_LIBRARIES}")
+    endif ()
+  endif ()
+else ()
+  if (NOT Brotli_FIND_QUIETLY)
+    set(BROTLI_ERR_MSG "Could not find the Brotli library. Looked in ")
+    if ( _brotli_roots )
+      set(BROTLI_ERR_MSG "${BROTLI_ERR_MSG} in ${_brotli_roots}.")
+    else ()
+      set(BROTLI_ERR_MSG "${BROTLI_ERR_MSG} system search paths.")
+    endif ()
+    if (Brotli_FIND_REQUIRED)
+      message(FATAL_ERROR "${BROTLI_ERR_MSG}")
+    else (Brotli_FIND_REQUIRED)
+      message(STATUS "${BROTLI_ERR_MSG}")
+    endif (Brotli_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  BROTLI_INCLUDE_DIR
+  BROTLI_LIBS
+  BROTLI_LIBRARIES
+  BROTLI_STATIC_LIB
+  BROTLI_SHARED_LIB
+)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/FindSnappy.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindSnappy.cmake b/cpp/cmake_modules/FindSnappy.cmake
new file mode 100644
index 0000000..867963c
--- /dev/null
+++ b/cpp/cmake_modules/FindSnappy.cmake
@@ -0,0 +1,94 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find Snappy headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(Snappy)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  Snappy_HOME - When set, this path is inspected instead of standard library
+#                locations as the root of the Snappy installation.
+#                The environment variable SNAPPY_HOME overrides this variable.
+#
+# This module defines
+#  SNAPPY_INCLUDE_DIR, directory containing headers
+#  SNAPPY_LIBS, directory containing snappy libraries
+#  SNAPPY_STATIC_LIB, path to libsnappy.a
+#  SNAPPY_SHARED_LIB, path to libsnappy's shared library
+#  SNAPPY_FOUND, whether snappy has been found
+
+if( NOT "${SNAPPY_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "${SNAPPY_HOME}" _native_path )
+    list( APPEND _snappy_roots ${_native_path} )
+elseif ( Snappy_HOME )
+    list( APPEND _snappy_roots ${Snappy_HOME} )
+endif()
+
+message(STATUS "SNAPPY_HOME: ${SNAPPY_HOME}")
+find_path(SNAPPY_INCLUDE_DIR snappy.h HINTS
+  ${_snappy_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "include")
+
+find_library( SNAPPY_LIBRARIES NAMES snappy PATHS
+  ${_snappy_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib")
+
+if (SNAPPY_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR SNAPPY_LIBRARIES))
+  set(SNAPPY_FOUND TRUE)
+  get_filename_component( SNAPPY_LIBS ${SNAPPY_LIBRARIES} PATH )
+  set(SNAPPY_HEADER_NAME snappy.h)
+  set(SNAPPY_HEADER ${SNAPPY_INCLUDE_DIR}/${SNAPPY_HEADER_NAME})
+  set(SNAPPY_LIB_NAME snappy)
+  set(SNAPPY_STATIC_LIB ${SNAPPY_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(SNAPPY_SHARED_LIB ${SNAPPY_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+else ()
+  set(SNAPPY_FOUND FALSE)
+endif ()
+
+if (SNAPPY_FOUND)
+  if (NOT Snappy_FIND_QUIETLY)
+    if (PARQUET_MINIMAL_DEPENDENCY)
+      message(STATUS "Found the Snappy header: ${SNAPPY_HEADER}")
+    else ()
+      message(STATUS "Found the Snappy library: ${SNAPPY_LIBRARIES}")
+    endif ()
+  endif ()
+else ()
+  if (NOT Snappy_FIND_QUIETLY)
+    set(SNAPPY_ERR_MSG "Could not find the Snappy library. Looked in ")
+    if ( _snappy_roots )
+      set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${_snappy_roots}.")
+    else ()
+      set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} system search paths.")
+    endif ()
+    if (Snappy_FIND_REQUIRED)
+      message(FATAL_ERROR "${SNAPPY_ERR_MSG}")
+    else (Snappy_FIND_REQUIRED)
+      message(STATUS "${SNAPPY_ERR_MSG}")
+    endif (Snappy_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  SNAPPY_INCLUDE_DIR
+  SNAPPY_LIBS
+  SNAPPY_LIBRARIES
+  SNAPPY_STATIC_LIB
+  SNAPPY_SHARED_LIB
+)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/FindZLIB.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindZLIB.cmake b/cpp/cmake_modules/FindZLIB.cmake
new file mode 100644
index 0000000..78b84f2
--- /dev/null
+++ b/cpp/cmake_modules/FindZLIB.cmake
@@ -0,0 +1,105 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find ZLIB headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(ZLIB)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  ZLIB_HOME - When set, this path is inspected instead of standard library
+#             locations as the root of the ZLIB installation.
+#             The environment variable ZLIB_HOME overrides this variable.
+#
+# - Find ZLIB (zlib.h, libz.a, libz.so, and libz.so.1)
+# This module defines
+#  ZLIB_INCLUDE_DIR, directory containing headers
+#  ZLIB_LIBS, directory containing zlib libraries
+#  ZLIB_STATIC_LIB, path to libz.a
+#  ZLIB_SHARED_LIB, path to libz's shared library
+#  ZLIB_FOUND, whether zlib has been found
+
+if( NOT "${ZLIB_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "${ZLIB_HOME}" _native_path )
+    list( APPEND _zlib_roots ${_native_path} )
+elseif ( ZLIB_HOME )
+    list( APPEND _zlib_roots ${ZLIB_HOME} )
+endif()
+
+# Try the parameterized roots, if they exist
+if ( _zlib_roots )
+    find_path( ZLIB_INCLUDE_DIR NAMES zlib.h
+        PATHS ${_zlib_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "include" )
+    find_library( ZLIB_LIBRARIES NAMES libz.a zlib
+        PATHS ${_zlib_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "lib" )
+else ()
+    find_path( ZLIB_INCLUDE_DIR NAMES zlib.h )
+    # Only look for the static library
+    find_library( ZLIB_LIBRARIES NAMES libz.a zlib )
+endif ()
+
+
+if (ZLIB_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR ZLIB_LIBRARIES))
+  set(ZLIB_FOUND TRUE)
+  get_filename_component( ZLIB_LIBS ${ZLIB_LIBRARIES} PATH )
+  set(ZLIB_HEADER_NAME zlib.h)
+  set(ZLIB_HEADER ${ZLIB_INCLUDE_DIR}/${ZLIB_HEADER_NAME})
+  set(ZLIB_LIB_NAME z)
+  if (MSVC)
+    if (NOT ZLIB_MSVC_STATIC_LIB_SUFFIX)
+      set(ZLIB_MSVC_STATIC_LIB_SUFFIX libstatic)
+    endif()
+    set(ZLIB_MSVC_SHARED_LIB_SUFFIX lib)
+  endif()
+  set(ZLIB_STATIC_LIB ${ZLIB_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${ZLIB_LIB_NAME}${ZLIB_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(ZLIB_SHARED_LIB ${ZLIB_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${ZLIB_LIB_NAME}${ZLIB_MSVC_SHARED_LIB_SUFFIX}${CMAKE_SHARED_LIBRARY_SUFFIX})
+else ()
+  set(ZLIB_FOUND FALSE)
+endif ()
+
+if (ZLIB_FOUND)
+  if (NOT ZLIB_FIND_QUIETLY)
+    if (PARQUET_MINIMAL_DEPENDENCY)
+      message(STATUS "Found the ZLIB header: ${ZLIB_HEADER}")
+    else()
+      message(STATUS "Found the ZLIB library: ${ZLIB_LIBRARIES}")
+    endif ()
+  endif ()
+else ()
+  if (NOT ZLIB_FIND_QUIETLY)
+    set(ZLIB_ERR_MSG "Could not find the ZLIB library. Looked in ")
+    if ( _zlib_roots )
+      set(ZLIB_ERR_MSG "${ZLIB_ERR_MSG} in ${_zlib_roots}.")
+    else ()
+      set(ZLIB_ERR_MSG "${ZLIB_ERR_MSG} system search paths.")
+    endif ()
+    if (ZLIB_FIND_REQUIRED)
+      message(FATAL_ERROR "${ZLIB_ERR_MSG}")
+    else (ZLIB_FIND_REQUIRED)
+      message(STATUS "${ZLIB_ERR_MSG}")
+    endif (ZLIB_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  ZLIB_INCLUDE_DIR
+  ZLIB_LIBS
+  ZLIB_LIBRARIES
+  ZLIB_STATIC_LIB
+  ZLIB_SHARED_LIB
+)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/SnappyCMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/SnappyCMakeLists.txt b/cpp/cmake_modules/SnappyCMakeLists.txt
new file mode 100644
index 0000000..9d0a166
--- /dev/null
+++ b/cpp/cmake_modules/SnappyCMakeLists.txt
@@ -0,0 +1,85 @@
+# Copyright 2008 Google Inc. All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+PROJECT(snappy)
+
+INCLUDE(CheckIncludeFiles)
+INCLUDE(CMakePackageConfigHelpers)
+
+CHECK_INCLUDE_FILES("stdint.h" HAVE_STDINT_H)
+CHECK_INCLUDE_FILES("stddef.h" HAVE_STDDEF_H)
+CHECK_INCLUDE_FILES("sys/uio.h" HAVE_SYS_UIO_H)
+
+if (NOT HAVE_SYS_UIO_H)
+  set(HAVE_SYS_UIO_H 0)
+endif()
+
+if (NOT HAVE_STDINT_H)
+  set(HAVE_STDINT_H 0)
+endif()
+
+if (NOT HAVE_STDDEF_H)
+  set(HAVE_STDDEF_H 0)
+endif()
+
+set(ac_cv_have_stdint_h ${HAVE_STDINT_H})
+set(ac_cv_have_stddef_h ${HAVE_STDDEF_H})
+set(ac_cv_have_sys_uio_h ${HAVE_SYS_UIO_H})
+CONFIGURE_FILE(${snappy_SOURCE_DIR}/snappy-stubs-public.h.in
+               snappy-stubs-public.h)
+
+if (WIN32)
+  ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS)
+endif()
+
+set(SNAPPY_SRCS snappy.cc
+  snappy-c.cc
+  snappy-stubs-internal.cc
+  snappy-sinksource.cc
+  snappy.h
+  snappy-c.h
+  snappy-sinksource.h
+  snappy-stubs-public.h)
+
+add_library(snappy SHARED ${SNAPPY_SRCS})
+add_library(snappystatic STATIC ${SNAPPY_SRCS})
+
+TARGET_COMPILE_DEFINITIONS(snappy PRIVATE -DHAVE_CONFIG_H)
+TARGET_COMPILE_DEFINITIONS(snappystatic PRIVATE -DHAVE_CONFIG_H)
+
+install(FILES snappy.h
+  snappy-c.h
+  snappy-sinksource.h
+  ${snappy_BINARY_DIR}/snappy-stubs-public.h
+  DESTINATION include)
+
+install(TARGETS snappy snappystatic
+  RUNTIME DESTINATION bin
+  LIBRARY DESTINATION lib
+  ARCHIVE DESTINATION lib)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/SnappyConfig.h
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/SnappyConfig.h b/cpp/cmake_modules/SnappyConfig.h
new file mode 100644
index 0000000..74eb776
--- /dev/null
+++ b/cpp/cmake_modules/SnappyConfig.h
@@ -0,0 +1,36 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef SNAPPY_CONFIG_H
+#define SNAPPY_CONFIG_H 1
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1900)
+typedef __int64 ssize_t;
+#endif
+
+#endif

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
new file mode 100644
index 0000000..f6a9bb4
--- /dev/null
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -0,0 +1,641 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# ----------------------------------------------------------------------
+# Thirdparty toolchain
+
+set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
+set(GFLAGS_VERSION "2.1.2")
+set(GTEST_VERSION "1.8.0")
+set(GBENCHMARK_VERSION "1.1.0")
+set(FLATBUFFERS_VERSION "1.6.0")
+set(JEMALLOC_VERSION "4.4.0")
+set(SNAPPY_VERSION "1.1.3")
+set(BROTLI_VERSION "v0.6.0")
+
+string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
+
+set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}")
+set(EP_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}")
+
+if (NOT MSVC)
+  # Set -fPIC on all external projects
+  set(EP_CXX_FLAGS "${EP_CXX_FLAGS} -fPIC")
+  set(EP_C_FLAGS "${EP_C_FLAGS} -fPIC")
+endif()
+
+if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
+  set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(RAPIDJSON_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(JEMALLOC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(GFLAGS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(SNAPPY_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(ZLIB_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(BROTLI_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+
+  if (NOT DEFINED ENV{BOOST_ROOT})
+    # Since we have to set this in the environment, we check whether
+    # $BOOST_ROOT is defined inside here
+    set(ENV{BOOST_ROOT} "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  endif()
+endif()
+
+if (DEFINED ENV{FLATBUFFERS_HOME})
+  set(FLATBUFFERS_HOME "$ENV{FLATBUFFERS_HOME}")
+endif()
+
+if (DEFINED ENV{RAPIDJSON_HOME})
+  set(RAPIDJSON_HOME "$ENV{RAPIDJSON_HOME}")
+endif()
+
+if (DEFINED ENV{JEMALLOC_HOME})
+  set(JEMALLOC_HOME "$ENV{JEMALLOC_HOME}")
+endif()
+
+if (DEFINED ENV{GFLAGS_HOME})
+  set(GFLAGS_HOME "$ENV{GFLAGS_HOME}")
+endif()
+
+if (DEFINED ENV{SNAPPY_HOME})
+  set(SNAPPY_HOME "$ENV{SNAPPY_HOME}")
+endif()
+
+if (DEFINED ENV{ZLIB_HOME})
+  set(ZLIB_HOME "$ENV{ZLIB_HOME}")
+endif()
+
+if (DEFINED ENV{BROTLI_HOME})
+  set(BROTLI_HOME "$ENV{BROTLI_HOME}")
+endif()
+
+# ----------------------------------------------------------------------
+# Find pthreads
+
+if (NOT MSVC)
+  find_library(PTHREAD_LIBRARY pthread)
+  message(STATUS "Found pthread: ${PTHREAD_LIBRARY}")
+endif()
+
+# ----------------------------------------------------------------------
+# Add Boost dependencies (code adapted from Apache Kudu (incubating))
+
+set(Boost_DEBUG TRUE)
+set(Boost_USE_MULTITHREADED ON)
+set(Boost_ADDITIONAL_VERSIONS
+  "1.64.0" "1.64"
+  "1.63.0" "1.63"
+  "1.62.0" "1.61"
+  "1.61.0" "1.62"
+  "1.60.0" "1.60")
+
+if (ARROW_BOOST_USE_SHARED)
+  # Find shared Boost libraries.
+  set(Boost_USE_STATIC_LIBS OFF)
+
+  if(MSVC)
+    # disable autolinking in boost
+    add_definitions(-DBOOST_ALL_NO_LIB)
+
+    # force all boost libraries to dynamic link
+    add_definitions(-DBOOST_ALL_DYN_LINK)
+  endif()
+
+  if (ARROW_BOOST_HEADER_ONLY)
+    find_package(Boost)
+  else()
+    find_package(Boost COMPONENTS system filesystem REQUIRED)
+    if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
+      set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
+      set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+    else()
+      set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
+      set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+    endif()
+    set(BOOST_SYSTEM_LIBRARY boost_system_shared)
+    set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
+  endif()
+else()
+  # Find static boost headers and libs
+  # TODO Differentiate here between release and debug builds
+  set(Boost_USE_STATIC_LIBS ON)
+  if (ARROW_BOOST_HEADER_ONLY)
+    find_package(Boost)
+  else()
+    find_package(Boost COMPONENTS system filesystem REQUIRED)
+    if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
+      set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
+      set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+    else()
+      set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
+      set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+    endif()
+    set(BOOST_SYSTEM_LIBRARY boost_system_static)
+    set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
+  endif()
+endif()
+
+message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS})
+message(STATUS "Boost libraries: " ${Boost_LIBRARIES})
+
+if (NOT ARROW_BOOST_HEADER_ONLY)
+  ADD_THIRDPARTY_LIB(boost_system
+      STATIC_LIB "${BOOST_STATIC_SYSTEM_LIBRARY}"
+      SHARED_LIB "${BOOST_SHARED_SYSTEM_LIBRARY}")
+
+  ADD_THIRDPARTY_LIB(boost_filesystem
+      STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}"
+      SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}")
+
+  SET(ARROW_BOOST_LIBS boost_system boost_filesystem)
+endif()
+
+include_directories(SYSTEM ${Boost_INCLUDE_DIR})
+
+if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
+  add_custom_target(unittest ctest -L unittest)
+
+  if("$ENV{GTEST_HOME}" STREQUAL "")
+    if(APPLE)
+      set(GTEST_CMAKE_CXX_FLAGS "-fPIC -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes")
+    elseif(NOT MSVC)
+      set(GTEST_CMAKE_CXX_FLAGS "-fPIC")
+    endif()
+    string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
+    set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}} ${GTEST_CMAKE_CXX_FLAGS}")
+
+    set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix/src/googletest_ep")
+    set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include")
+    set(GTEST_STATIC_LIB
+      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GTEST_MAIN_STATIC_LIB
+      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GTEST_VENDORED 1)
+    set(GTEST_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                         -DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX}
+                         -Dgtest_force_shared_crt=ON
+                         -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS})
+
+    if (CMAKE_VERSION VERSION_GREATER "3.2")
+      # BUILD_BYPRODUCTS is a 3.2+ feature
+      ExternalProject_Add(googletest_ep
+        URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
+        BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB}
+        CMAKE_ARGS ${GTEST_CMAKE_ARGS})
+    else()
+      ExternalProject_Add(googletest_ep
+        URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
+        CMAKE_ARGS ${GTEST_CMAKE_ARGS})
+    endif()
+  else()
+    find_package(GTest REQUIRED)
+    set(GTEST_VENDORED 0)
+  endif()
+
+  message(STATUS "GTest include dir: ${GTEST_INCLUDE_DIR}")
+  message(STATUS "GTest static library: ${GTEST_STATIC_LIB}")
+  include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(gtest
+    STATIC_LIB ${GTEST_STATIC_LIB})
+  ADD_THIRDPARTY_LIB(gtest_main
+    STATIC_LIB ${GTEST_MAIN_STATIC_LIB})
+
+  if(GTEST_VENDORED)
+    add_dependencies(gtest googletest_ep)
+    add_dependencies(gtest_main googletest_ep)
+  endif()
+
+  # gflags (formerly Googleflags) command line parsing
+  if("${GFLAGS_HOME}" STREQUAL "")
+    set(GFLAGS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS})
+
+    set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep")
+    set(GFLAGS_HOME "${GFLAGS_PREFIX}")
+    set(GFLAGS_INCLUDE_DIR "${GFLAGS_PREFIX}/include")
+    if(MSVC)
+      set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/gflags_static.lib")
+    else()
+      set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/libgflags.a")
+    endif()
+    set(GFLAGS_VENDORED 1)
+    set(GFLAGS_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                          -DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX}
+                          -DBUILD_SHARED_LIBS=OFF
+                          -DBUILD_STATIC_LIBS=ON
+                          -DBUILD_PACKAGING=OFF
+                          -DBUILD_TESTING=OFF
+                          -BUILD_CONFIG_TESTS=OFF
+                          -DINSTALL_HEADERS=ON
+                          -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS})
+    if (CMAKE_VERSION VERSION_GREATER "3.2")
+      # BUILD_BYPRODUCTS is a 3.2+ feature
+      ExternalProject_Add(gflags_ep
+        GIT_REPOSITORY https://github.com/gflags/gflags.git
+        GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee
+        BUILD_IN_SOURCE 1
+        BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}"
+        CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
+    else()
+      ExternalProject_Add(gflags_ep
+        GIT_REPOSITORY https://github.com/gflags/gflags.git
+        GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee
+        BUILD_IN_SOURCE 1
+        CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
+    endif()
+  else()
+    set(GFLAGS_VENDORED 0)
+    find_package(GFlags REQUIRED)
+  endif()
+
+  message(STATUS "GFlags include dir: ${GFLAGS_INCLUDE_DIR}")
+  message(STATUS "GFlags static library: ${GFLAGS_STATIC_LIB}")
+  include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(gflags
+    STATIC_LIB ${GFLAGS_STATIC_LIB})
+  if(MSVC)
+    set_target_properties(gflags
+      PROPERTIES
+      IMPORTED_LINK_INTERFACE_LIBRARIES "shlwapi.lib")
+  endif()
+
+  if(GFLAGS_VENDORED)
+    add_dependencies(gflags gflags_ep)
+  endif()
+endif()
+
+if(ARROW_BUILD_BENCHMARKS)
+  add_custom_target(runbenchmark ctest -L benchmark)
+
+  if("$ENV{GBENCHMARK_HOME}" STREQUAL "")
+    if(APPLE)
+      set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC -std=c++11 -stdlib=libc++")
+    elseif(NOT MSVC)
+      set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC --std=c++11")
+    endif()
+
+    set(GBENCHMARK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install")
+    set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include")
+    set(GBENCHMARK_STATIC_LIB "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GBENCHMARK_VENDORED 1)
+    set(GBENCHMARK_CMAKE_ARGS
+          "-DCMAKE_BUILD_TYPE=Release"
+          "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}"
+          "-DBENCHMARK_ENABLE_TESTING=OFF"
+          "-DCMAKE_CXX_FLAGS=${GBENCHMARK_CMAKE_CXX_FLAGS}")
+    if (APPLE)
+      set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON")
+    endif()
+    if (CMAKE_VERSION VERSION_GREATER "3.2")
+      # BUILD_BYPRODUCTS is a 3.2+ feature
+      ExternalProject_Add(gbenchmark_ep
+        URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
+        BUILD_BYPRODUCTS "${GBENCHMARK_STATIC_LIB}"
+        CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS})
+    else()
+      ExternalProject_Add(gbenchmark_ep
+        URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
+        CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS})
+    endif()
+  else()
+    find_package(GBenchmark REQUIRED)
+    set(GBENCHMARK_VENDORED 0)
+  endif()
+
+  message(STATUS "GBenchmark include dir: ${GBENCHMARK_INCLUDE_DIR}")
+  message(STATUS "GBenchmark static library: ${GBENCHMARK_STATIC_LIB}")
+  include_directories(SYSTEM ${GBENCHMARK_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(benchmark
+    STATIC_LIB ${GBENCHMARK_STATIC_LIB})
+
+  if(GBENCHMARK_VENDORED)
+    add_dependencies(benchmark gbenchmark_ep)
+  endif()
+endif()
+
+
+if (ARROW_IPC)
+  # RapidJSON, header only dependency
+  if("${RAPIDJSON_HOME}" STREQUAL "")
+    ExternalProject_Add(rapidjson_ep
+      PREFIX "${CMAKE_BINARY_DIR}"
+      URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz"
+      URL_MD5 "badd12c511e081fec6c89c43a7027bce"
+      CONFIGURE_COMMAND ""
+      BUILD_COMMAND ""
+      BUILD_IN_SOURCE 1
+      INSTALL_COMMAND "")
+
+    ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
+    set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include")
+    set(RAPIDJSON_VENDORED 1)
+  else()
+    set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_HOME}/include")
+    set(RAPIDJSON_VENDORED 0)
+  endif()
+  message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
+  include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
+
+  ## Flatbuffers
+  if("${FLATBUFFERS_HOME}" STREQUAL "")
+    set(FLATBUFFERS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install")
+    ExternalProject_Add(flatbuffers_ep
+      URL "https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz"
+      CMAKE_ARGS
+      "-DCMAKE_CXX_FLAGS=-fPIC"
+      "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}"
+      "-DFLATBUFFERS_BUILD_TESTS=OFF")
+
+    set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include")
+    set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc")
+    set(FLATBUFFERS_VENDORED 1)
+  else()
+    find_package(Flatbuffers REQUIRED)
+    set(FLATBUFFERS_VENDORED 0)
+  endif()
+
+  if(RAPIDJSON_VENDORED)
+    set(ARROW_DEPENDENCIES ${ARROW_DEPENDENCIES} rapidjson_ep)
+  endif()
+
+  if(FLATBUFFERS_VENDORED)
+    set(ARROW_DEPENDENCIES ${ARROW_DEPENDENCIES} flatbuffers_ep)
+  endif()
+
+  message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}")
+  message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}")
+  include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
+endif()
+#----------------------------------------------------------------------
+
+if (MSVC)
+  # jemalloc is not supported on Windows
+  set(ARROW_JEMALLOC off)
+endif()
+
+if (ARROW_JEMALLOC)
+  find_package(jemalloc)
+
+  if(NOT JEMALLOC_FOUND)
+    set(ARROW_JEMALLOC_USE_SHARED OFF)
+    set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/")
+    set(JEMALLOC_HOME "${JEMALLOC_PREFIX}")
+    set(JEMALLOC_INCLUDE_DIR "${JEMALLOC_PREFIX}/include")
+    set(JEMALLOC_SHARED_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    set(JEMALLOC_STATIC_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(JEMALLOC_VENDORED 1)
+    if (CMAKE_VERSION VERSION_GREATER "3.2")
+      # BUILD_BYPRODUCTS is a 3.2+ feature
+      ExternalProject_Add(jemalloc_ep
+        URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2
+        CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix="
+        BUILD_IN_SOURCE 1
+        BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
+        BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}" "${JEMALLOC_SHARED_LIB}"
+        INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install)
+    else()
+      ExternalProject_Add(jemalloc_ep
+        URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2
+        CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix="
+        BUILD_IN_SOURCE 1
+        BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
+        INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install)
+    endif()
+  else()
+    set(JEMALLOC_VENDORED 0)
+  endif()
+
+  include_directories(SYSTEM ${JEMALLOC_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(jemalloc
+    STATIC_LIB ${JEMALLOC_STATIC_LIB}
+    SHARED_LIB ${JEMALLOC_SHARED_LIB}
+    DEPS ${PTHREAD_LIBRARY})
+endif()
+
+## Google PerfTools
+##
+## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
+## near definition of ARROW_USING_GOLD).
+# find_package(GPerf REQUIRED)
+# if (NOT "${ARROW_USE_ASAN}" AND
+#     NOT "${ARROW_USE_TSAN}" AND
+#     NOT ("${ARROW_USING_GOLD}" AND "${ARROW_LINK}" STREQUAL "d"))
+#   ADD_THIRDPARTY_LIB(tcmalloc
+#     STATIC_LIB "${TCMALLOC_STATIC_LIB}"
+#     SHARED_LIB "${TCMALLOC_SHARED_LIB}")
+#   ADD_THIRDPARTY_LIB(profiler
+#     STATIC_LIB "${PROFILER_STATIC_LIB}"
+#     SHARED_LIB "${PROFILER_SHARED_LIB}")
+#   list(APPEND ARROW_BASE_LIBS tcmalloc profiler)
+#   add_definitions("-DTCMALLOC_ENABLED")
+#   set(ARROW_TCMALLOC_AVAILABLE 1)
+# endif()
+
+########################################################################
+# HDFS thirdparty setup
+
+if (DEFINED ENV{HADOOP_HOME})
+  set(HADOOP_HOME $ENV{HADOOP_HOME})
+  if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h")
+    message(STATUS "Did not find hdfs.h in expected location, using vendored one")
+    set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
+  endif()
+else()
+  set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
+endif()
+
+set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h")
+if (NOT EXISTS ${HDFS_H_PATH})
+  message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}")
+endif()
+message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH})
+
+include_directories(SYSTEM "${HADOOP_HOME}/include")
+
+# ----------------------------------------------------------------------
+# ZLIB
+
+if (NOT ARROW_ZLIB_VENDORED)
+  find_package(ZLIB)
+endif()
+
+if (NOT ZLIB_FOUND)
+  set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install")
+  set(ZLIB_HOME "${ZLIB_PREFIX}")
+  set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include")
+  if (MSVC)
+    if (${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
+      set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib)
+    else()
+      set(ZLIB_STATIC_LIB_NAME zlibstatic.lib)
+    endif()
+  else()
+    set(ZLIB_STATIC_LIB_NAME libz.a)
+  endif()
+  set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}")
+  set(ZLIB_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                      -DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}
+                      -DCMAKE_C_FLAGS=${EP_C_FLAGS}
+                      -DBUILD_SHARED_LIBS=OFF)
+
+  if (CMAKE_VERSION VERSION_GREATER "3.2")
+    set(ZLIB_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}")
+  endif()
+  ExternalProject_Add(zlib_ep
+    URL "http://zlib.net/fossils/zlib-1.2.8.tar.gz"
+    ${ZLIB_BUILD_BYPRODUCTS}
+    CMAKE_ARGS ${ZLIB_CMAKE_ARGS})
+  set(ZLIB_VENDORED 1)
+else()
+  set(ZLIB_VENDORED 0)
+endif()
+
+include_directories(SYSTEM ${ZLIB_INCLUDE_DIR})
+ADD_THIRDPARTY_LIB(zlib
+  STATIC_LIB ${ZLIB_STATIC_LIB})
+
+if (ZLIB_VENDORED)
+  add_dependencies(zlib zlib_ep)
+endif()
+
+# ----------------------------------------------------------------------
+# Snappy
+
+## Snappy
+find_package(Snappy)
+if (NOT SNAPPY_FOUND)
+  set(SNAPPY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep/src/snappy_ep-install")
+  set(SNAPPY_HOME "${SNAPPY_PREFIX}")
+  set(SNAPPY_INCLUDE_DIR "${SNAPPY_PREFIX}/include")
+  if (MSVC)
+    set(SNAPPY_STATIC_LIB_NAME snappystatic)
+  else()
+    set(SNAPPY_STATIC_LIB_NAME snappy)
+  endif()
+  set(SNAPPY_STATIC_LIB "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(SNAPPY_SRC_URL "https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz")
+
+  if (${UPPERCASE_BUILD_TYPE} EQUAL "RELEASE")
+    if (APPLE)
+      set(SNAPPY_CXXFLAGS "CXXFLAGS='-DNDEBUG -O1'")
+    else()
+      set(SNAPPY_CXXFLAGS "CXXFLAGS='-DNDEBUG -O2'")
+    endif()
+  endif()
+
+  if (CMAKE_VERSION VERSION_GREATER "3.2")
+    set(SNAPPY_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")
+  endif()
+
+  if (MSVC)
+    set(SNAPPY_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                          "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
+                          "-DCMAKE_C_FLAGS=${EX_C_FLAGS}"
+                          "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
+    set(SNAPPY_UPDATE_COMMAND ${CMAKE_COMMAND} -E copy
+                      ${CMAKE_SOURCE_DIR}/cmake_modules/SnappyCMakeLists.txt
+                      ./CMakeLists.txt &&
+                      ${CMAKE_COMMAND} -E copy
+                      ${CMAKE_SOURCE_DIR}/cmake_modules/SnappyConfig.h
+                      ./config.h)
+    ExternalProject_Add(snappy_ep
+      UPDATE_COMMAND ${SNAPPY_UPDATE_COMMAND}
+      BUILD_IN_SOURCE 1
+      BUILD_COMMAND ${MAKE}
+      INSTALL_DIR ${SNAPPY_PREFIX}
+      URL ${SNAPPY_SRC_URL}
+      CMAKE_ARGS ${SNAPPY_CMAKE_ARGS}
+      ${SNAPPY_BUILD_BYPRODUCTS})
+  else()
+    ExternalProject_Add(snappy_ep
+      CONFIGURE_COMMAND ./configure --with-pic "--prefix=${SNAPPY_PREFIX}" ${SNAPPY_CXXFLAGS}
+      BUILD_IN_SOURCE 1
+      BUILD_COMMAND ${MAKE}
+      INSTALL_DIR ${SNAPPY_PREFIX}
+      URL ${SNAPPY_SRC_URL}
+      ${SNAPPY_BUILD_BYPRODUCTS})
+  endif()
+  set(SNAPPY_VENDORED 1)
+else()
+  set(SNAPPY_VENDORED 0)
+endif()
+
+include_directories(SYSTEM ${SNAPPY_INCLUDE_DIR})
+ADD_THIRDPARTY_LIB(snappy
+  STATIC_LIB ${SNAPPY_STATIC_LIB})
+
+if (SNAPPY_VENDORED)
+  add_dependencies(snappy snappy_ep)
+endif()
+
+# ----------------------------------------------------------------------
+# Brotli
+
+find_package(Brotli)
+if (NOT BROTLI_FOUND)
+  set(BROTLI_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/brotli_ep/src/brotli_ep-install")
+  set(BROTLI_HOME "${BROTLI_PREFIX}")
+  set(BROTLI_INCLUDE_DIR "${BROTLI_PREFIX}/include")
+  if (MSVC)
+    set(BROTLI_LIB_DIR bin)
+  else()
+    set(BROTLI_LIB_DIR lib)
+  endif()
+  set(BROTLI_STATIC_LIBRARY_ENC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(BROTLI_STATIC_LIBRARY_DEC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(BROTLI_STATIC_LIBRARY_COMMON "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(BROTLI_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                        "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
+                        "-DCMAKE_C_FLAGS=${EX_C_FLAGS}"
+                        -DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}
+                        -DCMAKE_INSTALL_LIBDIR=lib/${CMAKE_LIBRARY_ARCHITECTURE}
+                        -DBUILD_SHARED_LIBS=OFF)
+
+  if (CMAKE_VERSION VERSION_GREATER "3.2")
+    set(BROTLI_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" "${BROTLI_STATIC_LIBRARY_DEC}" "${BROTLI_STATIC_LIBRARY_COMMON}")
+  endif()
+
+  ExternalProject_Add(brotli_ep
+    URL "https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz"
+    ${BROTLI_BUILD_BYPRODUCTS}
+    CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
+    STEP_TARGETS headers_copy)
+  if (MSVC)
+    ExternalProject_Get_Property(brotli_ep SOURCE_DIR)
+
+    ExternalProject_Add_Step(brotli_ep headers_copy
+      COMMAND xcopy /E /I include ..\\..\\..\\brotli_ep\\src\\brotli_ep-install\\include /Y
+      DEPENDEES build
+      WORKING_DIRECTORY ${SOURCE_DIR})
+  endif()
+  set(BROTLI_VENDORED 1)
+else()
+  set(BROTLI_VENDORED 0)
+endif()
+
+include_directories(SYSTEM ${BROTLI_INCLUDE_DIR})
+ADD_THIRDPARTY_LIB(brotli_enc
+  STATIC_LIB ${BROTLI_STATIC_LIBRARY_ENC})
+ADD_THIRDPARTY_LIB(brotli_dec
+  STATIC_LIB ${BROTLI_STATIC_LIBRARY_DEC})
+ADD_THIRDPARTY_LIB(brotli_common
+  STATIC_LIB ${BROTLI_STATIC_LIBRARY_COMMON})
+
+if (BROTLI_VENDORED)
+  add_dependencies(brotli_enc brotli_ep)
+  add_dependencies(brotli_dec brotli_ep)
+  add_dependencies(brotli_common brotli_ep)
+endif()

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index 3085229..bc2a815 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -34,8 +34,8 @@ endif()
 
 set(ARROW_PYTHON_MIN_TEST_LIBS
   arrow_python_test_main
-  arrow_python_static
-  arrow_static)
+  arrow_python_shared
+  arrow_shared)
 
 set(ARROW_PYTHON_TEST_LINK_LIBS ${ARROW_PYTHON_MIN_TEST_LIBS})
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index ac7e866..1abcce4 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -22,11 +22,13 @@
 # Headers: top level
 install(FILES
   bit-util.h
+  compression.h
+  key_value_metadata.h
   logging.h
   macros.h
   random.h
+  stl.h
   visibility.h
-  key_value_metadata.h
   DESTINATION include/arrow/util)
 
 #######################################
@@ -51,6 +53,7 @@ if (ARROW_BUILD_BENCHMARKS)
 endif()
 
 ADD_ARROW_TEST(bit-util-test)
-ADD_ARROW_TEST(stl-util-test)
+ADD_ARROW_TEST(compression-test)
 ADD_ARROW_TEST(decimal-test)
 ADD_ARROW_TEST(key-value-metadata-test)
+ADD_ARROW_TEST(stl-util-test)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/compression-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression-test.cc b/cpp/src/arrow/util/compression-test.cc
new file mode 100644
index 0000000..1a0e5d7
--- /dev/null
+++ b/cpp/src/arrow/util/compression-test.cc
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <gtest/gtest.h>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/test-common.h"
+#include "arrow/util/compression.h"
+
+using std::string;
+using std::vector;
+
+namespace arrow {
+
+template <typename T>
+void CheckCodecRoundtrip(const vector<uint8_t>& data) {
+  // create multiple compressors to try to break them
+  T c1;
+  T c2;
+
+  int max_compressed_len = static_cast<int>(c1.MaxCompressedLen(data.size(), &data[0]));
+  std::vector<uint8_t> compressed(max_compressed_len);
+  std::vector<uint8_t> decompressed(data.size());
+
+  // compress with c1
+  int64_t actual_size;
+  ASSERT_OK(c1.Compress(
+      data.size(), &data[0], max_compressed_len, &compressed[0], &actual_size));
+  compressed.resize(actual_size);
+
+  // decompress with c2
+  ASSERT_OK(c2.Decompress(
+      compressed.size(), &compressed[0], decompressed.size(), &decompressed[0]));
+
+  ASSERT_EQ(data, decompressed);
+
+  // compress with c2
+  int64_t actual_size2;
+  ASSERT_OK(c2.Compress(
+      data.size(), &data[0], max_compressed_len, &compressed[0], &actual_size2));
+  ASSERT_EQ(actual_size2, actual_size);
+
+  // decompress with c1
+  ASSERT_OK(c1.Decompress(
+      compressed.size(), &compressed[0], decompressed.size(), &decompressed[0]));
+
+  ASSERT_EQ(data, decompressed);
+}
+
+template <typename T>
+void CheckCodec() {
+  int sizes[] = {10000, 100000};
+  for (int data_size : sizes) {
+    vector<uint8_t> data(data_size);
+    test::random_bytes(data_size, 1234, data.data());
+    CheckCodecRoundtrip<T>(data);
+  }
+}
+
+TEST(TestCompressors, Snappy) {
+  CheckCodec<SnappyCodec>();
+}
+
+TEST(TestCompressors, Brotli) {
+  CheckCodec<BrotliCodec>();
+}
+
+TEST(TestCompressors, GZip) {
+  CheckCodec<GZipCodec>();
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/compression.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc
new file mode 100644
index 0000000..f82ae5c
--- /dev/null
+++ b/cpp/src/arrow/util/compression.cc
@@ -0,0 +1,327 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/compression.h"
+
+#include <cstdint>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+#include <snappy.h>
+#include <zlib.h>
+
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+Codec::~Codec() {}
+
+Status Codec::Create(Compression::type codec_type, std::unique_ptr<Codec>* result) {
+  switch (codec_type) {
+    case Compression::UNCOMPRESSED:
+      break;
+    case Compression::SNAPPY:
+      result->reset(new SnappyCodec());
+      break;
+    case Compression::GZIP:
+      result->reset(new GZipCodec());
+      break;
+    case Compression::LZO:
+      return Status::NotImplemented("LZO codec not implemented");
+    case Compression::BROTLI:
+      result->reset(new BrotliCodec());
+      break;
+    default:
+      return Status::Invalid("Unrecognized codec");
+  }
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// gzip implementation
+
+// These are magic numbers from zlib.h.  Not clear why they are not defined
+// there.
+
+// Maximum window size
+static constexpr int WINDOW_BITS = 15;
+
+// Output Gzip.
+static constexpr int GZIP_CODEC = 16;
+
+// Determine if this is libz or gzip from header.
+static constexpr int DETECT_CODEC = 32;
+
+class GZipCodec::GZipCodecImpl {
+ public:
+  explicit GZipCodecImpl(GZipCodec::Format format)
+      : format_(format),
+        compressor_initialized_(false),
+        decompressor_initialized_(false) {}
+
+  ~GZipCodecImpl() {
+    EndCompressor();
+    EndDecompressor();
+  }
+
+  Status InitCompressor() {
+    EndDecompressor();
+    memset(&stream_, 0, sizeof(stream_));
+
+    int ret;
+    // Initialize to run specified format
+    int window_bits = WINDOW_BITS;
+    if (format_ == DEFLATE) {
+      window_bits = -window_bits;
+    } else if (format_ == GZIP) {
+      window_bits += GZIP_CODEC;
+    }
+    if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits, 9,
+             Z_DEFAULT_STRATEGY)) != Z_OK) {
+      std::stringstream ss;
+      ss << "zlib deflateInit failed: " << std::string(stream_.msg);
+      return Status::IOError(ss.str());
+    }
+    compressor_initialized_ = true;
+    return Status::OK();
+  }
+
+  void EndCompressor() {
+    if (compressor_initialized_) { (void)deflateEnd(&stream_); }
+    compressor_initialized_ = false;
+  }
+
+  Status InitDecompressor() {
+    EndCompressor();
+    memset(&stream_, 0, sizeof(stream_));
+    int ret;
+
+    // Initialize to run either deflate or zlib/gzip format
+    int window_bits = format_ == DEFLATE ? -WINDOW_BITS : WINDOW_BITS | DETECT_CODEC;
+    if ((ret = inflateInit2(&stream_, window_bits)) != Z_OK) {
+      std::stringstream ss;
+      ss << "zlib inflateInit failed: " << std::string(stream_.msg);
+      return Status::IOError(ss.str());
+    }
+    decompressor_initialized_ = true;
+    return Status::OK();
+  }
+
+  void EndDecompressor() {
+    if (decompressor_initialized_) { (void)inflateEnd(&stream_); }
+    decompressor_initialized_ = false;
+  }
+
+  Status Decompress(int64_t input_length, const uint8_t* input, int64_t output_length,
+      uint8_t* output) {
+    if (!decompressor_initialized_) { RETURN_NOT_OK(InitDecompressor()); }
+    if (output_length == 0) {
+      // The zlib library does not allow *output to be NULL, even when output_length
+      // is 0 (inflate() will return Z_STREAM_ERROR). We don't consider this an
+      // error, so bail early if no output is expected. Note that we don't signal
+      // an error if the input actually contains compressed data.
+      return Status::OK();
+    }
+
+    // Reset the stream for this block
+    if (inflateReset(&stream_) != Z_OK) {
+      std::stringstream ss;
+      ss << "zlib inflateReset failed: " << std::string(stream_.msg);
+      return Status::IOError(ss.str());
+    }
+
+    int ret = 0;
+    // gzip can run in streaming mode or non-streaming mode.  We only
+    // support the non-streaming use case where we present it the entire
+    // compressed input and a buffer big enough to contain the entire
+    // compressed output.  In the case where we don't know the output,
+    // we just make a bigger buffer and try the non-streaming mode
+    // from the beginning again.
+    while (ret != Z_STREAM_END) {
+      stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
+      stream_.avail_in = static_cast<uInt>(input_length);
+      stream_.next_out = reinterpret_cast<Bytef*>(output);
+      stream_.avail_out = static_cast<uInt>(output_length);
+
+      // We know the output size.  In this case, we can use Z_FINISH
+      // which is more efficient.
+      ret = inflate(&stream_, Z_FINISH);
+      if (ret == Z_STREAM_END || ret != Z_OK) break;
+
+      // Failure, buffer was too small
+      std::stringstream ss;
+      ss << "Too small a buffer passed to GZipCodec. InputLength=" << input_length
+         << " OutputLength=" << output_length;
+      return Status::IOError(ss.str());
+    }
+
+    // Failure for some other reason
+    if (ret != Z_STREAM_END) {
+      std::stringstream ss;
+      ss << "GZipCodec failed: ";
+      if (stream_.msg != NULL) ss << stream_.msg;
+      return Status::IOError(ss.str());
+    }
+    return Status::OK();
+  }
+
+  int64_t MaxCompressedLen(int64_t input_length, const uint8_t* input) {
+    // Most be in compression mode
+    if (!compressor_initialized_) {
+      Status s = InitCompressor();
+      DCHECK(s.ok());
+    }
+    // TODO(wesm): deal with zlib < 1.2.3 (see Impala codebase)
+    return deflateBound(&stream_, static_cast<uLong>(input_length));
+  }
+
+  Status Compress(int64_t input_length, const uint8_t* input, int64_t output_buffer_len,
+      uint8_t* output, int64_t* output_length) {
+    if (!compressor_initialized_) { RETURN_NOT_OK(InitCompressor()); }
+    stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
+    stream_.avail_in = static_cast<uInt>(input_length);
+    stream_.next_out = reinterpret_cast<Bytef*>(output);
+    stream_.avail_out = static_cast<uInt>(output_buffer_len);
+
+    int64_t ret = 0;
+    if ((ret = deflate(&stream_, Z_FINISH)) != Z_STREAM_END) {
+      if (ret == Z_OK) {
+        // will return Z_OK (and stream.msg NOT set) if stream.avail_out is too
+        // small
+        return Status::IOError("zlib deflate failed, output buffer too small");
+      }
+      std::stringstream ss;
+      ss << "zlib deflate failed: " << stream_.msg;
+      return Status::IOError(ss.str());
+    }
+
+    if (deflateReset(&stream_) != Z_OK) {
+      std::stringstream ss;
+      ss << "zlib deflateReset failed: " << std::string(stream_.msg);
+      return Status::IOError(ss.str());
+    }
+
+    // Actual output length
+    *output_length = output_buffer_len - stream_.avail_out;
+    return Status::OK();
+  }
+
+ private:
+  // zlib is stateful and the z_stream state variable must be initialized
+  // before
+  z_stream stream_;
+
+  // Realistically, this will always be GZIP, but we leave the option open to
+  // configure
+  GZipCodec::Format format_;
+
+  // These variables are mutually exclusive. When the codec is in "compressor"
+  // state, compressor_initialized_ is true while decompressor_initialized_ is
+  // false. When it's decompressing, the opposite is true.
+  //
+  // Indeed, this is slightly hacky, but the alternative is having separate
+  // Compressor and Decompressor classes. If this ever becomes an issue, we can
+  // perform the refactoring then
+  bool compressor_initialized_;
+  bool decompressor_initialized_;
+};
+
+GZipCodec::GZipCodec(Format format) {
+  impl_.reset(new GZipCodecImpl(format));
+}
+
+GZipCodec::~GZipCodec() {}
+
+Status GZipCodec::Decompress(int64_t input_length, const uint8_t* input,
+    int64_t output_buffer_len, uint8_t* output) {
+  return impl_->Decompress(input_length, input, output_buffer_len, output);
+}
+
+int64_t GZipCodec::MaxCompressedLen(int64_t input_length, const uint8_t* input) {
+  return impl_->MaxCompressedLen(input_length, input);
+}
+
+Status GZipCodec::Compress(int64_t input_length, const uint8_t* input,
+    int64_t output_buffer_len, uint8_t* output, int64_t* output_length) {
+  return impl_->Compress(input_length, input, output_buffer_len, output, output_length);
+}
+
+const char* GZipCodec::name() const {
+  return "gzip";
+}
+
+// ----------------------------------------------------------------------
+// Snappy implementation
+
+Status SnappyCodec::Decompress(
+    int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) {
+  if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
+          static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer))) {
+    return Status::IOError("Corrupt snappy compressed data.");
+  }
+  return Status::OK();
+}
+
+int64_t SnappyCodec::MaxCompressedLen(int64_t input_len, const uint8_t* input) {
+  return snappy::MaxCompressedLength(input_len);
+}
+
+Status SnappyCodec::Compress(int64_t input_len, const uint8_t* input,
+    int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) {
+  size_t output_len;
+  snappy::RawCompress(reinterpret_cast<const char*>(input),
+      static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer),
+      &output_len);
+  *output_length = static_cast<int64_t>(output_len);
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Brotli implementation
+
+Status BrotliCodec::Decompress(
+    int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) {
+  size_t output_size = output_len;
+  if (BrotliDecoderDecompress(input_len, input, &output_size, output_buffer) !=
+      BROTLI_DECODER_RESULT_SUCCESS) {
+    return Status::IOError("Corrupt brotli compressed data.");
+  }
+  return Status::OK();
+}
+
+int64_t BrotliCodec::MaxCompressedLen(int64_t input_len, const uint8_t* input) {
+  return BrotliEncoderMaxCompressedSize(input_len);
+}
+
+Status BrotliCodec::Compress(int64_t input_len, const uint8_t* input,
+    int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) {
+  size_t output_len = output_buffer_len;
+  // TODO: Make quality configurable. We use 8 as a default as it is the best
+  //       trade-off for Parquet workload
+  if (BrotliEncoderCompress(8, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE, input_len,
+          input, &output_len, output_buffer) == BROTLI_FALSE) {
+    return Status::IOError("Brotli compression failure.");
+  }
+  *output_length = output_len;
+  return Status::OK();
+}
+
+}  // namespace arrow