You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by no...@apache.org on 2016/01/26 19:05:28 UTC

parquet-cpp git commit: PARQUET-437: Add googletest setup and ADD_PARQUET_TEST helper

Repository: parquet-cpp
Updated Branches:
  refs/heads/master 95277f3a4 -> b31baa0b0


PARQUET-437: Add googletest setup and ADD_PARQUET_TEST helper

I adapted this functionality from Apache Kudu (incubating). There are no real unit tests, yet, but you can now run `ctest` after building to run all tests that have been created with `ADD_PARQUET_TEST`.

Author: Wes McKinney <we...@cloudera.com>

Closes #19 from wesm/googletest-infra and squashes the following commits:

758328f [Wes McKinney] BLD: disable fixed OSX deployment target. Compile gtest with -fPIC
61cc5bb [Wes McKinney] Remove 'set -e' from setup_build_env.sh
6435970 [Wes McKinney] Fix setup_build_env.sh script
a54a219 [Wes McKinney] Add googletest to thirdparty and add ADD_PARQUET_TEST cmake helper and support scripts for using ctest after make


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/b31baa0b
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/b31baa0b
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/b31baa0b

Branch: refs/heads/master
Commit: b31baa0b0b1f634c97b172e44e13d41fa246f038
Parents: 95277f3
Author: Wes McKinney <we...@cloudera.com>
Authored: Tue Jan 26 10:05:23 2016 -0800
Committer: Nong Li <no...@gmail.com>
Committed: Tue Jan 26 10:05:23 2016 -0800

----------------------------------------------------------------------
 .travis.yml                           |  15 +--
 CMakeLists.txt                        | 107 +++++++++++++++++---
 README.md                             |  63 +++++++++---
 build-support/run-test.sh             | 153 +++++++++++++++++++++++++++++
 build-support/stacktrace_addr2line.pl |  92 +++++++++++++++++
 ci/before_script_travis.sh            |  27 +++++
 cmake_modules/FindGTest.cmake         |  91 +++++++++++++++++
 setup_build_env.sh                    |   6 +-
 src/parquet/CMakeLists.txt            |  27 ++---
 src/parquet/reader-test.cc            |  26 +++++
 src/parquet/util/CMakeLists.txt       |  17 ++++
 src/parquet/util/test_main.cc         |  26 +++++
 thirdparty/build_thirdparty.sh        |  14 +++
 thirdparty/download_thirdparty.sh     |   6 +-
 thirdparty/versions.sh                |   4 +
 15 files changed, 617 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 4ca229f..8e11840 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,22 +32,9 @@ addons:
 before_install:
     - mkdir $HOME/build_dir
     - cd $HOME/build_dir
-    - >
-      if [ $TRAVIS_OS_NAME == osx ]; then
-        brew update &&
-        brew install thrift lz4 snappy;
-      fi
-    - >
-      if [ $TRAVIS_OS_NAME == linux ]; then
-        cp -r $TRAVIS_BUILD_DIR/thirdparty . &&
-        ./thirdparty/download_thirdparty.sh &&
-        ./thirdparty/build_thirdparty.sh &&
-        export THRIFT_HOME=$HOME/build_dir/thirdparty/installed SNAPPY_HOME=$HOME/build_dir/thirdparty/installed LZ4_HOME=$HOME/build_dir/thirdparty/installed;
-      fi
 
 before_script:
-    - export CC="gcc-4.9"
-    - export CXX="g++-4.9"
+    - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh
     - cmake $TRAVIS_BUILD_DIR
 
 script: make

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 03b7da5..4ac13de 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,20 +1,29 @@
-# Copyright 2012 Cloudera Inc.
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#   http://www.apache.org/licenses/LICENSE-2.0
 #
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 
 cmake_minimum_required(VERSION 2.6)
 project(parquet-cpp)
 
+if (NOT "$ENV{PARQUET_GCC_ROOT}" STREQUAL "")
+  set(GCC_ROOT $ENV{PARQUET_GCC_ROOT})
+  set(CMAKE_C_COMPILER ${GCC_ROOT}/bin/gcc)
+  set(CMAKE_CXX_COMPILER ${GCC_ROOT}/bin/g++)
+endif()
+
 # generate CTest input files
 enable_testing()
 
@@ -24,7 +33,6 @@ set(BUILD_SUPPORT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build-support)
 
 if(APPLE)
   set(CMAKE_MACOSX_RPATH 1)
-  set(CMAKE_OSX_DEPLOYMENT_TARGET 10.9)
 endif()
 
 if (NOT PARQUET_LINK)
@@ -49,7 +57,7 @@ string (TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME)
 # If build in-source, create the latest symlink. If build out-of-source, which is
 # preferred, simply output the binaries in the build folder
 if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
-  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}/")
+  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}")
   # Link build/latest to the current build directory, to avoid developers
   # accidentally running the latest debug build when in fact they're building
   # release builds.
@@ -60,10 +68,57 @@ if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
 EXECUTE_PROCESS(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
   ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
 else()
-  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}/")
+  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}")
 endif()
 
 ############################################################
+# Testing
+############################################################
+
+# Add a new test case, with or without an executable that should be built.
+#
+# REL_TEST_NAME is the name of the test. It may be a single component
+# (e.g. monotime-test) or contain additional components (e.g.
+# net/net_util-test). Either way, the last component must be a globally
+# unique name.
+#
+# Arguments after the test name will be passed to set_tests_properties().
+function(ADD_PARQUET_TEST REL_TEST_NAME)
+  if(NO_TESTS)
+    return()
+  endif()
+  get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
+
+  if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}.cc)
+    # This test has a corresponding .cc file, set it up as an executable.
+    set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}")
+    add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc")
+    target_link_libraries(${TEST_NAME} ${PARQUET_TEST_LINK_LIBS})
+  else()
+    # No executable, just invoke the test (probably a script) directly.
+    set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME})
+  endif()
+
+  add_test(${TEST_NAME}
+    ${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH})
+  if(ARGN)
+    set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
+  endif()
+endfunction()
+
+# A wrapper for add_dependencies() that is compatible with NO_TESTS.
+function(ADD_PARQUET_TEST_DEPENDENCIES REL_TEST_NAME)
+  if(NO_TESTS)
+    return()
+  endif()
+  get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
+
+  add_dependencies(${TEST_NAME} ${ARGN})
+endfunction()
+
+enable_testing()
+
+############################################################
 # Dependencies
 ############################################################
 
@@ -101,6 +156,12 @@ include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
 add_library(lz4static STATIC IMPORTED)
 set_target_properties(lz4static PROPERTIES IMPORTED_LOCATION ${LZ4_STATIC_LIB})
 
+## GTest
+find_package(GTest REQUIRED)
+include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
+add_library(gtest STATIC IMPORTED)
+set_target_properties(gtest PROPERTIES IMPORTED_LOCATION ${GTEST_STATIC_LIB})
+
 # Thrift requires these definitions for some types that we use
 add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETINET_IN_H -DHAVE_NETDB_H)
 add_definitions(-fPIC)
@@ -119,7 +180,15 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
 SET(CMAKE_CXX_FLAGS "-std=c++11 -msse4.2 -Wall -Wno-unused-value -Wno-unused-variable -Wno-sign-compare -Wno-unknown-pragmas")
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb")
 
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
+if (APPLE)
+  # Use libc++ to avoid linker errors on some platforms
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+endif()
+
+
+include_directories(
+  ${CMAKE_CURRENT_SOURCE_DIR}/src
+)
 
 ############################################################
 # "make lint" target
@@ -132,6 +201,14 @@ if (UNIX)
     `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/thrift/g'`)
 endif (UNIX)
 
+#############################################################
+# Test linking
+
+set(PARQUET_MIN_TEST_LIBS
+  parquet_test_main
+  parquet)
+set(PARQUET_TEST_LINK_LIBS ${PARQUET_MIN_TEST_LIBS})
+
 ############################################################
 # Library config
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index cb81272..811d41e 100644
--- a/README.md
+++ b/README.md
@@ -33,33 +33,70 @@ well as a few example executables.
 
 Incremental builds can be done afterwords with just `make`.
 
+## Testing
+
+This library uses Google's `googletest` unit test framework. After building
+with `make`, you can run the test suite by running
+
+```
+ctest
+```
+
+See `ctest --help` for configuration details about ctest. On GNU/Linux systems,
+you can use valgrind with ctest to look for memory leaks:
+
+```
+valgrind --tool=memcheck --leak-check=yes ctest
+```
+
+## Out-of-source builds
+
+parquet-cpp supports out of source builds. For example:
+
+```
+mkdir test-build
+cd test-build
+cmake ..
+make
+ctest
+```
+
+By using out-of-source builds you can preserve your current build state in case
+you need to switch to another git branch.
+
 Design
 ========
-The library consists of 3 layers that map to the 3 units in the parquet format. 
+The library consists of 3 layers that map to the 3 units in the parquet format.
 
-The first is the encodings which correspond to data pages. The APIs at this level 
+The first is the encodings which correspond to data pages. The APIs at this level
 return single values.
 
-The second layer is the column reader which corresponds to column chunks. The APIs at 
-this level return a triple: definition level, repetition level and value. It also handles 
-reading pages, compression and managing encodings. 
+The second layer is the column reader which corresponds to column chunks. The APIs at
+this level return a triple: definition level, repetition level and value. It also handles
+reading pages, compression and managing encodings.
 
 The 3rd layer would handle reading/writing records.
 
 Developer Notes
 ========
-The project adheres to the google coding convention: 
-http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml 
-with two notable exceptions. We do not encourage anonymous namespaces and the line 
+The project adheres to the google coding convention:
+http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
+with two notable exceptions. We do not encourage anonymous namespaces and the line
 length is 90 characters.
 
-The project prefers the use of C++ style memory management. new/delete should be used 
-over malloc/free. new/delete should be avoided whenever possible by using stl/boost 
-where possible. For example, scoped_ptr instead of explicit new/delete and using 
+You can run `cpplint` through the build system with
+
+```
+make lint
+```
+
+The project prefers the use of C++ style memory management. new/delete should be used
+over malloc/free. new/delete should be avoided whenever possible by using stl/boost
+where possible. For example, scoped_ptr instead of explicit new/delete and using
 std::vector instead of allocated buffers. Currently, c++11 features are not used.
 
 For error handling, this project uses exceptions.
 
-In general, many of the APIs at the layers are interface based for extensibility. To 
-minimize the cost of virtual calls, the APIs should be batch-centric. For example, 
+In general, many of the APIs at the layers are interface based for extensibility. To
+minimize the cost of virtual calls, the APIs should be batch-centric. For example,
 encoding should operate on batches of values rather than a single value.

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/build-support/run-test.sh
----------------------------------------------------------------------
diff --git a/build-support/run-test.sh b/build-support/run-test.sh
new file mode 100755
index 0000000..889e2a2
--- /dev/null
+++ b/build-support/run-test.sh
@@ -0,0 +1,153 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Script which wraps running a test and redirects its output to a
+# test log directory.
+#
+# If PARQUET_COMPRESS_TEST_OUTPUT is non-empty, then the logs will be
+# gzip-compressed while they are written.
+
+ROOT=$(cd $(dirname $BASH_SOURCE)/..; pwd)
+
+TEST_LOGDIR=$ROOT/build/test-logs
+mkdir -p $TEST_LOGDIR
+
+TEST_DEBUGDIR=$ROOT/build/test-debug
+mkdir -p $TEST_DEBUGDIR
+
+TEST_DIRNAME=$(cd $(dirname $1); pwd)
+TEST_FILENAME=$(basename $1)
+shift
+TEST_EXECUTABLE="$TEST_DIRNAME/$TEST_FILENAME"
+TEST_NAME=$(echo $TEST_FILENAME | perl -pe 's/\..+?$//') # Remove path and extension (if any).
+
+TEST_EXECUTION_ATTEMPTS=1
+
+
+# We run each test in its own subdir to avoid core file related races.
+TEST_WORKDIR=$ROOT/build/test-work/$TEST_NAME
+mkdir -p $TEST_WORKDIR
+pushd $TEST_WORKDIR >/dev/null || exit 1
+rm -f *
+
+set -o pipefail
+
+LOGFILE=$TEST_LOGDIR/$TEST_NAME.txt
+XMLFILE=$TEST_LOGDIR/$TEST_NAME.xml
+
+# Remove both the compressed and uncompressed output, so the developer
+# doesn't accidentally get confused and read output from a prior test
+# run.
+rm -f $LOGFILE $LOGFILE.gz
+
+if [ -n "$PARQUET_COMPRESS_TEST_OUTPUT" ] && [ "$PARQUET_COMPRESS_TEST_OUTPUT" -ne 0 ] ; then
+  pipe_cmd=gzip
+  LOGFILE=${LOGFILE}.gz
+else
+  pipe_cmd=cat
+fi
+
+# Allow for collecting core dumps.
+PARQUET_TEST_ULIMIT_CORE=${PARQUET_TEST_ULIMIT_CORE:-0}
+ulimit -c $PARQUET_TEST_ULIMIT_CORE
+
+# Run the actual test.
+for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
+  if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
+    # If the test fails, the test output may or may not be left behind,
+    # depending on whether the test cleaned up or exited immediately. Either
+    # way we need to clean it up. We do this by comparing the data directory
+    # contents before and after the test runs, and deleting anything new.
+    #
+    # The comm program requires that its two inputs be sorted.
+    TEST_TMPDIR_BEFORE=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)
+  fi
+
+  # gtest won't overwrite old junit test files, resulting in a build failure
+  # even when retries are successful.
+  rm -f $XMLFILE
+
+  echo "Running $TEST_NAME, redirecting output into $LOGFILE" \
+    "(attempt ${ATTEMPT_NUMBER}/$TEST_EXECUTION_ATTEMPTS)"
+  $TEST_EXECUTABLE "$@" 2>&1 \
+    | $ROOT/build-support/stacktrace_addr2line.pl $TEST_EXECUTABLE \
+    | $pipe_cmd > $LOGFILE
+  STATUS=$?
+
+  # TSAN doesn't always exit with a non-zero exit code due to a bug:
+  # mutex errors don't get reported through the normal error reporting infrastructure.
+  # So we make sure to detect this and exit 1.
+  #
+  # Additionally, certain types of failures won't show up in the standard JUnit
+  # XML output from gtest. We assume that gtest knows better than us and our
+  # regexes in most cases, but for certain errors we delete the resulting xml
+  # file and let our own post-processing step regenerate it.
+  export GREP=$(which egrep)
+  if zgrep --silent "ThreadSanitizer|Leak check.*detected leaks" $LOGFILE ; then
+    echo ThreadSanitizer or leak check failures in $LOGFILE
+    STATUS=1
+    rm -f $XMLFILE
+  fi
+
+  if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
+    # Now delete any new test output.
+    TEST_TMPDIR_AFTER=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)
+    DIFF=$(comm -13 <(echo "$TEST_TMPDIR_BEFORE") \
+                    <(echo "$TEST_TMPDIR_AFTER"))
+    for DIR in $DIFF; do
+      # Multiple tests may be running concurrently. To avoid deleting the
+      # wrong directories, constrain to only directories beginning with the
+      # test name.
+      #
+      # This may delete old test directories belonging to this test, but
+      # that's not typically a concern when rerunning flaky tests.
+      if [[ $DIR =~ ^$TEST_TMPDIR/$TEST_NAME ]]; then
+        echo Deleting leftover flaky test directory "$DIR"
+        rm -Rf "$DIR"
+      fi
+    done
+  fi
+
+  if [ "$STATUS" -eq "0" ]; then
+    break
+  elif [ "$ATTEMPT_NUMBER" -lt "$TEST_EXECUTION_ATTEMPTS" ]; then
+    echo Test failed attempt number $ATTEMPT_NUMBER
+    echo Will retry...
+  fi
+done
+
+# Capture and compress core file and binary.
+COREFILES=$(ls | grep ^core)
+if [ -n "$COREFILES" ]; then
+  echo Found core dump. Saving executable and core files.
+  gzip < $TEST_EXECUTABLE > "$TEST_DEBUGDIR/$TEST_NAME.gz" || exit $?
+  for COREFILE in $COREFILES; do
+    gzip < $COREFILE > "$TEST_DEBUGDIR/$TEST_NAME.$COREFILE.gz" || exit $?
+  done
+  # Pull in any .so files as well.
+  for LIB in $(ldd $TEST_EXECUTABLE | grep $ROOT | awk '{print $3}'); do
+    LIB_NAME=$(basename $LIB)
+    gzip < $LIB > "$TEST_DEBUGDIR/$LIB_NAME.gz" || exit $?
+  done
+fi
+
+popd
+rm -Rf $TEST_WORKDIR
+
+exit $STATUS

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/build-support/stacktrace_addr2line.pl
----------------------------------------------------------------------
diff --git a/build-support/stacktrace_addr2line.pl b/build-support/stacktrace_addr2line.pl
new file mode 100755
index 0000000..7664bab
--- /dev/null
+++ b/build-support/stacktrace_addr2line.pl
@@ -0,0 +1,92 @@
+#!/usr/bin/perl
+# Copyright 2014 Cloudera, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#######################################################################
+# This script will convert a stack trace with addresses:
+#     @           0x5fb015 kudu::master::Master::Init()
+#     @           0x5c2d38 kudu::master::MiniMaster::StartOnPorts()
+#     @           0x5c31fa kudu::master::MiniMaster::Start()
+#     @           0x58270a kudu::MiniCluster::Start()
+#     @           0x57dc71 kudu::CreateTableStressTest::SetUp()
+# To one with line numbers:
+#     @           0x5fb015 kudu::master::Master::Init() at /home/mpercy/src/kudu/src/master/master.cc:54
+#     @           0x5c2d38 kudu::master::MiniMaster::StartOnPorts() at /home/mpercy/src/kudu/src/master/mini_master.cc:52
+#     @           0x5c31fa kudu::master::MiniMaster::Start() at /home/mpercy/src/kudu/src/master/mini_master.cc:33
+#     @           0x58270a kudu::MiniCluster::Start() at /home/mpercy/src/kudu/src/integration-tests/mini_cluster.cc:48
+#     @           0x57dc71 kudu::CreateTableStressTest::SetUp() at /home/mpercy/src/kudu/src/integration-tests/create-table-stress-test.cc:61
+#
+# If the script detects that the output is not symbolized, it will also attempt
+# to determine the function names, i.e. it will convert:
+#     @           0x5fb015
+#     @           0x5c2d38
+#     @           0x5c31fa
+# To:
+#     @           0x5fb015 kudu::master::Master::Init() at /home/mpercy/src/kudu/src/master/master.cc:54
+#     @           0x5c2d38 kudu::master::MiniMaster::StartOnPorts() at /home/mpercy/src/kudu/src/master/mini_master.cc:52
+#     @           0x5c31fa kudu::master::MiniMaster::Start() at /home/mpercy/src/kudu/src/master/mini_master.cc:33
+#######################################################################
+use strict;
+use warnings;
+
+if (!@ARGV) {
+  die <<EOF
+Usage: $0 executable [stack-trace-file]
+
+This script will read addresses from a file containing stack traces and
+will convert the addresses that conform to the pattern " @ 0x123456" to line
+numbers by calling addr2line on the provided executable.
+If no stack-trace-file is specified, it will take input from stdin.
+EOF
+}
+
+# el6 and other older systems don't support the -p flag,
+# so we do our own "pretty" parsing.
+sub parse_addr2line_output($$) {
+  defined(my $output = shift) or die;
+  defined(my $lookup_func_name = shift) or die;
+  my @lines = grep { $_ ne '' } split("\n", $output);
+  my $pretty_str = '';
+  if ($lookup_func_name) {
+    $pretty_str .= ' ' . $lines[0];
+  }
+  $pretty_str .= ' at ' . $lines[1];
+  return $pretty_str;
+}
+
+my $binary = shift @ARGV;
+if (! -x $binary || ! -r $binary) {
+  die "Error: Cannot access executable ($binary)";
+}
+
+# Cache lookups to speed processing of files with repeated trace addresses.
+my %addr2line_map = ();
+
+# Disable stdout buffering
+$| = 1;
+
+# Reading from <ARGV> is magical in Perl.
+while (defined(my $input = <ARGV>)) {
+  if ($input =~ /^\s+\@\s+(0x[[:xdigit:]]{6,})(?:\s+(\S+))?/) {
+    my $addr = $1;
+    my $lookup_func_name = (!defined $2);
+    if (!exists($addr2line_map{$addr})) {
+      $addr2line_map{$addr} = `addr2line -ifC -e $binary $addr`;
+    }
+    chomp $input;
+    $input .= parse_addr2line_output($addr2line_map{$addr}, $lookup_func_name) . "\n";
+  }
+  print $input;
+}
+
+exit 0;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/ci/before_script_travis.sh
----------------------------------------------------------------------
diff --git a/ci/before_script_travis.sh b/ci/before_script_travis.sh
new file mode 100755
index 0000000..b1f31c5
--- /dev/null
+++ b/ci/before_script_travis.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+# Build an isolated thirdparty
+cp -r $TRAVIS_BUILD_DIR/thirdparty .
+./thirdparty/download_thirdparty.sh
+source thirdparty/versions.sh
+
+if [ $TRAVIS_OS_NAME == "osx" ]; then
+  brew update
+  brew install thrift lz4 snappy
+
+  # Only build gtest
+  ./thirdparty/build_thirdparty.sh gtest
+fi
+
+if [ $TRAVIS_OS_NAME == "linux" ]; then
+  ./thirdparty/build_thirdparty.sh
+  export THRIFT_HOME=$HOME/build_dir/thirdparty/installed
+  export SNAPPY_HOME=$HOME/build_dir/thirdparty/installed
+  export LZ4_HOME=$HOME/build_dir/thirdparty/installed
+
+  # Use a C++11 compiler on Linux
+  export CC="gcc-4.9"
+  export CXX="g++-4.9"
+fi
+
+export GTEST_HOME=$HOME/build_dir/thirdparty/$GTEST_BASEDIR

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/cmake_modules/FindGTest.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindGTest.cmake b/cmake_modules/FindGTest.cmake
new file mode 100644
index 0000000..e47faf0
--- /dev/null
+++ b/cmake_modules/FindGTest.cmake
@@ -0,0 +1,91 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find GTest headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(GTest)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  GTest_HOME - When set, this path is inspected instead of standard library
+#                locations as the root of the GTest installation.
+#                The environment variable GTEST_HOME overrides this veriable.
+#
+# This module defines
+#  GTEST_INCLUDE_DIR, directory containing headers
+#  GTEST_LIBS, directory containing gtest libraries
+#  GTEST_STATIC_LIB, path to libgtest.a
+#  GTEST_SHARED_LIB, path to libgtest's shared library
+#  GTEST_FOUND, whether gtest has been found
+
+if( NOT "$ENV{GTEST_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "$ENV{GTEST_HOME}" _native_path )
+    list( APPEND _gtest_roots ${_native_path} )
+elseif ( GTest_HOME )
+    list( APPEND _gtest_roots ${GTest_HOME} )
+endif()
+
+# Try the parameterized roots, if they exist
+if ( _gtest_roots )
+    find_path( GTEST_INCLUDE_DIR NAMES gtest/gtest.h
+        PATHS ${_gtest_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "include" )
+    find_library( GTEST_LIBRARIES NAMES gtest
+        PATHS ${_gtest_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "lib" )
+else ()
+    find_path( GTEST_INCLUDE_DIR NAMES gtest/gtest.h )
+    find_library( GTEST_LIBRARIES NAMES gtest )
+endif ()
+
+
+if (GTEST_INCLUDE_DIR AND GTEST_LIBRARIES)
+  set(GTEST_FOUND TRUE)
+  get_filename_component( GTEST_LIBS ${GTEST_LIBRARIES} DIRECTORY )
+  set(GTEST_LIB_NAME libgtest)
+  set(GTEST_STATIC_LIB ${GTEST_LIBS}/${GTEST_LIB_NAME}.a)
+  set(GTEST_SHARED_LIB ${GTEST_LIBS}/${GTEST_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+else ()
+  set(GTEST_FOUND FALSE)
+endif ()
+
+if (GTEST_FOUND)
+  if (NOT GTest_FIND_QUIETLY)
+    message(STATUS "Found the GTest library: ${GTEST_LIBRARIES}")
+  endif ()
+else ()
+  if (NOT GTest_FIND_QUIETLY)
+    set(GTEST_ERR_MSG "Could not find the GTest library. Looked in ")
+    if ( _gtest_roots )
+      set(GTEST_ERR_MSG "${GTEST_ERR_MSG} in ${_gtest_roots}.")
+    else ()
+      set(GTEST_ERR_MSG "${GTEST_ERR_MSG} system search paths.")
+    endif ()
+    if (GTest_FIND_REQUIRED)
+      message(FATAL_ERROR "${GTEST_ERR_MSG}")
+    else (GTest_FIND_REQUIRED)
+      message(STATUS "${GTEST_ERR_MSG}")
+    endif (GTest_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  GTEST_INCLUDE_DIR
+  GTEST_LIBS
+  GTEST_LIBRARIES
+  GTEST_STATIC_LIB
+  GTEST_SHARED_LIB
+)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/setup_build_env.sh
----------------------------------------------------------------------
diff --git a/setup_build_env.sh b/setup_build_env.sh
index e99f5d4..4b496d9 100755
--- a/setup_build_env.sh
+++ b/setup_build_env.sh
@@ -1,7 +1,5 @@
 #!/bin/bash
 
-set -e
-
 SOURCE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 : ${BUILD_DIR:=$SOURCE_DIR/build}
 
@@ -10,6 +8,7 @@ cp -r $SOURCE_DIR/thirdparty $BUILD_DIR
 cd $BUILD_DIR
 ./thirdparty/download_thirdparty.sh
 ./thirdparty/build_thirdparty.sh
+source thirdparty/versions.sh
 
 export SNAPPY_HOME=$BUILD_DIR/thirdparty/installed
 export LZ4_HOME=$BUILD_DIR/thirdparty/installed
@@ -18,10 +17,11 @@ if [ "$(uname)" != "Darwin" ]; then
   export THRIFT_HOME=$BUILD_DIR/thirdparty/installed
 fi
 
+export GTEST_HOME=$BUILD_DIR/thirdparty/$GTEST_BASEDIR
+
 cmake $SOURCE_DIR
 
 cd $SOURCE_DIR
 
 echo
 echo "Build env initialized in $BUILD_DIR."
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/src/parquet/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/CMakeLists.txt b/src/parquet/CMakeLists.txt
index 11eaeb6..f35af70 100644
--- a/src/parquet/CMakeLists.txt
+++ b/src/parquet/CMakeLists.txt
@@ -1,18 +1,23 @@
-# Copyright 2015 Cloudera Inc.
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#   http://www.apache.org/licenses/LICENSE-2.0
 #
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 
 # Headers: top level
 install(FILES
   parquet.h
   DESTINATION include/parquet)
+
+ADD_PARQUET_TEST(reader-test)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/src/parquet/reader-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/reader-test.cc b/src/parquet/reader-test.cc
new file mode 100644
index 0000000..f6bf8b1
--- /dev/null
+++ b/src/parquet/reader-test.cc
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+namespace parquet {
+
+TEST(TestReader, ItWorks) {
+  ASSERT_TRUE(true);
+}
+
+} // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/src/parquet/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/util/CMakeLists.txt b/src/parquet/util/CMakeLists.txt
index 1a5de97..1b712f7 100644
--- a/src/parquet/util/CMakeLists.txt
+++ b/src/parquet/util/CMakeLists.txt
@@ -22,3 +22,20 @@ install(FILES
   rle-encoding.h
   stopwatch.h
   DESTINATION include/parquet/util)
+
+add_library(parquet_test_main
+  test_main.cc)
+
+if (APPLE)
+  target_link_libraries(parquet_test_main
+	gtest
+	dl)
+  set_target_properties(parquet_test_main
+		PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+else()
+  target_link_libraries(parquet_test_main
+    dl
+	gtest
+    pthread
+  )
+endif()

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/src/parquet/util/test_main.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/test_main.cc b/src/parquet/util/test_main.cc
new file mode 100644
index 0000000..00139f3
--- /dev/null
+++ b/src/parquet/util/test_main.cc
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+
+  int ret = RUN_ALL_TESTS();
+
+  return ret;
+}

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/thirdparty/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build_thirdparty.sh b/thirdparty/build_thirdparty.sh
index dc0bedb..5205fe8 100755
--- a/thirdparty/build_thirdparty.sh
+++ b/thirdparty/build_thirdparty.sh
@@ -16,6 +16,7 @@ else
   for arg in "$*"; do
     case $arg in
       "lz4")        F_LZ4=1 ;;
+      "gtest")      F_GTEST=1 ;;
       "snappy")     F_SNAPPY=1 ;;
       "thrift")     F_THRIFT=1 ;;
       *)            echo "Unknown module: $arg"; exit 1 ;;
@@ -53,6 +54,19 @@ if [ -n "$F_ALL" -o -n "$F_SNAPPY" ]; then
   make -j$PARALLEL install
 fi
 
+# build googletest
+if [ -n "$F_ALL" -o -n "$F_GTEST" ]; then
+  cd $TP_DIR/$GTEST_BASEDIR
+
+  if [[ "$OSTYPE" == "darwin"* ]]; then
+    cmake -DCMAKE_CXX_FLAGS="-fPIC -std=c++11 -stdlib=libc++ -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes"
+  else
+    CXXFLAGS=-fPIC cmake -DCMAKE_INSTALL_PREFIX:PATH=$PREFIX .
+  fi
+
+  make
+fi
+
 # build lz4
 if [ -n "$F_ALL" -o -n "$F_LZ4" ]; then
   cd $TP_DIR/$LZ4_BASEDIR/cmake_unofficial

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/thirdparty/download_thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/download_thirdparty.sh b/thirdparty/download_thirdparty.sh
index 7cde792..9111cd4 100755
--- a/thirdparty/download_thirdparty.sh
+++ b/thirdparty/download_thirdparty.sh
@@ -24,8 +24,12 @@ if [ ! -d ${SNAPPY_BASEDIR} ]; then
   download_extract_and_cleanup $SNAPPY_URL
 fi
 
+if [ ! -d ${GTEST_BASEDIR} ]; then
+  echo "Fetching gtest"
+  download_extract_and_cleanup $GTEST_URL
+fi
+
 if [ ! -d ${THRIFT_BASEDIR} ]; then
   echo "Fetching thrift"
   download_extract_and_cleanup $THRIFT_URL
 fi
-

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b31baa0b/thirdparty/versions.sh
----------------------------------------------------------------------
diff --git a/thirdparty/versions.sh b/thirdparty/versions.sh
old mode 100644
new mode 100755
index c7cbeed..112352d
--- a/thirdparty/versions.sh
+++ b/thirdparty/versions.sh
@@ -9,3 +9,7 @@ SNAPPY_BASEDIR=snappy-$SNAPPY_VERSION
 THRIFT_VERSION=0.9.3
 THRIFT_URL="http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz"
 THRIFT_BASEDIR=thrift-$THRIFT_VERSION
+
+GTEST_VERSION=1.7.0
+GTEST_URL="https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
+GTEST_BASEDIR=googletest-release-$GTEST_VERSION