You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2015/07/06 23:52:50 UTC
[21/23] orc git commit: ORC-23. Simplify directory structure.

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/Exceptions.cc
----------------------------------------------------------------------
diff --git a/c++/src/Exceptions.cc b/c++/src/Exceptions.cc
new file mode 100644
index 0000000..ae0e3d1
--- /dev/null
+++ b/c++/src/Exceptions.cc
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Exceptions.hh"
+
+namespace orc {
+
+  NotImplementedYet::NotImplementedYet(const std::string& what_arg
+                                       ) : logic_error(what_arg) {
+    // PASS
+  }
+
+  NotImplementedYet::NotImplementedYet(const char* what_arg
+                                       ) :logic_error(what_arg) {
+    // PASS
+  }
+
+  NotImplementedYet::NotImplementedYet(const NotImplementedYet& error
+                                       ): logic_error(error) {
+    // PASS
+  }
+
+  NotImplementedYet::~NotImplementedYet() noexcept {
+    // PASS
+  }
+
+  ParseError::ParseError(const std::string& what_arg
+                         ): runtime_error(what_arg) {
+    // PASS
+  }
+
+  ParseError::ParseError(const char* what_arg
+                         ): runtime_error(what_arg) {
+    // PASS
+  }
+
+  ParseError::ParseError(const ParseError& error): runtime_error(error) {
+    // PASS
+  }
+
+  ParseError::~ParseError() noexcept {
+    // PASS
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/Exceptions.hh
----------------------------------------------------------------------
diff --git a/c++/src/Exceptions.hh b/c++/src/Exceptions.hh
new file mode 100644
index 0000000..4706085
--- /dev/null
+++ b/c++/src/Exceptions.hh
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_EXCEPTIONS_HH
+#define ORC_EXCEPTIONS_HH
+
+#include "Adaptor.hh"
+
+#include <stdexcept>
+#include <string>
+
+namespace orc {
+
+  class NotImplementedYet: public std::logic_error {
+  public:
+    explicit NotImplementedYet(const std::string& what_arg);
+    explicit NotImplementedYet(const char* what_arg);
+    virtual ~NotImplementedYet() noexcept;
+    NotImplementedYet(const NotImplementedYet&);
+  private:
+    NotImplementedYet& operator=(const NotImplementedYet&);
+  };
+
+  class ParseError: public std::runtime_error {
+  public:
+    explicit ParseError(const std::string& what_arg);
+    explicit ParseError(const char* what_arg);
+    virtual ~ParseError() noexcept;
+    ParseError(const ParseError&);
+  private:
+    ParseError& operator=(const ParseError&);
+  };
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/Int128.cc
----------------------------------------------------------------------
diff --git a/c++/src/Int128.cc b/c++/src/Int128.cc
new file mode 100644
index 0000000..ece7850
--- /dev/null
+++ b/c++/src/Int128.cc
@@ -0,0 +1,438 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Int128.hh"
+#include "Adaptor.hh"
+
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+namespace orc {
+
+  Int128 Int128::maximumValue() {
+    return Int128(0x7fffffffffffffff, 0xfffffffffffffff);
+  }
+
+  Int128 Int128::minimumValue() {
+    return Int128(static_cast<int64_t>(0x8000000000000000), 0x0);
+  }
+
+  Int128::Int128(const std::string& str) {
+    lowbits = 0;
+    highbits = 0;
+    size_t length = str.length();
+    if (length > 0) {
+      bool isNegative = str[0] == '-';
+      size_t posn = isNegative ? 1 : 0;
+      while (posn < length) {
+        size_t group = std::min(18ul, length - posn);
+        int64_t chunk = std::stoll(str.substr(posn, group));
+        int64_t multiple = 1;
+        for(size_t i=0; i < group; ++i) {
+          multiple *= 10;
+        }
+        *this *= multiple;
+        *this += chunk;
+        posn += group;
+      }
+      if (isNegative) {
+        negate();
+      }
+    }
+  }
+
+  Int128& Int128::operator*=(const Int128 &right) {
+    const uint64_t INT_MASK = 0xffffffff;
+    const uint64_t CARRY_BIT = 1l << 32;
+
+    // Break the left and right numbers into 32 bit chunks
+    // so that we can multiply them without overflow.
+    uint64_t L0 = static_cast<uint64_t>(highbits) >> 32;
+    uint64_t L1 = static_cast<uint64_t>(highbits) & INT_MASK;
+    uint64_t L2 = lowbits >> 32;
+    uint64_t L3 = lowbits & INT_MASK;
+    uint64_t R0 = static_cast<uint64_t>(right.highbits) >> 32;
+    uint64_t R1 = static_cast<uint64_t>(right.highbits) & INT_MASK;
+    uint64_t R2 = right.lowbits >> 32;
+    uint64_t R3 = right.lowbits & INT_MASK;
+
+    uint64_t product = L3 * R3;
+    lowbits = product & INT_MASK;
+    uint64_t sum = product >> 32;
+    product = L2 * R3;
+    sum += product;
+    highbits = sum < product ? CARRY_BIT : 0;
+    product = L3 * R2;
+    sum += product;
+    if (sum < product) {
+      highbits += CARRY_BIT;
+    }
+    lowbits += sum << 32;
+    highbits += static_cast<int64_t>(sum >> 32);
+    highbits += L1 * R3 + L2 * R2 + L3 * R1;
+    highbits += (L0 * R3 + L1 * R2 + L2 * R1 + L3 * R0) << 32;
+    return *this;
+  }
+
+  /**
+   * Expands the given value into an array of ints so that we can work on
+   * it. The array will be converted to an absolute value and the wasNegative
+   * flag will be set appropriately. The array will remove leading zeros from
+   * the value.
+   * @param array an array of length 4 to set with the value
+   * @param wasNegative a flag for whether the value was original negative
+   * @result the output length of the array
+   */
+  int64_t Int128::fillInArray(uint32_t* array, bool &wasNegative) const {
+    uint64_t high;
+    uint64_t low;
+    if (highbits < 0) {
+      low = ~lowbits + 1;
+      high = static_cast<uint64_t>(~highbits);
+      if (low == 0) {
+        high += 1;
+      }
+      wasNegative = true;
+    } else {
+      low = lowbits;
+      high = static_cast<uint64_t>(highbits);
+      wasNegative = false;
+    }
+    if (high != 0) {
+      if (high > UINT32_MAX) {
+        array[0] = static_cast<uint32_t>(high >> 32);
+        array[1] = static_cast<uint32_t>(high);
+        array[2] = static_cast<uint32_t>(low >> 32);
+        array[3] = static_cast<uint32_t>(low);
+        return 4;
+      } else {
+        array[0] = static_cast<uint32_t>(high);
+        array[1] = static_cast<uint32_t>(low >> 32);
+        array[2] = static_cast<uint32_t>(low);
+        return 3;
+      }
+    } else if (low >= UINT32_MAX) {
+      array[0] = static_cast<uint32_t>(low >> 32);
+      array[1] = static_cast<uint32_t>(low);
+      return 2;
+    } else if (low == 0) {
+      return 0;
+    } else {
+      array[0] = static_cast<uint32_t>(low);
+      return 1;
+    }
+  }
+
+
+  /**
+   * Find last set bit in a 32 bit integer. Bit 1 is the LSB and bit 32 is
+   * the MSB. We can replace this with bsrq asm instruction on x64.
+   */
+  int64_t fls(uint32_t x) {
+    int64_t bitpos = 0;
+    while (x) {
+      x >>= 1;
+      bitpos += 1;
+    }
+    return bitpos;
+  }
+
+  /**
+   * Shift the number in the array left by bits positions.
+   * @param array the number to shift, must have length elements
+   * @param length the number of entries in the array
+   * @param bits the number of bits to shift (0 <= bits < 32)
+   */
+  void shiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
+    if (length > 0 && bits != 0) {
+      for(int64_t i=0; i < length-1; ++i) {
+        array[i] = (array[i] << bits) | (array[i+1] >> (32 - bits));
+      }
+      array[length-1] <<= bits;
+    }
+  }
+
+  /**
+   * Shift the number in the array right by bits positions.
+   * @param array the number to shift, must have length elements
+   * @param length the number of entries in the array
+   * @param bits the number of bits to shift (0 <= bits < 32)
+   */
+  void shiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
+    if (length > 0 && bits != 0) {
+      for(int64_t i=length-1; i > 0; --i) {
+        array[i] = (array[i] >> bits) | (array[i-1] << (32 - bits));
+      }
+      array[0] >>= bits;
+    }
+  }
+
+  /**
+   * Fix the signs of the result and remainder at the end of the division
+   * based on the signs of the dividend and divisor.
+   */
+  void fixDivisionSigns(Int128 &result, Int128 &remainder,
+                        bool dividendWasNegative, bool divisorWasNegative) {
+    if (dividendWasNegative != divisorWasNegative) {
+      result.negate();
+    }
+    if (dividendWasNegative) {
+      remainder.negate();
+    }
+  }
+
+  /**
+   * Build a Int128 from a list of ints.
+   */
+  void buildFromArray(Int128& value, uint32_t* array, int64_t length) {
+    switch (length) {
+    case 0:
+      value = 0;
+      break;
+    case 1:
+      value = array[0];
+      break;
+    case 2:
+      value = Int128(0, (static_cast<uint64_t>(array[0]) << 32) + array[1]);
+      break;
+    case 3:
+      value = Int128(array[0],
+                     (static_cast<uint64_t>(array[1]) << 32) + array[2]);
+      break;
+    case 4:
+      value = Int128((static_cast<int64_t>(array[0]) << 32) + array[1],
+                     (static_cast<uint64_t>(array[2]) << 32) + array[3]);
+      break;
+    case 5:
+      if (array[0] != 0) {
+        throw std::logic_error("Can't build Int128 with 5 ints.");
+      }
+      value = Int128((static_cast<int64_t>(array[1]) << 32) + array[2],
+                     (static_cast<uint64_t>(array[3]) << 32) + array[4]);
+      break;
+    default:
+      throw std::logic_error("Unsupported length for building Int128");
+    }
+  }
+
+  /**
+   * Do a division where the divisor fits into a single 32 bit value.
+   */
+  Int128 singleDivide(uint32_t* dividend, int64_t dividendLength,
+                      uint32_t divisor, Int128& remainder,
+                      bool dividendWasNegative, bool divisorWasNegative) {
+    uint64_t r = 0;
+    uint32_t resultArray[5];
+    for(int64_t j=0; j < dividendLength; j++) {
+      r <<= 32;
+      r += dividend[j];
+      resultArray[j] = static_cast<uint32_t>(r / divisor);
+      r %= divisor;
+    }
+    Int128 result;
+    buildFromArray(result, resultArray, dividendLength);
+    remainder = static_cast<int64_t>(r);
+    fixDivisionSigns(result, remainder, dividendWasNegative,
+                     divisorWasNegative);
+    return result;
+  }
+
+  Int128 Int128::divide(const Int128 &divisor, Int128 &remainder) const {
+    // Split the dividend and divisor into integer pieces so that we can
+    // work on them.
+    uint32_t dividendArray[5];
+    uint32_t divisorArray[4];
+    bool dividendWasNegative;
+    bool divisorWasNegative;
+    // leave an extra zero before the dividend
+    dividendArray[0] = 0;
+    int64_t dividendLength = fillInArray(dividendArray + 1, dividendWasNegative)+1;
+    int64_t divisorLength = divisor.fillInArray(divisorArray, divisorWasNegative);
+
+    // Handle some of the easy cases.
+    if (dividendLength <= divisorLength) {
+      remainder = *this;
+      return 0;
+    } else if (divisorLength == 0) {
+      throw std::range_error("Division by 0 in Int128");
+    } else if (divisorLength == 1) {
+      return singleDivide(dividendArray, dividendLength, divisorArray[0],
+                          remainder, dividendWasNegative, divisorWasNegative);
+    }
+
+    int64_t resultLength = dividendLength - divisorLength;
+    uint32_t resultArray[4];
+
+    // Normalize by shifting both by a multiple of 2 so that
+    // the digit guessing is better. The requirement is that
+    // divisorArray[0] is greater than 2**31.
+    int64_t normalizeBits = 32 - fls(divisorArray[0]);
+    shiftArrayLeft(divisorArray, divisorLength, normalizeBits);
+    shiftArrayLeft(dividendArray, dividendLength, normalizeBits);
+
+    // compute each digit in the result
+    for(int64_t j=0; j < resultLength; ++j) {
+      // Guess the next digit. At worst it is two too large
+      uint32_t guess = UINT32_MAX;
+      uint64_t highDividend = static_cast<uint64_t>(dividendArray[j]) << 32 |
+        dividendArray[j+1];
+      if (dividendArray[j] != divisorArray[0]) {
+        guess = static_cast<uint32_t>(highDividend / divisorArray[0]);
+      }
+
+      // catch all of the cases where guess is two too large and most of the
+      // cases where it is one too large
+      uint32_t rhat =
+        static_cast<uint32_t>(highDividend - guess *
+                              static_cast<uint64_t>(divisorArray[0]));
+      while (static_cast<uint64_t>(divisorArray[1]) * guess >
+             (static_cast<uint64_t>(rhat) << 32) + dividendArray[j+2]) {
+        guess -= 1;
+        rhat += divisorArray[0];
+        if (static_cast<uint64_t>(rhat) < divisorArray[0]) {
+          break;
+        }
+      }
+
+      // subtract off the guess * divisor from the dividend
+      uint64_t mult = 0;
+      for(int64_t i=divisorLength-1; i >= 0; --i) {
+        mult += static_cast<uint64_t>(guess) * divisorArray[i];
+        uint32_t prev = dividendArray[j+i+1];
+        dividendArray[j+i+1] -= static_cast<uint32_t>(mult);
+        mult >>= 32;
+        if (dividendArray[j+i+1] > prev) {
+          mult += 1;
+        }
+      }
+      uint32_t prev = dividendArray[j];
+      dividendArray[j] -= static_cast<uint32_t>(mult);
+
+      // if guess was too big, we add back divisor
+      if (dividendArray[j] > prev) {
+        guess -= 1;
+        uint32_t carry = 0;
+        for(int64_t i=divisorLength-1; i >= 0; --i) {
+          uint64_t sum = static_cast<uint64_t>(divisorArray[i]) +
+            dividendArray[j+i+1] + carry;
+          dividendArray[j+i+1] = static_cast<uint32_t>(sum);
+          carry = static_cast<uint32_t>(sum >> 32);
+        }
+        dividendArray[j] += carry;
+      }
+
+      resultArray[j] = guess;
+    }
+
+    // denormalize the remainder
+    shiftArrayRight(dividendArray, dividendLength, normalizeBits);
+
+    // return result and remainder
+    Int128 result;
+    buildFromArray(result, resultArray, resultLength);
+    buildFromArray(remainder, dividendArray, dividendLength);
+    fixDivisionSigns(result, remainder,
+                     dividendWasNegative, divisorWasNegative);
+    return result;
+  }
+
+  std::string Int128::toString() const {
+    // 10**18 - the largest power of 10 less than 63 bits
+    const Int128 tenTo18(0xde0b6b3a7640000);
+    // 10**36
+    const Int128 tenTo36(0xc097ce7bc90715, 0xb34b9f1000000000);
+    Int128 remainder;
+    std::stringstream buf;
+    bool needFill = false;
+
+    // get anything above 10**36 and print it
+    Int128 top = divide(tenTo36, remainder);
+    if (top != 0) {
+      buf << top.toLong();
+      remainder.abs();
+      needFill = true;
+    }
+
+    // now get anything above 10**18 and print it
+    Int128 tail;
+    top = remainder.divide(tenTo18, tail);
+    if (needFill || top != 0) {
+      if (needFill) {
+        buf << std::setw(18) << std::setfill('0');
+      } else {
+        needFill = true;
+        tail.abs();
+      }
+      buf << top.toLong();
+    }
+
+    // finally print the tail, which is less than 10**18
+    if (needFill) {
+      buf << std::setw(18) << std::setfill('0');
+    }
+    buf << tail.toLong();
+    return buf.str();
+  }
+
+  std::string Int128::toDecimalString(int32_t scale) const {
+    std::string str = toString();
+    if (scale == 0) {
+      return str;
+    } else if (*this < 0) {
+      int32_t len = static_cast<int32_t>(str.length());
+      if (len - 1 > scale) {
+        return str.substr(0, static_cast<size_t>(len - scale)) + "." +
+          str.substr(static_cast<size_t>(len - scale),
+                     static_cast<size_t>(scale));
+      } else if (len - 1 == scale) {
+        return "-0." + str.substr(1, std::string::npos);
+      } else {
+        std::string result = "-0.";
+        for(int32_t i=0; i < scale - len + 1; ++i) {
+          result += "0";
+        }
+        return result + str.substr(1, std::string::npos);
+      }
+    } else {
+      int32_t len = static_cast<int32_t>(str.length());
+      if (len > scale) {
+        return str.substr(0, static_cast<size_t>(len - scale)) + "." +
+          str.substr(static_cast<size_t>(len - scale),
+                     static_cast<size_t>(scale));
+      } else if (len == scale) {
+        return "0." + str;
+      } else {
+        std::string result = "0.";
+        for(int32_t i=0; i < scale - len; ++i) {
+          result += "0";
+        }
+        return result + str;
+      }
+    }
+  }
+
+  std::string Int128::toHexString() const {
+    std::stringstream buf;
+    buf << std::hex << "0x"
+        << std::setw(16) << std::setfill('0') << highbits
+        << std::setw(16) << std::setfill('0') << lowbits;
+    return buf.str();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/MemoryPool.cc
----------------------------------------------------------------------
diff --git a/c++/src/MemoryPool.cc b/c++/src/MemoryPool.cc
new file mode 100644
index 0000000..28cc9e1
--- /dev/null
+++ b/c++/src/MemoryPool.cc
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/Int128.hh"
+#include "orc/MemoryPool.hh"
+
+#include "Adaptor.hh"
+
+#include <cstdlib>
+#include <iostream>
+#include <string.h>
+
+namespace orc {
+
+  MemoryPool::~MemoryPool() {
+    // PASS
+  }
+
+  class MemoryPoolImpl: public MemoryPool {
+  public:
+    virtual ~MemoryPoolImpl();
+
+    char* malloc(uint64_t size) override;
+    void free(char* p) override;
+  };
+
+  char* MemoryPoolImpl::malloc(uint64_t size) {
+    return static_cast<char*>(std::malloc(size));
+  }
+
+  void MemoryPoolImpl::free(char* p) {
+    std::free(p);
+  }
+
+  MemoryPoolImpl::~MemoryPoolImpl() {
+    // PASS
+  }
+
+  template <class T>
+  DataBuffer<T>::DataBuffer(MemoryPool& pool,
+                            uint64_t newSize
+                            ): memoryPool(pool),
+                               buf(nullptr),
+                               currentSize(0),
+                               currentCapacity(0) {
+    resize(newSize);
+  }
+
+  template <class T>
+  DataBuffer<T>::~DataBuffer(){
+    for(uint64_t i=currentSize; i > 0; --i) {
+      (buf + i - 1)->~T();
+    }
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <class T>
+  void DataBuffer<T>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (currentSize > newSize) {
+      for(uint64_t i=currentSize; i > newSize; --i) {
+        (buf + i - 1)->~T();
+      }
+    } else if (newSize > currentSize) {
+      for(uint64_t i=currentSize; i < newSize; ++i) {
+        new (buf + i) T();
+      }
+    }
+    currentSize = newSize;
+  }
+
+  template <class T>
+  void DataBuffer<T>::reserve(uint64_t newCapacity){
+    if (newCapacity > currentCapacity) {
+      if (buf) {
+        T* buf_old = buf;
+        buf = reinterpret_cast<T*>(memoryPool.malloc(sizeof(T) * newCapacity));
+        memcpy(buf, buf_old, sizeof(T) * currentSize);
+        memoryPool.free(reinterpret_cast<char*>(buf_old));
+      } else {
+        buf = reinterpret_cast<T*>(memoryPool.malloc(sizeof(T) * newCapacity));
+      }
+      currentCapacity = newCapacity;
+    }
+  }
+
+  // Specializations for char
+
+  template <>
+  DataBuffer<char>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<char>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, newSize - currentSize);
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for char*
+
+  template <>
+  DataBuffer<char*>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<char*>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(char*));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for double
+
+  template <>
+  DataBuffer<double>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<double>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(double));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for int64_t
+
+  template <>
+  DataBuffer<int64_t>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<int64_t>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int64_t));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for uint64_t
+
+  template <>
+  DataBuffer<uint64_t>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<uint64_t>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(uint64_t));
+    }
+    currentSize = newSize;
+  }
+
+  // Specializations for unsigned char
+
+  template <>
+  DataBuffer<unsigned char>::~DataBuffer(){
+    if (buf) {
+      memoryPool.free(reinterpret_cast<char*>(buf));
+    }
+  }
+
+  template <>
+  void DataBuffer<unsigned char>::resize(uint64_t newSize) {
+    reserve(newSize);
+    if (newSize > currentSize) {
+      memset(buf + currentSize, 0, newSize - currentSize);
+    }
+    currentSize = newSize;
+  }
+
+  #ifdef __clang__
+    #pragma clang diagnostic ignored "-Wweak-template-vtables"
+  #endif
+
+  template class DataBuffer<char>;
+  template class DataBuffer<char*>;
+  template class DataBuffer<double>;
+  template class DataBuffer<Int128>;
+  template class DataBuffer<int64_t>;
+  template class DataBuffer<uint64_t>;
+  template class DataBuffer<unsigned char>;
+
+  #ifdef __clang__
+    #pragma clang diagnostic ignored "-Wexit-time-destructors"
+  #endif
+
+  MemoryPool* getDefaultPool() {
+    static MemoryPoolImpl internal;
+    return &internal;
+  }
+} // namespace orc

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/OrcFile.cc
----------------------------------------------------------------------
diff --git a/c++/src/OrcFile.cc b/c++/src/OrcFile.cc
new file mode 100644
index 0000000..f8c22c4
--- /dev/null
+++ b/c++/src/OrcFile.cc
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/OrcFile.hh"
+
+#include "Adaptor.hh"
+#include "Exceptions.hh"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+namespace orc {
+
+  class FileInputStream : public InputStream {
+  private:
+    std::string filename ;
+    int file;
+    uint64_t totalLength;
+
+  public:
+    FileInputStream(std::string _filename) {
+      filename = _filename ;
+      file = open(filename.c_str(), O_RDONLY);
+      if (file == -1) {
+        throw ParseError("Can't open " + filename);
+      }
+      struct stat fileStat;
+      if (fstat(file, &fileStat) == -1) {
+        throw ParseError("Can't stat " + filename);
+      }
+      totalLength = static_cast<uint64_t>(fileStat.st_size);
+    }
+
+    ~FileInputStream();
+
+    uint64_t getLength() const override {
+      return totalLength;
+    }
+
+    void read(void* buf,
+              uint64_t length,
+              uint64_t offset) override {
+      if (!buf) {
+        throw ParseError("Buffer is null");
+      }
+      ssize_t bytesRead = pread(file, buf, length, static_cast<off_t>(offset));
+
+      if (bytesRead == -1) {
+        throw ParseError("Bad read of " + filename);
+      }
+      if (static_cast<uint64_t>(bytesRead) != length) {
+        throw ParseError("Short read of " + filename);
+      }
+    }
+
+    const std::string& getName() const override {
+      return filename;
+    }
+  };
+
+  FileInputStream::~FileInputStream() {
+    close(file);
+  }
+
+  std::unique_ptr<InputStream> readLocalFile(const std::string& path) {
+    return std::unique_ptr<InputStream>(new FileInputStream(path));
+  }
+}
+
+#ifndef HAS_STOLL
+
+  #include <sstream>
+
+  int64_t std::stoll(std::string str) {
+    int64_t val = 0;
+    stringstream ss ;
+    ss << str ;
+    ss >> val ;
+    return val;
+  }
+
+#endif

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/RLE.cc
----------------------------------------------------------------------
diff --git a/c++/src/RLE.cc b/c++/src/RLE.cc
new file mode 100644
index 0000000..51bd628
--- /dev/null
+++ b/c++/src/RLE.cc
@@ -0,0 +1,47 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include "RLEv1.hh"
+#include "RLEv2.hh"
+#include "Exceptions.hh"
+
+namespace orc {
+
+  RleDecoder::~RleDecoder() {
+    // PASS
+  }
+
+  std::unique_ptr<RleDecoder> createRleDecoder
+                         (std::unique_ptr<SeekableInputStream> input,
+                          bool isSigned,
+                          RleVersion version,
+                          MemoryPool& pool) {
+    switch (static_cast<int64_t>(version)) {
+    case RleVersion_1:
+      // We don't have std::make_unique() yet.
+      return std::unique_ptr<RleDecoder>(new RleDecoderV1(std::move(input),
+                                                          isSigned));
+    case RleVersion_2:
+      return std::unique_ptr<RleDecoder>(new RleDecoderV2(std::move(input),
+                                                          isSigned, pool));
+    default:
+      throw NotImplementedYet("Not implemented yet");
+    }
+  }
+
+}  // namespace orc

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/RLE.hh
----------------------------------------------------------------------
diff --git a/c++/src/RLE.hh b/c++/src/RLE.hh
new file mode 100644
index 0000000..0a44c95
--- /dev/null
+++ b/c++/src/RLE.hh
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_RLE_HH
+#define ORC_RLE_HH
+
+#include "Compression.hh"
+
+#include <memory>
+
+namespace orc {
+
+  inline int64_t unZigZag(uint64_t value) {
+    return value >> 1 ^ -(value & 1);
+  }
+
+  class RleDecoder {
+  public:
+    // must be non-inline!
+    virtual ~RleDecoder();
+
+    /**
+     * Seek to a particular spot.
+     */
+    virtual void seek(PositionProvider&) = 0;
+
+    /**
+     * Seek over a given number of values.
+     */
+    virtual void skip(uint64_t numValues) = 0;
+
+    /**
+     * Read a number of values into the batch.
+     * @param data the array to read into
+     * @param numValues the number of values to read
+     * @param notNull If the pointer is null, all values are read. If the
+     *    pointer is not null, positions that are false are skipped.
+     */
+    virtual void next(int64_t* data, uint64_t numValues,
+                      const char* notNull) = 0;
+  };
+
+  enum RleVersion {
+    RleVersion_1,
+    RleVersion_2
+  };
+
+  /**
+   * Create an RLE decoder.
+   * @param input the input stream to read from
+   * @param isSigned true if the number sequence is signed
+   * @param version version of RLE decoding to do
+   * @param pool memory pool to use for allocation
+   */
+  std::unique_ptr<RleDecoder> createRleDecoder
+                      (std::unique_ptr<SeekableInputStream> input,
+                       bool isSigned,
+                       RleVersion version,
+                       MemoryPool& pool);
+
+}  // namespace orc
+
+#endif  // ORC_RLE_HH

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/RLEv1.cc
----------------------------------------------------------------------
diff --git a/c++/src/RLEv1.cc b/c++/src/RLEv1.cc
new file mode 100644
index 0000000..91bb79d
--- /dev/null
+++ b/c++/src/RLEv1.cc
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "Compression.hh"
+#include "Exceptions.hh"
+#include "RLEv1.hh"
+
+#include <algorithm>
+
+namespace orc {
+
+const uint64_t MINIMUM_REPEAT = 3;
+const uint64_t BASE_128_MASK = 0x7f;
+
+signed char RleDecoderV1::readByte() {
+  if (bufferStart == bufferEnd) {
+    int bufferLength;
+    const void* bufferPointer;
+    if (!inputStream->Next(&bufferPointer, &bufferLength)) {
+      throw ParseError("bad read in readByte");
+    }
+    bufferStart = static_cast<const char*>(bufferPointer);
+    bufferEnd = bufferStart + bufferLength;
+  }
+  return *(bufferStart++);
+}
+
+uint64_t RleDecoderV1::readLong() {
+  uint64_t result = 0;
+  int64_t offset = 0;
+  signed char ch = readByte();
+  if (ch >= 0) {
+    result = static_cast<uint64_t>(ch);
+  } else {
+    result = static_cast<uint64_t>(ch) & BASE_128_MASK;
+    while ((ch = readByte()) < 0) {
+      offset += 7;
+      result |= (static_cast<uint64_t>(ch) & BASE_128_MASK) << offset;
+    }
+    result |= static_cast<uint64_t>(ch) << (offset + 7);
+  }
+  return result;
+}
+
+void RleDecoderV1::skipLongs(uint64_t numValues) {
+  while (numValues > 0) {
+    if (readByte() >= 0) {
+      --numValues;
+    }
+  }
+}
+
+void RleDecoderV1::readHeader() {
+  signed char ch = readByte();
+  if (ch < 0) {
+    remainingValues = static_cast<uint64_t>(-ch);
+    repeating = false;
+  } else {
+    remainingValues = static_cast<uint64_t>(ch) + MINIMUM_REPEAT;
+    repeating = true;
+    delta = readByte();
+    value = isSigned
+        ? unZigZag(readLong())
+        : static_cast<int64_t>(readLong());
+  }
+}
+
+RleDecoderV1::RleDecoderV1(std::unique_ptr<SeekableInputStream> input,
+                           bool hasSigned)
+    : inputStream(std::move(input)),
+      isSigned(hasSigned),
+      remainingValues(0),
+      bufferStart(nullptr),
+      bufferEnd(bufferStart) {
+}
+
+void RleDecoderV1::seek(PositionProvider& location) {
+  // move the input stream
+  inputStream->seek(location);
+  // force a re-read from the stream
+  bufferEnd = bufferStart;
+  // read a new header
+  readHeader();
+  // skip ahead the given number of records
+  skip(location.next());
+}
+
+void RleDecoderV1::skip(uint64_t numValues) {
+  while (numValues > 0) {
+    if (remainingValues == 0) {
+      readHeader();
+    }
+    uint64_t count = std::min(numValues, remainingValues);
+    remainingValues -= count;
+    numValues -= count;
+    if (repeating) {
+      value += delta * static_cast<int64_t>(count);
+    } else {
+      skipLongs(count);
+    }
+  }
+}
+
+void RleDecoderV1::next(int64_t* const data,
+                        const uint64_t numValues,
+                        const char* const notNull) {
+  uint64_t position = 0;
+  // skipNulls()
+  if (notNull) {
+    // Skip over null values.
+    while (position < numValues && !notNull[position]) {
+      ++position;
+    }
+  }
+  while (position < numValues) {
+    // If we are out of values, read more.
+    if (remainingValues == 0) {
+      readHeader();
+    }
+    // How many do we read out of this block?
+    uint64_t count = std::min(numValues - position, remainingValues);
+    uint64_t consumed = 0;
+    if (repeating) {
+      if (notNull) {
+        for (uint64_t i = 0; i < count; ++i) {
+          if (notNull[position + i]) {
+            data[position + i] = value + static_cast<int64_t>(consumed) * delta;
+            consumed += 1;
+          }
+        }
+      } else {
+        for (uint64_t i = 0; i < count; ++i) {
+          data[position + i] = value + static_cast<int64_t>(i) * delta;
+        }
+        consumed = count;
+      }
+      value += static_cast<int64_t>(consumed) * delta;
+    } else {
+      if (notNull) {
+        for (uint64_t i = 0 ; i < count; ++i) {
+          if (notNull[i]) {
+            data[position + i] = isSigned
+                ? unZigZag(readLong())
+                : static_cast<int64_t>(readLong());
+            ++consumed;
+          }
+        }
+      } else {
+        if (isSigned) {
+          for (uint64_t i = 0; i < count; ++i) {
+            data[position + i] = unZigZag(readLong());
+          }
+        } else {
+          for (uint64_t i = 0; i < count; ++i) {
+            data[position + i] = static_cast<int64_t>(readLong());
+          }
+        }
+        consumed = count;
+      }
+    }
+    remainingValues -= consumed;
+    position += count;
+
+    // skipNulls()
+    if (notNull) {
+      // Skip over null values.
+      while (position < numValues && !notNull[position]) {
+        ++position;
+      }
+    }
+  }
+}
+
+}  // namespace orc

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/RLEv1.hh
----------------------------------------------------------------------
diff --git a/c++/src/RLEv1.hh b/c++/src/RLEv1.hh
new file mode 100644
index 0000000..95e50a3
--- /dev/null
+++ b/c++/src/RLEv1.hh
@@ -0,0 +1,70 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef ORC_RLEV1_HH
+#define ORC_RLEV1_HH
+
+#include "Adaptor.hh"
+#include "RLE.hh"
+
+#include <memory>
+
+namespace orc {
+
+class RleDecoderV1 : public RleDecoder {
+public:
+    RleDecoderV1(std::unique_ptr<SeekableInputStream> input,
+                 bool isSigned);
+
+    /**
+    * Seek to a particular spot.
+    */
+    void seek(PositionProvider&) override;
+
+    /**
+    * Seek over a given number of values.
+    */
+    void skip(uint64_t numValues) override;
+
+    /**
+    * Read a number of values into the batch.
+    */
+    void next(int64_t* data, uint64_t numValues,
+              const char* notNull) override;
+
+private:
+    inline signed char readByte();
+
+    inline void readHeader();
+
+    inline uint64_t readLong();
+
+    inline void skipLongs(uint64_t numValues);
+
+    const std::unique_ptr<SeekableInputStream> inputStream;
+    const bool isSigned;
+    uint64_t remainingValues;
+    int64_t value;
+    const char *bufferStart;
+    const char *bufferEnd;
+    int64_t delta;
+    bool repeating;
+};
+}  // namespace orc
+
+#endif  // ORC_RLEV1_HH

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/RLEv2.cc
----------------------------------------------------------------------
diff --git a/c++/src/RLEv2.cc b/c++/src/RLEv2.cc
new file mode 100644
index 0000000..43428b4
--- /dev/null
+++ b/c++/src/RLEv2.cc
@@ -0,0 +1,482 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "Compression.hh"
+#include "RLEv2.hh"
+
+#define MIN_REPEAT 3
+
+namespace orc {
+
+struct FixedBitSizes {
+  enum FBS {
+    ONE = 0, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
+    THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
+    TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
+    TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR
+  };
+};
+
+inline uint32_t decodeBitWidth(uint32_t n) {
+  if (n >= FixedBitSizes::ONE &&
+      n <= FixedBitSizes::TWENTYFOUR) {
+    return n + 1;
+  } else if (n == FixedBitSizes::TWENTYSIX) {
+    return 26;
+  } else if (n == FixedBitSizes::TWENTYEIGHT) {
+    return 28;
+  } else if (n == FixedBitSizes::THIRTY) {
+    return 30;
+  } else if (n == FixedBitSizes::THIRTYTWO) {
+    return 32;
+  } else if (n == FixedBitSizes::FORTY) {
+    return 40;
+  } else if (n == FixedBitSizes::FORTYEIGHT) {
+    return 48;
+  } else if (n == FixedBitSizes::FIFTYSIX) {
+    return 56;
+  } else {
+    return 64;
+  }
+}
+
+inline uint32_t getClosestFixedBits(uint32_t n) {
+  if (n == 0) {
+    return 1;
+  }
+
+  if (n >= 1 && n <= 24) {
+    return n;
+  } else if (n > 24 && n <= 26) {
+    return 26;
+  } else if (n > 26 && n <= 28) {
+    return 28;
+  } else if (n > 28 && n <= 30) {
+    return 30;
+  } else if (n > 30 && n <= 32) {
+    return 32;
+  } else if (n > 32 && n <= 40) {
+    return 40;
+  } else if (n > 40 && n <= 48) {
+    return 48;
+  } else if (n > 48 && n <= 56) {
+    return 56;
+  } else {
+    return 64;
+  }
+}
+
+int64_t RleDecoderV2::readLongBE(uint64_t bsz) {
+  int64_t ret = 0, val;
+  uint64_t n = bsz;
+  while (n > 0) {
+    n--;
+    val = readByte();
+    ret |= (val << (n * 8));
+  }
+  return ret;
+}
+
+inline int64_t RleDecoderV2::readVslong() {
+  return unZigZag(readVulong());
+}
+
+uint64_t RleDecoderV2::readVulong() {
+  uint64_t ret = 0, b;
+  uint64_t offset = 0;
+  do {
+    b = readByte();
+    ret |= (0x7f & b) << offset;
+    offset += 7;
+  } while (b >= 0x80);
+  return ret;
+}
+
+RleDecoderV2::RleDecoderV2(std::unique_ptr<SeekableInputStream> input,
+                           bool _isSigned, MemoryPool& pool
+                           ): inputStream(std::move(input)),
+                              isSigned(_isSigned),
+                              firstByte(0),
+                              runLength(0),
+                              runRead(0),
+                              bufferStart(nullptr),
+                              bufferEnd(bufferStart),
+                              deltaBase(0),
+                              byteSize(0),
+                              firstValue(0),
+                              prevValue(0),
+                              bitSize(0),
+                              bitsLeft(0),
+                              curByte(0),
+                              patchBitSize(0),
+                              base(0),
+                              curGap(0),
+                              patchMask(0),
+                              actualGap(0),
+                              unpacked(pool, 0),
+                              unpackedPatch(pool, 0) {
+  // PASS
+}
+
+void RleDecoderV2::seek(PositionProvider& location) {
+  // move the input stream
+  inputStream->seek(location);
+  // clear state
+  bufferEnd = bufferStart = 0;
+  runRead = runLength = 0;
+  // skip ahead the given number of records
+  skip(location.next());
+}
+
+void RleDecoderV2::skip(uint64_t numValues) {
+  // simple for now, until perf tests indicate something encoding specific is
+  // needed
+  const uint64_t N = 64;
+  int64_t dummy[N];
+
+  while (numValues) {
+    uint64_t nRead = std::min(N, numValues);
+    next(dummy, nRead, nullptr);
+    numValues -= nRead;
+  }
+}
+
+void RleDecoderV2::next(int64_t* const data,
+                        const uint64_t numValues,
+                        const char* const notNull) {
+  uint64_t nRead = 0;
+
+  while (nRead < numValues) {
+    // Skip any nulls before attempting to read first byte.
+    while (notNull && !notNull[nRead]) {
+      if (++nRead == numValues) {
+        return; // ended with null values
+      }
+    }
+
+    if (runRead == runLength) {
+      resetRun();
+      firstByte = readByte();
+    }
+
+    uint64_t offset = nRead, length = numValues - nRead;
+
+    EncodingType enc = static_cast<EncodingType>
+        ((firstByte >> 6) & 0x03);
+    switch(static_cast<int64_t>(enc)) {
+    case SHORT_REPEAT:
+      nRead += nextShortRepeats(data, offset, length, notNull);
+      break;
+    case DIRECT:
+      nRead += nextDirect(data, offset, length, notNull);
+      break;
+    case PATCHED_BASE:
+      nRead += nextPatched(data, offset, length, notNull);
+      break;
+    case DELTA:
+      nRead += nextDelta(data, offset, length, notNull);
+      break;
+    default:
+      throw ParseError("unknown encoding");
+    }
+  }
+}
+
+uint64_t RleDecoderV2::nextShortRepeats(int64_t* const data,
+                                        uint64_t offset,
+                                        uint64_t numValues,
+                                        const char* const notNull) {
+  if (runRead == runLength) {
+    // extract the number of fixed bytes
+    byteSize = (firstByte >> 3) & 0x07;
+    byteSize += 1;
+
+    runLength = firstByte & 0x07;
+    // run lengths values are stored only after MIN_REPEAT value is met
+    runLength += MIN_REPEAT;
+    runRead = 0;
+
+    // read the repeated value which is store using fixed bytes
+    firstValue = readLongBE(byteSize);
+
+    if (isSigned) {
+      firstValue = unZigZag(static_cast<uint64_t>(firstValue));
+    }
+  }
+
+  uint64_t nRead = std::min(runLength - runRead, numValues);
+
+  if (notNull) {
+    for(uint64_t pos = offset; pos < offset + nRead; ++pos) {
+      if (notNull[pos]) {
+        data[pos] = firstValue;
+        ++runRead;
+      }
+    }
+  } else {
+    for(uint64_t pos = offset; pos < offset + nRead; ++pos) {
+      data[pos] = firstValue;
+      ++runRead;
+    }
+  }
+
+  return nRead;
+}
+
+uint64_t RleDecoderV2::nextDirect(int64_t* const data,
+                                  uint64_t offset,
+                                  uint64_t numValues,
+                                  const char* const notNull) {
+  if (runRead == runLength) {
+    // extract the number of fixed bits
+    unsigned char fbo = (firstByte >> 1) & 0x1f;
+    bitSize = decodeBitWidth(fbo);
+
+    // extract the run length
+    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
+    runLength |= readByte();
+    // runs are one off
+    runLength += 1;
+    runRead = 0;
+  }
+
+  uint64_t nRead = std::min(runLength - runRead, numValues);
+
+  runRead += readLongs(data, offset, nRead, bitSize, notNull);
+
+  if (isSigned) {
+    if (notNull) {
+      for (uint64_t pos = offset; pos < offset + nRead; ++pos) {
+        if (notNull[pos]) {
+          data[pos] = unZigZag(static_cast<uint64_t>(data[pos]));
+        }
+      }
+    } else {
+      for (uint64_t pos = offset; pos < offset + nRead; ++pos) {
+        data[pos] = unZigZag(static_cast<uint64_t>(data[pos]));
+      }
+    }
+  }
+
+  return nRead;
+}
+
+uint64_t RleDecoderV2::nextPatched(int64_t* const data,
+                                   uint64_t offset,
+                                   uint64_t numValues,
+                                   const char* const notNull) {
+  if (runRead == runLength) {
+    // extract the number of fixed bits
+    unsigned char fbo = (firstByte >> 1) & 0x1f;
+    bitSize = decodeBitWidth(fbo);
+
+    // extract the run length
+    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
+    runLength |= readByte();
+    // runs are one off
+    runLength += 1;
+    runRead = 0;
+
+    // extract the number of bytes occupied by base
+    uint64_t thirdByte = readByte();
+    byteSize = (thirdByte >> 5) & 0x07;
+    // base width is one off
+    byteSize += 1;
+
+    // extract patch width
+    uint32_t pwo = thirdByte & 0x1f;
+    patchBitSize = decodeBitWidth(pwo);
+
+    // read fourth byte and extract patch gap width
+    uint64_t fourthByte = readByte();
+    uint32_t pgw = (fourthByte >> 5) & 0x07;
+    // patch gap width is one off
+    pgw += 1;
+
+    // extract the length of the patch list
+    size_t pl = fourthByte & 0x1f;
+    if (pl == 0) {
+      throw ParseError("Corrupt PATCHED_BASE encoded data (pl==0)!");
+    }
+
+    // read the next base width number of bytes to extract base value
+    base = readLongBE(byteSize);
+    int64_t mask = (static_cast<int64_t>(1) << ((byteSize * 8) - 1));
+    // if mask of base value is 1 then base is negative value else positive
+    if ((base & mask) != 0) {
+      base = base & ~mask;
+      base = -base;
+    }
+
+    // TODO: something more efficient than resize
+    unpacked.resize(runLength);
+    unpackedIdx = 0;
+    readLongs(unpacked.data(), 0, runLength, bitSize);
+    // any remaining bits are thrown out
+    resetReadLongs();
+
+    // TODO: something more efficient than resize
+    unpackedPatch.resize(pl);
+    patchIdx = 0;
+    // TODO: Skip corrupt?
+    //    if ((patchBitSize + pgw) > 64 && !skipCorrupt) {
+    if ((patchBitSize + pgw) > 64) {
+      throw ParseError("Corrupt PATCHED_BASE encoded data "
+                       "(patchBitSize + pgw > 64)!");
+    }
+    uint32_t cfb = getClosestFixedBits(patchBitSize + pgw);
+    readLongs(unpackedPatch.data(), 0, pl, cfb);
+    // any remaining bits are thrown out
+    resetReadLongs();
+
+    // apply the patch directly when decoding the packed data
+    patchMask = ((static_cast<int64_t>(1) << patchBitSize) - 1);
+
+    adjustGapAndPatch();
+  }
+
+  uint64_t nRead = std::min(runLength - runRead, numValues);
+
+  for(uint64_t pos = offset; pos < offset + nRead; ++pos) {
+    // skip null positions
+    if (notNull && !notNull[pos]) {
+      continue;
+    }
+    if (static_cast<int64_t>(unpackedIdx) != actualGap) {
+      // no patching required. add base to unpacked value to get final value
+      data[pos] = base + unpacked[unpackedIdx];
+    } else {
+      // extract the patch value
+      int64_t patchedVal = unpacked[unpackedIdx] | (curPatch << bitSize);
+
+      // add base to patched value
+      data[pos] = base + patchedVal;
+
+      // increment the patch to point to next entry in patch list
+      ++patchIdx;
+
+      if (patchIdx < unpackedPatch.size()) {
+        adjustGapAndPatch();
+
+        // next gap is relative to the current gap
+        actualGap += unpackedIdx;
+      }
+    }
+
+    ++runRead;
+    ++unpackedIdx;
+  }
+
+  return nRead;
+}
+
+uint64_t RleDecoderV2::nextDelta(int64_t* const data,
+                                 uint64_t offset,
+                                 uint64_t numValues,
+                                 const char* const notNull) {
+  if (runRead == runLength) {
+    // extract the number of fixed bits
+    unsigned char fbo = (firstByte >> 1) & 0x1f;
+    if (fbo != 0) {
+      bitSize = decodeBitWidth(fbo);
+    } else {
+      bitSize = 0;
+    }
+
+    // extract the run length
+    runLength = static_cast<uint64_t>(firstByte & 0x01) << 8;
+    runLength |= readByte();
+    ++runLength; // account for first value
+    runRead = deltaBase = 0;
+
+    // read the first value stored as vint
+    if (isSigned) {
+      firstValue = static_cast<int64_t>(readVslong());
+    } else {
+      firstValue = static_cast<int64_t>(readVulong());
+    }
+
+    prevValue = firstValue;
+
+    // read the fixed delta value stored as vint (deltas can be negative even
+    // if all number are positive)
+    deltaBase = static_cast<int64_t>(readVslong());
+  }
+
+  uint64_t nRead = std::min(runLength - runRead, numValues);
+
+  uint64_t pos = offset;
+  for ( ; pos < offset + nRead; ++pos) {
+    // skip null positions
+    if (!notNull || notNull[pos]) break;
+  }
+  if (runRead == 0 && pos < offset + nRead) {
+    data[pos++] = firstValue;
+    ++runRead;
+  }
+
+  if (bitSize == 0) {
+    // add fixed deltas to adjacent values
+    for ( ; pos < offset + nRead; ++pos) {
+      // skip null positions
+      if (notNull && !notNull[pos]) {
+        continue;
+      }
+      prevValue = data[pos] = prevValue + deltaBase;
+      ++runRead;
+    }
+  } else {
+    for ( ; pos < offset + nRead; ++pos) {
+      // skip null positions
+      if (!notNull || notNull[pos]) break;
+    }
+    if (runRead < 2 && pos < offset + nRead) {
+      // add delta base and first value
+      prevValue = data[pos++] = firstValue + deltaBase;
+      ++runRead;
+    }
+
+    // write the unpacked values, add it to previous value and store final
+    // value to result buffer. if the delta base value is negative then it
+    // is a decreasing sequence else an increasing sequence
+    uint64_t remaining = (offset + nRead) - pos;
+    runRead += readLongs(data, pos, remaining, bitSize, notNull);
+
+    if (deltaBase < 0) {
+      for ( ; pos < offset + nRead; ++pos) {
+        // skip null positions
+        if (notNull && !notNull[pos]) {
+          continue;
+        }
+        prevValue = data[pos] = prevValue - data[pos];
+      }
+    } else {
+      for ( ; pos < offset + nRead; ++pos) {
+        // skip null positions
+        if (notNull && !notNull[pos]) {
+          continue;
+        }
+        prevValue = data[pos] = prevValue + data[pos];
+      }
+    }
+  }
+  return nRead;
+}
+
+}  // namespace orc

http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/RLEv2.hh
----------------------------------------------------------------------
diff --git a/c++/src/RLEv2.hh b/c++/src/RLEv2.hh
new file mode 100644
index 0000000..2923009
--- /dev/null
+++ b/c++/src/RLEv2.hh
@@ -0,0 +1,175 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef ORC_RLEV2_HH
+#define ORC_RLEV2_HH
+
+#include "Adaptor.hh"
+#include "Exceptions.hh"
+#include "RLE.hh"
+
+#include <vector>
+
+namespace orc {
+
+class RleDecoderV2 : public RleDecoder {
+public:
+
+  enum EncodingType { SHORT_REPEAT=0, DIRECT=1, PATCHED_BASE=2, DELTA=3 };
+
+  RleDecoderV2(std::unique_ptr<SeekableInputStream> input,
+               bool isSigned, MemoryPool& pool);
+
+  /**
+  * Seek to a particular spot.
+  */
+  void seek(PositionProvider&) override;
+
+  /**
+  * Seek over a given number of values.
+  */
+  void skip(uint64_t numValues) override;
+
+  /**
+  * Read a number of values into the batch.
+  */
+  void next(int64_t* data, uint64_t numValues,
+            const char* notNull) override;
+
+private:
+
+  // Used by PATCHED_BASE
+  void adjustGapAndPatch() {
+    curGap = static_cast<uint64_t>(unpackedPatch[patchIdx]) >>
+      patchBitSize;
+    curPatch = unpackedPatch[patchIdx] & patchMask;
+    actualGap = 0;
+
+    // special case: gap is >255 then patch value will be 0.
+    // if gap is <=255 then patch value cannot be 0
+    while (curGap == 255 && curPatch == 0) {
+      actualGap += 255;
+      ++patchIdx;
+      curGap = static_cast<uint64_t>(unpackedPatch[patchIdx]) >>
+        patchBitSize;
+      curPatch = unpackedPatch[patchIdx] & patchMask;
+    }
+    // add the left over gap
+    actualGap += curGap;
+  }
+
+  void resetReadLongs() {
+    bitsLeft = 0;
+    curByte = 0;
+  }
+
+  void resetRun() {
+    resetReadLongs();
+    bitSize = 0;
+  }
+
+  unsigned char readByte() {
+  if (bufferStart == bufferEnd) {
+    int bufferLength;
+    const void* bufferPointer;
+    if (!inputStream->Next(&bufferPointer, &bufferLength)) {
+      throw ParseError("bad read in RleDecoderV2::readByte");
+    }
+    bufferStart = static_cast<const char*>(bufferPointer);
+    bufferEnd = bufferStart + bufferLength;
+  }
+
+  unsigned char result = static_cast<unsigned char>(*bufferStart++);
+  return result;
+}
+
+  int64_t readLongBE(uint64_t bsz);
+  int64_t readVslong();
+  uint64_t readVulong();
+  uint64_t readLongs(int64_t *data, uint64_t offset, uint64_t len,
+                     uint64_t fb, const char* notNull = nullptr) {
+  uint64_t ret = 0;
+
+  // TODO: unroll to improve performance
+  for(uint64_t i = offset; i < (offset + len); i++) {
+    // skip null positions
+    if (notNull && !notNull[i]) {
+      continue;
+    }
+    uint64_t result = 0;
+    uint64_t bitsLeftToRead = fb;
+    while (bitsLeftToRead > bitsLeft) {
+      result <<= bitsLeft;
+      result |= curByte & ((1 << bitsLeft) - 1);
+      bitsLeftToRead -= bitsLeft;
+      curByte = readByte();
+      bitsLeft = 8;
+    }
+
+    // handle the left over bits
+    if (bitsLeftToRead > 0) {
+      result <<= bitsLeftToRead;
+      bitsLeft -= static_cast<uint32_t>(bitsLeftToRead);
+      result |= (curByte >> bitsLeft) & ((1 << bitsLeftToRead) - 1);
+    }
+    data[i] = static_cast<int64_t>(result);
+    ++ret;
+  }
+
+  return ret;
+}
+
+
+  uint64_t nextShortRepeats(int64_t* data, uint64_t offset, uint64_t numValues,
+                            const char* notNull);
+  uint64_t nextDirect(int64_t* data, uint64_t offset, uint64_t numValues,
+                      const char* notNull);
+  uint64_t nextPatched(int64_t* data, uint64_t offset, uint64_t numValues,
+                       const char* notNull);
+  uint64_t nextDelta(int64_t* data, uint64_t offset, uint64_t numValues,
+                     const char* notNull);
+
+  const std::unique_ptr<SeekableInputStream> inputStream;
+  const bool isSigned;
+
+  unsigned char firstByte;
+  uint64_t runLength;
+  uint64_t runRead;
+  const char *bufferStart;
+  const char *bufferEnd;
+  int64_t deltaBase; // Used by DELTA
+  uint64_t byteSize; // Used by SHORT_REPEAT and PATCHED_BASE
+  int64_t firstValue; // Used by SHORT_REPEAT and DELTA
+  int64_t prevValue; // Used by DELTA
+  uint32_t bitSize; // Used by DIRECT, PATCHED_BASE and DELTA
+  uint32_t bitsLeft; // Used by anything that uses readLongs
+  uint32_t curByte; // Used by anything that uses readLongs
+  uint32_t patchBitSize; // Used by PATCHED_BASE
+  uint64_t unpackedIdx; // Used by PATCHED_BASE
+  uint64_t patchIdx; // Used by PATCHED_BASE
+  int64_t base; // Used by PATCHED_BASE
+  uint64_t curGap; // Used by PATCHED_BASE
+  int64_t curPatch; // Used by PATCHED_BASE
+  int64_t patchMask; // Used by PATCHED_BASE
+  int64_t actualGap; // Used by PATCHED_BASE
+  DataBuffer<int64_t> unpacked; // Used by PATCHED_BASE
+  DataBuffer<int64_t> unpackedPatch; // Used by PATCHED_BASE
+};
+}  // namespace orc
+
+#endif  // ORC_RLEV2_HH