You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2015/07/06 23:52:52 UTC
[23/23] orc git commit: ORC-23. Simplify directory structure.
ORC-23. Simplify directory structure.
This closes #1
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/7f55b453
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/7f55b453
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/7f55b453
Branch: refs/heads/master
Commit: 7f55b45370529b6a68a1b62068352336901609b8
Parents: 486433f
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jul 2 14:19:40 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Jul 6 14:51:55 2015 -0700
----------------------------------------------------------------------
CMakeLists.txt | 2 +-
c++/src/Adaptor.hh.in | 92 +
c++/src/ByteRLE.cc | 352 +++
c++/src/ByteRLE.hh | 71 +
c++/src/C09Adapter.cc | 28 +
c++/src/CMakeLists.txt | 34 +-
c++/src/ColumnPrinter.cc | 725 +++++
c++/src/ColumnReader.cc | 1558 ++++++++++
c++/src/ColumnReader.hh | 109 +
c++/src/Compression.cc | 750 +++++
c++/src/Compression.hh | 130 +
c++/src/Exceptions.cc | 59 +
c++/src/Exceptions.hh | 50 +
c++/src/Int128.cc | 438 +++
c++/src/MemoryPool.cc | 232 ++
c++/src/OrcFile.cc | 102 +
c++/src/RLE.cc | 47 +
c++/src/RLE.hh | 78 +
c++/src/RLEv1.cc | 190 ++
c++/src/RLEv1.hh | 70 +
c++/src/RLEv2.cc | 482 +++
c++/src/RLEv2.hh | 175 ++
c++/src/Reader.cc | 1903 ++++++++++++
c++/src/TypeImpl.cc | 369 +++
c++/src/TypeImpl.hh | 101 +
c++/src/Vector.cc | 306 ++
c++/src/orc/Adaptor.hh.in | 92 -
c++/src/orc/ByteRLE.cc | 352 ---
c++/src/orc/ByteRLE.hh | 71 -
c++/src/orc/C09Adapter.cc | 28 -
c++/src/orc/ColumnPrinter.cc | 724 -----
c++/src/orc/ColumnReader.cc | 1557 ----------
c++/src/orc/ColumnReader.hh | 109 -
c++/src/orc/Compression.cc | 751 -----
c++/src/orc/Compression.hh | 129 -
c++/src/orc/Exceptions.cc | 59 -
c++/src/orc/Exceptions.hh | 50 -
c++/src/orc/Int128.cc | 438 ---
c++/src/orc/MemoryPool.cc | 231 --
c++/src/orc/OrcFile.cc | 101 -
c++/src/orc/RLE.cc | 47 -
c++/src/orc/RLE.hh | 78 -
c++/src/orc/RLEv1.cc | 190 --
c++/src/orc/RLEv1.hh | 70 -
c++/src/orc/RLEv2.cc | 482 ---
c++/src/orc/RLEv2.hh | 175 --
c++/src/orc/Reader.cc | 1902 ------------
c++/src/orc/TypeImpl.cc | 369 ---
c++/src/orc/TypeImpl.hh | 101 -
c++/src/orc/Vector.cc | 305 --
c++/src/wrap/coded-stream-wrapper.h | 2 +-
c++/src/wrap/gmock.h | 2 +-
c++/src/wrap/gtest-wrapper.h | 2 +-
c++/src/wrap/orc-proto-wrapper.cc | 2 +-
c++/src/wrap/orc-proto-wrapper.hh | 2 +-
c++/src/wrap/snappy-wrapper.h | 2 +-
c++/src/wrap/zero-copy-stream-wrapper.h | 2 +-
c++/test/CMakeLists.txt | 14 +-
c++/test/OrcTest.hh | 25 +
c++/test/TestByteRle.cc | 1385 +++++++++
c++/test/TestColumnPrinter.cc | 588 ++++
c++/test/TestColumnReader.cc | 4308 +++++++++++++++++++++++++
c++/test/TestCompression.cc | 647 ++++
c++/test/TestDriver.cc | 33 +
c++/test/TestInt128.cc | 587 ++++
c++/test/TestRle.cc | 2639 ++++++++++++++++
c++/test/orc/OrcTest.hh | 25 -
c++/test/orc/TestByteRle.cc | 1385 ---------
c++/test/orc/TestColumnPrinter.cc | 588 ----
c++/test/orc/TestColumnReader.cc | 4309 --------------------------
c++/test/orc/TestCompression.cc | 647 ----
c++/test/orc/TestDriver.cc | 33 -
c++/test/orc/TestInt128.cc | 587 ----
c++/test/orc/TestRle.cc | 2639 ----------------
tools-c++/CMakeLists.txt | 14 -
tools-c++/src/CMakeLists.txt | 64 -
tools-c++/src/FileContents.cc | 62 -
tools-c++/src/FileMetadata.cc | 181 --
tools-c++/src/FileScan.cc | 56 -
tools-c++/src/FileStatistics.cc | 75 -
tools-c++/test/CMakeLists.txt | 40 -
tools-c++/test/TestReader.cc | 2950 ------------------
tools-c++/test/ToolTest.cc | 43 -
tools-c++/test/ToolTest.hh | 21 -
tools-c++/test/gzip.cc | 115 -
tools-c++/test/gzip.hh | 52 -
tools/CMakeLists.txt | 14 +
tools/src/CMakeLists.txt | 64 +
tools/src/FileContents.cc | 63 +
tools/src/FileMetadata.cc | 181 ++
tools/src/FileScan.cc | 57 +
tools/src/FileStatistics.cc | 75 +
tools/test/CMakeLists.txt | 40 +
tools/test/TestReader.cc | 2950 ++++++++++++++++++
tools/test/ToolTest.cc | 43 +
tools/test/ToolTest.hh | 21 +
tools/test/gzip.cc | 115 +
tools/test/gzip.hh | 52 +
98 files changed, 22336 insertions(+), 22329 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a25551e..9981c65 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -95,7 +95,7 @@ enable_testing()
set (EXAMPLE_DIRECTORY ${CMAKE_SOURCE_DIR}/examples)
add_subdirectory(c++)
-add_subdirectory(tools-c++)
+add_subdirectory(tools)
# Add another target called test-out that prints the results on failure
if (CMAKE_CONFIGURATION_TYPES)
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/Adaptor.hh.in
----------------------------------------------------------------------
diff --git a/c++/src/Adaptor.hh.in b/c++/src/Adaptor.hh.in
new file mode 100644
index 0000000..5b3a677
--- /dev/null
+++ b/c++/src/Adaptor.hh.in
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ADAPTER_HH
+#define ADAPTER_HH
+
+#cmakedefine INT64_IS_LL
+#cmakedefine HAS_PREAD
+#cmakedefine HAS_STOLL
+#cmakedefine HAS_DIAGNOSTIC_PUSH
+#cmakedefine HAS_PRE_1970
+#cmakedefine HAS_POST_2038
+
+#include "orc/orc-config.hh"
+#include <string>
+
+#ifndef HAS_STOLL
+ // A poor man's stoll that converts str to a long long int base 10
+ namespace std {
+ int64_t stoll(std::string str);
+ }
+#endif
+
+#ifndef HAS_PREAD
+ ssize_t pread(int fd, void *buf, size_t count, off_t offset);
+#endif
+
+#ifdef INT64_IS_LL
+ #define INT64_FORMAT_STRING "ll"
+#else
+ #define INT64_FORMAT_STRING "l"
+#endif
+
+#ifndef ORC_CXX_HAS_NOEXCEPT
+ #define noexcept ORC_NOEXCEPT
+#endif
+
+#ifndef ORC_CXX_HAS_OVERRIDE
+ #define override ORC_OVERRIDE
+#endif
+
+#ifdef HAS_DIAGNOSTIC_PUSH
+ #ifdef __clang__
+ #define DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
+ #define DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
+ #elif defined(__GNUC__)
+ #define DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
+ #define DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
+ #else
+ #error("Unknown compiler")
+ #endif
+#else
+ #define DIAGNOSTIC_PUSH
+ #define DIAGNOSTIC_POP
+#endif
+
+#define PRAGMA(TXT) _Pragma(#TXT)
+
+#ifdef __clang__
+ #define DIAGNOSTIC_IGNORE(XXX) PRAGMA(clang diagnostic ignored XXX)
+#elif defined(__GNUC__)
+ #define DIAGNOSTIC_IGNORE(XXX) PRAGMA(GCC diagnostic ignored XXX)
+#else
+ #define DIAGNOSTIC_IGNORE(XXX)
+#endif
+
+#ifndef ORC_CXX_HAS_UNIQUE_PTR
+ #define unique_ptr auto_ptr
+#endif
+
+#ifndef UINT32_MAX
+ #define UINT32_MAX 0xffffffff
+#endif
+
+#define GTEST_LANG_CXX11 0
+
+#endif /* ADAPTER_HH */
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/ByteRLE.cc
----------------------------------------------------------------------
diff --git a/c++/src/ByteRLE.cc b/c++/src/ByteRLE.cc
new file mode 100644
index 0000000..2c0032c
--- /dev/null
+++ b/c++/src/ByteRLE.cc
@@ -0,0 +1,352 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <iostream>
+#include <string.h>
+#include <utility>
+
+#include "ByteRLE.hh"
+#include "Exceptions.hh"
+
+namespace orc {
+
+ const size_t MINIMUM_REPEAT = 3;
+
+ ByteRleDecoder::~ByteRleDecoder() {
+ // PASS
+ }
+
+ class ByteRleDecoderImpl: public ByteRleDecoder {
+ public:
+ ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
+
+ virtual ~ByteRleDecoderImpl();
+
+ /**
+ * Seek to a particular spot.
+ */
+ virtual void seek(PositionProvider&);
+
+ /**
+ * Seek over a given number of values.
+ */
+ virtual void skip(uint64_t numValues);
+
+ /**
+ * Read a number of values into the batch.
+ */
+ virtual void next(char* data, uint64_t numValues, char* notNull);
+
+ protected:
+ inline void nextBuffer();
+ inline signed char readByte();
+ inline void readHeader();
+
+ std::unique_ptr<SeekableInputStream> inputStream;
+ size_t remainingValues;
+ char value;
+ const char* bufferStart;
+ const char* bufferEnd;
+ bool repeating;
+ };
+
+ void ByteRleDecoderImpl::nextBuffer() {
+ int bufferLength;
+ const void* bufferPointer;
+ bool result = inputStream->Next(&bufferPointer, &bufferLength);
+ if (!result) {
+ throw ParseError("bad read in nextBuffer");
+ }
+ bufferStart = static_cast<const char*>(bufferPointer);
+ bufferEnd = bufferStart + bufferLength;
+ }
+
+ signed char ByteRleDecoderImpl::readByte() {
+ if (bufferStart == bufferEnd) {
+ nextBuffer();
+ }
+ return *(bufferStart++);
+ }
+
+ void ByteRleDecoderImpl::readHeader() {
+ signed char ch = readByte();
+ if (ch < 0) {
+ remainingValues = static_cast<size_t>(-ch);
+ repeating = false;
+ } else {
+ remainingValues = static_cast<size_t>(ch) + MINIMUM_REPEAT;
+ repeating = true;
+ value = readByte();
+ }
+ }
+
+ ByteRleDecoderImpl::ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream>
+ input) {
+ inputStream = std::move(input);
+ repeating = false;
+ remainingValues = 0;
+ value = 0;
+ bufferStart = 0;
+ bufferEnd = 0;
+ }
+
+ ByteRleDecoderImpl::~ByteRleDecoderImpl() {
+ // PASS
+ }
+
+ void ByteRleDecoderImpl::seek(PositionProvider& location) {
+ // move the input stream
+ inputStream->seek(location);
+ // force a re-read from the stream
+ bufferEnd = bufferStart;
+ // read a new header
+ readHeader();
+ // skip ahead the given number of records
+ skip(location.next());
+ }
+
+ void ByteRleDecoderImpl::skip(uint64_t numValues) {
+ while (numValues > 0) {
+ if (remainingValues == 0) {
+ readHeader();
+ }
+ size_t count = std::min(static_cast<size_t>(numValues), remainingValues);
+ remainingValues -= count;
+ numValues -= count;
+ // for literals we need to skip over count bytes, which may involve
+ // reading from the underlying stream
+ if (!repeating) {
+ size_t consumedBytes = count;
+ while (consumedBytes > 0) {
+ if (bufferStart == bufferEnd) {
+ nextBuffer();
+ }
+ size_t skipSize = std::min(static_cast<size_t>(consumedBytes),
+ static_cast<size_t>(bufferEnd -
+ bufferStart));
+ bufferStart += skipSize;
+ consumedBytes -= skipSize;
+ }
+ }
+ }
+ }
+
+ void ByteRleDecoderImpl::next(char* data, uint64_t numValues,
+ char* notNull) {
+ uint64_t position = 0;
+ // skip over null values
+ while (notNull && position < numValues && !notNull[position]) {
+ position += 1;
+ }
+ while (position < numValues) {
+ // if we are out of values, read more
+ if (remainingValues == 0) {
+ readHeader();
+ }
+ // how many do we read out of this block?
+ size_t count = std::min(static_cast<size_t>(numValues - position),
+ remainingValues);
+ uint64_t consumed = 0;
+ if (repeating) {
+ if (notNull) {
+ for(uint64_t i=0; i < count; ++i) {
+ if (notNull[position + i]) {
+ data[position + i] = value;
+ consumed += 1;
+ }
+ }
+ } else {
+ memset(data + position, value, count);
+ consumed = count;
+ }
+ } else {
+ if (notNull) {
+ for(uint64_t i=0; i < count; ++i) {
+ if (notNull[position + i]) {
+ data[position + i] = readByte();
+ consumed += 1;
+ }
+ }
+ } else {
+ uint64_t i = 0;
+ while (i < count) {
+ if (bufferStart == bufferEnd) {
+ nextBuffer();
+ }
+ uint64_t copyBytes =
+ std::min(static_cast<uint64_t>(count - i),
+ static_cast<uint64_t>(bufferEnd - bufferStart));
+ memcpy(data + position + i, bufferStart, copyBytes);
+ bufferStart += copyBytes;
+ i += copyBytes;
+ }
+ consumed = count;
+ }
+ }
+ remainingValues -= consumed;
+ position += count;
+ // skip over any null values
+ while (notNull && position < numValues && !notNull[position]) {
+ position += 1;
+ }
+ }
+ }
+
+ std::unique_ptr<ByteRleDecoder> createByteRleDecoder
+ (std::unique_ptr<SeekableInputStream> input) {
+ return std::unique_ptr<ByteRleDecoder>(new ByteRleDecoderImpl
+ (std::move(input)));
+ }
+
+ class BooleanRleDecoderImpl: public ByteRleDecoderImpl {
+ public:
+ BooleanRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
+
+ virtual ~BooleanRleDecoderImpl();
+
+ /**
+ * Seek to a particular spot.
+ */
+ virtual void seek(PositionProvider&);
+
+ /**
+ * Seek over a given number of values.
+ */
+ virtual void skip(uint64_t numValues);
+
+ /**
+ * Read a number of values into the batch.
+ */
+ virtual void next(char* data, uint64_t numValues, char* notNull);
+
+ protected:
+ size_t remainingBits;
+ char lastByte;
+ };
+
+ BooleanRleDecoderImpl::BooleanRleDecoderImpl
+ (std::unique_ptr<SeekableInputStream> input
+ ): ByteRleDecoderImpl(std::move(input)) {
+ remainingBits = 0;
+ lastByte = 0;
+ }
+
+ BooleanRleDecoderImpl::~BooleanRleDecoderImpl() {
+ // PASS
+ }
+
+ void BooleanRleDecoderImpl::seek(PositionProvider& location) {
+ ByteRleDecoderImpl::seek(location);
+ uint64_t consumed = location.next();
+ if (consumed > 8) {
+ throw ParseError("bad position");
+ }
+ if (consumed != 0) {
+ remainingBits = 8 - consumed;
+ ByteRleDecoderImpl::next(&lastByte, 1, 0);
+ }
+ }
+
+ void BooleanRleDecoderImpl::skip(uint64_t numValues) {
+ if (numValues <= remainingBits) {
+ remainingBits -= numValues;
+ } else {
+ numValues -= remainingBits;
+ uint64_t bytesSkipped = numValues / 8;
+ ByteRleDecoderImpl::skip(bytesSkipped);
+ ByteRleDecoderImpl::next(&lastByte, 1, 0);
+ remainingBits = 8 - (numValues % 8);
+ }
+ }
+
+ void BooleanRleDecoderImpl::next(char* data, uint64_t numValues,
+ char* notNull) {
+ // next spot to fill in
+ uint64_t position = 0;
+
+ // use up any remaining bits
+ if (notNull) {
+ while(remainingBits > 0 && position < numValues) {
+ if (notNull[position]) {
+ remainingBits -= 1;
+ data[position] = (static_cast<unsigned char>(lastByte) >>
+ remainingBits) & 0x1;
+ } else {
+ data[position] = 0;
+ }
+ position += 1;
+ }
+ } else {
+ while(remainingBits > 0 && position < numValues) {
+ remainingBits -= 1;
+ data[position++] = (static_cast<unsigned char>(lastByte) >>
+ remainingBits) & 0x1;
+ }
+ }
+
+ // count the number of nonNulls remaining
+ uint64_t nonNulls = numValues - position;
+ if (notNull) {
+ for(uint64_t i=position; i < numValues; ++i) {
+ if (!notNull[i]) {
+ nonNulls -= 1;
+ }
+ }
+ }
+
+ // fill in the remaining values
+ if (nonNulls == 0) {
+ while (position < numValues) {
+ data[position++] = 0;
+ }
+ } else if (position < numValues) {
+ // read the new bytes into the array
+ uint64_t bytesRead = (nonNulls + 7) / 8;
+ ByteRleDecoderImpl::next(data + position, bytesRead, 0);
+ lastByte = data[position + bytesRead - 1];
+ remainingBits = bytesRead * 8 - nonNulls;
+ // expand the array backwards so that we don't clobber the data
+ uint64_t bitsLeft = bytesRead * 8 - remainingBits;
+ if (notNull) {
+ for(int64_t i=static_cast<int64_t>(numValues) - 1;
+ i >= static_cast<int64_t>(position); --i) {
+ if (notNull[i]) {
+ uint64_t shiftPosn = (-bitsLeft) % 8;
+ data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
+ bitsLeft -= 1;
+ } else {
+ data[i] = 0;
+ }
+ }
+ } else {
+ for(int64_t i=static_cast<int64_t>(numValues) - 1;
+ i >= static_cast<int64_t>(position); --i, --bitsLeft) {
+ uint64_t shiftPosn = (-bitsLeft) % 8;
+ data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
+ }
+ }
+ }
+ }
+
+ std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder
+ (std::unique_ptr<SeekableInputStream> input) {
+ BooleanRleDecoderImpl* decoder = new BooleanRleDecoderImpl(std::move(input)) ;
+ return std::unique_ptr<ByteRleDecoder>(reinterpret_cast<ByteRleDecoder*>(decoder));
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/ByteRLE.hh
----------------------------------------------------------------------
diff --git a/c++/src/ByteRLE.hh b/c++/src/ByteRLE.hh
new file mode 100644
index 0000000..6762cb5
--- /dev/null
+++ b/c++/src/ByteRLE.hh
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_BYTE_RLE_HH
+#define ORC_BYTE_RLE_HH
+
+#include <memory>
+
+#include "Compression.hh"
+
+namespace orc {
+
+ class ByteRleDecoder {
+ public:
+ virtual ~ByteRleDecoder();
+
+ /**
+ * Seek to a particular spot.
+ */
+ virtual void seek(PositionProvider&) = 0;
+
+ /**
+ * Seek over a given number of values.
+ */
+ virtual void skip(uint64_t numValues) = 0;
+
+ /**
+ * Read a number of values into the batch.
+ * @param data the array to read into
+ * @param numValues the number of values to read
+ * @param notNull If the pointer is null, all values are read. If the
+ * pointer is not null, positions that are false are skipped.
+ */
+ virtual void next(char* data, uint64_t numValues, char* notNull) = 0;
+ };
+
+ /**
+ * Create a byte RLE decoder.
+ * @param input the input stream to read from
+ */
+ std::unique_ptr<ByteRleDecoder> createByteRleDecoder
+ (std::unique_ptr<SeekableInputStream> input);
+
+ /**
+ * Create a boolean RLE decoder.
+ *
+ * Unlike the other RLE decoders, the boolean decoder sets the data to 0
+ * if the value is masked by notNull. This is required for the notNull stream
+ * processing to properly apply multiple masks from nested types.
+ * @param input the input stream to read from
+ */
+ std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder
+ (std::unique_ptr<SeekableInputStream> input);
+}
+
+#endif
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/C09Adapter.cc
----------------------------------------------------------------------
diff --git a/c++/src/C09Adapter.cc b/c++/src/C09Adapter.cc
new file mode 100644
index 0000000..8afc752
--- /dev/null
+++ b/c++/src/C09Adapter.cc
@@ -0,0 +1,28 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include "orc/C09Adapter.hh"
+#include <sstream>
+
+int64_t std::stoll(std::string str) {
+ int64_t val = 0;
+ stringstream ss ;
+ ss << str ;
+ ss >> val ;
+ return val;
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index b1ed10a..b7b5260 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -82,8 +82,8 @@ CHECK_CXX_SOURCE_RUNS("
)
configure_file (
- "orc/Adaptor.hh.in"
- "${CMAKE_CURRENT_BINARY_DIR}/orc/Adaptor.hh"
+ "Adaptor.hh.in"
+ "${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
)
include_directories (
@@ -104,23 +104,23 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
)
add_library (orc STATIC
- "${CMAKE_CURRENT_BINARY_DIR}/orc/Adaptor.hh"
+ "${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
orc_proto.pb.h
wrap/orc-proto-wrapper.cc
- orc/ByteRLE.cc
- orc/ColumnPrinter.cc
- orc/ColumnReader.cc
- orc/Compression.cc
- orc/Exceptions.cc
- orc/Int128.cc
- orc/MemoryPool.cc
- orc/OrcFile.cc
- orc/Reader.cc
- orc/RLEv1.cc
- orc/RLEv2.cc
- orc/RLE.cc
- orc/TypeImpl.cc
- orc/Vector.cc
+ ByteRLE.cc
+ ColumnPrinter.cc
+ ColumnReader.cc
+ Compression.cc
+ Exceptions.cc
+ Int128.cc
+ MemoryPool.cc
+ OrcFile.cc
+ Reader.cc
+ RLEv1.cc
+ RLEv2.cc
+ RLE.cc
+ TypeImpl.cc
+ Vector.cc
)
install(TARGETS orc DESTINATION lib)
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/ColumnPrinter.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc
new file mode 100644
index 0000000..190441c
--- /dev/null
+++ b/c++/src/ColumnPrinter.cc
@@ -0,0 +1,725 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/ColumnPrinter.hh"
+#include "orc/orc-config.hh"
+
+#include "Adaptor.hh"
+
+#include <limits>
+#include <sstream>
+#include <stdexcept>
+#include <time.h>
+#include <typeinfo>
+
+#ifdef __clang__
+ #pragma clang diagnostic ignored "-Wformat-security"
+#endif
+
+namespace orc {
+
+ class BooleanColumnPrinter: public ColumnPrinter {
+ private:
+ const int64_t* data;
+ public:
+ BooleanColumnPrinter(std::string&, const Type&);
+ ~BooleanColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class LongColumnPrinter: public ColumnPrinter {
+ private:
+ const int64_t* data;
+ public:
+ LongColumnPrinter(std::string&, const Type&);
+ ~LongColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class DoubleColumnPrinter: public ColumnPrinter {
+ private:
+ const double* data;
+ const bool isFloat;
+
+ public:
+ DoubleColumnPrinter(std::string&, const Type&);
+ virtual ~DoubleColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class TimestampColumnPrinter: public ColumnPrinter {
+ private:
+ const int64_t* data;
+ time_t epoch;
+
+ public:
+ TimestampColumnPrinter(std::string&, const Type&);
+ ~TimestampColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class DateColumnPrinter: public ColumnPrinter {
+ private:
+ const int64_t* data;
+
+ public:
+ DateColumnPrinter(std::string&, const Type& type);
+ ~DateColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class Decimal64ColumnPrinter: public ColumnPrinter {
+ private:
+ const int64_t* data;
+ int32_t scale;
+ public:
+ Decimal64ColumnPrinter(std::string&, const Type& type);
+ ~Decimal64ColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class Decimal128ColumnPrinter: public ColumnPrinter {
+ private:
+ const Int128* data;
+ int32_t scale;
+ public:
+ Decimal128ColumnPrinter(std::string&, const Type& type);
+ ~Decimal128ColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class StringColumnPrinter: public ColumnPrinter {
+ private:
+ const char* const * start;
+ const int64_t* length;
+ public:
+ StringColumnPrinter(std::string&, const Type& type);
+ virtual ~StringColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class BinaryColumnPrinter: public ColumnPrinter {
+ private:
+ const char* const * start;
+ const int64_t* length;
+ public:
+ BinaryColumnPrinter(std::string&, const Type& type);
+ virtual ~BinaryColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class ListColumnPrinter: public ColumnPrinter {
+ private:
+ const int64_t* offsets;
+ std::unique_ptr<ColumnPrinter> elementPrinter;
+
+ public:
+ ListColumnPrinter(std::string&, const Type& type);
+ virtual ~ListColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class MapColumnPrinter: public ColumnPrinter {
+ private:
+ const int64_t* offsets;
+ std::unique_ptr<ColumnPrinter> keyPrinter;
+ std::unique_ptr<ColumnPrinter> elementPrinter;
+
+ public:
+ MapColumnPrinter(std::string&, const Type& type);
+ virtual ~MapColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class UnionColumnPrinter: public ColumnPrinter {
+ private:
+ const unsigned char *tags;
+ const uint64_t* offsets;
+ std::vector<ColumnPrinter*> fieldPrinter;
+
+ public:
+ UnionColumnPrinter(std::string&, const Type& type);
+ virtual ~UnionColumnPrinter();
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ class StructColumnPrinter: public ColumnPrinter {
+ private:
+ std::vector<ColumnPrinter*> fieldPrinter;
+ public:
+ StructColumnPrinter(std::string&, const Type& type);
+ virtual ~StructColumnPrinter();
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
+ void writeChar(std::string& file, char ch) {
+ file += ch;
+ }
+
+ void writeString(std::string& file, const char *ptr) {
+ size_t len = strlen(ptr);
+ file.append(ptr, len);
+ }
+
+ ColumnPrinter::ColumnPrinter(std::string& _buffer, const Type& _type
+ ): buffer(_buffer),
+ type(_type) {
+ notNull = nullptr;
+ hasNulls = false;
+ }
+
+ ColumnPrinter::~ColumnPrinter() {
+ // PASS
+ }
+
+ void ColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ hasNulls = batch.hasNulls;
+ if (hasNulls) {
+ notNull = batch.notNull.data();
+ } else {
+ notNull = nullptr ;
+ }
+ }
+
+ std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string& buffer,
+ const Type& type) {
+ ColumnPrinter *result;
+ switch(static_cast<int64_t>(type.getKind())) {
+ case BOOLEAN:
+ result = new BooleanColumnPrinter(buffer, type);
+ break;
+
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ result = new LongColumnPrinter(buffer, type);
+ break;
+
+ case FLOAT:
+ case DOUBLE:
+ result = new DoubleColumnPrinter(buffer, type);
+ break;
+
+ case STRING:
+ case VARCHAR :
+ case CHAR:
+ result = new StringColumnPrinter(buffer, type);
+ break;
+
+ case BINARY:
+ result = new BinaryColumnPrinter(buffer, type);
+ break;
+
+ case TIMESTAMP:
+ result = new TimestampColumnPrinter(buffer, type);
+ break;
+
+ case LIST:
+ result = new ListColumnPrinter(buffer, type);
+ break;
+
+ case MAP:
+ result = new MapColumnPrinter(buffer, type);
+ break;
+
+ case STRUCT:
+ result = new StructColumnPrinter(buffer, type);
+ break;
+
+ case DECIMAL:
+ if (type.getPrecision() == 0 || type.getPrecision() > 18) {
+ result = new Decimal128ColumnPrinter(buffer, type);
+ } else {
+ result = new Decimal64ColumnPrinter(buffer, type);
+ }
+ break;
+
+ case DATE:
+ result = new DateColumnPrinter(buffer, type);
+ break;
+
+ case UNION:
+ result = new UnionColumnPrinter(buffer, type);
+ break;
+
+ default:
+ throw std::logic_error("unknown batch type");
+ }
+ return std::unique_ptr<ColumnPrinter>(result);
+ }
+
+ LongColumnPrinter::LongColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type) {
+ // pass
+ }
+
+ void LongColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+ }
+
+ void LongColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ char numBuffer[64];
+ snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d",
+ static_cast<int64_t >(data[rowId]));
+ writeString(buffer, numBuffer);
+ }
+ }
+
+ DoubleColumnPrinter::DoubleColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type),
+ isFloat(type.getKind() == FLOAT){
+ // PASS
+ }
+
+ void DoubleColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ data = dynamic_cast<const DoubleVectorBatch&>(batch).data.data();
+ }
+
+ void DoubleColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ char numBuffer[64];
+ snprintf(numBuffer, sizeof(numBuffer), isFloat ? "%.7g" : "%.14g",
+ data[rowId]);
+ writeString(buffer, numBuffer);
+ }
+ }
+
+ Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer,
+ type) {
+ // PASS
+ }
+
+ void Decimal64ColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ data = dynamic_cast<const Decimal64VectorBatch&>(batch).values.data();
+ scale = dynamic_cast<const Decimal64VectorBatch&>(batch).scale;
+ }
+
+ std::string toDecimalString(int64_t value, int32_t scale) {
+ std::stringstream buffer;
+ if (scale == 0) {
+ buffer << value;
+ return buffer.str();
+ }
+ std::string sign = "";
+ if (value < 0) {
+ sign = "-";
+ value = -value;
+ }
+ buffer << value;
+ std::string str = buffer.str();
+ int32_t len = static_cast<int32_t>(str.length());
+ if (len > scale) {
+ return sign + str.substr(0, static_cast<size_t>(len - scale)) + "." +
+ str.substr(static_cast<size_t>(len - scale),
+ static_cast<size_t>(scale));
+ } else if (len == scale) {
+ return sign + "0." + str;
+ } else {
+ std::string result = sign + "0.";
+ for(int32_t i=0; i < scale - len; ++i) {
+ result += "0";
+ }
+ return result + str;
+ }
+ }
+
+ void Decimal64ColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ writeString(buffer, toDecimalString(data[rowId], scale).c_str());
+ }
+ }
+
+ Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer,
+ type) {
+ // PASS
+ }
+
+ void Decimal128ColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data();
+ scale =dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
+ }
+
+ void Decimal128ColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ writeString(buffer, data[rowId].toDecimalString(scale).c_str());
+ }
+ }
+
+ StringColumnPrinter::StringColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type) {
+ // PASS
+ }
+
+ void StringColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ start = dynamic_cast<const StringVectorBatch&>(batch).data.data();
+ length = dynamic_cast<const StringVectorBatch&>(batch).length.data();
+ }
+
+ void StringColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ writeChar(buffer, '"');
+ for(int64_t i=0; i < length[rowId]; ++i) {
+ char ch = static_cast<char>(start[rowId][i]);
+ switch (ch) {
+ case '\\':
+ writeString(buffer, "\\\\");
+ break;
+ case '\b':
+ writeString(buffer, "\\b");
+ break;
+ case '\f':
+ writeString(buffer, "\\f");
+ break;
+ case '\n':
+ writeString(buffer, "\\n");
+ break;
+ case '\r':
+ writeString(buffer, "\\r");
+ break;
+ case '\t':
+ writeString(buffer, "\\t");
+ break;
+ case '"':
+ writeString(buffer, "\\\"");
+ break;
+ default:
+ writeChar(buffer, ch);
+ break;
+ }
+ }
+ writeChar(buffer, '"');
+ }
+ }
+
+ ListColumnPrinter::ListColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type) {
+ elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
+ }
+
+ void ListColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ offsets = dynamic_cast<const ListVectorBatch&>(batch).offsets.data();
+ elementPrinter->reset(*dynamic_cast<const ListVectorBatch&>(batch).
+ elements);
+ }
+
+ void ListColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ writeChar(buffer, '[');
+ for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) {
+ if (i != offsets[rowId]) {
+ writeString(buffer, ", ");
+ }
+ elementPrinter->printRow(static_cast<uint64_t>(i));
+ }
+ writeChar(buffer, ']');
+ }
+ }
+
+ MapColumnPrinter::MapColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type) {
+ keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
+ elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
+ }
+
+ void MapColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ const MapVectorBatch& myBatch = dynamic_cast<const MapVectorBatch&>(batch);
+ offsets = myBatch.offsets.data();
+ keyPrinter->reset(*myBatch.keys);
+ elementPrinter->reset(*myBatch.elements);
+ }
+
+ void MapColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ writeChar(buffer, '[');
+ for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) {
+ if (i != offsets[rowId]) {
+ writeString(buffer, ", ");
+ }
+ writeString(buffer, "{\"key\": ");
+ keyPrinter->printRow(static_cast<uint64_t>(i));
+ writeString(buffer, ", \"value\": ");
+ elementPrinter->printRow(static_cast<uint64_t>(i));
+ writeChar(buffer, '}');
+ }
+ writeChar(buffer, ']');
+ }
+ }
+
+ UnionColumnPrinter::UnionColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type) {
+ for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
+ fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
+ .release());
+ }
+ }
+
+ UnionColumnPrinter::~UnionColumnPrinter() {
+ for (size_t i = 0; i < fieldPrinter.size(); i++) {
+ delete fieldPrinter[i];
+ }
+ }
+
+ void UnionColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ const UnionVectorBatch& unionBatch =
+ dynamic_cast<const UnionVectorBatch&>(batch);
+ tags = unionBatch.tags.data();
+ offsets = unionBatch.offsets.data();
+ for(size_t i=0; i < fieldPrinter.size(); ++i) {
+ fieldPrinter[i]->reset(*(unionBatch.children[i]));
+ }
+ }
+
+ void UnionColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ writeString(buffer, "{\"tag\": ");
+ char numBuffer[64];
+ snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d",
+ static_cast<int64_t>(tags[rowId]));
+ writeString(buffer, numBuffer);
+ writeString(buffer, ", \"value\": ");
+ fieldPrinter[tags[rowId]]->printRow(offsets[rowId]);
+ writeChar(buffer, '}');
+ }
+ }
+
+ StructColumnPrinter::StructColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type) {
+ for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
+ fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
+ .release());
+ }
+ }
+
+ StructColumnPrinter::~StructColumnPrinter() {
+ for (size_t i = 0; i < fieldPrinter.size(); i++) {
+ delete fieldPrinter[i];
+ }
+ }
+
+ void StructColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ const StructVectorBatch& structBatch =
+ dynamic_cast<const StructVectorBatch&>(batch);
+ for(size_t i=0; i < fieldPrinter.size(); ++i) {
+ fieldPrinter[i]->reset(*(structBatch.fields[i]));
+ }
+ }
+
+ void StructColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ writeChar(buffer, '{');
+ for(unsigned int i=0; i < fieldPrinter.size(); ++i) {
+ if (i != 0) {
+ writeString(buffer, ", ");
+ }
+ writeChar(buffer, '"');
+ writeString(buffer, type.getFieldName(i).c_str());
+ writeString(buffer, "\": ");
+ fieldPrinter[i]->printRow(rowId);
+ }
+ writeChar(buffer, '}');
+ }
+ }
+
+ DateColumnPrinter::DateColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type) {
+ // PASS
+ }
+
+ void DateColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ const time_t timeValue = data[rowId] * 24 * 60 * 60;
+ struct tm tmValue;
+ gmtime_r(&timeValue, &tmValue);
+ char timeBuffer[11];
+ strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d", &tmValue);
+ writeChar(buffer, '"');
+ writeString(buffer, timeBuffer);
+ writeChar(buffer, '"');
+ }
+ }
+
+ void DateColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+ }
+
+ BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type) {
+ // PASS
+ }
+
+ void BooleanColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ writeString(buffer, (data[rowId] ? "true" : "false"));
+ }
+ }
+
+ void BooleanColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+ }
+
+ BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer, type) {
+ // PASS
+ }
+
+ void BinaryColumnPrinter::printRow(uint64_t rowId) {
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ writeChar(buffer, '[');
+ for(int64_t i=0; i < length[rowId]; ++i) {
+ if (i != 0) {
+ writeString(buffer, ", ");
+ }
+ char numBuffer[64];
+ snprintf(numBuffer, sizeof(numBuffer), "%d",
+ (static_cast<const int>(start[rowId][i]) & 0xff));
+ writeString(buffer, numBuffer);
+ }
+ writeChar(buffer, ']');
+ }
+ }
+
+ void BinaryColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ start = dynamic_cast<const StringVectorBatch&>(batch).data.data();
+ length = dynamic_cast<const StringVectorBatch&>(batch).length.data();
+ }
+
+ TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer,
+ const Type& type
+ ): ColumnPrinter(buffer,
+ type) {
+ struct tm epochTm;
+ epochTm.tm_sec = 0;
+ epochTm.tm_min = 0;
+ epochTm.tm_hour = 0;
+ epochTm.tm_mday = 1;
+ epochTm.tm_mon = 0;
+ epochTm.tm_year = 70;
+ epochTm.tm_isdst = 0;
+ epoch = mktime(&epochTm);
+ }
+
+ void TimestampColumnPrinter::printRow(uint64_t rowId) {
+ const int64_t NANOS_PER_SECOND = 1000000000;
+ const int64_t NANO_DIGITS = 9;
+ if (hasNulls && !notNull[rowId]) {
+ writeString(buffer, "null");
+ } else {
+ int64_t nanos = data[rowId] % NANOS_PER_SECOND;
+ time_t seconds =
+ static_cast<time_t>(data[rowId] / NANOS_PER_SECOND) + epoch;
+ // make sure the nanos are positive
+ if (nanos < 0) {
+ seconds -= 1;
+ nanos = -nanos;
+ }
+ struct tm tmValue;
+ localtime_r(&seconds, &tmValue);
+ char timeBuffer[20];
+ strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+ writeChar(buffer, '"');
+ writeString(buffer, timeBuffer);
+ writeChar(buffer, '.');
+ // remove trailing zeros off the back of the nanos value.
+ int64_t zeroDigits = 0;
+ if (nanos == 0) {
+ zeroDigits = 8;
+ } else {
+ while (nanos % 10 == 0) {
+ nanos /= 10;
+ zeroDigits += 1;
+ }
+ }
+ char numBuffer[64];
+ snprintf(numBuffer, sizeof(numBuffer),
+ "%0*" INT64_FORMAT_STRING "d\"",
+ static_cast<int>(NANO_DIGITS - zeroDigits),
+ static_cast<int64_t >(nanos));
+ writeString(buffer, numBuffer);
+ }
+ }
+
+ void TimestampColumnPrinter::reset(const ColumnVectorBatch& batch) {
+ ColumnPrinter::reset(batch);
+ data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+ }
+}