You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2015/07/06 23:52:48 UTC
[19/23] orc git commit: ORC-23. Simplify directory structure.
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/orc/ByteRLE.cc
----------------------------------------------------------------------
diff --git a/c++/src/orc/ByteRLE.cc b/c++/src/orc/ByteRLE.cc
deleted file mode 100644
index 2c0032c..0000000
--- a/c++/src/orc/ByteRLE.cc
+++ /dev/null
@@ -1,352 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <iostream>
-#include <string.h>
-#include <utility>
-
-#include "ByteRLE.hh"
-#include "Exceptions.hh"
-
-namespace orc {
-
- const size_t MINIMUM_REPEAT = 3;
-
- ByteRleDecoder::~ByteRleDecoder() {
- // PASS
- }
-
- class ByteRleDecoderImpl: public ByteRleDecoder {
- public:
- ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
-
- virtual ~ByteRleDecoderImpl();
-
- /**
- * Seek to a particular spot.
- */
- virtual void seek(PositionProvider&);
-
- /**
- * Seek over a given number of values.
- */
- virtual void skip(uint64_t numValues);
-
- /**
- * Read a number of values into the batch.
- */
- virtual void next(char* data, uint64_t numValues, char* notNull);
-
- protected:
- inline void nextBuffer();
- inline signed char readByte();
- inline void readHeader();
-
- std::unique_ptr<SeekableInputStream> inputStream;
- size_t remainingValues;
- char value;
- const char* bufferStart;
- const char* bufferEnd;
- bool repeating;
- };
-
- void ByteRleDecoderImpl::nextBuffer() {
- int bufferLength;
- const void* bufferPointer;
- bool result = inputStream->Next(&bufferPointer, &bufferLength);
- if (!result) {
- throw ParseError("bad read in nextBuffer");
- }
- bufferStart = static_cast<const char*>(bufferPointer);
- bufferEnd = bufferStart + bufferLength;
- }
-
- signed char ByteRleDecoderImpl::readByte() {
- if (bufferStart == bufferEnd) {
- nextBuffer();
- }
- return *(bufferStart++);
- }
-
- void ByteRleDecoderImpl::readHeader() {
- signed char ch = readByte();
- if (ch < 0) {
- remainingValues = static_cast<size_t>(-ch);
- repeating = false;
- } else {
- remainingValues = static_cast<size_t>(ch) + MINIMUM_REPEAT;
- repeating = true;
- value = readByte();
- }
- }
-
- ByteRleDecoderImpl::ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream>
- input) {
- inputStream = std::move(input);
- repeating = false;
- remainingValues = 0;
- value = 0;
- bufferStart = 0;
- bufferEnd = 0;
- }
-
- ByteRleDecoderImpl::~ByteRleDecoderImpl() {
- // PASS
- }
-
- void ByteRleDecoderImpl::seek(PositionProvider& location) {
- // move the input stream
- inputStream->seek(location);
- // force a re-read from the stream
- bufferEnd = bufferStart;
- // read a new header
- readHeader();
- // skip ahead the given number of records
- skip(location.next());
- }
-
- void ByteRleDecoderImpl::skip(uint64_t numValues) {
- while (numValues > 0) {
- if (remainingValues == 0) {
- readHeader();
- }
- size_t count = std::min(static_cast<size_t>(numValues), remainingValues);
- remainingValues -= count;
- numValues -= count;
- // for literals we need to skip over count bytes, which may involve
- // reading from the underlying stream
- if (!repeating) {
- size_t consumedBytes = count;
- while (consumedBytes > 0) {
- if (bufferStart == bufferEnd) {
- nextBuffer();
- }
- size_t skipSize = std::min(static_cast<size_t>(consumedBytes),
- static_cast<size_t>(bufferEnd -
- bufferStart));
- bufferStart += skipSize;
- consumedBytes -= skipSize;
- }
- }
- }
- }
-
- void ByteRleDecoderImpl::next(char* data, uint64_t numValues,
- char* notNull) {
- uint64_t position = 0;
- // skip over null values
- while (notNull && position < numValues && !notNull[position]) {
- position += 1;
- }
- while (position < numValues) {
- // if we are out of values, read more
- if (remainingValues == 0) {
- readHeader();
- }
- // how many do we read out of this block?
- size_t count = std::min(static_cast<size_t>(numValues - position),
- remainingValues);
- uint64_t consumed = 0;
- if (repeating) {
- if (notNull) {
- for(uint64_t i=0; i < count; ++i) {
- if (notNull[position + i]) {
- data[position + i] = value;
- consumed += 1;
- }
- }
- } else {
- memset(data + position, value, count);
- consumed = count;
- }
- } else {
- if (notNull) {
- for(uint64_t i=0; i < count; ++i) {
- if (notNull[position + i]) {
- data[position + i] = readByte();
- consumed += 1;
- }
- }
- } else {
- uint64_t i = 0;
- while (i < count) {
- if (bufferStart == bufferEnd) {
- nextBuffer();
- }
- uint64_t copyBytes =
- std::min(static_cast<uint64_t>(count - i),
- static_cast<uint64_t>(bufferEnd - bufferStart));
- memcpy(data + position + i, bufferStart, copyBytes);
- bufferStart += copyBytes;
- i += copyBytes;
- }
- consumed = count;
- }
- }
- remainingValues -= consumed;
- position += count;
- // skip over any null values
- while (notNull && position < numValues && !notNull[position]) {
- position += 1;
- }
- }
- }
-
- std::unique_ptr<ByteRleDecoder> createByteRleDecoder
- (std::unique_ptr<SeekableInputStream> input) {
- return std::unique_ptr<ByteRleDecoder>(new ByteRleDecoderImpl
- (std::move(input)));
- }
-
- class BooleanRleDecoderImpl: public ByteRleDecoderImpl {
- public:
- BooleanRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
-
- virtual ~BooleanRleDecoderImpl();
-
- /**
- * Seek to a particular spot.
- */
- virtual void seek(PositionProvider&);
-
- /**
- * Seek over a given number of values.
- */
- virtual void skip(uint64_t numValues);
-
- /**
- * Read a number of values into the batch.
- */
- virtual void next(char* data, uint64_t numValues, char* notNull);
-
- protected:
- size_t remainingBits;
- char lastByte;
- };
-
- BooleanRleDecoderImpl::BooleanRleDecoderImpl
- (std::unique_ptr<SeekableInputStream> input
- ): ByteRleDecoderImpl(std::move(input)) {
- remainingBits = 0;
- lastByte = 0;
- }
-
- BooleanRleDecoderImpl::~BooleanRleDecoderImpl() {
- // PASS
- }
-
- void BooleanRleDecoderImpl::seek(PositionProvider& location) {
- ByteRleDecoderImpl::seek(location);
- uint64_t consumed = location.next();
- if (consumed > 8) {
- throw ParseError("bad position");
- }
- if (consumed != 0) {
- remainingBits = 8 - consumed;
- ByteRleDecoderImpl::next(&lastByte, 1, 0);
- }
- }
-
- void BooleanRleDecoderImpl::skip(uint64_t numValues) {
- if (numValues <= remainingBits) {
- remainingBits -= numValues;
- } else {
- numValues -= remainingBits;
- uint64_t bytesSkipped = numValues / 8;
- ByteRleDecoderImpl::skip(bytesSkipped);
- ByteRleDecoderImpl::next(&lastByte, 1, 0);
- remainingBits = 8 - (numValues % 8);
- }
- }
-
- void BooleanRleDecoderImpl::next(char* data, uint64_t numValues,
- char* notNull) {
- // next spot to fill in
- uint64_t position = 0;
-
- // use up any remaining bits
- if (notNull) {
- while(remainingBits > 0 && position < numValues) {
- if (notNull[position]) {
- remainingBits -= 1;
- data[position] = (static_cast<unsigned char>(lastByte) >>
- remainingBits) & 0x1;
- } else {
- data[position] = 0;
- }
- position += 1;
- }
- } else {
- while(remainingBits > 0 && position < numValues) {
- remainingBits -= 1;
- data[position++] = (static_cast<unsigned char>(lastByte) >>
- remainingBits) & 0x1;
- }
- }
-
- // count the number of nonNulls remaining
- uint64_t nonNulls = numValues - position;
- if (notNull) {
- for(uint64_t i=position; i < numValues; ++i) {
- if (!notNull[i]) {
- nonNulls -= 1;
- }
- }
- }
-
- // fill in the remaining values
- if (nonNulls == 0) {
- while (position < numValues) {
- data[position++] = 0;
- }
- } else if (position < numValues) {
- // read the new bytes into the array
- uint64_t bytesRead = (nonNulls + 7) / 8;
- ByteRleDecoderImpl::next(data + position, bytesRead, 0);
- lastByte = data[position + bytesRead - 1];
- remainingBits = bytesRead * 8 - nonNulls;
- // expand the array backwards so that we don't clobber the data
- uint64_t bitsLeft = bytesRead * 8 - remainingBits;
- if (notNull) {
- for(int64_t i=static_cast<int64_t>(numValues) - 1;
- i >= static_cast<int64_t>(position); --i) {
- if (notNull[i]) {
- uint64_t shiftPosn = (-bitsLeft) % 8;
- data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
- bitsLeft -= 1;
- } else {
- data[i] = 0;
- }
- }
- } else {
- for(int64_t i=static_cast<int64_t>(numValues) - 1;
- i >= static_cast<int64_t>(position); --i, --bitsLeft) {
- uint64_t shiftPosn = (-bitsLeft) % 8;
- data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
- }
- }
- }
- }
-
- std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder
- (std::unique_ptr<SeekableInputStream> input) {
- BooleanRleDecoderImpl* decoder = new BooleanRleDecoderImpl(std::move(input)) ;
- return std::unique_ptr<ByteRleDecoder>(reinterpret_cast<ByteRleDecoder*>(decoder));
- }
-}
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/orc/ByteRLE.hh
----------------------------------------------------------------------
diff --git a/c++/src/orc/ByteRLE.hh b/c++/src/orc/ByteRLE.hh
deleted file mode 100644
index 6762cb5..0000000
--- a/c++/src/orc/ByteRLE.hh
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ORC_BYTE_RLE_HH
-#define ORC_BYTE_RLE_HH
-
-#include <memory>
-
-#include "Compression.hh"
-
-namespace orc {
-
- class ByteRleDecoder {
- public:
- virtual ~ByteRleDecoder();
-
- /**
- * Seek to a particular spot.
- */
- virtual void seek(PositionProvider&) = 0;
-
- /**
- * Seek over a given number of values.
- */
- virtual void skip(uint64_t numValues) = 0;
-
- /**
- * Read a number of values into the batch.
- * @param data the array to read into
- * @param numValues the number of values to read
- * @param notNull If the pointer is null, all values are read. If the
- * pointer is not null, positions that are false are skipped.
- */
- virtual void next(char* data, uint64_t numValues, char* notNull) = 0;
- };
-
- /**
- * Create a byte RLE decoder.
- * @param input the input stream to read from
- */
- std::unique_ptr<ByteRleDecoder> createByteRleDecoder
- (std::unique_ptr<SeekableInputStream> input);
-
- /**
- * Create a boolean RLE decoder.
- *
- * Unlike the other RLE decoders, the boolean decoder sets the data to 0
- * if the value is masked by notNull. This is required for the notNull stream
- * processing to properly apply multiple masks from nested types.
- * @param input the input stream to read from
- */
- std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder
- (std::unique_ptr<SeekableInputStream> input);
-}
-
-#endif
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/orc/C09Adapter.cc
----------------------------------------------------------------------
diff --git a/c++/src/orc/C09Adapter.cc b/c++/src/orc/C09Adapter.cc
deleted file mode 100644
index 8afc752..0000000
--- a/c++/src/orc/C09Adapter.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-#include "orc/C09Adapter.hh"
-#include <sstream>
-
-int64_t std::stoll(std::string str) {
- int64_t val = 0;
- stringstream ss ;
- ss << str ;
- ss >> val ;
- return val;
-}
http://git-wip-us.apache.org/repos/asf/orc/blob/7f55b453/c++/src/orc/ColumnPrinter.cc
----------------------------------------------------------------------
diff --git a/c++/src/orc/ColumnPrinter.cc b/c++/src/orc/ColumnPrinter.cc
deleted file mode 100644
index 3fd95e5..0000000
--- a/c++/src/orc/ColumnPrinter.cc
+++ /dev/null
@@ -1,724 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "orc/Adaptor.hh"
-#include "orc/ColumnPrinter.hh"
-#include "orc/orc-config.hh"
-
-#include <limits>
-#include <sstream>
-#include <stdexcept>
-#include <time.h>
-#include <typeinfo>
-
-#ifdef __clang__
- #pragma clang diagnostic ignored "-Wformat-security"
-#endif
-
-namespace orc {
-
- class BooleanColumnPrinter: public ColumnPrinter {
- private:
- const int64_t* data;
- public:
- BooleanColumnPrinter(std::string&, const Type&);
- ~BooleanColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class LongColumnPrinter: public ColumnPrinter {
- private:
- const int64_t* data;
- public:
- LongColumnPrinter(std::string&, const Type&);
- ~LongColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class DoubleColumnPrinter: public ColumnPrinter {
- private:
- const double* data;
- const bool isFloat;
-
- public:
- DoubleColumnPrinter(std::string&, const Type&);
- virtual ~DoubleColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class TimestampColumnPrinter: public ColumnPrinter {
- private:
- const int64_t* data;
- time_t epoch;
-
- public:
- TimestampColumnPrinter(std::string&, const Type&);
- ~TimestampColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class DateColumnPrinter: public ColumnPrinter {
- private:
- const int64_t* data;
-
- public:
- DateColumnPrinter(std::string&, const Type& type);
- ~DateColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class Decimal64ColumnPrinter: public ColumnPrinter {
- private:
- const int64_t* data;
- int32_t scale;
- public:
- Decimal64ColumnPrinter(std::string&, const Type& type);
- ~Decimal64ColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class Decimal128ColumnPrinter: public ColumnPrinter {
- private:
- const Int128* data;
- int32_t scale;
- public:
- Decimal128ColumnPrinter(std::string&, const Type& type);
- ~Decimal128ColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class StringColumnPrinter: public ColumnPrinter {
- private:
- const char* const * start;
- const int64_t* length;
- public:
- StringColumnPrinter(std::string&, const Type& type);
- virtual ~StringColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class BinaryColumnPrinter: public ColumnPrinter {
- private:
- const char* const * start;
- const int64_t* length;
- public:
- BinaryColumnPrinter(std::string&, const Type& type);
- virtual ~BinaryColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class ListColumnPrinter: public ColumnPrinter {
- private:
- const int64_t* offsets;
- std::unique_ptr<ColumnPrinter> elementPrinter;
-
- public:
- ListColumnPrinter(std::string&, const Type& type);
- virtual ~ListColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class MapColumnPrinter: public ColumnPrinter {
- private:
- const int64_t* offsets;
- std::unique_ptr<ColumnPrinter> keyPrinter;
- std::unique_ptr<ColumnPrinter> elementPrinter;
-
- public:
- MapColumnPrinter(std::string&, const Type& type);
- virtual ~MapColumnPrinter() {}
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class UnionColumnPrinter: public ColumnPrinter {
- private:
- const unsigned char *tags;
- const uint64_t* offsets;
- std::vector<ColumnPrinter*> fieldPrinter;
-
- public:
- UnionColumnPrinter(std::string&, const Type& type);
- virtual ~UnionColumnPrinter();
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- class StructColumnPrinter: public ColumnPrinter {
- private:
- std::vector<ColumnPrinter*> fieldPrinter;
- public:
- StructColumnPrinter(std::string&, const Type& type);
- virtual ~StructColumnPrinter();
- void printRow(uint64_t rowId) override;
- void reset(const ColumnVectorBatch& batch) override;
- };
-
- void writeChar(std::string& file, char ch) {
- file += ch;
- }
-
- void writeString(std::string& file, const char *ptr) {
- size_t len = strlen(ptr);
- file.append(ptr, len);
- }
-
- ColumnPrinter::ColumnPrinter(std::string& _buffer, const Type& _type
- ): buffer(_buffer),
- type(_type) {
- notNull = nullptr;
- hasNulls = false;
- }
-
- ColumnPrinter::~ColumnPrinter() {
- // PASS
- }
-
- void ColumnPrinter::reset(const ColumnVectorBatch& batch) {
- hasNulls = batch.hasNulls;
- if (hasNulls) {
- notNull = batch.notNull.data();
- } else {
- notNull = nullptr ;
- }
- }
-
- std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string& buffer,
- const Type& type) {
- ColumnPrinter *result;
- switch(static_cast<int64_t>(type.getKind())) {
- case BOOLEAN:
- result = new BooleanColumnPrinter(buffer, type);
- break;
-
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- result = new LongColumnPrinter(buffer, type);
- break;
-
- case FLOAT:
- case DOUBLE:
- result = new DoubleColumnPrinter(buffer, type);
- break;
-
- case STRING:
- case VARCHAR :
- case CHAR:
- result = new StringColumnPrinter(buffer, type);
- break;
-
- case BINARY:
- result = new BinaryColumnPrinter(buffer, type);
- break;
-
- case TIMESTAMP:
- result = new TimestampColumnPrinter(buffer, type);
- break;
-
- case LIST:
- result = new ListColumnPrinter(buffer, type);
- break;
-
- case MAP:
- result = new MapColumnPrinter(buffer, type);
- break;
-
- case STRUCT:
- result = new StructColumnPrinter(buffer, type);
- break;
-
- case DECIMAL:
- if (type.getPrecision() == 0 || type.getPrecision() > 18) {
- result = new Decimal128ColumnPrinter(buffer, type);
- } else {
- result = new Decimal64ColumnPrinter(buffer, type);
- }
- break;
-
- case DATE:
- result = new DateColumnPrinter(buffer, type);
- break;
-
- case UNION:
- result = new UnionColumnPrinter(buffer, type);
- break;
-
- default:
- throw std::logic_error("unknown batch type");
- }
- return std::unique_ptr<ColumnPrinter>(result);
- }
-
- LongColumnPrinter::LongColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type) {
- // pass
- }
-
- void LongColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
- }
-
- void LongColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- char numBuffer[64];
- snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d",
- static_cast<int64_t >(data[rowId]));
- writeString(buffer, numBuffer);
- }
- }
-
- DoubleColumnPrinter::DoubleColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type),
- isFloat(type.getKind() == FLOAT){
- // PASS
- }
-
- void DoubleColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- data = dynamic_cast<const DoubleVectorBatch&>(batch).data.data();
- }
-
- void DoubleColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- char numBuffer[64];
- snprintf(numBuffer, sizeof(numBuffer), isFloat ? "%.7g" : "%.14g",
- data[rowId]);
- writeString(buffer, numBuffer);
- }
- }
-
- Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer,
- type) {
- // PASS
- }
-
- void Decimal64ColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- data = dynamic_cast<const Decimal64VectorBatch&>(batch).values.data();
- scale = dynamic_cast<const Decimal64VectorBatch&>(batch).scale;
- }
-
- std::string toDecimalString(int64_t value, int32_t scale) {
- std::stringstream buffer;
- if (scale == 0) {
- buffer << value;
- return buffer.str();
- }
- std::string sign = "";
- if (value < 0) {
- sign = "-";
- value = -value;
- }
- buffer << value;
- std::string str = buffer.str();
- int32_t len = static_cast<int32_t>(str.length());
- if (len > scale) {
- return sign + str.substr(0, static_cast<size_t>(len - scale)) + "." +
- str.substr(static_cast<size_t>(len - scale),
- static_cast<size_t>(scale));
- } else if (len == scale) {
- return sign + "0." + str;
- } else {
- std::string result = sign + "0.";
- for(int32_t i=0; i < scale - len; ++i) {
- result += "0";
- }
- return result + str;
- }
- }
-
- void Decimal64ColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- writeString(buffer, toDecimalString(data[rowId], scale).c_str());
- }
- }
-
- Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer,
- type) {
- // PASS
- }
-
- void Decimal128ColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data();
- scale =dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
- }
-
- void Decimal128ColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- writeString(buffer, data[rowId].toDecimalString(scale).c_str());
- }
- }
-
- StringColumnPrinter::StringColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type) {
- // PASS
- }
-
- void StringColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- start = dynamic_cast<const StringVectorBatch&>(batch).data.data();
- length = dynamic_cast<const StringVectorBatch&>(batch).length.data();
- }
-
- void StringColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- writeChar(buffer, '"');
- for(int64_t i=0; i < length[rowId]; ++i) {
- char ch = static_cast<char>(start[rowId][i]);
- switch (ch) {
- case '\\':
- writeString(buffer, "\\\\");
- break;
- case '\b':
- writeString(buffer, "\\b");
- break;
- case '\f':
- writeString(buffer, "\\f");
- break;
- case '\n':
- writeString(buffer, "\\n");
- break;
- case '\r':
- writeString(buffer, "\\r");
- break;
- case '\t':
- writeString(buffer, "\\t");
- break;
- case '"':
- writeString(buffer, "\\\"");
- break;
- default:
- writeChar(buffer, ch);
- break;
- }
- }
- writeChar(buffer, '"');
- }
- }
-
- ListColumnPrinter::ListColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type) {
- elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
- }
-
- void ListColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- offsets = dynamic_cast<const ListVectorBatch&>(batch).offsets.data();
- elementPrinter->reset(*dynamic_cast<const ListVectorBatch&>(batch).
- elements);
- }
-
- void ListColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- writeChar(buffer, '[');
- for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) {
- if (i != offsets[rowId]) {
- writeString(buffer, ", ");
- }
- elementPrinter->printRow(static_cast<uint64_t>(i));
- }
- writeChar(buffer, ']');
- }
- }
-
- MapColumnPrinter::MapColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type) {
- keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
- elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
- }
-
- void MapColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- const MapVectorBatch& myBatch = dynamic_cast<const MapVectorBatch&>(batch);
- offsets = myBatch.offsets.data();
- keyPrinter->reset(*myBatch.keys);
- elementPrinter->reset(*myBatch.elements);
- }
-
- void MapColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- writeChar(buffer, '[');
- for(int64_t i=offsets[rowId]; i < offsets[rowId+1]; ++i) {
- if (i != offsets[rowId]) {
- writeString(buffer, ", ");
- }
- writeString(buffer, "{\"key\": ");
- keyPrinter->printRow(static_cast<uint64_t>(i));
- writeString(buffer, ", \"value\": ");
- elementPrinter->printRow(static_cast<uint64_t>(i));
- writeChar(buffer, '}');
- }
- writeChar(buffer, ']');
- }
- }
-
- UnionColumnPrinter::UnionColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type) {
- for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
- fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
- .release());
- }
- }
-
- UnionColumnPrinter::~UnionColumnPrinter() {
- for (size_t i = 0; i < fieldPrinter.size(); i++) {
- delete fieldPrinter[i];
- }
- }
-
- void UnionColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- const UnionVectorBatch& unionBatch =
- dynamic_cast<const UnionVectorBatch&>(batch);
- tags = unionBatch.tags.data();
- offsets = unionBatch.offsets.data();
- for(size_t i=0; i < fieldPrinter.size(); ++i) {
- fieldPrinter[i]->reset(*(unionBatch.children[i]));
- }
- }
-
- void UnionColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- writeString(buffer, "{\"tag\": ");
- char numBuffer[64];
- snprintf(numBuffer, sizeof(numBuffer), "%" INT64_FORMAT_STRING "d",
- static_cast<int64_t>(tags[rowId]));
- writeString(buffer, numBuffer);
- writeString(buffer, ", \"value\": ");
- fieldPrinter[tags[rowId]]->printRow(offsets[rowId]);
- writeChar(buffer, '}');
- }
- }
-
- StructColumnPrinter::StructColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type) {
- for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
- fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
- .release());
- }
- }
-
- StructColumnPrinter::~StructColumnPrinter() {
- for (size_t i = 0; i < fieldPrinter.size(); i++) {
- delete fieldPrinter[i];
- }
- }
-
- void StructColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- const StructVectorBatch& structBatch =
- dynamic_cast<const StructVectorBatch&>(batch);
- for(size_t i=0; i < fieldPrinter.size(); ++i) {
- fieldPrinter[i]->reset(*(structBatch.fields[i]));
- }
- }
-
- void StructColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- writeChar(buffer, '{');
- for(unsigned int i=0; i < fieldPrinter.size(); ++i) {
- if (i != 0) {
- writeString(buffer, ", ");
- }
- writeChar(buffer, '"');
- writeString(buffer, type.getFieldName(i).c_str());
- writeString(buffer, "\": ");
- fieldPrinter[i]->printRow(rowId);
- }
- writeChar(buffer, '}');
- }
- }
-
- DateColumnPrinter::DateColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type) {
- // PASS
- }
-
- void DateColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- const time_t timeValue = data[rowId] * 24 * 60 * 60;
- struct tm tmValue;
- gmtime_r(&timeValue, &tmValue);
- char timeBuffer[11];
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d", &tmValue);
- writeChar(buffer, '"');
- writeString(buffer, timeBuffer);
- writeChar(buffer, '"');
- }
- }
-
- void DateColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
- }
-
- BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type) {
- // PASS
- }
-
- void BooleanColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- writeString(buffer, (data[rowId] ? "true" : "false"));
- }
- }
-
- void BooleanColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
- }
-
- BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type) {
- // PASS
- }
-
- void BinaryColumnPrinter::printRow(uint64_t rowId) {
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- writeChar(buffer, '[');
- for(int64_t i=0; i < length[rowId]; ++i) {
- if (i != 0) {
- writeString(buffer, ", ");
- }
- char numBuffer[64];
- snprintf(numBuffer, sizeof(numBuffer), "%d",
- (static_cast<const int>(start[rowId][i]) & 0xff));
- writeString(buffer, numBuffer);
- }
- writeChar(buffer, ']');
- }
- }
-
- void BinaryColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- start = dynamic_cast<const StringVectorBatch&>(batch).data.data();
- length = dynamic_cast<const StringVectorBatch&>(batch).length.data();
- }
-
- TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer,
- type) {
- struct tm epochTm;
- epochTm.tm_sec = 0;
- epochTm.tm_min = 0;
- epochTm.tm_hour = 0;
- epochTm.tm_mday = 1;
- epochTm.tm_mon = 0;
- epochTm.tm_year = 70;
- epochTm.tm_isdst = 0;
- epoch = mktime(&epochTm);
- }
-
- void TimestampColumnPrinter::printRow(uint64_t rowId) {
- const int64_t NANOS_PER_SECOND = 1000000000;
- const int64_t NANO_DIGITS = 9;
- if (hasNulls && !notNull[rowId]) {
- writeString(buffer, "null");
- } else {
- int64_t nanos = data[rowId] % NANOS_PER_SECOND;
- time_t seconds =
- static_cast<time_t>(data[rowId] / NANOS_PER_SECOND) + epoch;
- // make sure the nanos are positive
- if (nanos < 0) {
- seconds -= 1;
- nanos = -nanos;
- }
- struct tm tmValue;
- localtime_r(&seconds, &tmValue);
- char timeBuffer[20];
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
- writeChar(buffer, '"');
- writeString(buffer, timeBuffer);
- writeChar(buffer, '.');
- // remove trailing zeros off the back of the nanos value.
- int64_t zeroDigits = 0;
- if (nanos == 0) {
- zeroDigits = 8;
- } else {
- while (nanos % 10 == 0) {
- nanos /= 10;
- zeroDigits += 1;
- }
- }
- char numBuffer[64];
- snprintf(numBuffer, sizeof(numBuffer),
- "%0*" INT64_FORMAT_STRING "d\"",
- static_cast<int>(NANO_DIGITS - zeroDigits),
- static_cast<int64_t >(nanos));
- writeString(buffer, numBuffer);
- }
- }
-
- void TimestampColumnPrinter::reset(const ColumnVectorBatch& batch) {
- ColumnPrinter::reset(batch);
- data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
- }
-}