You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2015/12/12 00:28:00 UTC
[03/16] hive git commit: HIVE-11890. Create ORC submodue. (omalley
reviewed by prasanthj)
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
deleted file mode 100644
index 8cc836e..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
+++ /dev/null
@@ -1,1291 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.math.BigInteger;
-
-public final class SerializationUtils {
-
- private final static int BUFFER_SIZE = 64;
- private final byte[] readBuffer;
- private final byte[] writeBuffer;
-
- public SerializationUtils() {
- this.readBuffer = new byte[BUFFER_SIZE];
- this.writeBuffer = new byte[BUFFER_SIZE];
- }
-
- void writeVulong(OutputStream output, long value) throws IOException {
- while (true) {
- if ((value & ~0x7f) == 0) {
- output.write((byte) value);
- return;
- } else {
- output.write((byte) (0x80 | (value & 0x7f)));
- value >>>= 7;
- }
- }
- }
-
- void writeVslong(OutputStream output, long value) throws IOException {
- writeVulong(output, (value << 1) ^ (value >> 63));
- }
-
-
- long readVulong(InputStream in) throws IOException {
- long result = 0;
- long b;
- int offset = 0;
- do {
- b = in.read();
- if (b == -1) {
- throw new EOFException("Reading Vulong past EOF");
- }
- result |= (0x7f & b) << offset;
- offset += 7;
- } while (b >= 0x80);
- return result;
- }
-
- long readVslong(InputStream in) throws IOException {
- long result = readVulong(in);
- return (result >>> 1) ^ -(result & 1);
- }
-
- public float readFloat(InputStream in) throws IOException {
- int ser = in.read() | (in.read() << 8) | (in.read() << 16) |
- (in.read() << 24);
- return Float.intBitsToFloat(ser);
- }
-
- void writeFloat(OutputStream output, float value) throws IOException {
- int ser = Float.floatToIntBits(value);
- output.write(ser & 0xff);
- output.write((ser >> 8) & 0xff);
- output.write((ser >> 16) & 0xff);
- output.write((ser >> 24) & 0xff);
- }
-
- public double readDouble(InputStream in) throws IOException {
- return Double.longBitsToDouble(readLongLE(in));
- }
-
- long readLongLE(InputStream in) throws IOException {
- in.read(readBuffer, 0, 8);
- return (((readBuffer[0] & 0xff) << 0)
- + ((readBuffer[1] & 0xff) << 8)
- + ((readBuffer[2] & 0xff) << 16)
- + ((long) (readBuffer[3] & 0xff) << 24)
- + ((long) (readBuffer[4] & 0xff) << 32)
- + ((long) (readBuffer[5] & 0xff) << 40)
- + ((long) (readBuffer[6] & 0xff) << 48)
- + ((long) (readBuffer[7] & 0xff) << 56));
- }
-
- void writeDouble(OutputStream output, double value) throws IOException {
- writeLongLE(output, Double.doubleToLongBits(value));
- }
-
- private void writeLongLE(OutputStream output, long value) throws IOException {
- writeBuffer[0] = (byte) ((value >> 0) & 0xff);
- writeBuffer[1] = (byte) ((value >> 8) & 0xff);
- writeBuffer[2] = (byte) ((value >> 16) & 0xff);
- writeBuffer[3] = (byte) ((value >> 24) & 0xff);
- writeBuffer[4] = (byte) ((value >> 32) & 0xff);
- writeBuffer[5] = (byte) ((value >> 40) & 0xff);
- writeBuffer[6] = (byte) ((value >> 48) & 0xff);
- writeBuffer[7] = (byte) ((value >> 56) & 0xff);
- output.write(writeBuffer, 0, 8);
- }
-
- /**
- * Write the arbitrarily sized signed BigInteger in vint format.
- *
- * Signed integers are encoded using the low bit as the sign bit using zigzag
- * encoding.
- *
- * Each byte uses the low 7 bits for data and the high bit for stop/continue.
- *
- * Bytes are stored LSB first.
- * @param output the stream to write to
- * @param value the value to output
- * @throws IOException
- */
- static void writeBigInteger(OutputStream output,
- BigInteger value) throws IOException {
- // encode the signed number as a positive integer
- value = value.shiftLeft(1);
- int sign = value.signum();
- if (sign < 0) {
- value = value.negate();
- value = value.subtract(BigInteger.ONE);
- }
- int length = value.bitLength();
- while (true) {
- long lowBits = value.longValue() & 0x7fffffffffffffffL;
- length -= 63;
- // write out the next 63 bits worth of data
- for(int i=0; i < 9; ++i) {
- // if this is the last byte, leave the high bit off
- if (length <= 0 && (lowBits & ~0x7f) == 0) {
- output.write((byte) lowBits);
- return;
- } else {
- output.write((byte) (0x80 | (lowBits & 0x7f)));
- lowBits >>>= 7;
- }
- }
- value = value.shiftRight(63);
- }
- }
-
- /**
- * Read the signed arbitrary sized BigInteger BigInteger in vint format
- * @param input the stream to read from
- * @return the read BigInteger
- * @throws IOException
- */
- static BigInteger readBigInteger(InputStream input) throws IOException {
- BigInteger result = BigInteger.ZERO;
- long work = 0;
- int offset = 0;
- long b;
- do {
- b = input.read();
- if (b == -1) {
- throw new EOFException("Reading BigInteger past EOF from " + input);
- }
- work |= (0x7f & b) << (offset % 63);
- offset += 7;
- // if we've read 63 bits, roll them into the result
- if (offset == 63) {
- result = BigInteger.valueOf(work);
- work = 0;
- } else if (offset % 63 == 0) {
- result = result.or(BigInteger.valueOf(work).shiftLeft(offset-63));
- work = 0;
- }
- } while (b >= 0x80);
- if (work != 0) {
- result = result.or(BigInteger.valueOf(work).shiftLeft((offset/63)*63));
- }
- // convert back to a signed number
- boolean isNegative = result.testBit(0);
- if (isNegative) {
- result = result.add(BigInteger.ONE);
- result = result.negate();
- }
- result = result.shiftRight(1);
- return result;
- }
-
- enum FixedBitSizes {
- ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
- THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
- TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
- TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR;
- }
-
- /**
- * Count the number of bits required to encode the given value
- * @param value
- * @return bits required to store value
- */
- int findClosestNumBits(long value) {
- int count = 0;
- while (value != 0) {
- count++;
- value = value >>> 1;
- }
- return getClosestFixedBits(count);
- }
-
- /**
- * zigzag encode the given value
- * @param val
- * @return zigzag encoded value
- */
- long zigzagEncode(long val) {
- return (val << 1) ^ (val >> 63);
- }
-
- /**
- * zigzag decode the given value
- * @param val
- * @return zizag decoded value
- */
- long zigzagDecode(long val) {
- return (val >>> 1) ^ -(val & 1);
- }
-
- /**
- * Compute the bits required to represent pth percentile value
- * @param data - array
- * @param p - percentile value (>=0.0 to <=1.0)
- * @return pth percentile bits
- */
- int percentileBits(long[] data, int offset, int length, double p) {
- if ((p > 1.0) || (p <= 0.0)) {
- return -1;
- }
-
- // histogram that store the encoded bit requirement for each values.
- // maximum number of bits that can encoded is 32 (refer FixedBitSizes)
- int[] hist = new int[32];
-
- // compute the histogram
- for(int i = offset; i < (offset + length); i++) {
- int idx = encodeBitWidth(findClosestNumBits(data[i]));
- hist[idx] += 1;
- }
-
- int perLen = (int) (length * (1.0 - p));
-
- // return the bits required by pth percentile length
- for(int i = hist.length - 1; i >= 0; i--) {
- perLen -= hist[i];
- if (perLen < 0) {
- return decodeBitWidth(i);
- }
- }
-
- return 0;
- }
-
- /**
- * Read n bytes in big endian order and convert to long
- * @param b - byte array
- * @return long value
- */
- long bytesToLongBE(InStream input, int n) throws IOException {
- long out = 0;
- long val = 0;
- while (n > 0) {
- n--;
- // store it in a long and then shift else integer overflow will occur
- val = input.read();
- out |= (val << (n * 8));
- }
- return out;
- }
-
- /**
- * Calculate the number of bytes required
- * @param n - number of values
- * @param numBits - bit width
- * @return number of bytes required
- */
- int getTotalBytesRequired(int n, int numBits) {
- return (n * numBits + 7) / 8;
- }
-
- /**
- * For a given fixed bit this function will return the closest available fixed
- * bit
- * @param n
- * @return closest valid fixed bit
- */
- int getClosestFixedBits(int n) {
- if (n == 0) {
- return 1;
- }
-
- if (n >= 1 && n <= 24) {
- return n;
- } else if (n > 24 && n <= 26) {
- return 26;
- } else if (n > 26 && n <= 28) {
- return 28;
- } else if (n > 28 && n <= 30) {
- return 30;
- } else if (n > 30 && n <= 32) {
- return 32;
- } else if (n > 32 && n <= 40) {
- return 40;
- } else if (n > 40 && n <= 48) {
- return 48;
- } else if (n > 48 && n <= 56) {
- return 56;
- } else {
- return 64;
- }
- }
-
- public int getClosestAlignedFixedBits(int n) {
- if (n == 0 || n == 1) {
- return 1;
- } else if (n > 1 && n <= 2) {
- return 2;
- } else if (n > 2 && n <= 4) {
- return 4;
- } else if (n > 4 && n <= 8) {
- return 8;
- } else if (n > 8 && n <= 16) {
- return 16;
- } else if (n > 16 && n <= 24) {
- return 24;
- } else if (n > 24 && n <= 32) {
- return 32;
- } else if (n > 32 && n <= 40) {
- return 40;
- } else if (n > 40 && n <= 48) {
- return 48;
- } else if (n > 48 && n <= 56) {
- return 56;
- } else {
- return 64;
- }
- }
-
- /**
- * Finds the closest available fixed bit width match and returns its encoded
- * value (ordinal)
- * @param n - fixed bit width to encode
- * @return encoded fixed bit width
- */
- int encodeBitWidth(int n) {
- n = getClosestFixedBits(n);
-
- if (n >= 1 && n <= 24) {
- return n - 1;
- } else if (n > 24 && n <= 26) {
- return FixedBitSizes.TWENTYSIX.ordinal();
- } else if (n > 26 && n <= 28) {
- return FixedBitSizes.TWENTYEIGHT.ordinal();
- } else if (n > 28 && n <= 30) {
- return FixedBitSizes.THIRTY.ordinal();
- } else if (n > 30 && n <= 32) {
- return FixedBitSizes.THIRTYTWO.ordinal();
- } else if (n > 32 && n <= 40) {
- return FixedBitSizes.FORTY.ordinal();
- } else if (n > 40 && n <= 48) {
- return FixedBitSizes.FORTYEIGHT.ordinal();
- } else if (n > 48 && n <= 56) {
- return FixedBitSizes.FIFTYSIX.ordinal();
- } else {
- return FixedBitSizes.SIXTYFOUR.ordinal();
- }
- }
-
- /**
- * Decodes the ordinal fixed bit value to actual fixed bit width value
- * @param n - encoded fixed bit width
- * @return decoded fixed bit width
- */
- int decodeBitWidth(int n) {
- if (n >= FixedBitSizes.ONE.ordinal()
- && n <= FixedBitSizes.TWENTYFOUR.ordinal()) {
- return n + 1;
- } else if (n == FixedBitSizes.TWENTYSIX.ordinal()) {
- return 26;
- } else if (n == FixedBitSizes.TWENTYEIGHT.ordinal()) {
- return 28;
- } else if (n == FixedBitSizes.THIRTY.ordinal()) {
- return 30;
- } else if (n == FixedBitSizes.THIRTYTWO.ordinal()) {
- return 32;
- } else if (n == FixedBitSizes.FORTY.ordinal()) {
- return 40;
- } else if (n == FixedBitSizes.FORTYEIGHT.ordinal()) {
- return 48;
- } else if (n == FixedBitSizes.FIFTYSIX.ordinal()) {
- return 56;
- } else {
- return 64;
- }
- }
-
- /**
- * Bitpack and write the input values to underlying output stream
- * @param input - values to write
- * @param offset - offset
- * @param len - length
- * @param bitSize - bit width
- * @param output - output stream
- * @throws IOException
- */
- void writeInts(long[] input, int offset, int len, int bitSize,
- OutputStream output) throws IOException {
- if (input == null || input.length < 1 || offset < 0 || len < 1
- || bitSize < 1) {
- return;
- }
-
- switch (bitSize) {
- case 1:
- unrolledBitPack1(input, offset, len, output);
- return;
- case 2:
- unrolledBitPack2(input, offset, len, output);
- return;
- case 4:
- unrolledBitPack4(input, offset, len, output);
- return;
- case 8:
- unrolledBitPack8(input, offset, len, output);
- return;
- case 16:
- unrolledBitPack16(input, offset, len, output);
- return;
- case 24:
- unrolledBitPack24(input, offset, len, output);
- return;
- case 32:
- unrolledBitPack32(input, offset, len, output);
- return;
- case 40:
- unrolledBitPack40(input, offset, len, output);
- return;
- case 48:
- unrolledBitPack48(input, offset, len, output);
- return;
- case 56:
- unrolledBitPack56(input, offset, len, output);
- return;
- case 64:
- unrolledBitPack64(input, offset, len, output);
- return;
- default:
- break;
- }
-
- int bitsLeft = 8;
- byte current = 0;
- for(int i = offset; i < (offset + len); i++) {
- long value = input[i];
- int bitsToWrite = bitSize;
- while (bitsToWrite > bitsLeft) {
- // add the bits to the bottom of the current word
- current |= value >>> (bitsToWrite - bitsLeft);
- // subtract out the bits we just added
- bitsToWrite -= bitsLeft;
- // zero out the bits above bitsToWrite
- value &= (1L << bitsToWrite) - 1;
- output.write(current);
- current = 0;
- bitsLeft = 8;
- }
- bitsLeft -= bitsToWrite;
- current |= value << bitsLeft;
- if (bitsLeft == 0) {
- output.write(current);
- current = 0;
- bitsLeft = 8;
- }
- }
-
- // flush
- if (bitsLeft != 8) {
- output.write(current);
- current = 0;
- bitsLeft = 8;
- }
- }
-
- private void unrolledBitPack1(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- final int numHops = 8;
- final int remainder = len % numHops;
- final int endOffset = offset + len;
- final int endUnroll = endOffset - remainder;
- int val = 0;
- for (int i = offset; i < endUnroll; i = i + numHops) {
- val = (int) (val | ((input[i] & 1) << 7)
- | ((input[i + 1] & 1) << 6)
- | ((input[i + 2] & 1) << 5)
- | ((input[i + 3] & 1) << 4)
- | ((input[i + 4] & 1) << 3)
- | ((input[i + 5] & 1) << 2)
- | ((input[i + 6] & 1) << 1)
- | (input[i + 7]) & 1);
- output.write(val);
- val = 0;
- }
-
- if (remainder > 0) {
- int startShift = 7;
- for (int i = endUnroll; i < endOffset; i++) {
- val = (int) (val | (input[i] & 1) << startShift);
- startShift -= 1;
- }
- output.write(val);
- }
- }
-
- private void unrolledBitPack2(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- final int numHops = 4;
- final int remainder = len % numHops;
- final int endOffset = offset + len;
- final int endUnroll = endOffset - remainder;
- int val = 0;
- for (int i = offset; i < endUnroll; i = i + numHops) {
- val = (int) (val | ((input[i] & 3) << 6)
- | ((input[i + 1] & 3) << 4)
- | ((input[i + 2] & 3) << 2)
- | (input[i + 3]) & 3);
- output.write(val);
- val = 0;
- }
-
- if (remainder > 0) {
- int startShift = 6;
- for (int i = endUnroll; i < endOffset; i++) {
- val = (int) (val | (input[i] & 3) << startShift);
- startShift -= 2;
- }
- output.write(val);
- }
- }
-
- private void unrolledBitPack4(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- final int numHops = 2;
- final int remainder = len % numHops;
- final int endOffset = offset + len;
- final int endUnroll = endOffset - remainder;
- int val = 0;
- for (int i = offset; i < endUnroll; i = i + numHops) {
- val = (int) (val | ((input[i] & 15) << 4) | (input[i + 1]) & 15);
- output.write(val);
- val = 0;
- }
-
- if (remainder > 0) {
- int startShift = 4;
- for (int i = endUnroll; i < endOffset; i++) {
- val = (int) (val | (input[i] & 15) << startShift);
- startShift -= 4;
- }
- output.write(val);
- }
- }
-
- private void unrolledBitPack8(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- unrolledBitPackBytes(input, offset, len, output, 1);
- }
-
- private void unrolledBitPack16(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- unrolledBitPackBytes(input, offset, len, output, 2);
- }
-
- private void unrolledBitPack24(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- unrolledBitPackBytes(input, offset, len, output, 3);
- }
-
- private void unrolledBitPack32(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- unrolledBitPackBytes(input, offset, len, output, 4);
- }
-
- private void unrolledBitPack40(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- unrolledBitPackBytes(input, offset, len, output, 5);
- }
-
- private void unrolledBitPack48(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- unrolledBitPackBytes(input, offset, len, output, 6);
- }
-
- private void unrolledBitPack56(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- unrolledBitPackBytes(input, offset, len, output, 7);
- }
-
- private void unrolledBitPack64(long[] input, int offset, int len,
- OutputStream output) throws IOException {
- unrolledBitPackBytes(input, offset, len, output, 8);
- }
-
- private void unrolledBitPackBytes(long[] input, int offset, int len, OutputStream output, int numBytes) throws IOException {
- final int numHops = 8;
- final int remainder = len % numHops;
- final int endOffset = offset + len;
- final int endUnroll = endOffset - remainder;
- int i = offset;
- for (; i < endUnroll; i = i + numHops) {
- writeLongBE(output, input, i, numHops, numBytes);
- }
-
- if (remainder > 0) {
- writeRemainingLongs(output, i, input, remainder, numBytes);
- }
- }
-
- private void writeRemainingLongs(OutputStream output, int offset, long[] input, int remainder,
- int numBytes) throws IOException {
- final int numHops = remainder;
-
- int idx = 0;
- switch (numBytes) {
- case 1:
- while (remainder > 0) {
- writeBuffer[idx] = (byte) (input[offset + idx] & 255);
- remainder--;
- idx++;
- }
- break;
- case 2:
- while (remainder > 0) {
- writeLongBE2(output, input[offset + idx], idx * 2);
- remainder--;
- idx++;
- }
- break;
- case 3:
- while (remainder > 0) {
- writeLongBE3(output, input[offset + idx], idx * 3);
- remainder--;
- idx++;
- }
- break;
- case 4:
- while (remainder > 0) {
- writeLongBE4(output, input[offset + idx], idx * 4);
- remainder--;
- idx++;
- }
- break;
- case 5:
- while (remainder > 0) {
- writeLongBE5(output, input[offset + idx], idx * 5);
- remainder--;
- idx++;
- }
- break;
- case 6:
- while (remainder > 0) {
- writeLongBE6(output, input[offset + idx], idx * 6);
- remainder--;
- idx++;
- }
- break;
- case 7:
- while (remainder > 0) {
- writeLongBE7(output, input[offset + idx], idx * 7);
- remainder--;
- idx++;
- }
- break;
- case 8:
- while (remainder > 0) {
- writeLongBE8(output, input[offset + idx], idx * 8);
- remainder--;
- idx++;
- }
- break;
- default:
- break;
- }
-
- final int toWrite = numHops * numBytes;
- output.write(writeBuffer, 0, toWrite);
- }
-
- private void writeLongBE(OutputStream output, long[] input, int offset, int numHops, int numBytes) throws IOException {
-
- switch (numBytes) {
- case 1:
- writeBuffer[0] = (byte) (input[offset + 0] & 255);
- writeBuffer[1] = (byte) (input[offset + 1] & 255);
- writeBuffer[2] = (byte) (input[offset + 2] & 255);
- writeBuffer[3] = (byte) (input[offset + 3] & 255);
- writeBuffer[4] = (byte) (input[offset + 4] & 255);
- writeBuffer[5] = (byte) (input[offset + 5] & 255);
- writeBuffer[6] = (byte) (input[offset + 6] & 255);
- writeBuffer[7] = (byte) (input[offset + 7] & 255);
- break;
- case 2:
- writeLongBE2(output, input[offset + 0], 0);
- writeLongBE2(output, input[offset + 1], 2);
- writeLongBE2(output, input[offset + 2], 4);
- writeLongBE2(output, input[offset + 3], 6);
- writeLongBE2(output, input[offset + 4], 8);
- writeLongBE2(output, input[offset + 5], 10);
- writeLongBE2(output, input[offset + 6], 12);
- writeLongBE2(output, input[offset + 7], 14);
- break;
- case 3:
- writeLongBE3(output, input[offset + 0], 0);
- writeLongBE3(output, input[offset + 1], 3);
- writeLongBE3(output, input[offset + 2], 6);
- writeLongBE3(output, input[offset + 3], 9);
- writeLongBE3(output, input[offset + 4], 12);
- writeLongBE3(output, input[offset + 5], 15);
- writeLongBE3(output, input[offset + 6], 18);
- writeLongBE3(output, input[offset + 7], 21);
- break;
- case 4:
- writeLongBE4(output, input[offset + 0], 0);
- writeLongBE4(output, input[offset + 1], 4);
- writeLongBE4(output, input[offset + 2], 8);
- writeLongBE4(output, input[offset + 3], 12);
- writeLongBE4(output, input[offset + 4], 16);
- writeLongBE4(output, input[offset + 5], 20);
- writeLongBE4(output, input[offset + 6], 24);
- writeLongBE4(output, input[offset + 7], 28);
- break;
- case 5:
- writeLongBE5(output, input[offset + 0], 0);
- writeLongBE5(output, input[offset + 1], 5);
- writeLongBE5(output, input[offset + 2], 10);
- writeLongBE5(output, input[offset + 3], 15);
- writeLongBE5(output, input[offset + 4], 20);
- writeLongBE5(output, input[offset + 5], 25);
- writeLongBE5(output, input[offset + 6], 30);
- writeLongBE5(output, input[offset + 7], 35);
- break;
- case 6:
- writeLongBE6(output, input[offset + 0], 0);
- writeLongBE6(output, input[offset + 1], 6);
- writeLongBE6(output, input[offset + 2], 12);
- writeLongBE6(output, input[offset + 3], 18);
- writeLongBE6(output, input[offset + 4], 24);
- writeLongBE6(output, input[offset + 5], 30);
- writeLongBE6(output, input[offset + 6], 36);
- writeLongBE6(output, input[offset + 7], 42);
- break;
- case 7:
- writeLongBE7(output, input[offset + 0], 0);
- writeLongBE7(output, input[offset + 1], 7);
- writeLongBE7(output, input[offset + 2], 14);
- writeLongBE7(output, input[offset + 3], 21);
- writeLongBE7(output, input[offset + 4], 28);
- writeLongBE7(output, input[offset + 5], 35);
- writeLongBE7(output, input[offset + 6], 42);
- writeLongBE7(output, input[offset + 7], 49);
- break;
- case 8:
- writeLongBE8(output, input[offset + 0], 0);
- writeLongBE8(output, input[offset + 1], 8);
- writeLongBE8(output, input[offset + 2], 16);
- writeLongBE8(output, input[offset + 3], 24);
- writeLongBE8(output, input[offset + 4], 32);
- writeLongBE8(output, input[offset + 5], 40);
- writeLongBE8(output, input[offset + 6], 48);
- writeLongBE8(output, input[offset + 7], 56);
- break;
- default:
- break;
- }
-
- final int toWrite = numHops * numBytes;
- output.write(writeBuffer, 0, toWrite);
- }
-
- private void writeLongBE2(OutputStream output, long val, int wbOffset) {
- writeBuffer[wbOffset + 0] = (byte) (val >>> 8);
- writeBuffer[wbOffset + 1] = (byte) (val >>> 0);
- }
-
- private void writeLongBE3(OutputStream output, long val, int wbOffset) {
- writeBuffer[wbOffset + 0] = (byte) (val >>> 16);
- writeBuffer[wbOffset + 1] = (byte) (val >>> 8);
- writeBuffer[wbOffset + 2] = (byte) (val >>> 0);
- }
-
- private void writeLongBE4(OutputStream output, long val, int wbOffset) {
- writeBuffer[wbOffset + 0] = (byte) (val >>> 24);
- writeBuffer[wbOffset + 1] = (byte) (val >>> 16);
- writeBuffer[wbOffset + 2] = (byte) (val >>> 8);
- writeBuffer[wbOffset + 3] = (byte) (val >>> 0);
- }
-
- private void writeLongBE5(OutputStream output, long val, int wbOffset) {
- writeBuffer[wbOffset + 0] = (byte) (val >>> 32);
- writeBuffer[wbOffset + 1] = (byte) (val >>> 24);
- writeBuffer[wbOffset + 2] = (byte) (val >>> 16);
- writeBuffer[wbOffset + 3] = (byte) (val >>> 8);
- writeBuffer[wbOffset + 4] = (byte) (val >>> 0);
- }
-
- private void writeLongBE6(OutputStream output, long val, int wbOffset) {
- writeBuffer[wbOffset + 0] = (byte) (val >>> 40);
- writeBuffer[wbOffset + 1] = (byte) (val >>> 32);
- writeBuffer[wbOffset + 2] = (byte) (val >>> 24);
- writeBuffer[wbOffset + 3] = (byte) (val >>> 16);
- writeBuffer[wbOffset + 4] = (byte) (val >>> 8);
- writeBuffer[wbOffset + 5] = (byte) (val >>> 0);
- }
-
- private void writeLongBE7(OutputStream output, long val, int wbOffset) {
- writeBuffer[wbOffset + 0] = (byte) (val >>> 48);
- writeBuffer[wbOffset + 1] = (byte) (val >>> 40);
- writeBuffer[wbOffset + 2] = (byte) (val >>> 32);
- writeBuffer[wbOffset + 3] = (byte) (val >>> 24);
- writeBuffer[wbOffset + 4] = (byte) (val >>> 16);
- writeBuffer[wbOffset + 5] = (byte) (val >>> 8);
- writeBuffer[wbOffset + 6] = (byte) (val >>> 0);
- }
-
- private void writeLongBE8(OutputStream output, long val, int wbOffset) {
- writeBuffer[wbOffset + 0] = (byte) (val >>> 56);
- writeBuffer[wbOffset + 1] = (byte) (val >>> 48);
- writeBuffer[wbOffset + 2] = (byte) (val >>> 40);
- writeBuffer[wbOffset + 3] = (byte) (val >>> 32);
- writeBuffer[wbOffset + 4] = (byte) (val >>> 24);
- writeBuffer[wbOffset + 5] = (byte) (val >>> 16);
- writeBuffer[wbOffset + 6] = (byte) (val >>> 8);
- writeBuffer[wbOffset + 7] = (byte) (val >>> 0);
- }
-
- /**
- * Read bitpacked integers from input stream
- * @param buffer - input buffer
- * @param offset - offset
- * @param len - length
- * @param bitSize - bit width
- * @param input - input stream
- * @throws IOException
- */
- void readInts(long[] buffer, int offset, int len, int bitSize,
- InStream input) throws IOException {
- int bitsLeft = 0;
- int current = 0;
-
- switch (bitSize) {
- case 1:
- unrolledUnPack1(buffer, offset, len, input);
- return;
- case 2:
- unrolledUnPack2(buffer, offset, len, input);
- return;
- case 4:
- unrolledUnPack4(buffer, offset, len, input);
- return;
- case 8:
- unrolledUnPack8(buffer, offset, len, input);
- return;
- case 16:
- unrolledUnPack16(buffer, offset, len, input);
- return;
- case 24:
- unrolledUnPack24(buffer, offset, len, input);
- return;
- case 32:
- unrolledUnPack32(buffer, offset, len, input);
- return;
- case 40:
- unrolledUnPack40(buffer, offset, len, input);
- return;
- case 48:
- unrolledUnPack48(buffer, offset, len, input);
- return;
- case 56:
- unrolledUnPack56(buffer, offset, len, input);
- return;
- case 64:
- unrolledUnPack64(buffer, offset, len, input);
- return;
- default:
- break;
- }
-
- for(int i = offset; i < (offset + len); i++) {
- long result = 0;
- int bitsLeftToRead = bitSize;
- while (bitsLeftToRead > bitsLeft) {
- result <<= bitsLeft;
- result |= current & ((1 << bitsLeft) - 1);
- bitsLeftToRead -= bitsLeft;
- current = input.read();
- bitsLeft = 8;
- }
-
- // handle the left over bits
- if (bitsLeftToRead > 0) {
- result <<= bitsLeftToRead;
- bitsLeft -= bitsLeftToRead;
- result |= (current >> bitsLeft) & ((1 << bitsLeftToRead) - 1);
- }
- buffer[i] = result;
- }
- }
-
-
- private void unrolledUnPack1(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- final int numHops = 8;
- final int remainder = len % numHops;
- final int endOffset = offset + len;
- final int endUnroll = endOffset - remainder;
- int val = 0;
- for (int i = offset; i < endUnroll; i = i + numHops) {
- val = input.read();
- buffer[i] = (val >>> 7) & 1;
- buffer[i + 1] = (val >>> 6) & 1;
- buffer[i + 2] = (val >>> 5) & 1;
- buffer[i + 3] = (val >>> 4) & 1;
- buffer[i + 4] = (val >>> 3) & 1;
- buffer[i + 5] = (val >>> 2) & 1;
- buffer[i + 6] = (val >>> 1) & 1;
- buffer[i + 7] = val & 1;
- }
-
- if (remainder > 0) {
- int startShift = 7;
- val = input.read();
- for (int i = endUnroll; i < endOffset; i++) {
- buffer[i] = (val >>> startShift) & 1;
- startShift -= 1;
- }
- }
- }
-
- private void unrolledUnPack2(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- final int numHops = 4;
- final int remainder = len % numHops;
- final int endOffset = offset + len;
- final int endUnroll = endOffset - remainder;
- int val = 0;
- for (int i = offset; i < endUnroll; i = i + numHops) {
- val = input.read();
- buffer[i] = (val >>> 6) & 3;
- buffer[i + 1] = (val >>> 4) & 3;
- buffer[i + 2] = (val >>> 2) & 3;
- buffer[i + 3] = val & 3;
- }
-
- if (remainder > 0) {
- int startShift = 6;
- val = input.read();
- for (int i = endUnroll; i < endOffset; i++) {
- buffer[i] = (val >>> startShift) & 3;
- startShift -= 2;
- }
- }
- }
-
- private void unrolledUnPack4(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- final int numHops = 2;
- final int remainder = len % numHops;
- final int endOffset = offset + len;
- final int endUnroll = endOffset - remainder;
- int val = 0;
- for (int i = offset; i < endUnroll; i = i + numHops) {
- val = input.read();
- buffer[i] = (val >>> 4) & 15;
- buffer[i + 1] = val & 15;
- }
-
- if (remainder > 0) {
- int startShift = 4;
- val = input.read();
- for (int i = endUnroll; i < endOffset; i++) {
- buffer[i] = (val >>> startShift) & 15;
- startShift -= 4;
- }
- }
- }
-
- private void unrolledUnPack8(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- unrolledUnPackBytes(buffer, offset, len, input, 1);
- }
-
- private void unrolledUnPack16(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- unrolledUnPackBytes(buffer, offset, len, input, 2);
- }
-
- private void unrolledUnPack24(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- unrolledUnPackBytes(buffer, offset, len, input, 3);
- }
-
- private void unrolledUnPack32(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- unrolledUnPackBytes(buffer, offset, len, input, 4);
- }
-
- private void unrolledUnPack40(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- unrolledUnPackBytes(buffer, offset, len, input, 5);
- }
-
- private void unrolledUnPack48(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- unrolledUnPackBytes(buffer, offset, len, input, 6);
- }
-
- private void unrolledUnPack56(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- unrolledUnPackBytes(buffer, offset, len, input, 7);
- }
-
- private void unrolledUnPack64(long[] buffer, int offset, int len,
- InStream input) throws IOException {
- unrolledUnPackBytes(buffer, offset, len, input, 8);
- }
-
- private void unrolledUnPackBytes(long[] buffer, int offset, int len, InStream input, int numBytes)
- throws IOException {
- final int numHops = 8;
- final int remainder = len % numHops;
- final int endOffset = offset + len;
- final int endUnroll = endOffset - remainder;
- int i = offset;
- for (; i < endUnroll; i = i + numHops) {
- readLongBE(input, buffer, i, numHops, numBytes);
- }
-
- if (remainder > 0) {
- readRemainingLongs(buffer, i, input, remainder, numBytes);
- }
- }
-
- private void readRemainingLongs(long[] buffer, int offset, InStream input, int remainder,
- int numBytes) throws IOException {
- final int toRead = remainder * numBytes;
- // bulk read to buffer
- int bytesRead = input.read(readBuffer, 0, toRead);
- while (bytesRead != toRead) {
- bytesRead += input.read(readBuffer, bytesRead, toRead - bytesRead);
- }
-
- int idx = 0;
- switch (numBytes) {
- case 1:
- while (remainder > 0) {
- buffer[offset++] = readBuffer[idx] & 255;
- remainder--;
- idx++;
- }
- break;
- case 2:
- while (remainder > 0) {
- buffer[offset++] = readLongBE2(input, idx * 2);
- remainder--;
- idx++;
- }
- break;
- case 3:
- while (remainder > 0) {
- buffer[offset++] = readLongBE3(input, idx * 3);
- remainder--;
- idx++;
- }
- break;
- case 4:
- while (remainder > 0) {
- buffer[offset++] = readLongBE4(input, idx * 4);
- remainder--;
- idx++;
- }
- break;
- case 5:
- while (remainder > 0) {
- buffer[offset++] = readLongBE5(input, idx * 5);
- remainder--;
- idx++;
- }
- break;
- case 6:
- while (remainder > 0) {
- buffer[offset++] = readLongBE6(input, idx * 6);
- remainder--;
- idx++;
- }
- break;
- case 7:
- while (remainder > 0) {
- buffer[offset++] = readLongBE7(input, idx * 7);
- remainder--;
- idx++;
- }
- break;
- case 8:
- while (remainder > 0) {
- buffer[offset++] = readLongBE8(input, idx * 8);
- remainder--;
- idx++;
- }
- break;
- default:
- break;
- }
- }
-
- private void readLongBE(InStream in, long[] buffer, int start, int numHops, int numBytes)
- throws IOException {
- final int toRead = numHops * numBytes;
- // bulk read to buffer
- int bytesRead = in.read(readBuffer, 0, toRead);
- while (bytesRead != toRead) {
- bytesRead += in.read(readBuffer, bytesRead, toRead - bytesRead);
- }
-
- switch (numBytes) {
- case 1:
- buffer[start + 0] = readBuffer[0] & 255;
- buffer[start + 1] = readBuffer[1] & 255;
- buffer[start + 2] = readBuffer[2] & 255;
- buffer[start + 3] = readBuffer[3] & 255;
- buffer[start + 4] = readBuffer[4] & 255;
- buffer[start + 5] = readBuffer[5] & 255;
- buffer[start + 6] = readBuffer[6] & 255;
- buffer[start + 7] = readBuffer[7] & 255;
- break;
- case 2:
- buffer[start + 0] = readLongBE2(in, 0);
- buffer[start + 1] = readLongBE2(in, 2);
- buffer[start + 2] = readLongBE2(in, 4);
- buffer[start + 3] = readLongBE2(in, 6);
- buffer[start + 4] = readLongBE2(in, 8);
- buffer[start + 5] = readLongBE2(in, 10);
- buffer[start + 6] = readLongBE2(in, 12);
- buffer[start + 7] = readLongBE2(in, 14);
- break;
- case 3:
- buffer[start + 0] = readLongBE3(in, 0);
- buffer[start + 1] = readLongBE3(in, 3);
- buffer[start + 2] = readLongBE3(in, 6);
- buffer[start + 3] = readLongBE3(in, 9);
- buffer[start + 4] = readLongBE3(in, 12);
- buffer[start + 5] = readLongBE3(in, 15);
- buffer[start + 6] = readLongBE3(in, 18);
- buffer[start + 7] = readLongBE3(in, 21);
- break;
- case 4:
- buffer[start + 0] = readLongBE4(in, 0);
- buffer[start + 1] = readLongBE4(in, 4);
- buffer[start + 2] = readLongBE4(in, 8);
- buffer[start + 3] = readLongBE4(in, 12);
- buffer[start + 4] = readLongBE4(in, 16);
- buffer[start + 5] = readLongBE4(in, 20);
- buffer[start + 6] = readLongBE4(in, 24);
- buffer[start + 7] = readLongBE4(in, 28);
- break;
- case 5:
- buffer[start + 0] = readLongBE5(in, 0);
- buffer[start + 1] = readLongBE5(in, 5);
- buffer[start + 2] = readLongBE5(in, 10);
- buffer[start + 3] = readLongBE5(in, 15);
- buffer[start + 4] = readLongBE5(in, 20);
- buffer[start + 5] = readLongBE5(in, 25);
- buffer[start + 6] = readLongBE5(in, 30);
- buffer[start + 7] = readLongBE5(in, 35);
- break;
- case 6:
- buffer[start + 0] = readLongBE6(in, 0);
- buffer[start + 1] = readLongBE6(in, 6);
- buffer[start + 2] = readLongBE6(in, 12);
- buffer[start + 3] = readLongBE6(in, 18);
- buffer[start + 4] = readLongBE6(in, 24);
- buffer[start + 5] = readLongBE6(in, 30);
- buffer[start + 6] = readLongBE6(in, 36);
- buffer[start + 7] = readLongBE6(in, 42);
- break;
- case 7:
- buffer[start + 0] = readLongBE7(in, 0);
- buffer[start + 1] = readLongBE7(in, 7);
- buffer[start + 2] = readLongBE7(in, 14);
- buffer[start + 3] = readLongBE7(in, 21);
- buffer[start + 4] = readLongBE7(in, 28);
- buffer[start + 5] = readLongBE7(in, 35);
- buffer[start + 6] = readLongBE7(in, 42);
- buffer[start + 7] = readLongBE7(in, 49);
- break;
- case 8:
- buffer[start + 0] = readLongBE8(in, 0);
- buffer[start + 1] = readLongBE8(in, 8);
- buffer[start + 2] = readLongBE8(in, 16);
- buffer[start + 3] = readLongBE8(in, 24);
- buffer[start + 4] = readLongBE8(in, 32);
- buffer[start + 5] = readLongBE8(in, 40);
- buffer[start + 6] = readLongBE8(in, 48);
- buffer[start + 7] = readLongBE8(in, 56);
- break;
- default:
- break;
- }
- }
-
- private long readLongBE2(InStream in, int rbOffset) {
- return (((readBuffer[rbOffset] & 255) << 8)
- + ((readBuffer[rbOffset + 1] & 255) << 0));
- }
-
- private long readLongBE3(InStream in, int rbOffset) {
- return (((readBuffer[rbOffset] & 255) << 16)
- + ((readBuffer[rbOffset + 1] & 255) << 8)
- + ((readBuffer[rbOffset + 2] & 255) << 0));
- }
-
- private long readLongBE4(InStream in, int rbOffset) {
- return (((long) (readBuffer[rbOffset] & 255) << 24)
- + ((readBuffer[rbOffset + 1] & 255) << 16)
- + ((readBuffer[rbOffset + 2] & 255) << 8)
- + ((readBuffer[rbOffset + 3] & 255) << 0));
- }
-
- private long readLongBE5(InStream in, int rbOffset) {
- return (((long) (readBuffer[rbOffset] & 255) << 32)
- + ((long) (readBuffer[rbOffset + 1] & 255) << 24)
- + ((readBuffer[rbOffset + 2] & 255) << 16)
- + ((readBuffer[rbOffset + 3] & 255) << 8)
- + ((readBuffer[rbOffset + 4] & 255) << 0));
- }
-
- private long readLongBE6(InStream in, int rbOffset) {
- return (((long) (readBuffer[rbOffset] & 255) << 40)
- + ((long) (readBuffer[rbOffset + 1] & 255) << 32)
- + ((long) (readBuffer[rbOffset + 2] & 255) << 24)
- + ((readBuffer[rbOffset + 3] & 255) << 16)
- + ((readBuffer[rbOffset + 4] & 255) << 8)
- + ((readBuffer[rbOffset + 5] & 255) << 0));
- }
-
- private long readLongBE7(InStream in, int rbOffset) {
- return (((long) (readBuffer[rbOffset] & 255) << 48)
- + ((long) (readBuffer[rbOffset + 1] & 255) << 40)
- + ((long) (readBuffer[rbOffset + 2] & 255) << 32)
- + ((long) (readBuffer[rbOffset + 3] & 255) << 24)
- + ((readBuffer[rbOffset + 4] & 255) << 16)
- + ((readBuffer[rbOffset + 5] & 255) << 8)
- + ((readBuffer[rbOffset + 6] & 255) << 0));
- }
-
- private long readLongBE8(InStream in, int rbOffset) {
- return (((long) (readBuffer[rbOffset] & 255) << 56)
- + ((long) (readBuffer[rbOffset + 1] & 255) << 48)
- + ((long) (readBuffer[rbOffset + 2] & 255) << 40)
- + ((long) (readBuffer[rbOffset + 3] & 255) << 32)
- + ((long) (readBuffer[rbOffset + 4] & 255) << 24)
- + ((readBuffer[rbOffset + 5] & 255) << 16)
- + ((readBuffer[rbOffset + 6] & 255) << 8)
- + ((readBuffer[rbOffset + 7] & 255) << 0));
- }
-
- // Do not want to use Guava LongMath.checkedSubtract() here as it will throw
- // ArithmeticException in case of overflow
- public boolean isSafeSubtract(long left, long right) {
- return (left ^ right) >= 0 | (left ^ (left - right)) >= 0;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SettableUncompressedStream.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SettableUncompressedStream.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SettableUncompressedStream.java
deleted file mode 100644
index 3496de9..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SettableUncompressedStream.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.common.DiskRangeInfo;
-import org.apache.hadoop.hive.common.io.DiskRange;
-
-/**
- * An uncompressed stream whose underlying byte buffer can be set.
- */
-public class SettableUncompressedStream extends InStream.UncompressedStream {
-
- public SettableUncompressedStream(String name, List<DiskRange> input, long length) {
- super(name, input, length);
- setOffset(input);
- }
-
- public void setBuffers(DiskRangeInfo diskRangeInfo) {
- reset(diskRangeInfo.getDiskRanges(), diskRangeInfo.getTotalLength());
- setOffset(diskRangeInfo.getDiskRanges());
- }
-
- private void setOffset(List<DiskRange> list) {
- currentOffset = list.isEmpty() ? 0 : list.get(0).getOffset();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
deleted file mode 100644
index 820f215..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
-import org.iq80.snappy.Snappy;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-class SnappyCodec implements CompressionCodec, DirectDecompressionCodec {
-
- Boolean direct = null;
-
- @Override
- public boolean compress(ByteBuffer in, ByteBuffer out,
- ByteBuffer overflow) throws IOException {
- int inBytes = in.remaining();
- // I should work on a patch for Snappy to support an overflow buffer
- // to prevent the extra buffer copy.
- byte[] compressed = new byte[Snappy.maxCompressedLength(inBytes)];
- int outBytes =
- Snappy.compress(in.array(), in.arrayOffset() + in.position(), inBytes,
- compressed, 0);
- if (outBytes < inBytes) {
- int remaining = out.remaining();
- if (remaining >= outBytes) {
- System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
- out.position(), outBytes);
- out.position(out.position() + outBytes);
- } else {
- System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
- out.position(), remaining);
- out.position(out.limit());
- System.arraycopy(compressed, remaining, overflow.array(),
- overflow.arrayOffset(), outBytes - remaining);
- overflow.position(outBytes - remaining);
- }
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
- if(in.isDirect() && out.isDirect()) {
- directDecompress(in, out);
- return;
- }
- int inOffset = in.position();
- int uncompressLen =
- Snappy.uncompress(in.array(), in.arrayOffset() + inOffset,
- in.limit() - inOffset, out.array(), out.arrayOffset() + out.position());
- out.position(uncompressLen + out.position());
- out.flip();
- }
-
- @Override
- public boolean isAvailable() {
- if (direct == null) {
- try {
- if (ShimLoader.getHadoopShims().getDirectDecompressor(
- DirectCompressionType.SNAPPY) != null) {
- direct = Boolean.valueOf(true);
- } else {
- direct = Boolean.valueOf(false);
- }
- } catch (UnsatisfiedLinkError ule) {
- direct = Boolean.valueOf(false);
- }
- }
- return direct.booleanValue();
- }
-
- @Override
- public void directDecompress(ByteBuffer in, ByteBuffer out)
- throws IOException {
- DirectDecompressorShim decompressShim = ShimLoader.getHadoopShims()
- .getDirectDecompressor(DirectCompressionType.SNAPPY);
- decompressShim.decompress(in, out);
- out.flip(); // flip for read
- }
-
- @Override
- public CompressionCodec modify(EnumSet<Modifier> modifiers) {
- // snappy allows no modifications
- return this;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
deleted file mode 100644
index 9ff0b1f..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * The name of a stream within a stripe.
- */
-public class StreamName implements Comparable<StreamName> {
- private final int column;
- private final OrcProto.Stream.Kind kind;
-
- public static enum Area {
- DATA, INDEX
- }
-
- public StreamName(int column, OrcProto.Stream.Kind kind) {
- this.column = column;
- this.kind = kind;
- }
-
- public boolean equals(Object obj) {
- if (obj != null && obj instanceof StreamName) {
- StreamName other = (StreamName) obj;
- return other.column == column && other.kind == kind;
- } else {
- return false;
- }
- }
-
- @Override
- public int compareTo(StreamName streamName) {
- if (streamName == null) {
- return -1;
- }
- Area area = getArea(kind);
- Area otherArea = streamName.getArea(streamName.kind);
- if (area != otherArea) {
- return -area.compareTo(otherArea);
- }
- if (column != streamName.column) {
- return column < streamName.column ? -1 : 1;
- }
- return kind.compareTo(streamName.kind);
- }
-
- public int getColumn() {
- return column;
- }
-
- public OrcProto.Stream.Kind getKind() {
- return kind;
- }
-
- public Area getArea() {
- return getArea(kind);
- }
-
- public static Area getArea(OrcProto.Stream.Kind kind) {
- switch (kind) {
- case ROW_INDEX:
- case DICTIONARY_COUNT:
- case BLOOM_FILTER:
- return Area.INDEX;
- default:
- return Area.DATA;
- }
- }
-
- @Override
- public String toString() {
- return "Stream for column " + column + " kind " + kind;
- }
-
- @Override
- public int hashCode() {
- return column * 101 + kind.getNumber();
- }
-}
-
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
deleted file mode 100644
index 3a49269..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics for string columns.
- */
-public interface StringColumnStatistics extends ColumnStatistics {
- /**
- * Get the minimum string.
- * @return the minimum
- */
- String getMinimum();
-
- /**
- * Get the maximum string.
- * @return the maximum
- */
- String getMaximum();
-
- /**
- * Get the total length of all strings
- * @return the sum (total length)
- */
- long getSum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
deleted file mode 100644
index e0c52e7..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
+++ /dev/null
@@ -1,207 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.hadoop.io.Text;
-
-/**
- * A red-black tree that stores strings. The strings are stored as UTF-8 bytes
- * and an offset for each entry.
- */
-class StringRedBlackTree extends RedBlackTree {
- private final DynamicByteArray byteArray = new DynamicByteArray();
- private final DynamicIntArray keyOffsets;
- private final Text newKey = new Text();
-
- public StringRedBlackTree(int initialCapacity) {
- super(initialCapacity);
- keyOffsets = new DynamicIntArray(initialCapacity);
- }
-
- public int add(String value) {
- newKey.set(value);
- return addNewKey();
- }
-
- private int addNewKey() {
- // if the newKey is actually new, add it to our byteArray and store the offset & length
- if (add()) {
- int len = newKey.getLength();
- keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len));
- }
- return lastAdd;
- }
-
- public int add(Text value) {
- newKey.set(value);
- return addNewKey();
- }
-
- public int add(byte[] bytes, int offset, int length) {
- newKey.set(bytes, offset, length);
- return addNewKey();
- }
-
- @Override
- protected int compareValue(int position) {
- int start = keyOffsets.get(position);
- int end;
- if (position + 1 == keyOffsets.size()) {
- end = byteArray.size();
- } else {
- end = keyOffsets.get(position+1);
- }
- return byteArray.compare(newKey.getBytes(), 0, newKey.getLength(),
- start, end - start);
- }
-
- /**
- * The information about each node.
- */
- public interface VisitorContext {
- /**
- * Get the position where the key was originally added.
- * @return the number returned by add.
- */
- int getOriginalPosition();
-
- /**
- * Write the bytes for the string to the given output stream.
- * @param out the stream to write to.
- * @throws IOException
- */
- void writeBytes(OutputStream out) throws IOException;
-
- /**
- * Get the original string.
- * @return the string
- */
- Text getText();
-
- /**
- * Get the number of bytes.
- * @return the string's length in bytes
- */
- int getLength();
- }
-
- /**
- * The interface for visitors.
- */
- public interface Visitor {
- /**
- * Called once for each node of the tree in sort order.
- * @param context the information about each node
- * @throws IOException
- */
- void visit(VisitorContext context) throws IOException;
- }
-
- private class VisitorContextImpl implements VisitorContext {
- private int originalPosition;
- private int start;
- private int end;
- private final Text text = new Text();
-
- public int getOriginalPosition() {
- return originalPosition;
- }
-
- public Text getText() {
- byteArray.setText(text, start, end - start);
- return text;
- }
-
- public void writeBytes(OutputStream out) throws IOException {
- byteArray.write(out, start, end - start);
- }
-
- public int getLength() {
- return end - start;
- }
-
- void setPosition(int position) {
- originalPosition = position;
- start = keyOffsets.get(originalPosition);
- if (position + 1 == keyOffsets.size()) {
- end = byteArray.size();
- } else {
- end = keyOffsets.get(originalPosition + 1);
- }
- }
- }
-
- private void recurse(int node, Visitor visitor, VisitorContextImpl context
- ) throws IOException {
- if (node != NULL) {
- recurse(getLeft(node), visitor, context);
- context.setPosition(node);
- visitor.visit(context);
- recurse(getRight(node), visitor, context);
- }
- }
-
- /**
- * Visit all of the nodes in the tree in sorted order.
- * @param visitor the action to be applied to each node
- * @throws IOException
- */
- public void visit(Visitor visitor) throws IOException {
- recurse(root, visitor, new VisitorContextImpl());
- }
-
- /**
- * Reset the table to empty.
- */
- public void clear() {
- super.clear();
- byteArray.clear();
- keyOffsets.clear();
- }
-
- public void getText(Text result, int originalPosition) {
- int offset = keyOffsets.get(originalPosition);
- int length;
- if (originalPosition + 1 == keyOffsets.size()) {
- length = byteArray.size() - offset;
- } else {
- length = keyOffsets.get(originalPosition + 1) - offset;
- }
- byteArray.setText(result, offset, length);
- }
-
- /**
- * Get the size of the character data in the table.
- * @return the bytes used by the table
- */
- public int getCharacterSize() {
- return byteArray.size();
- }
-
- /**
- * Calculate the approximate size in memory.
- * @return the number of bytes used in storing the tree.
- */
- public long getSizeInBytes() {
- return byteArray.getSizeInBytes() + keyOffsets.getSizeInBytes() +
- super.getSizeInBytes();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java
deleted file mode 100644
index 2ac4601..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Information about the stripes in an ORC file that is provided by the Reader.
- */
-public interface StripeInformation {
- /**
- * Get the byte offset of the start of the stripe.
- * @return the bytes from the start of the file
- */
- long getOffset();
-
- /**
- * Get the total length of the stripe in bytes.
- * @return the number of bytes in the stripe
- */
- long getLength();
-
- /**
- * Get the length of the stripe's indexes.
- * @return the number of bytes in the index
- */
- long getIndexLength();
-
- /**
- * Get the length of the stripe's data.
- * @return the number of bytes in the stripe
- */
- long getDataLength();
-
- /**
- * Get the length of the stripe's tail section, which contains its index.
- * @return the number of bytes in the tail
- */
- long getFooterLength();
-
- /**
- * Get the number of rows in the stripe.
- * @return a count of the number of rows
- */
- long getNumberOfRows();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeStatistics.java
deleted file mode 100644
index 6fc1ab6..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeStatistics.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.List;
-
-public class StripeStatistics {
- private final List<OrcProto.ColumnStatistics> cs;
-
- StripeStatistics(List<OrcProto.ColumnStatistics> list) {
- this.cs = list;
- }
-
- /**
- * Return list of column statistics
- *
- * @return column stats
- */
- public ColumnStatistics[] getColumnStatistics() {
- ColumnStatistics[] result = new ColumnStatistics[cs.size()];
- for (int i = 0; i < result.length; ++i) {
- result[i] = ColumnStatisticsImpl.deserialize(cs.get(i));
- }
- return result;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TimestampColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TimestampColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TimestampColumnStatistics.java
deleted file mode 100644
index ef42f50..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TimestampColumnStatistics.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.sql.Timestamp;
-
-/**
- * Statistics for Timestamp columns.
- */
-public interface TimestampColumnStatistics extends ColumnStatistics {
- /**
- * Get the minimum value for the column.
- * @return minimum value
- */
- Timestamp getMinimum();
-
- /**
- * Get the maximum value for the column.
- * @return maximum value
- */
- Timestamp getMaximum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
index 8c13571..3fe28d8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
@@ -57,13 +57,25 @@ import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.orc.impl.BitFieldReader;
+import org.apache.orc.impl.DynamicByteArray;
+import org.apache.orc.impl.InStream;
+import org.apache.orc.impl.IntegerReader;
+import org.apache.orc.OrcProto;
+import org.apache.orc.impl.PositionProvider;
+import org.apache.orc.impl.RunLengthByteReader;
+import org.apache.orc.impl.RunLengthIntegerReader;
+import org.apache.orc.impl.RunLengthIntegerReaderV2;
+import org.apache.orc.impl.SerializationUtils;
+import org.apache.orc.impl.StreamName;
/**
* Factory for creating ORC tree readers.
*/
public class TreeReaderFactory {
- public static final Logger LOG = LoggerFactory.getLogger(TreeReaderFactory.class);
+ private static final Logger LOG =
+ LoggerFactory.getLogger(TreeReaderFactory.class);
public static class TreeReaderSchema {
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
deleted file mode 100644
index b365408..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
+++ /dev/null
@@ -1,540 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * This is the description of the types in an ORC file.
- */
-public class TypeDescription {
- private static final int MAX_PRECISION = 38;
- private static final int MAX_SCALE = 38;
- private static final int DEFAULT_PRECISION = 38;
- private static final int DEFAULT_SCALE = 10;
- private static final int DEFAULT_LENGTH = 256;
- public enum Category {
- BOOLEAN("boolean", true),
- BYTE("tinyint", true),
- SHORT("smallint", true),
- INT("int", true),
- LONG("bigint", true),
- FLOAT("float", true),
- DOUBLE("double", true),
- STRING("string", true),
- DATE("date", true),
- TIMESTAMP("timestamp", true),
- BINARY("binary", true),
- DECIMAL("decimal", true),
- VARCHAR("varchar", true),
- CHAR("char", true),
- LIST("array", false),
- MAP("map", false),
- STRUCT("struct", false),
- UNION("union", false);
-
- Category(String name, boolean isPrimitive) {
- this.name = name;
- this.isPrimitive = isPrimitive;
- }
-
- final boolean isPrimitive;
- final String name;
-
- public boolean isPrimitive() {
- return isPrimitive;
- }
-
- public String getName() {
- return name;
- }
- }
-
- public static TypeDescription createBoolean() {
- return new TypeDescription(Category.BOOLEAN);
- }
-
- public static TypeDescription createByte() {
- return new TypeDescription(Category.BYTE);
- }
-
- public static TypeDescription createShort() {
- return new TypeDescription(Category.SHORT);
- }
-
- public static TypeDescription createInt() {
- return new TypeDescription(Category.INT);
- }
-
- public static TypeDescription createLong() {
- return new TypeDescription(Category.LONG);
- }
-
- public static TypeDescription createFloat() {
- return new TypeDescription(Category.FLOAT);
- }
-
- public static TypeDescription createDouble() {
- return new TypeDescription(Category.DOUBLE);
- }
-
- public static TypeDescription createString() {
- return new TypeDescription(Category.STRING);
- }
-
- public static TypeDescription createDate() {
- return new TypeDescription(Category.DATE);
- }
-
- public static TypeDescription createTimestamp() {
- return new TypeDescription(Category.TIMESTAMP);
- }
-
- public static TypeDescription createBinary() {
- return new TypeDescription(Category.BINARY);
- }
-
- public static TypeDescription createDecimal() {
- return new TypeDescription(Category.DECIMAL);
- }
-
- /**
- * For decimal types, set the precision.
- * @param precision the new precision
- * @return this
- */
- public TypeDescription withPrecision(int precision) {
- if (category != Category.DECIMAL) {
- throw new IllegalArgumentException("precision is only allowed on decimal"+
- " and not " + category.name);
- } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){
- throw new IllegalArgumentException("precision " + precision +
- " is out of range 1 .. " + scale);
- }
- this.precision = precision;
- return this;
- }
-
- /**
- * For decimal types, set the scale.
- * @param scale the new scale
- * @return this
- */
- public TypeDescription withScale(int scale) {
- if (category != Category.DECIMAL) {
- throw new IllegalArgumentException("scale is only allowed on decimal"+
- " and not " + category.name);
- } else if (scale < 0 || scale > MAX_SCALE || scale > precision) {
- throw new IllegalArgumentException("scale is out of range at " + scale);
- }
- this.scale = scale;
- return this;
- }
-
- public static TypeDescription createVarchar() {
- return new TypeDescription(Category.VARCHAR);
- }
-
- public static TypeDescription createChar() {
- return new TypeDescription(Category.CHAR);
- }
-
- /**
- * Set the maximum length for char and varchar types.
- * @param maxLength the maximum value
- * @return this
- */
- public TypeDescription withMaxLength(int maxLength) {
- if (category != Category.VARCHAR && category != Category.CHAR) {
- throw new IllegalArgumentException("maxLength is only allowed on char" +
- " and varchar and not " + category.name);
- }
- this.maxLength = maxLength;
- return this;
- }
-
- public static TypeDescription createList(TypeDescription childType) {
- TypeDescription result = new TypeDescription(Category.LIST);
- result.children.add(childType);
- childType.parent = result;
- return result;
- }
-
- public static TypeDescription createMap(TypeDescription keyType,
- TypeDescription valueType) {
- TypeDescription result = new TypeDescription(Category.MAP);
- result.children.add(keyType);
- result.children.add(valueType);
- keyType.parent = result;
- valueType.parent = result;
- return result;
- }
-
- public static TypeDescription createUnion() {
- return new TypeDescription(Category.UNION);
- }
-
- public static TypeDescription createStruct() {
- return new TypeDescription(Category.STRUCT);
- }
-
- /**
- * Add a child to a union type.
- * @param child a new child type to add
- * @return the union type.
- */
- public TypeDescription addUnionChild(TypeDescription child) {
- if (category != Category.UNION) {
- throw new IllegalArgumentException("Can only add types to union type" +
- " and not " + category);
- }
- children.add(child);
- child.parent = this;
- return this;
- }
-
- /**
- * Add a field to a struct type as it is built.
- * @param field the field name
- * @param fieldType the type of the field
- * @return the struct type
- */
- public TypeDescription addField(String field, TypeDescription fieldType) {
- if (category != Category.STRUCT) {
- throw new IllegalArgumentException("Can only add fields to struct type" +
- " and not " + category);
- }
- fieldNames.add(field);
- children.add(fieldType);
- fieldType.parent = this;
- return this;
- }
-
- /**
- * Get the id for this type.
- * The first call will cause all of the the ids in tree to be assigned, so
- * it should not be called before the type is completely built.
- * @return the sequential id
- */
- public int getId() {
- // if the id hasn't been assigned, assign all of the ids from the root
- if (id == -1) {
- TypeDescription root = this;
- while (root.parent != null) {
- root = root.parent;
- }
- root.assignIds(0);
- }
- return id;
- }
-
- /**
- * Get the maximum id assigned to this type or its children.
- * The first call will cause all of the the ids in tree to be assigned, so
- * it should not be called before the type is completely built.
- * @return the maximum id assigned under this type
- */
- public int getMaximumId() {
- // if the id hasn't been assigned, assign all of the ids from the root
- if (maxId == -1) {
- TypeDescription root = this;
- while (root.parent != null) {
- root = root.parent;
- }
- root.assignIds(0);
- }
- return maxId;
- }
-
- private ColumnVector createColumn() {
- switch (category) {
- case BOOLEAN:
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case TIMESTAMP:
- case DATE:
- return new LongColumnVector();
- case FLOAT:
- case DOUBLE:
- return new DoubleColumnVector();
- case DECIMAL:
- return new DecimalColumnVector(precision, scale);
- case STRING:
- case BINARY:
- case CHAR:
- case VARCHAR:
- return new BytesColumnVector();
- case STRUCT: {
- ColumnVector[] fieldVector = new ColumnVector[children.size()];
- for(int i=0; i < fieldVector.length; ++i) {
- fieldVector[i] = children.get(i).createColumn();
- }
- return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
- fieldVector);
- }
- case UNION: {
- ColumnVector[] fieldVector = new ColumnVector[children.size()];
- for(int i=0; i < fieldVector.length; ++i) {
- fieldVector[i] = children.get(i).createColumn();
- }
- return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
- fieldVector);
- }
- case LIST:
- return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
- children.get(0).createColumn());
- case MAP:
- return new MapColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
- children.get(0).createColumn(), children.get(1).createColumn());
- default:
- throw new IllegalArgumentException("Unknown type " + category);
- }
- }
-
- public VectorizedRowBatch createRowBatch() {
- VectorizedRowBatch result;
- if (category == Category.STRUCT) {
- result = new VectorizedRowBatch(children.size(),
- VectorizedRowBatch.DEFAULT_SIZE);
- for(int i=0; i < result.cols.length; ++i) {
- result.cols[i] = children.get(i).createColumn();
- }
- } else {
- result = new VectorizedRowBatch(1, VectorizedRowBatch.DEFAULT_SIZE);
- result.cols[0] = createColumn();
- }
- result.reset();
- return result;
- }
-
- /**
- * Get the kind of this type.
- * @return get the category for this type.
- */
- public Category getCategory() {
- return category;
- }
-
- /**
- * Get the maximum length of the type. Only used for char and varchar types.
- * @return the maximum length of the string type
- */
- public int getMaxLength() {
- return maxLength;
- }
-
- /**
- * Get the precision of the decimal type.
- * @return the number of digits for the precision.
- */
- public int getPrecision() {
- return precision;
- }
-
- /**
- * Get the scale of the decimal type.
- * @return the number of digits for the scale.
- */
- public int getScale() {
- return scale;
- }
-
- /**
- * For struct types, get the list of field names.
- * @return the list of field names.
- */
- public List<String> getFieldNames() {
- return Collections.unmodifiableList(fieldNames);
- }
-
- /**
- * Get the subtypes of this type.
- * @return the list of children types
- */
- public List<TypeDescription> getChildren() {
- return children == null ? null : Collections.unmodifiableList(children);
- }
-
- /**
- * Assign ids to all of the nodes under this one.
- * @param startId the lowest id to assign
- * @return the next available id
- */
- private int assignIds(int startId) {
- id = startId++;
- if (children != null) {
- for (TypeDescription child : children) {
- startId = child.assignIds(startId);
- }
- }
- maxId = startId - 1;
- return startId;
- }
-
- private TypeDescription(Category category) {
- this.category = category;
- if (category.isPrimitive) {
- children = null;
- } else {
- children = new ArrayList<>();
- }
- if (category == Category.STRUCT) {
- fieldNames = new ArrayList<>();
- } else {
- fieldNames = null;
- }
- }
-
- private int id = -1;
- private int maxId = -1;
- private TypeDescription parent;
- private final Category category;
- private final List<TypeDescription> children;
- private final List<String> fieldNames;
- private int maxLength = DEFAULT_LENGTH;
- private int precision = DEFAULT_PRECISION;
- private int scale = DEFAULT_SCALE;
-
- public void printToBuffer(StringBuilder buffer) {
- buffer.append(category.name);
- switch (category) {
- case DECIMAL:
- buffer.append('(');
- buffer.append(precision);
- buffer.append(',');
- buffer.append(scale);
- buffer.append(')');
- break;
- case CHAR:
- case VARCHAR:
- buffer.append('(');
- buffer.append(maxLength);
- buffer.append(')');
- break;
- case LIST:
- case MAP:
- case UNION:
- buffer.append('<');
- for(int i=0; i < children.size(); ++i) {
- if (i != 0) {
- buffer.append(',');
- }
- children.get(i).printToBuffer(buffer);
- }
- buffer.append('>');
- break;
- case STRUCT:
- buffer.append('<');
- for(int i=0; i < children.size(); ++i) {
- if (i != 0) {
- buffer.append(',');
- }
- buffer.append(fieldNames.get(i));
- buffer.append(':');
- children.get(i).printToBuffer(buffer);
- }
- buffer.append('>');
- break;
- default:
- break;
- }
- }
-
- public String toString() {
- StringBuilder buffer = new StringBuilder();
- printToBuffer(buffer);
- return buffer.toString();
- }
-
- private void printJsonToBuffer(String prefix, StringBuilder buffer,
- int indent) {
- for(int i=0; i < indent; ++i) {
- buffer.append(' ');
- }
- buffer.append(prefix);
- buffer.append("{\"category\": \"");
- buffer.append(category.name);
- buffer.append("\", \"id\": ");
- buffer.append(getId());
- buffer.append(", \"max\": ");
- buffer.append(maxId);
- switch (category) {
- case DECIMAL:
- buffer.append(", \"precision\": ");
- buffer.append(precision);
- buffer.append(", \"scale\": ");
- buffer.append(scale);
- break;
- case CHAR:
- case VARCHAR:
- buffer.append(", \"length\": ");
- buffer.append(maxLength);
- break;
- case LIST:
- case MAP:
- case UNION:
- buffer.append(", \"children\": [");
- for(int i=0; i < children.size(); ++i) {
- buffer.append('\n');
- children.get(i).printJsonToBuffer("", buffer, indent + 2);
- if (i != children.size() - 1) {
- buffer.append(',');
- }
- }
- buffer.append("]");
- break;
- case STRUCT:
- buffer.append(", \"fields\": [");
- for(int i=0; i < children.size(); ++i) {
- buffer.append('\n');
- children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ",
- buffer, indent + 2);
- if (i != children.size() - 1) {
- buffer.append(',');
- }
- }
- buffer.append(']');
- break;
- default:
- break;
- }
- buffer.append('}');
- }
-
- public String toJson() {
- StringBuilder buffer = new StringBuilder();
- printJsonToBuffer("", buffer, 0);
- return buffer.toString();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
index 6d280c8..e08aaf3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
@@ -39,6 +39,8 @@ import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.orc.OrcProto;
+import org.apache.orc.TypeDescription;
/**
* A MapReduce/Hive input format for ORC files.
@@ -64,7 +66,7 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect
/**
* Do we have schema on read in the configuration variables?
*/
- TypeDescription schema = OrcUtils.getDesiredRowTypeDescr(conf);
+ TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf);
List<OrcProto.Type> types = file.getTypes();
Reader.Options options = new Reader.Options();
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
index 1873ed1..1f5927a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
@@ -18,32 +18,12 @@
package org.apache.hadoop.hive.ql.io.orc;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
/**
- * The interface for writing ORC files.
+ * The HIVE interface for writing ORC files.
*/
-public interface Writer {
-
- /**
- * Get the schema for this writer
- * @return the file schema
- */
- TypeDescription getSchema();
-
- /**
- * Add arbitrary meta-data to the ORC file. This may be called at any point
- * until the Writer is closed. If the same key is passed a second time, the
- * second value will replace the first.
- * @param key a key to label the data with.
- * @param value the contents of the metadata.
- */
- void addUserMetadata(String key, ByteBuffer value);
+public interface Writer extends org.apache.orc.Writer {
/**
* Add a row to the ORC file.
@@ -51,68 +31,4 @@ public interface Writer {
* @throws IOException
*/
void addRow(Object row) throws IOException;
-
- /**
- * Add a row batch to the ORC file.
- * @param batch the rows to add
- */
- void addRowBatch(VectorizedRowBatch batch) throws IOException;
-
- /**
- * Flush all of the buffers and close the file. No methods on this writer
- * should be called afterwards.
- * @throws IOException
- */
- void close() throws IOException;
-
- /**
- * Return the deserialized data size. Raw data size will be compute when
- * writing the file footer. Hence raw data size value will be available only
- * after closing the writer.
- *
- * @return raw data size
- */
- long getRawDataSize();
-
- /**
- * Return the number of rows in file. Row count gets updated when flushing
- * the stripes. To get accurate row count this method should be called after
- * closing the writer.
- *
- * @return row count
- */
- long getNumberOfRows();
-
- /**
- * Write an intermediate footer on the file such that if the file is
- * truncated to the returned offset, it would be a valid ORC file.
- * @return the offset that would be a valid end location for an ORC file
- */
- long writeIntermediateFooter() throws IOException;
-
- /**
- * Fast stripe append to ORC file. This interface is used for fast ORC file
- * merge with other ORC files. When merging, the file to be merged should pass
- * stripe in binary form along with stripe information and stripe statistics.
- * After appending last stripe of a file, use appendUserMetadata() to append
- * any user metadata.
- * @param stripe - stripe as byte array
- * @param offset - offset within byte array
- * @param length - length of stripe within byte array
- * @param stripeInfo - stripe information
- * @param stripeStatistics - stripe statistics (Protobuf objects can be
- * merged directly)
- * @throws IOException
- */
- public void appendStripe(byte[] stripe, int offset, int length,
- StripeInformation stripeInfo,
- OrcProto.StripeStatistics stripeStatistics) throws IOException;
-
- /**
- * When fast stripe append is used for merging ORC stripes, after appending
- * the last stripe from a file, this interface must be used to merge any
- * user metadata.
- * @param userMetadata - user metadata
- */
- public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata);
}