You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2015/12/12 00:28:04 UTC
[07/16] hive git commit: HIVE-11890. Create ORC submodue. (omalley
reviewed by prasanthj)
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
index 99a3e8d..2c9deac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
@@ -20,16 +20,16 @@ package org.apache.hadoop.hive.ql.exec;
import java.io.IOException;
import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.hadoop.hive.ql.io.orc.Writer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.orc.CompressionKind;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcFileKeyWrapper;
import org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper;
import org.apache.hadoop.hive.ql.io.orc.Reader;
-import org.apache.hadoop.hive.ql.io.orc.Writer;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java b/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java
index 56aec9f..878efbe 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.hive.ql.io.filters;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.orc.OrcProto;
import org.apache.hive.common.util.BloomFilter;
import com.google.common.primitives.Longs;
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java
deleted file mode 100644
index 23030a3..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics for binary columns.
- */
-public interface BinaryColumnStatistics extends ColumnStatistics {
- long getSum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
deleted file mode 100644
index ec1f0a9..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
+++ /dev/null
@@ -1,212 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.EOFException;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-
-public class BitFieldReader {
- private final RunLengthByteReader input;
- /** The number of bits in one item. Non-test code always uses 1. */
- private final int bitSize;
- private int current;
- private int bitsLeft;
- private final int mask;
-
- public BitFieldReader(InStream input,
- int bitSize) throws IOException {
- this.input = new RunLengthByteReader(input);
- this.bitSize = bitSize;
- mask = (1 << bitSize) - 1;
- }
-
- public void setInStream(InStream inStream) {
- this.input.setInStream(inStream);
- }
-
- private void readByte() throws IOException {
- if (input.hasNext()) {
- current = 0xff & input.next();
- bitsLeft = 8;
- } else {
- throw new EOFException("Read past end of bit field from " + this);
- }
- }
-
- public int next() throws IOException {
- int result = 0;
- int bitsLeftToRead = bitSize;
- while (bitsLeftToRead > bitsLeft) {
- result <<= bitsLeft;
- result |= current & ((1 << bitsLeft) - 1);
- bitsLeftToRead -= bitsLeft;
- readByte();
- }
- if (bitsLeftToRead > 0) {
- result <<= bitsLeftToRead;
- bitsLeft -= bitsLeftToRead;
- result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
- }
- return result & mask;
- }
-
- /**
- * Unlike integer readers, where runs are encoded explicitly, in this one we have to read ahead
- * to figure out whether we have a run. Given that runs in booleans are likely it's worth it.
- * However it means we'd need to keep track of how many bytes we read, and next/nextVector won't
- * work anymore once this is called. These is trivial to fix, but these are never interspersed.
- */
- private boolean lastRunValue;
- private int lastRunLength = -1;
- private void readNextRun(int maxRunLength) throws IOException {
- assert bitSize == 1;
- if (lastRunLength > 0) return; // last run is not exhausted yet
- if (bitsLeft == 0) {
- readByte();
- }
- // First take care of the partial bits.
- boolean hasVal = false;
- int runLength = 0;
- if (bitsLeft != 8) {
- int partialBitsMask = (1 << bitsLeft) - 1;
- int partialBits = current & partialBitsMask;
- if (partialBits == partialBitsMask || partialBits == 0) {
- lastRunValue = (partialBits == partialBitsMask);
- if (maxRunLength <= bitsLeft) {
- lastRunLength = maxRunLength;
- return;
- }
- maxRunLength -= bitsLeft;
- hasVal = true;
- runLength = bitsLeft;
- bitsLeft = 0;
- } else {
- // There's no run in partial bits. Return whatever we have.
- int prefixBitsCount = 32 - bitsLeft;
- runLength = Integer.numberOfLeadingZeros(partialBits) - prefixBitsCount;
- lastRunValue = (runLength > 0);
- lastRunLength = Math.min(maxRunLength, lastRunValue ? runLength :
- (Integer.numberOfLeadingZeros(~(partialBits | ~partialBitsMask)) - prefixBitsCount));
- return;
- }
- assert bitsLeft == 0;
- readByte();
- }
- if (!hasVal) {
- lastRunValue = ((current >> 7) == 1);
- hasVal = true;
- }
- // Read full bytes until the run ends.
- assert bitsLeft == 8;
- while (maxRunLength >= 8
- && ((lastRunValue && (current == 0xff)) || (!lastRunValue && (current == 0)))) {
- runLength += 8;
- maxRunLength -= 8;
- readByte();
- }
- if (maxRunLength > 0) {
- int extraBits = Integer.numberOfLeadingZeros(
- lastRunValue ? (~(current | ~255)) : current) - 24;
- bitsLeft -= extraBits;
- runLength += extraBits;
- }
- lastRunLength = runLength;
- }
-
- void nextVector(LongColumnVector previous, long previousLen) throws IOException {
- previous.isRepeating = true;
- for (int i = 0; i < previousLen; i++) {
- if (!previous.isNull[i]) {
- previous.vector[i] = next();
- } else {
- // The default value of null for int types in vectorized
- // processing is 1, so set that if the value is null
- previous.vector[i] = 1;
- }
-
- // The default value for nulls in Vectorization for int types is 1
- // and given that non null value can also be 1, we need to check for isNull also
- // when determining the isRepeating flag.
- if (previous.isRepeating
- && i > 0
- && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
- previous.isRepeating = false;
- }
- }
- }
-
- public void seek(PositionProvider index) throws IOException {
- input.seek(index);
- int consumed = (int) index.getNext();
- if (consumed > 8) {
- throw new IllegalArgumentException("Seek past end of byte at " +
- consumed + " in " + input);
- } else if (consumed != 0) {
- readByte();
- bitsLeft = 8 - consumed;
- } else {
- bitsLeft = 0;
- }
- }
-
- void skip(long items) throws IOException {
- long totalBits = bitSize * items;
- if (bitsLeft >= totalBits) {
- bitsLeft -= totalBits;
- } else {
- totalBits -= bitsLeft;
- input.skip(totalBits / 8);
- current = input.next();
- bitsLeft = (int) (8 - (totalBits % 8));
- }
- }
-
- @Override
- public String toString() {
- return "bit reader current: " + current + " bits left: " + bitsLeft +
- " bit size: " + bitSize + " from " + input;
- }
-
- boolean hasFullByte() {
- return bitsLeft == 8 || bitsLeft == 0;
- }
-
- int peekOneBit() throws IOException {
- assert bitSize == 1;
- if (bitsLeft == 0) {
- readByte();
- }
- return (current >>> (bitsLeft - 1)) & 1;
- }
-
- int peekFullByte() throws IOException {
- assert bitSize == 1;
- assert bitsLeft == 8 || bitsLeft == 0;
- if (bitsLeft == 0) {
- readByte();
- }
- return current;
- }
-
- void skipInCurrentByte(int bits) throws IOException {
- assert bitsLeft >= bits;
- bitsLeft -= bits;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
deleted file mode 100644
index 0608da6..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-
-class BitFieldWriter {
- private RunLengthByteWriter output;
- private final int bitSize;
- private byte current = 0;
- private int bitsLeft = 8;
-
- BitFieldWriter(PositionedOutputStream output,
- int bitSize) throws IOException {
- this.output = new RunLengthByteWriter(output);
- this.bitSize = bitSize;
- }
-
- private void writeByte() throws IOException {
- output.write(current);
- current = 0;
- bitsLeft = 8;
- }
-
- void flush() throws IOException {
- if (bitsLeft != 8) {
- writeByte();
- }
- output.flush();
- }
-
- void write(int value) throws IOException {
- int bitsToWrite = bitSize;
- while (bitsToWrite > bitsLeft) {
- // add the bits to the bottom of the current word
- current |= value >>> (bitsToWrite - bitsLeft);
- // subtract out the bits we just added
- bitsToWrite -= bitsLeft;
- // zero out the bits above bitsToWrite
- value &= (1 << bitsToWrite) - 1;
- writeByte();
- }
- bitsLeft -= bitsToWrite;
- current |= value << bitsLeft;
- if (bitsLeft == 0) {
- writeByte();
- }
- }
-
- void getPosition(PositionRecorder recorder) throws IOException {
- output.getPosition(recorder);
- recorder.addPosition(8 - bitsLeft);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
deleted file mode 100644
index 6d03998..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics for boolean columns.
- */
-public interface BooleanColumnStatistics extends ColumnStatistics {
- long getFalseCount();
-
- long getTrueCount();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
deleted file mode 100644
index 7c973c2..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics that are available for all types of columns.
- */
-public interface ColumnStatistics {
- /**
- * Get the number of values in this column. It will differ from the number
- * of rows because of NULL values and repeated values.
- * @return the number of values
- */
- long getNumberOfValues();
-
- /**
- * Returns true if there are nulls in the scope of column statistics.
- * @return true if null present else false
- */
- boolean hasNull();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
deleted file mode 100644
index bcca9de..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
+++ /dev/null
@@ -1,1082 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparator;
-
-class ColumnStatisticsImpl implements ColumnStatistics {
-
- private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
- implements BooleanColumnStatistics {
- private long trueCount = 0;
-
- BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
- trueCount = bkt.getCount(0);
- }
-
- BooleanStatisticsImpl() {
- }
-
- @Override
- void reset() {
- super.reset();
- trueCount = 0;
- }
-
- @Override
- void updateBoolean(boolean value, int repetitions) {
- if (value) {
- trueCount += repetitions;
- }
- }
-
- @Override
- void merge(ColumnStatisticsImpl other) {
- if (other instanceof BooleanStatisticsImpl) {
- BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
- trueCount += bkt.trueCount;
- } else {
- if (isStatsExists() && trueCount != 0) {
- throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder builder = super.serialize();
- OrcProto.BucketStatistics.Builder bucket =
- OrcProto.BucketStatistics.newBuilder();
- bucket.addCount(trueCount);
- builder.setBucketStatistics(bucket);
- return builder;
- }
-
- @Override
- public long getFalseCount() {
- return getNumberOfValues() - trueCount;
- }
-
- @Override
- public long getTrueCount() {
- return trueCount;
- }
-
- @Override
- public String toString() {
- return super.toString() + " true: " + trueCount;
- }
- }
-
- private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
- implements IntegerColumnStatistics {
-
- private long minimum = Long.MAX_VALUE;
- private long maximum = Long.MIN_VALUE;
- private long sum = 0;
- private boolean hasMinimum = false;
- private boolean overflow = false;
-
- IntegerStatisticsImpl() {
- }
-
- IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
- if (intStat.hasMinimum()) {
- hasMinimum = true;
- minimum = intStat.getMinimum();
- }
- if (intStat.hasMaximum()) {
- maximum = intStat.getMaximum();
- }
- if (intStat.hasSum()) {
- sum = intStat.getSum();
- } else {
- overflow = true;
- }
- }
-
- @Override
- void reset() {
- super.reset();
- hasMinimum = false;
- minimum = Long.MAX_VALUE;
- maximum = Long.MIN_VALUE;
- sum = 0;
- overflow = false;
- }
-
- @Override
- void updateInteger(long value, int repetitions) {
- if (!hasMinimum) {
- hasMinimum = true;
- minimum = value;
- maximum = value;
- } else if (value < minimum) {
- minimum = value;
- } else if (value > maximum) {
- maximum = value;
- }
- if (!overflow) {
- boolean wasPositive = sum >= 0;
- sum += value * repetitions;
- if ((value >= 0) == wasPositive) {
- overflow = (sum >= 0) != wasPositive;
- }
- }
- }
-
- @Override
- void merge(ColumnStatisticsImpl other) {
- if (other instanceof IntegerStatisticsImpl) {
- IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
- if (!hasMinimum) {
- hasMinimum = otherInt.hasMinimum;
- minimum = otherInt.minimum;
- maximum = otherInt.maximum;
- } else if (otherInt.hasMinimum) {
- if (otherInt.minimum < minimum) {
- minimum = otherInt.minimum;
- }
- if (otherInt.maximum > maximum) {
- maximum = otherInt.maximum;
- }
- }
-
- overflow |= otherInt.overflow;
- if (!overflow) {
- boolean wasPositive = sum >= 0;
- sum += otherInt.sum;
- if ((otherInt.sum >= 0) == wasPositive) {
- overflow = (sum >= 0) != wasPositive;
- }
- }
- } else {
- if (isStatsExists() && hasMinimum) {
- throw new IllegalArgumentException("Incompatible merging of integer column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder builder = super.serialize();
- OrcProto.IntegerStatistics.Builder intb =
- OrcProto.IntegerStatistics.newBuilder();
- if (hasMinimum) {
- intb.setMinimum(minimum);
- intb.setMaximum(maximum);
- }
- if (!overflow) {
- intb.setSum(sum);
- }
- builder.setIntStatistics(intb);
- return builder;
- }
-
- @Override
- public long getMinimum() {
- return minimum;
- }
-
- @Override
- public long getMaximum() {
- return maximum;
- }
-
- @Override
- public boolean isSumDefined() {
- return !overflow;
- }
-
- @Override
- public long getSum() {
- return sum;
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (hasMinimum) {
- buf.append(" min: ");
- buf.append(minimum);
- buf.append(" max: ");
- buf.append(maximum);
- }
- if (!overflow) {
- buf.append(" sum: ");
- buf.append(sum);
- }
- return buf.toString();
- }
- }
-
- private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
- implements DoubleColumnStatistics {
- private boolean hasMinimum = false;
- private double minimum = Double.MAX_VALUE;
- private double maximum = Double.MIN_VALUE;
- private double sum = 0;
-
- DoubleStatisticsImpl() {
- }
-
- DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
- if (dbl.hasMinimum()) {
- hasMinimum = true;
- minimum = dbl.getMinimum();
- }
- if (dbl.hasMaximum()) {
- maximum = dbl.getMaximum();
- }
- if (dbl.hasSum()) {
- sum = dbl.getSum();
- }
- }
-
- @Override
- void reset() {
- super.reset();
- hasMinimum = false;
- minimum = Double.MAX_VALUE;
- maximum = Double.MIN_VALUE;
- sum = 0;
- }
-
- @Override
- void updateDouble(double value) {
- if (!hasMinimum) {
- hasMinimum = true;
- minimum = value;
- maximum = value;
- } else if (value < minimum) {
- minimum = value;
- } else if (value > maximum) {
- maximum = value;
- }
- sum += value;
- }
-
- @Override
- void merge(ColumnStatisticsImpl other) {
- if (other instanceof DoubleStatisticsImpl) {
- DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
- if (!hasMinimum) {
- hasMinimum = dbl.hasMinimum;
- minimum = dbl.minimum;
- maximum = dbl.maximum;
- } else if (dbl.hasMinimum) {
- if (dbl.minimum < minimum) {
- minimum = dbl.minimum;
- }
- if (dbl.maximum > maximum) {
- maximum = dbl.maximum;
- }
- }
- sum += dbl.sum;
- } else {
- if (isStatsExists() && hasMinimum) {
- throw new IllegalArgumentException("Incompatible merging of double column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder builder = super.serialize();
- OrcProto.DoubleStatistics.Builder dbl =
- OrcProto.DoubleStatistics.newBuilder();
- if (hasMinimum) {
- dbl.setMinimum(minimum);
- dbl.setMaximum(maximum);
- }
- dbl.setSum(sum);
- builder.setDoubleStatistics(dbl);
- return builder;
- }
-
- @Override
- public double getMinimum() {
- return minimum;
- }
-
- @Override
- public double getMaximum() {
- return maximum;
- }
-
- @Override
- public double getSum() {
- return sum;
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (hasMinimum) {
- buf.append(" min: ");
- buf.append(minimum);
- buf.append(" max: ");
- buf.append(maximum);
- }
- buf.append(" sum: ");
- buf.append(sum);
- return buf.toString();
- }
- }
-
- protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
- implements StringColumnStatistics {
- private Text minimum = null;
- private Text maximum = null;
- private long sum = 0;
-
- StringStatisticsImpl() {
- }
-
- StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.StringStatistics str = stats.getStringStatistics();
- if (str.hasMaximum()) {
- maximum = new Text(str.getMaximum());
- }
- if (str.hasMinimum()) {
- minimum = new Text(str.getMinimum());
- }
- if(str.hasSum()) {
- sum = str.getSum();
- }
- }
-
- @Override
- void reset() {
- super.reset();
- minimum = null;
- maximum = null;
- sum = 0;
- }
-
- @Override
- void updateString(Text value) {
- if (minimum == null) {
- maximum = minimum = new Text(value);
- } else if (minimum.compareTo(value) > 0) {
- minimum = new Text(value);
- } else if (maximum.compareTo(value) < 0) {
- maximum = new Text(value);
- }
- sum += value.getLength();
- }
-
- @Override
- void updateString(byte[] bytes, int offset, int length, int repetitions) {
- if (minimum == null) {
- maximum = minimum = new Text();
- maximum.set(bytes, offset, length);
- } else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
- minimum.getLength(), bytes, offset, length) > 0) {
- minimum = new Text();
- minimum.set(bytes, offset, length);
- } else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
- maximum.getLength(), bytes, offset, length) < 0) {
- maximum = new Text();
- maximum.set(bytes, offset, length);
- }
- sum += length * repetitions;
- }
-
- @Override
- void merge(ColumnStatisticsImpl other) {
- if (other instanceof StringStatisticsImpl) {
- StringStatisticsImpl str = (StringStatisticsImpl) other;
- if (minimum == null) {
- if (str.minimum != null) {
- maximum = new Text(str.getMaximum());
- minimum = new Text(str.getMinimum());
- } else {
- /* both are empty */
- maximum = minimum = null;
- }
- } else if (str.minimum != null) {
- if (minimum.compareTo(str.minimum) > 0) {
- minimum = new Text(str.getMinimum());
- }
- if (maximum.compareTo(str.maximum) < 0) {
- maximum = new Text(str.getMaximum());
- }
- }
- sum += str.sum;
- } else {
- if (isStatsExists() && minimum != null) {
- throw new IllegalArgumentException("Incompatible merging of string column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.StringStatistics.Builder str =
- OrcProto.StringStatistics.newBuilder();
- if (getNumberOfValues() != 0) {
- str.setMinimum(getMinimum());
- str.setMaximum(getMaximum());
- str.setSum(sum);
- }
- result.setStringStatistics(str);
- return result;
- }
-
- @Override
- public String getMinimum() {
- return minimum == null ? null : minimum.toString();
- }
-
- @Override
- public String getMaximum() {
- return maximum == null ? null : maximum.toString();
- }
-
- @Override
- public long getSum() {
- return sum;
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" min: ");
- buf.append(getMinimum());
- buf.append(" max: ");
- buf.append(getMaximum());
- buf.append(" sum: ");
- buf.append(sum);
- }
- return buf.toString();
- }
- }
-
- protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
- BinaryColumnStatistics {
-
- private long sum = 0;
-
- BinaryStatisticsImpl() {
- }
-
- BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
- if (binStats.hasSum()) {
- sum = binStats.getSum();
- }
- }
-
- @Override
- void reset() {
- super.reset();
- sum = 0;
- }
-
- @Override
- void updateBinary(BytesWritable value) {
- sum += value.getLength();
- }
-
- @Override
- void updateBinary(byte[] bytes, int offset, int length, int repetitions) {
- sum += length * repetitions;
- }
-
- @Override
- void merge(ColumnStatisticsImpl other) {
- if (other instanceof BinaryColumnStatistics) {
- BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
- sum += bin.sum;
- } else {
- if (isStatsExists() && sum != 0) {
- throw new IllegalArgumentException("Incompatible merging of binary column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- public long getSum() {
- return sum;
- }
-
- @Override
- OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
- bin.setSum(sum);
- result.setBinaryStatistics(bin);
- return result;
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" sum: ");
- buf.append(sum);
- }
- return buf.toString();
- }
- }
-
- private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
- implements DecimalColumnStatistics {
- private HiveDecimal minimum = null;
- private HiveDecimal maximum = null;
- private HiveDecimal sum = HiveDecimal.ZERO;
-
- DecimalStatisticsImpl() {
- }
-
- DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
- if (dec.hasMaximum()) {
- maximum = HiveDecimal.create(dec.getMaximum());
- }
- if (dec.hasMinimum()) {
- minimum = HiveDecimal.create(dec.getMinimum());
- }
- if (dec.hasSum()) {
- sum = HiveDecimal.create(dec.getSum());
- } else {
- sum = null;
- }
- }
-
- @Override
- void reset() {
- super.reset();
- minimum = null;
- maximum = null;
- sum = HiveDecimal.ZERO;
- }
-
- @Override
- void updateDecimal(HiveDecimal value) {
- if (minimum == null) {
- minimum = value;
- maximum = value;
- } else if (minimum.compareTo(value) > 0) {
- minimum = value;
- } else if (maximum.compareTo(value) < 0) {
- maximum = value;
- }
- if (sum != null) {
- sum = sum.add(value);
- }
- }
-
- @Override
- void merge(ColumnStatisticsImpl other) {
- if (other instanceof DecimalStatisticsImpl) {
- DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
- if (minimum == null) {
- minimum = dec.minimum;
- maximum = dec.maximum;
- sum = dec.sum;
- } else if (dec.minimum != null) {
- if (minimum.compareTo(dec.minimum) > 0) {
- minimum = dec.minimum;
- }
- if (maximum.compareTo(dec.maximum) < 0) {
- maximum = dec.maximum;
- }
- if (sum == null || dec.sum == null) {
- sum = null;
- } else {
- sum = sum.add(dec.sum);
- }
- }
- } else {
- if (isStatsExists() && minimum != null) {
- throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.DecimalStatistics.Builder dec =
- OrcProto.DecimalStatistics.newBuilder();
- if (getNumberOfValues() != 0 && minimum != null) {
- dec.setMinimum(minimum.toString());
- dec.setMaximum(maximum.toString());
- }
- if (sum != null) {
- dec.setSum(sum.toString());
- }
- result.setDecimalStatistics(dec);
- return result;
- }
-
- @Override
- public HiveDecimal getMinimum() {
- return minimum;
- }
-
- @Override
- public HiveDecimal getMaximum() {
- return maximum;
- }
-
- @Override
- public HiveDecimal getSum() {
- return sum;
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" min: ");
- buf.append(minimum);
- buf.append(" max: ");
- buf.append(maximum);
- if (sum != null) {
- buf.append(" sum: ");
- buf.append(sum);
- }
- }
- return buf.toString();
- }
- }
-
- private static final class DateStatisticsImpl extends ColumnStatisticsImpl
- implements DateColumnStatistics {
- private Integer minimum = null;
- private Integer maximum = null;
-
- DateStatisticsImpl() {
- }
-
- DateStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.DateStatistics dateStats = stats.getDateStatistics();
- // min,max values serialized/deserialized as int (days since epoch)
- if (dateStats.hasMaximum()) {
- maximum = dateStats.getMaximum();
- }
- if (dateStats.hasMinimum()) {
- minimum = dateStats.getMinimum();
- }
- }
-
- @Override
- void reset() {
- super.reset();
- minimum = null;
- maximum = null;
- }
-
- @Override
- void updateDate(DateWritable value) {
- if (minimum == null) {
- minimum = value.getDays();
- maximum = value.getDays();
- } else if (minimum > value.getDays()) {
- minimum = value.getDays();
- } else if (maximum < value.getDays()) {
- maximum = value.getDays();
- }
- }
-
- @Override
- void updateDate(int value) {
- if (minimum == null) {
- minimum = value;
- maximum = value;
- } else if (minimum > value) {
- minimum = value;
- } else if (maximum < value) {
- maximum = value;
- }
- }
-
- @Override
- void merge(ColumnStatisticsImpl other) {
- if (other instanceof DateStatisticsImpl) {
- DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
- if (minimum == null) {
- minimum = dateStats.minimum;
- maximum = dateStats.maximum;
- } else if (dateStats.minimum != null) {
- if (minimum > dateStats.minimum) {
- minimum = dateStats.minimum;
- }
- if (maximum < dateStats.maximum) {
- maximum = dateStats.maximum;
- }
- }
- } else {
- if (isStatsExists() && minimum != null) {
- throw new IllegalArgumentException("Incompatible merging of date column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.DateStatistics.Builder dateStats =
- OrcProto.DateStatistics.newBuilder();
- if (getNumberOfValues() != 0 && minimum != null) {
- dateStats.setMinimum(minimum);
- dateStats.setMaximum(maximum);
- }
- result.setDateStatistics(dateStats);
- return result;
- }
-
- private transient final DateWritable minDate = new DateWritable();
- private transient final DateWritable maxDate = new DateWritable();
-
- @Override
- public Date getMinimum() {
- if (minimum == null) {
- return null;
- }
- minDate.set(minimum);
- return minDate.get();
- }
-
- @Override
- public Date getMaximum() {
- if (maximum == null) {
- return null;
- }
- maxDate.set(maximum);
- return maxDate.get();
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" min: ");
- buf.append(getMinimum());
- buf.append(" max: ");
- buf.append(getMaximum());
- }
- return buf.toString();
- }
- }
-
- private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
- implements TimestampColumnStatistics {
- private Long minimum = null;
- private Long maximum = null;
-
- TimestampStatisticsImpl() {
- }
-
- TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
- super(stats);
- OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
- // min,max values serialized/deserialized as int (milliseconds since epoch)
- if (timestampStats.hasMaximum()) {
- maximum = timestampStats.getMaximum();
- }
- if (timestampStats.hasMinimum()) {
- minimum = timestampStats.getMinimum();
- }
- }
-
- @Override
- void reset() {
- super.reset();
- minimum = null;
- maximum = null;
- }
-
- @Override
- void updateTimestamp(Timestamp value) {
- if (minimum == null) {
- minimum = value.getTime();
- maximum = value.getTime();
- } else if (minimum > value.getTime()) {
- minimum = value.getTime();
- } else if (maximum < value.getTime()) {
- maximum = value.getTime();
- }
- }
-
- @Override
- void updateTimestamp(long value) {
- if (minimum == null) {
- minimum = value;
- maximum = value;
- } else if (minimum > value) {
- minimum = value;
- } else if (maximum < value) {
- maximum = value;
- }
- }
-
- @Override
- void merge(ColumnStatisticsImpl other) {
- if (other instanceof TimestampStatisticsImpl) {
- TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
- if (minimum == null) {
- minimum = timestampStats.minimum;
- maximum = timestampStats.maximum;
- } else if (timestampStats.minimum != null) {
- if (minimum > timestampStats.minimum) {
- minimum = timestampStats.minimum;
- }
- if (maximum < timestampStats.maximum) {
- maximum = timestampStats.maximum;
- }
- }
- } else {
- if (isStatsExists() && minimum != null) {
- throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
- }
- }
- super.merge(other);
- }
-
- @Override
- OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder result = super.serialize();
- OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
- .newBuilder();
- if (getNumberOfValues() != 0 && minimum != null) {
- timestampStats.setMinimum(minimum);
- timestampStats.setMaximum(maximum);
- }
- result.setTimestampStatistics(timestampStats);
- return result;
- }
-
- @Override
- public Timestamp getMinimum() {
- return minimum == null ? null : new Timestamp(minimum);
- }
-
- @Override
- public Timestamp getMaximum() {
- return maximum == null ? null : new Timestamp(maximum);
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder(super.toString());
- if (getNumberOfValues() != 0) {
- buf.append(" min: ");
- buf.append(getMinimum());
- buf.append(" max: ");
- buf.append(getMaximum());
- }
- return buf.toString();
- }
- }
-
- private long count = 0;
- private boolean hasNull = false;
-
- ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
- if (stats.hasNumberOfValues()) {
- count = stats.getNumberOfValues();
- }
-
- if (stats.hasHasNull()) {
- hasNull = stats.getHasNull();
- } else {
- hasNull = true;
- }
- }
-
- ColumnStatisticsImpl() {
- }
-
- void increment() {
- count += 1;
- }
-
- void increment(int count) {
- this.count += count;
- }
-
- void setNull() {
- hasNull = true;
- }
-
- void updateBoolean(boolean value, int repetitions) {
- throw new UnsupportedOperationException("Can't update boolean");
- }
-
- void updateInteger(long value, int repetitions) {
- throw new UnsupportedOperationException("Can't update integer");
- }
-
- void updateDouble(double value) {
- throw new UnsupportedOperationException("Can't update double");
- }
-
- void updateString(Text value) {
- throw new UnsupportedOperationException("Can't update string");
- }
-
- void updateString(byte[] bytes, int offset, int length, int repetitions) {
- throw new UnsupportedOperationException("Can't update string");
- }
-
- void updateBinary(BytesWritable value) {
- throw new UnsupportedOperationException("Can't update binary");
- }
-
- void updateBinary(byte[] bytes, int offset, int length, int repetitions) {
- throw new UnsupportedOperationException("Can't update string");
- }
-
- void updateDecimal(HiveDecimal value) {
- throw new UnsupportedOperationException("Can't update decimal");
- }
-
- void updateDate(DateWritable value) {
- throw new UnsupportedOperationException("Can't update date");
- }
-
- void updateDate(int value) {
- throw new UnsupportedOperationException("Can't update date");
- }
-
- void updateTimestamp(Timestamp value) {
- throw new UnsupportedOperationException("Can't update timestamp");
- }
-
- void updateTimestamp(long value) {
- throw new UnsupportedOperationException("Can't update timestamp");
- }
-
- boolean isStatsExists() {
- return (count > 0 || hasNull == true);
- }
-
- void merge(ColumnStatisticsImpl stats) {
- count += stats.count;
- hasNull |= stats.hasNull;
- }
-
- void reset() {
- count = 0;
- hasNull = false;
- }
-
- @Override
- public long getNumberOfValues() {
- return count;
- }
-
- @Override
- public boolean hasNull() {
- return hasNull;
- }
-
- @Override
- public String toString() {
- return "count: " + count + " hasNull: " + hasNull;
- }
-
- OrcProto.ColumnStatistics.Builder serialize() {
- OrcProto.ColumnStatistics.Builder builder =
- OrcProto.ColumnStatistics.newBuilder();
- builder.setNumberOfValues(count);
- builder.setHasNull(hasNull);
- return builder;
- }
-
- static ColumnStatisticsImpl create(TypeDescription schema) {
- switch (schema.getCategory()) {
- case BOOLEAN:
- return new BooleanStatisticsImpl();
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- return new IntegerStatisticsImpl();
- case FLOAT:
- case DOUBLE:
- return new DoubleStatisticsImpl();
- case STRING:
- case CHAR:
- case VARCHAR:
- return new StringStatisticsImpl();
- case DECIMAL:
- return new DecimalStatisticsImpl();
- case DATE:
- return new DateStatisticsImpl();
- case TIMESTAMP:
- return new TimestampStatisticsImpl();
- case BINARY:
- return new BinaryStatisticsImpl();
- default:
- return new ColumnStatisticsImpl();
- }
- }
-
- static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
- if (stats.hasBucketStatistics()) {
- return new BooleanStatisticsImpl(stats);
- } else if (stats.hasIntStatistics()) {
- return new IntegerStatisticsImpl(stats);
- } else if (stats.hasDoubleStatistics()) {
- return new DoubleStatisticsImpl(stats);
- } else if (stats.hasStringStatistics()) {
- return new StringStatisticsImpl(stats);
- } else if (stats.hasDecimalStatistics()) {
- return new DecimalStatisticsImpl(stats);
- } else if (stats.hasDateStatistics()) {
- return new DateStatisticsImpl(stats);
- } else if (stats.hasTimestampStatistics()) {
- return new TimestampStatisticsImpl(stats);
- } else if(stats.hasBinaryStatistics()) {
- return new BinaryStatisticsImpl(stats);
- } else {
- return new ColumnStatisticsImpl(stats);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
deleted file mode 100644
index ed9d7ac..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-import javax.annotation.Nullable;
-
-public interface CompressionCodec {
-
- public enum Modifier {
- /* speed/compression tradeoffs */
- FASTEST,
- FAST,
- DEFAULT,
- /* data sensitivity modifiers */
- TEXT,
- BINARY
- };
-
- /**
- * Compress the in buffer to the out buffer.
- * @param in the bytes to compress
- * @param out the uncompressed bytes
- * @param overflow put any additional bytes here
- * @return true if the output is smaller than input
- * @throws IOException
- */
- boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
- ) throws IOException;
-
- /**
- * Decompress the in buffer to the out buffer.
- * @param in the bytes to decompress
- * @param out the decompressed bytes
- * @throws IOException
- */
- void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
-
- /**
- * Produce a modified compression codec if the underlying algorithm allows
- * modification.
- *
- * This does not modify the current object, but returns a new object if
- * modifications are possible. Returns the same object if no modifications
- * are possible.
- * @param modifiers compression modifiers
- * @return codec for use after optional modification
- */
- CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers);
-
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
index 07c6116..22627df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
@@ -20,8 +20,22 @@ package org.apache.hadoop.hive.ql.io.orc;
/**
* An enumeration that lists the generic compression algorithms that
- * can be applied to ORC files.
+ * can be applied to ORC files. This is a shim to help users while we
+ * migrate to the org.apache.orc package.
*/
public enum CompressionKind {
- NONE, ZLIB, SNAPPY, LZO
+ NONE(org.apache.orc.CompressionKind.NONE),
+ ZLIB(org.apache.orc.CompressionKind.ZLIB),
+ SNAPPY(org.apache.orc.CompressionKind.SNAPPY),
+ LZO(org.apache.orc.CompressionKind.LZO);
+
+ CompressionKind(org.apache.orc.CompressionKind underlying) {
+ this.underlying = underlying;
+ }
+
+ public org.apache.orc.CompressionKind getUnderlying() {
+ return underlying;
+ }
+
+ private final org.apache.orc.CompressionKind underlying;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java
deleted file mode 100644
index e0d9943..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-
-/** An abstract data reader that IO formats can use to read bytes from underlying storage. */
-public interface DataReader {
-
- /** Opens the DataReader, making it ready to use. */
- void open() throws IOException;
-
- /** Closes the DataReader. */
- void close() throws IOException;
-
- /** Reads the data.
- *
- * Note that for the cases such as zero-copy read, caller must release the disk ranges
- * produced after being done with them. Call isTrackingDiskRanges to find out if this is needed.
- * @param range List if disk ranges to read. Ranges with data will be ignored.
- * @param baseOffset Base offset from the start of the file of the ranges in disk range list.
- * @param doForceDirect Whether the data should be read into direct buffers.
- * @return New or modified list of DiskRange-s, where all the ranges are filled with data.
- */
- DiskRangeList readFileData(
- DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException;
-
-
- /**
- * Whether the user should release buffers created by readFileData. See readFileData javadoc.
- */
- boolean isTrackingDiskRanges();
-
- /**
- * Releases buffers created by readFileData. See readFileData javadoc.
- * @param toRelease The buffer to release.
- */
- void releaseBuffer(ByteBuffer toRelease);
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
deleted file mode 100644
index ae3fe31..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.Date;
-
-/**
- * Statistics for DATE columns.
- */
-public interface DateColumnStatistics extends ColumnStatistics {
- /**
- * Get the minimum value for the column.
- * @return minimum value
- */
- Date getMinimum();
-
- /**
- * Get the maximum value for the column.
- * @return maximum value
- */
- Date getMaximum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java
deleted file mode 100644
index ec6aa43..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-
-/**
- * Statistics for decimal columns.
- */
-public interface DecimalColumnStatistics extends ColumnStatistics {
-
- /**
- * Get the minimum value for the column.
- * @return the minimum value
- */
- HiveDecimal getMinimum();
-
- /**
- * Get the maximum value for the column.
- * @return the maximum value
- */
- HiveDecimal getMaximum();
-
- /**
- * Get the sum of the values of the column.
- * @return the sum
- */
- HiveDecimal getSum();
-
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java
deleted file mode 100644
index 41a77b0..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-public interface DirectDecompressionCodec extends CompressionCodec {
- public boolean isAvailable();
- public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException;
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
deleted file mode 100644
index 6af7535..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics for float and double columns.
- */
-public interface DoubleColumnStatistics extends ColumnStatistics {
-
- /**
- * Get the smallest value in the column. Only defined if getNumberOfValues
- * is non-zero.
- * @return the minimum
- */
- double getMinimum();
-
- /**
- * Get the largest value in the column. Only defined if getNumberOfValues
- * is non-zero.
- * @return the maximum
- */
- double getMaximum();
-
- /**
- * Get the sum of the values in the column.
- * @return the sum
- */
- double getSum();
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
deleted file mode 100644
index 063c53c..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
+++ /dev/null
@@ -1,303 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.io.Text;
-
-/**
- * A class that is a growable array of bytes. Growth is managed in terms of
- * chunks that are allocated when needed.
- */
-public final class DynamicByteArray {
- static final int DEFAULT_CHUNKSIZE = 32 * 1024;
- static final int DEFAULT_NUM_CHUNKS = 128;
-
- private final int chunkSize; // our allocation sizes
- private byte[][] data; // the real data
- private int length; // max set element index +1
- private int initializedChunks = 0; // the number of chunks created
-
- public DynamicByteArray() {
- this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE);
- }
-
- public DynamicByteArray(int numChunks, int chunkSize) {
- if (chunkSize == 0) {
- throw new IllegalArgumentException("bad chunksize");
- }
- this.chunkSize = chunkSize;
- data = new byte[numChunks][];
- }
-
- /**
- * Ensure that the given index is valid.
- */
- private void grow(int chunkIndex) {
- if (chunkIndex >= initializedChunks) {
- if (chunkIndex >= data.length) {
- int newSize = Math.max(chunkIndex + 1, 2 * data.length);
- byte[][] newChunk = new byte[newSize][];
- System.arraycopy(data, 0, newChunk, 0, data.length);
- data = newChunk;
- }
- for(int i=initializedChunks; i <= chunkIndex; ++i) {
- data[i] = new byte[chunkSize];
- }
- initializedChunks = chunkIndex + 1;
- }
- }
-
- public byte get(int index) {
- if (index >= length) {
- throw new IndexOutOfBoundsException("Index " + index +
- " is outside of 0.." +
- (length - 1));
- }
- int i = index / chunkSize;
- int j = index % chunkSize;
- return data[i][j];
- }
-
- public void set(int index, byte value) {
- int i = index / chunkSize;
- int j = index % chunkSize;
- grow(i);
- if (index >= length) {
- length = index + 1;
- }
- data[i][j] = value;
- }
-
- public int add(byte value) {
- int i = length / chunkSize;
- int j = length % chunkSize;
- grow(i);
- data[i][j] = value;
- int result = length;
- length += 1;
- return result;
- }
-
- /**
- * Copy a slice of a byte array into our buffer.
- * @param value the array to copy from
- * @param valueOffset the first location to copy from value
- * @param valueLength the number of bytes to copy from value
- * @return the offset of the start of the value
- */
- public int add(byte[] value, int valueOffset, int valueLength) {
- int i = length / chunkSize;
- int j = length % chunkSize;
- grow((length + valueLength) / chunkSize);
- int remaining = valueLength;
- while (remaining > 0) {
- int size = Math.min(remaining, chunkSize - j);
- System.arraycopy(value, valueOffset, data[i], j, size);
- remaining -= size;
- valueOffset += size;
- i += 1;
- j = 0;
- }
- int result = length;
- length += valueLength;
- return result;
- }
-
- /**
- * Read the entire stream into this array.
- * @param in the stream to read from
- * @throws IOException
- */
- public void readAll(InputStream in) throws IOException {
- int currentChunk = length / chunkSize;
- int currentOffset = length % chunkSize;
- grow(currentChunk);
- int currentLength = in.read(data[currentChunk], currentOffset,
- chunkSize - currentOffset);
- while (currentLength > 0) {
- length += currentLength;
- currentOffset = length % chunkSize;
- if (currentOffset == 0) {
- currentChunk = length / chunkSize;
- grow(currentChunk);
- }
- currentLength = in.read(data[currentChunk], currentOffset,
- chunkSize - currentOffset);
- }
- }
-
- /**
- * Byte compare a set of bytes against the bytes in this dynamic array.
- * @param other source of the other bytes
- * @param otherOffset start offset in the other array
- * @param otherLength number of bytes in the other array
- * @param ourOffset the offset in our array
- * @param ourLength the number of bytes in our array
- * @return negative for less, 0 for equal, positive for greater
- */
- public int compare(byte[] other, int otherOffset, int otherLength,
- int ourOffset, int ourLength) {
- int currentChunk = ourOffset / chunkSize;
- int currentOffset = ourOffset % chunkSize;
- int maxLength = Math.min(otherLength, ourLength);
- while (maxLength > 0 &&
- other[otherOffset] == data[currentChunk][currentOffset]) {
- otherOffset += 1;
- currentOffset += 1;
- if (currentOffset == chunkSize) {
- currentChunk += 1;
- currentOffset = 0;
- }
- maxLength -= 1;
- }
- if (maxLength == 0) {
- return otherLength - ourLength;
- }
- int otherByte = 0xff & other[otherOffset];
- int ourByte = 0xff & data[currentChunk][currentOffset];
- return otherByte > ourByte ? 1 : -1;
- }
-
- /**
- * Get the size of the array.
- * @return the number of bytes in the array
- */
- public int size() {
- return length;
- }
-
- /**
- * Clear the array to its original pristine state.
- */
- public void clear() {
- length = 0;
- for(int i=0; i < data.length; ++i) {
- data[i] = null;
- }
- initializedChunks = 0;
- }
-
- /**
- * Set a text value from the bytes in this dynamic array.
- * @param result the value to set
- * @param offset the start of the bytes to copy
- * @param length the number of bytes to copy
- */
- public void setText(Text result, int offset, int length) {
- result.clear();
- int currentChunk = offset / chunkSize;
- int currentOffset = offset % chunkSize;
- int currentLength = Math.min(length, chunkSize - currentOffset);
- while (length > 0) {
- result.append(data[currentChunk], currentOffset, currentLength);
- length -= currentLength;
- currentChunk += 1;
- currentOffset = 0;
- currentLength = Math.min(length, chunkSize - currentOffset);
- }
- }
-
- /**
- * Write out a range of this dynamic array to an output stream.
- * @param out the stream to write to
- * @param offset the first offset to write
- * @param length the number of bytes to write
- * @throws IOException
- */
- public void write(OutputStream out, int offset,
- int length) throws IOException {
- int currentChunk = offset / chunkSize;
- int currentOffset = offset % chunkSize;
- while (length > 0) {
- int currentLength = Math.min(length, chunkSize - currentOffset);
- out.write(data[currentChunk], currentOffset, currentLength);
- length -= currentLength;
- currentChunk += 1;
- currentOffset = 0;
- }
- }
-
- @Override
- public String toString() {
- int i;
- StringBuilder sb = new StringBuilder(length * 3);
-
- sb.append('{');
- int l = length - 1;
- for (i=0; i<l; i++) {
- sb.append(Integer.toHexString(get(i)));
- sb.append(',');
- }
- sb.append(get(i));
- sb.append('}');
-
- return sb.toString();
- }
-
- public void setByteBuffer(ByteBuffer result, int offset, int length) {
- result.clear();
- int currentChunk = offset / chunkSize;
- int currentOffset = offset % chunkSize;
- int currentLength = Math.min(length, chunkSize - currentOffset);
- while (length > 0) {
- result.put(data[currentChunk], currentOffset, currentLength);
- length -= currentLength;
- currentChunk += 1;
- currentOffset = 0;
- currentLength = Math.min(length, chunkSize - currentOffset);
- }
- }
-
- /**
- * Gets all the bytes of the array.
- *
- * @return Bytes of the array
- */
- public byte[] get() {
- byte[] result = null;
- if (length > 0) {
- int currentChunk = 0;
- int currentOffset = 0;
- int currentLength = Math.min(length, chunkSize);
- int destOffset = 0;
- result = new byte[length];
- int totalLength = length;
- while (totalLength > 0) {
- System.arraycopy(data[currentChunk], currentOffset, result, destOffset, currentLength);
- destOffset += currentLength;
- totalLength -= currentLength;
- currentChunk += 1;
- currentOffset = 0;
- currentLength = Math.min(totalLength, chunkSize - currentOffset);
- }
- }
- return result;
- }
-
- /**
- * Get the size of the buffers.
- */
- public long getSizeInBytes() {
- return initializedChunks * chunkSize;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
deleted file mode 100644
index 2e884c0..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Dynamic int array that uses primitive types and chunks to avoid copying
- * large number of integers when it resizes.
- *
- * The motivation for this class is memory optimization, i.e. space efficient
- * storage of potentially huge arrays without good a-priori size guesses.
- *
- * The API of this class is between a primitive array and a AbstractList. It's
- * not a Collection implementation because it handles primitive types, but the
- * API could be extended to support iterators and the like.
- *
- * NOTE: Like standard Collection implementations/arrays, this class is not
- * synchronized.
- */
-final class DynamicIntArray {
- static final int DEFAULT_CHUNKSIZE = 8 * 1024;
- static final int INIT_CHUNKS = 128;
-
- private final int chunkSize; // our allocation size
- private int[][] data; // the real data
- private int length; // max set element index +1
- private int initializedChunks = 0; // the number of created chunks
-
- public DynamicIntArray() {
- this(DEFAULT_CHUNKSIZE);
- }
-
- public DynamicIntArray(int chunkSize) {
- this.chunkSize = chunkSize;
-
- data = new int[INIT_CHUNKS][];
- }
-
- /**
- * Ensure that the given index is valid.
- */
- private void grow(int chunkIndex) {
- if (chunkIndex >= initializedChunks) {
- if (chunkIndex >= data.length) {
- int newSize = Math.max(chunkIndex + 1, 2 * data.length);
- int[][] newChunk = new int[newSize][];
- System.arraycopy(data, 0, newChunk, 0, data.length);
- data = newChunk;
- }
- for (int i=initializedChunks; i <= chunkIndex; ++i) {
- data[i] = new int[chunkSize];
- }
- initializedChunks = chunkIndex + 1;
- }
- }
-
- public int get(int index) {
- if (index >= length) {
- throw new IndexOutOfBoundsException("Index " + index +
- " is outside of 0.." +
- (length - 1));
- }
- int i = index / chunkSize;
- int j = index % chunkSize;
- return data[i][j];
- }
-
- public void set(int index, int value) {
- int i = index / chunkSize;
- int j = index % chunkSize;
- grow(i);
- if (index >= length) {
- length = index + 1;
- }
- data[i][j] = value;
- }
-
- public void increment(int index, int value) {
- int i = index / chunkSize;
- int j = index % chunkSize;
- grow(i);
- if (index >= length) {
- length = index + 1;
- }
- data[i][j] += value;
- }
-
- public void add(int value) {
- int i = length / chunkSize;
- int j = length % chunkSize;
- grow(i);
- data[i][j] = value;
- length += 1;
- }
-
- public int size() {
- return length;
- }
-
- public void clear() {
- length = 0;
- for(int i=0; i < data.length; ++i) {
- data[i] = null;
- }
- initializedChunks = 0;
- }
-
- public String toString() {
- int i;
- StringBuilder sb = new StringBuilder(length * 4);
-
- sb.append('{');
- int l = length - 1;
- for (i=0; i<l; i++) {
- sb.append(get(i));
- sb.append(',');
- }
- sb.append(get(i));
- sb.append('}');
-
- return sb.toString();
- }
-
- public int getSizeInBytes() {
- return 4 * initializedChunks * chunkSize;
- }
-}
-
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index f17b154..a242a37 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -41,8 +41,6 @@ import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
@@ -50,6 +48,13 @@ import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
+import org.apache.orc.ColumnStatistics;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.ColumnStatisticsImpl;
+import org.apache.orc.impl.OrcIndex;
+import org.apache.orc.OrcProto;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.StripeStatistics;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONWriter;
@@ -381,7 +386,7 @@ public final class FileDump {
for (int colIdx : rowIndexCols) {
sargColumns[colIdx] = true;
}
- RecordReaderImpl.Index indices = rows
+ OrcIndex indices = rows
.readRowIndex(stripeIx, null, null, null, sargColumns);
for (int col : rowIndexCols) {
StringBuilder buf = new StringBuilder();
@@ -649,9 +654,10 @@ public final class FileDump {
return sb.toString();
}
- private static String getFormattedRowIndices(int col, RowIndex[] rowGroupIndex) {
+ private static String getFormattedRowIndices(int col,
+ OrcProto.RowIndex[] rowGroupIndex) {
StringBuilder buf = new StringBuilder();
- RowIndex index;
+ OrcProto.RowIndex index;
buf.append(" Row group indices for column ").append(col).append(":");
if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
((index = rowGroupIndex[col]) == null)) {
@@ -661,7 +667,7 @@ public final class FileDump {
for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
buf.append("\n Entry ").append(entryIx).append(": ");
- RowIndexEntry entry = index.getEntry(entryIx);
+ OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
if (entry == null) {
buf.append("unknown\n");
continue;
@@ -686,7 +692,7 @@ public final class FileDump {
public static long getTotalPaddingSize(Reader reader) throws IOException {
long paddedBytes = 0;
- List<org.apache.hadoop.hive.ql.io.orc.StripeInformation> stripes = reader.getStripes();
+ List<StripeInformation> stripes = reader.getStripes();
for (int i = 1; i < stripes.size(); i++) {
long prevStripeOffset = stripes.get(i - 1).getOffset();
long prevStripeLen = stripes.get(i - 1).getLength();
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
deleted file mode 100644
index 95c674e..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.nio.ByteBuffer;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
-
-/**
- * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file
- * that is useful for Reader implementation
- *
- */
-public class FileMetaInfo {
- ByteBuffer footerMetaAndPsBuffer;
- final String compressionType;
- final int bufferSize;
- final int metadataSize;
- final ByteBuffer footerBuffer;
- final List<Integer> versionList;
- final OrcFile.WriterVersion writerVersion;
-
-
- /** Ctor used when reading splits - no version list or full footer buffer. */
- FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
- this(compressionType, bufferSize, metadataSize, footerBuffer, null,
- writerVersion, null);
- }
-
- /** Ctor used when creating file info during init and when getting a new one. */
- public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer, List<Integer> versionList, WriterVersion writerVersion,
- ByteBuffer fullFooterBuffer) {
- this.compressionType = compressionType;
- this.bufferSize = bufferSize;
- this.metadataSize = metadataSize;
- this.footerBuffer = footerBuffer;
- this.versionList = versionList;
- this.writerVersion = writerVersion;
- this.footerMetaAndPsBuffer = fullFooterBuffer;
- }
-
- public OrcFile.WriterVersion getWriterVersion() {
- return writerVersion;
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java
deleted file mode 100644
index 26b27a3..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.io.orc.OrcProto;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
-
-/**
- * Cached file metadata. Right now, it caches everything; we don't have to store all the
- * protobuf structs actually, we could just store what we need, but that would require that
- * ORC stop depending on them too. Luckily, they shouldn't be very big.
- */
-public interface FileMetadata {
- boolean isOriginalFormat();
-
- List<StripeInformation> getStripes();
-
- CompressionKind getCompressionKind();
-
- int getCompressionBufferSize();
-
- int getRowIndexStride();
-
- int getColumnCount();
-
- int getFlattenedColumnCount();
-
- long getFileId();
-
- List<Integer> getVersionList();
-
- int getMetadataSize();
-
- int getWriterVersionNum();
-
- List<Type> getTypes();
-
- List<OrcProto.StripeStatistics> getStripeStats();
-
- long getContentLength();
-
- long getNumberOfRows();
-
- List<OrcProto.ColumnStatistics> getFileStats();
-}
\ No newline at end of file