You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2015/12/12 00:27:59 UTC
[02/16] hive git commit: HIVE-11890. Create ORC submodue. (omalley
reviewed by prasanthj)
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
index c3916d3..993be71 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
@@ -22,7 +22,6 @@ import static com.google.common.base.Preconditions.checkArgument;
import java.io.IOException;
import java.io.OutputStream;
-import java.lang.management.ManagementFactory;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
@@ -33,6 +32,30 @@ import java.util.Map;
import java.util.TimeZone;
import java.util.TreeMap;
+import org.apache.orc.BinaryColumnStatistics;
+import org.apache.orc.impl.BitFieldWriter;
+import org.apache.orc.impl.ColumnStatisticsImpl;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.impl.DynamicIntArray;
+import org.apache.orc.impl.IntegerWriter;
+import org.apache.orc.impl.MemoryManager;
+import org.apache.orc.OrcConf;
+import org.apache.orc.OrcUtils;
+import org.apache.orc.impl.OutStream;
+import org.apache.orc.impl.PositionRecorder;
+import org.apache.orc.impl.PositionedOutputStream;
+import org.apache.orc.impl.RunLengthByteWriter;
+import org.apache.orc.impl.RunLengthIntegerWriter;
+import org.apache.orc.impl.RunLengthIntegerWriterV2;
+import org.apache.orc.impl.SerializationUtils;
+import org.apache.orc.impl.SnappyCodec;
+import org.apache.orc.impl.StreamName;
+import org.apache.orc.StringColumnStatistics;
+import org.apache.orc.impl.StringRedBlackTree;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.ZlibCodec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -52,13 +75,7 @@ import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
-import org.apache.hadoop.hive.ql.io.orc.CompressionCodec.Modifier;
-import org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy;
-import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem;
+import org.apache.orc.CompressionCodec.Modifier;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
@@ -85,9 +102,9 @@ import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
+import org.apache.orc.OrcProto;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.primitives.Longs;
import com.google.protobuf.ByteString;
@@ -182,8 +199,8 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
boolean addBlockPadding,
OrcFile.Version version,
OrcFile.WriterCallback callback,
- EncodingStrategy encodingStrategy,
- CompressionStrategy compressionStrategy,
+ OrcFile.EncodingStrategy encodingStrategy,
+ OrcFile.CompressionStrategy compressionStrategy,
double paddingTolerance,
long blockSizeValue,
String bloomFilterColumnNames,
@@ -468,7 +485,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
case BLOOM_FILTER:
case DATA:
case DICTIONARY_DATA:
- if (getCompressionStrategy() == CompressionStrategy.SPEED) {
+ if (getCompressionStrategy() == OrcFile.CompressionStrategy.SPEED) {
modifiers = EnumSet.of(Modifier.FAST, Modifier.TEXT);
} else {
modifiers = EnumSet.of(Modifier.DEFAULT, Modifier.TEXT);
@@ -532,7 +549,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
* Get the encoding strategy to use.
* @return encoding strategy
*/
- public EncodingStrategy getEncodingStrategy() {
+ public OrcFile.EncodingStrategy getEncodingStrategy() {
return encodingStrategy;
}
@@ -540,7 +557,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
* Get the compression strategy to use.
* @return compression strategy
*/
- public CompressionStrategy getCompressionStrategy() {
+ public OrcFile.CompressionStrategy getCompressionStrategy() {
return compressionStrategy;
}
@@ -610,7 +627,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
private final OrcProto.BloomFilter.Builder bloomFilterEntry;
private boolean foundNulls;
private OutStream isPresentOutStream;
- private final List<StripeStatistics.Builder> stripeStatsBuilders;
+ private final List<OrcProto.StripeStatistics.Builder> stripeStatsBuilders;
private final StreamFactory streamFactory;
/**
@@ -687,7 +704,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
StreamFactory writer) {
if (isDirectV2) {
boolean alignedBitpacking = false;
- if (writer.getEncodingStrategy().equals(EncodingStrategy.SPEED)) {
+ if (writer.getEncodingStrategy().equals(OrcFile.EncodingStrategy.SPEED)) {
alignedBitpacking = true;
}
return new RunLengthIntegerWriterV2(output, signed, alignedBitpacking);
@@ -788,7 +805,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
private void removeIsPresentPositions() {
for(int i=0; i < rowIndex.getEntryCount(); ++i) {
- RowIndexEntry.Builder entry = rowIndex.getEntryBuilder(i);
+ OrcProto.RowIndexEntry.Builder entry = rowIndex.getEntryBuilder(i);
List<Long> positions = entry.getPositionsList();
// bit streams use 3 positions if uncompressed, 4 if compressed
positions = positions.subList(isCompressed ? 4 : 3, positions.size());
@@ -2772,7 +2789,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
type.setMaximumLength(schema.getMaxLength());
break;
case VARCHAR:
- type.setKind(Type.Kind.VARCHAR);
+ type.setKind(OrcProto.Type.Kind.VARCHAR);
type.setMaximumLength(schema.getMaxLength());
break;
case BINARY:
@@ -3284,9 +3301,9 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
}
@Override
- public void appendUserMetadata(List<UserMetadataItem> userMetadata) {
+ public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata) {
if (userMetadata != null) {
- for (UserMetadataItem item : userMetadata) {
+ for (OrcProto.UserMetadataItem item : userMetadata) {
this.userMetadata.put(item.getName(), item.getValue());
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
deleted file mode 100644
index 03cc3c5..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-import java.util.zip.DataFormatException;
-import java.util.zip.Deflater;
-import java.util.zip.Inflater;
-
-import javax.annotation.Nullable;
-
-import org.apache.hadoop.hive.shims.HadoopShims;
-import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
-import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
-import org.apache.hadoop.hive.shims.ShimLoader;
-
-class ZlibCodec implements CompressionCodec, DirectDecompressionCodec {
-
- private Boolean direct = null;
-
- private final int level;
- private final int strategy;
-
- public ZlibCodec() {
- level = Deflater.DEFAULT_COMPRESSION;
- strategy = Deflater.DEFAULT_STRATEGY;
- }
-
- private ZlibCodec(int level, int strategy) {
- this.level = level;
- this.strategy = strategy;
- }
-
- @Override
- public boolean compress(ByteBuffer in, ByteBuffer out,
- ByteBuffer overflow) throws IOException {
- Deflater deflater = new Deflater(level, true);
- deflater.setStrategy(strategy);
- int length = in.remaining();
- deflater.setInput(in.array(), in.arrayOffset() + in.position(), length);
- deflater.finish();
- int outSize = 0;
- int offset = out.arrayOffset() + out.position();
- while (!deflater.finished() && (length > outSize)) {
- int size = deflater.deflate(out.array(), offset, out.remaining());
- out.position(size + out.position());
- outSize += size;
- offset += size;
- // if we run out of space in the out buffer, use the overflow
- if (out.remaining() == 0) {
- if (overflow == null) {
- deflater.end();
- return false;
- }
- out = overflow;
- offset = out.arrayOffset() + out.position();
- }
- }
- deflater.end();
- return length > outSize;
- }
-
- @Override
- public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
-
- if(in.isDirect() && out.isDirect()) {
- directDecompress(in, out);
- return;
- }
-
- Inflater inflater = new Inflater(true);
- inflater.setInput(in.array(), in.arrayOffset() + in.position(),
- in.remaining());
- while (!(inflater.finished() || inflater.needsDictionary() ||
- inflater.needsInput())) {
- try {
- int count = inflater.inflate(out.array(),
- out.arrayOffset() + out.position(),
- out.remaining());
- out.position(count + out.position());
- } catch (DataFormatException dfe) {
- throw new IOException("Bad compression data", dfe);
- }
- }
- out.flip();
- inflater.end();
- in.position(in.limit());
- }
-
- @Override
- public boolean isAvailable() {
- if (direct == null) {
- // see nowrap option in new Inflater(boolean) which disables zlib headers
- try {
- if (ShimLoader.getHadoopShims().getDirectDecompressor(
- DirectCompressionType.ZLIB_NOHEADER) != null) {
- direct = Boolean.valueOf(true);
- } else {
- direct = Boolean.valueOf(false);
- }
- } catch (UnsatisfiedLinkError ule) {
- direct = Boolean.valueOf(false);
- }
- }
- return direct.booleanValue();
- }
-
- @Override
- public void directDecompress(ByteBuffer in, ByteBuffer out)
- throws IOException {
- DirectDecompressorShim decompressShim = ShimLoader.getHadoopShims()
- .getDirectDecompressor(DirectCompressionType.ZLIB_NOHEADER);
- decompressShim.decompress(in, out);
- out.flip(); // flip for read
- }
-
- @Override
- public CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers) {
-
- if (modifiers == null) {
- return this;
- }
-
- int l = this.level;
- int s = this.strategy;
-
- for (Modifier m : modifiers) {
- switch (m) {
- case BINARY:
- /* filtered == less LZ77, more huffman */
- s = Deflater.FILTERED;
- break;
- case TEXT:
- s = Deflater.DEFAULT_STRATEGY;
- break;
- case FASTEST:
- // deflate_fast looking for 8 byte patterns
- l = Deflater.BEST_SPEED;
- break;
- case FAST:
- // deflate_fast looking for 16 byte patterns
- l = Deflater.BEST_SPEED + 1;
- break;
- case DEFAULT:
- // deflate_slow looking for 128 byte patterns
- l = Deflater.DEFAULT_COMPRESSION;
- break;
- default:
- break;
- }
- }
- return new ZlibCodec(l, s);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReader.java
index 0691050..96af96a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReader.java
@@ -20,11 +20,9 @@ package org.apache.hadoop.hive.ql.io.orc.encoded;
import java.io.IOException;
import java.util.List;
-import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnEncoding;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Stream;
+import org.apache.orc.StripeInformation;
import org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch;
+import org.apache.orc.OrcProto;
public interface EncodedReader {
@@ -41,7 +39,8 @@ public interface EncodedReader {
* @param consumer The sink for data that has been read.
*/
void readEncodedColumns(int stripeIx, StripeInformation stripe,
- RowIndex[] index, List<ColumnEncoding> encodings, List<Stream> streams,
+ OrcProto.RowIndex[] index, List<OrcProto.ColumnEncoding> encodings,
+ List<OrcProto.Stream> streams,
boolean[] included, boolean[][] colRgs,
Consumer<OrcEncodedColumnBatch> consumer) throws IOException;
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
index ea7e0fa..fed6de0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
@@ -34,24 +34,18 @@ import org.apache.hadoop.hive.common.io.DataCache.DiskRangeListFactory;
import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData;
import org.apache.hadoop.hive.common.io.encoded.MemoryBuffer;
-import org.apache.hadoop.hive.ql.io.orc.CompressionCodec;
-import org.apache.hadoop.hive.ql.io.orc.DataReader;
-import org.apache.hadoop.hive.ql.io.orc.OrcConf;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto;
-import org.apache.hadoop.hive.ql.io.orc.OutStream;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.DataReader;
+import org.apache.orc.OrcConf;
+import org.apache.orc.impl.OutStream;
import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils;
-import org.apache.hadoop.hive.ql.io.orc.StreamName;
-import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnEncoding;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Stream;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
+import org.apache.orc.impl.StreamName;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.impl.BufferChunk;
import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool;
import org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch;
import org.apache.hadoop.hive.ql.io.orc.encoded.Reader.PoolFactory;
-
-
+import org.apache.orc.OrcProto;
/**
* Encoded reader implementation.
@@ -135,7 +129,8 @@ class EncodedReaderImpl implements EncodedReader {
/** Helper context for each column being read */
private static final class ColumnReadContext {
- public ColumnReadContext(int colIx, ColumnEncoding encoding, RowIndex rowIndex) {
+ public ColumnReadContext(int colIx, OrcProto.ColumnEncoding encoding,
+ OrcProto.RowIndex rowIndex) {
this.encoding = encoding;
this.rowIndex = rowIndex;
this.colIx = colIx;
@@ -147,7 +142,7 @@ class EncodedReaderImpl implements EncodedReader {
int streamCount = 0;
final StreamContext[] streams = new StreamContext[MAX_STREAMS];
/** Column encoding. */
- ColumnEncoding encoding;
+ OrcProto.ColumnEncoding encoding;
/** Column rowindex. */
OrcProto.RowIndex rowIndex;
/** Column index in the file. */
@@ -204,7 +199,7 @@ class EncodedReaderImpl implements EncodedReader {
@Override
public void readEncodedColumns(int stripeIx, StripeInformation stripe,
- RowIndex[] indexes, List<ColumnEncoding> encodings, List<Stream> streamList,
+ OrcProto.RowIndex[] indexes, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Stream> streamList,
boolean[] included, boolean[][] colRgs,
Consumer<OrcEncodedColumnBatch> consumer) throws IOException {
// Note: for now we don't have to setError here, caller will setError if we throw.
@@ -351,7 +346,7 @@ class EncodedReaderImpl implements EncodedReader {
continue; // TODO: this would be invalid with HL cache, where RG x col can be excluded.
}
ColumnReadContext ctx = colCtxs[colIxMod];
- RowIndexEntry index = ctx.rowIndex.getEntry(rgIx),
+ OrcProto.RowIndexEntry index = ctx.rowIndex.getEntry(rgIx),
nextIndex = isLastRg ? null : ctx.rowIndex.getEntry(rgIx + 1);
ecb.initColumn(colIxMod, ctx.colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
index dd6f64c..fe46446 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
@@ -22,12 +22,11 @@ import java.util.List;
import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch;
import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData;
-import org.apache.hadoop.hive.ql.io.orc.CompressionCodec;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Stream.Kind;
-import org.apache.hadoop.hive.ql.io.orc.PositionProvider;
-import org.apache.hadoop.hive.ql.io.orc.SettableUncompressedStream;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.impl.PositionProvider;
+import org.apache.orc.impl.SettableUncompressedStream;
import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory;
+import org.apache.orc.OrcProto;
public class EncodedTreeReaderFactory extends TreeReaderFactory {
/**
@@ -87,13 +86,13 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_secondsStream != null) {
- _secondsStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _secondsStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
if (_nanosStream != null) {
- _nanosStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.SECONDARY_VALUE]));
+ _nanosStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.SECONDARY_VALUE]));
}
}
@@ -232,24 +231,24 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
if (!_isDictionaryEncoding) {
if (_lengthStream != null) {
- _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.LENGTH_VALUE]));
+ _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.LENGTH_VALUE]));
}
}
// set these streams only if the stripe is different
if (!sameStripe && _isDictionaryEncoding) {
if (_lengthStream != null) {
- _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.LENGTH_VALUE]));
+ _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.LENGTH_VALUE]));
}
if (_dictionaryStream != null) {
- _dictionaryStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DICTIONARY_DATA_VALUE]));
+ _dictionaryStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DICTIONARY_DATA_VALUE]));
}
}
}
@@ -364,10 +363,10 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
}
@@ -461,10 +460,10 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
}
@@ -564,10 +563,10 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
}
@@ -661,10 +660,10 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
}
@@ -751,10 +750,10 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
}
@@ -852,13 +851,13 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_valueStream != null) {
- _valueStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _valueStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
if (_scaleStream != null) {
- _scaleStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.SECONDARY_VALUE]));
+ _scaleStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.SECONDARY_VALUE]));
}
}
@@ -972,10 +971,10 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
}
@@ -1099,24 +1098,24 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
if (!_isDictionaryEncoding) {
if (_lengthStream != null) {
- _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.LENGTH_VALUE]));
+ _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.LENGTH_VALUE]));
}
}
// set these streams only if the stripe is different
if (!sameStripe && _isDictionaryEncoding) {
if (_lengthStream != null) {
- _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.LENGTH_VALUE]));
+ _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.LENGTH_VALUE]));
}
if (_dictionaryStream != null) {
- _dictionaryStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DICTIONARY_DATA_VALUE]));
+ _dictionaryStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DICTIONARY_DATA_VALUE]));
}
}
}
@@ -1267,24 +1266,24 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
if (!_isDictionaryEncoding) {
if (_lengthStream != null) {
- _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.LENGTH_VALUE]));
+ _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.LENGTH_VALUE]));
}
}
// set these streams only if the stripe is different
if (!sameStripe && _isDictionaryEncoding) {
if (_lengthStream != null) {
- _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.LENGTH_VALUE]));
+ _lengthStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.LENGTH_VALUE]));
}
if (_dictionaryStream != null) {
- _dictionaryStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DICTIONARY_DATA_VALUE]));
+ _dictionaryStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DICTIONARY_DATA_VALUE]));
}
}
}
@@ -1404,10 +1403,10 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
}
@@ -1504,13 +1503,13 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
if (_lengthsStream != null) {
- _lengthsStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.LENGTH_VALUE]));
+ _lengthsStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.LENGTH_VALUE]));
}
}
@@ -1610,10 +1609,10 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
public void setBuffers(ColumnStreamData[] streamsData, boolean sameStripe)
throws IOException {
if (_presentStream != null) {
- _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.PRESENT_VALUE]));
+ _presentStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.PRESENT_VALUE]));
}
if (_dataStream != null) {
- _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[Kind.DATA_VALUE]));
+ _dataStream.setBuffers(StreamUtils.createDiskRangeInfo(streamsData[OrcProto.Stream.Kind.DATA_VALUE]));
}
}
@@ -1683,11 +1682,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
OrcProto.ColumnEncoding columnEncoding = encodings.get(columnIndex);
// stream buffers are arranged in enum order of stream kind
- ColumnStreamData present = streamBuffers[Kind.PRESENT_VALUE],
- data = streamBuffers[Kind.DATA_VALUE],
- dictionary = streamBuffers[Kind.DICTIONARY_DATA_VALUE],
- lengths = streamBuffers[Kind.LENGTH_VALUE],
- secondary = streamBuffers[Kind.SECONDARY_VALUE];
+ ColumnStreamData present = streamBuffers[OrcProto.Stream.Kind.PRESENT_VALUE],
+ data = streamBuffers[OrcProto.Stream.Kind.DATA_VALUE],
+ dictionary = streamBuffers[OrcProto.Stream.Kind.DICTIONARY_DATA_VALUE],
+ lengths = streamBuffers[OrcProto.Stream.Kind.LENGTH_VALUE],
+ secondary = streamBuffers[OrcProto.Stream.Kind.SECONDARY_VALUE];
switch (columnType.getKind()) {
case BINARY:
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
index a27b35e..246ead6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
@@ -25,8 +25,8 @@ import org.apache.hadoop.hive.common.Pool.PoolObjectHelper;
import org.apache.hadoop.hive.common.io.DataCache;
import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch;
import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData;
-import org.apache.hadoop.hive.ql.io.orc.DataReader;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Stream;
+import org.apache.orc.DataReader;
+import org.apache.orc.OrcProto;
/**
* The interface for reading encoded data from ORC files.
@@ -48,7 +48,7 @@ public interface Reader extends org.apache.hadoop.hive.ql.io.orc.Reader {
* All the previous streams are data streams, this and the next ones are index streams.
* We assume the sort will stay the same for backward compat.
*/
- public static final int MAX_DATA_STREAMS = Stream.Kind.ROW_INDEX.getNumber();
+ public static final int MAX_DATA_STREAMS = OrcProto.Stream.Kind.ROW_INDEX.getNumber();
public void init(long fileId, int stripeIx, int rgIx, int columnCount) {
if (batchKey == null) {
batchKey = new OrcBatchKey(fileId, stripeIx, rgIx);
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
index a4fa03b..b0ac503 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
@@ -22,9 +22,8 @@ import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.io.DataCache;
-import org.apache.hadoop.hive.ql.io.orc.DataReader;
+import org.apache.orc.DataReader;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions;
-import org.apache.hadoop.hive.ql.io.orc.encoded.EncodedReaderImpl;
class ReaderImpl extends org.apache.hadoop.hive.ql.io.orc.ReaderImpl implements Reader {
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/StreamUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/StreamUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/StreamUtils.java
index 2c9acfb..9ac53af 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/StreamUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/StreamUtils.java
@@ -23,9 +23,8 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.hive.common.DiskRangeInfo;
import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData;
import org.apache.hadoop.hive.common.io.encoded.MemoryBuffer;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl;
-import org.apache.hadoop.hive.ql.io.orc.SettableUncompressedStream;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
+import org.apache.orc.impl.SettableUncompressedStream;
+import org.apache.orc.impl.BufferChunk;
/**
* Stream utility.
@@ -63,7 +62,7 @@ public class StreamUtils {
// TODO: we should get rid of this
for (MemoryBuffer memoryBuffer : streamBuffer.getCacheBuffers()) {
ByteBuffer buffer = memoryBuffer.getByteBufferDup();
- diskRangeInfo.addDiskRange(new RecordReaderImpl.BufferChunk(buffer, offset));
+ diskRangeInfo.addDiskRange(new BufferChunk(buffer, offset));
offset += buffer.remaining();
}
return diskRangeInfo;
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
index 42ad04b..7cddcc9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
@@ -28,9 +28,8 @@ import org.apache.hadoop.hive.metastore.PartitionExpressionProxy;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto;
import org.apache.hadoop.hive.ql.io.orc.ReaderImpl;
-import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+import org.apache.orc.StripeInformation;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -38,6 +37,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.orc.OrcProto;
/**
* The basic implementation of PartitionExpressionProxy that uses ql package classes.
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
----------------------------------------------------------------------
diff --git a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
deleted file mode 100644
index acadef9..0000000
--- a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
+++ /dev/null
@@ -1,220 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package orc.proto;
-
-option java_package = "org.apache.hadoop.hive.ql.io.orc";
-
-message IntegerStatistics {
- optional sint64 minimum = 1;
- optional sint64 maximum = 2;
- optional sint64 sum = 3;
-}
-
-message DoubleStatistics {
- optional double minimum = 1;
- optional double maximum = 2;
- optional double sum = 3;
-}
-
-message StringStatistics {
- optional string minimum = 1;
- optional string maximum = 2;
- // sum will store the total length of all strings in a stripe
- optional sint64 sum = 3;
-}
-
-message BucketStatistics {
- repeated uint64 count = 1 [packed=true];
-}
-
-message DecimalStatistics {
- optional string minimum = 1;
- optional string maximum = 2;
- optional string sum = 3;
-}
-
-message DateStatistics {
- // min,max values saved as days since epoch
- optional sint32 minimum = 1;
- optional sint32 maximum = 2;
-}
-
-message TimestampStatistics {
- // min,max values saved as milliseconds since epoch
- optional sint64 minimum = 1;
- optional sint64 maximum = 2;
-}
-
-message BinaryStatistics {
- // sum will store the total binary blob length in a stripe
- optional sint64 sum = 1;
-}
-
-message ColumnStatistics {
- optional uint64 numberOfValues = 1;
- optional IntegerStatistics intStatistics = 2;
- optional DoubleStatistics doubleStatistics = 3;
- optional StringStatistics stringStatistics = 4;
- optional BucketStatistics bucketStatistics = 5;
- optional DecimalStatistics decimalStatistics = 6;
- optional DateStatistics dateStatistics = 7;
- optional BinaryStatistics binaryStatistics = 8;
- optional TimestampStatistics timestampStatistics = 9;
- optional bool hasNull = 10;
-}
-
-message RowIndexEntry {
- repeated uint64 positions = 1 [packed=true];
- optional ColumnStatistics statistics = 2;
-}
-
-message RowIndex {
- repeated RowIndexEntry entry = 1;
-}
-
-message BloomFilter {
- optional uint32 numHashFunctions = 1;
- repeated fixed64 bitset = 2;
-}
-
-message BloomFilterIndex {
- repeated BloomFilter bloomFilter = 1;
-}
-
-message Stream {
- // if you add new index stream kinds, you need to make sure to update
- // StreamName to ensure it is added to the stripe in the right area
- enum Kind {
- PRESENT = 0;
- DATA = 1;
- LENGTH = 2;
- DICTIONARY_DATA = 3;
- DICTIONARY_COUNT = 4;
- SECONDARY = 5;
- ROW_INDEX = 6;
- BLOOM_FILTER = 7;
- }
- optional Kind kind = 1;
- optional uint32 column = 2;
- optional uint64 length = 3;
-}
-
-message ColumnEncoding {
- enum Kind {
- DIRECT = 0;
- DICTIONARY = 1;
- DIRECT_V2 = 2;
- DICTIONARY_V2 = 3;
- }
- optional Kind kind = 1;
- optional uint32 dictionarySize = 2;
-}
-
-message StripeFooter {
- repeated Stream streams = 1;
- repeated ColumnEncoding columns = 2;
- optional string writerTimezone = 3;
-}
-
-message Type {
- enum Kind {
- BOOLEAN = 0;
- BYTE = 1;
- SHORT = 2;
- INT = 3;
- LONG = 4;
- FLOAT = 5;
- DOUBLE = 6;
- STRING = 7;
- BINARY = 8;
- TIMESTAMP = 9;
- LIST = 10;
- MAP = 11;
- STRUCT = 12;
- UNION = 13;
- DECIMAL = 14;
- DATE = 15;
- VARCHAR = 16;
- CHAR = 17;
- }
- optional Kind kind = 1;
- repeated uint32 subtypes = 2 [packed=true];
- repeated string fieldNames = 3;
- optional uint32 maximumLength = 4;
- optional uint32 precision = 5;
- optional uint32 scale = 6;
-}
-
-message StripeInformation {
- optional uint64 offset = 1;
- optional uint64 indexLength = 2;
- optional uint64 dataLength = 3;
- optional uint64 footerLength = 4;
- optional uint64 numberOfRows = 5;
-}
-
-message UserMetadataItem {
- optional string name = 1;
- optional bytes value = 2;
-}
-
-message StripeStatistics {
- repeated ColumnStatistics colStats = 1;
-}
-
-message Metadata {
- repeated StripeStatistics stripeStats = 1;
-}
-
-message Footer {
- optional uint64 headerLength = 1;
- optional uint64 contentLength = 2;
- repeated StripeInformation stripes = 3;
- repeated Type types = 4;
- repeated UserMetadataItem metadata = 5;
- optional uint64 numberOfRows = 6;
- repeated ColumnStatistics statistics = 7;
- optional uint32 rowIndexStride = 8;
-}
-
-enum CompressionKind {
- NONE = 0;
- ZLIB = 1;
- SNAPPY = 2;
- LZO = 3;
-}
-
-// Serialized length must be less that 255 bytes
-message PostScript {
- optional uint64 footerLength = 1;
- optional CompressionKind compression = 2;
- optional uint64 compressionBlockSize = 3;
- // the version of the file format
- // [0, 11] = Hive 0.11
- // [0, 12] = Hive 0.12
- repeated uint32 version = 4 [packed = true];
- optional uint64 metadataLength = 5;
- // Version of the writer:
- // 0 (or missing) = original
- // 1 = HIVE-8732 fixed
- // 2 = HIVE-4243 fixed
- optional uint32 writerVersion = 6;
- // Leave this last in the record
- optional string magic = 8000;
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/OrcFileGenerator.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/OrcFileGenerator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/OrcFileGenerator.java
index a95fee9..e29ad7a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/OrcFileGenerator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/OrcFileGenerator.java
@@ -31,9 +31,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.Writer;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.TestVectorizedORCReader;
-import org.apache.hadoop.hive.ql.io.orc.Writer;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitFieldReader.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitFieldReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitFieldReader.java
deleted file mode 100644
index b537765..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitFieldReader.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.nio.ByteBuffer;
-
-import org.junit.Test;
-
-public class TestBitFieldReader {
-
- public void runSeekTest(CompressionCodec codec) throws Exception {
- TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
- final int COUNT = 16384;
- BitFieldWriter out = new BitFieldWriter(
- new OutStream("test", 500, codec, collect), 1);
- TestInStream.PositionCollector[] positions =
- new TestInStream.PositionCollector[COUNT];
- for(int i=0; i < COUNT; ++i) {
- positions[i] = new TestInStream.PositionCollector();
- out.getPosition(positions[i]);
- // test runs, non-runs
- if (i < COUNT / 2) {
- out.write(i & 1);
- } else {
- out.write((i/3) & 1);
- }
- }
- out.flush();
- ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
- collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
- inBuf.flip();
- BitFieldReader in = new BitFieldReader(InStream.create("test",
- new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
- codec, 500), 1);
- for(int i=0; i < COUNT; ++i) {
- int x = in.next();
- if (i < COUNT / 2) {
- assertEquals(i & 1, x);
- } else {
- assertEquals((i/3) & 1, x);
- }
- }
- for(int i=COUNT-1; i >= 0; --i) {
- in.seek(positions[i]);
- int x = in.next();
- if (i < COUNT / 2) {
- assertEquals(i & 1, x);
- } else {
- assertEquals((i/3) & 1, x);
- }
- }
- }
-
- @Test
- public void testUncompressedSeek() throws Exception {
- runSeekTest(null);
- }
-
- @Test
- public void testCompressedSeek() throws Exception {
- runSeekTest(new ZlibCodec());
- }
-
- @Test
- public void testBiggerItems() throws Exception {
- TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
- final int COUNT = 16384;
- BitFieldWriter out = new BitFieldWriter(
- new OutStream("test", 500, null, collect), 3);
- for(int i=0; i < COUNT; ++i) {
- // test runs, non-runs
- if (i < COUNT / 2) {
- out.write(i & 7);
- } else {
- out.write((i/3) & 7);
- }
- }
- out.flush();
- ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
- collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
- inBuf.flip();
- BitFieldReader in = new BitFieldReader(InStream.create("test",
- new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
- null, 500), 3);
- for(int i=0; i < COUNT; ++i) {
- int x = in.next();
- if (i < COUNT / 2) {
- assertEquals(i & 7, x);
- } else {
- assertEquals((i/3) & 7, x);
- }
- }
- }
-
- @Test
- public void testSkips() throws Exception {
- TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
- BitFieldWriter out = new BitFieldWriter(
- new OutStream("test", 100, null, collect), 1);
- final int COUNT = 16384;
- for(int i=0; i < COUNT; ++i) {
- if (i < COUNT/2) {
- out.write(i & 1);
- } else {
- out.write((i/3) & 1);
- }
- }
- out.flush();
- ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
- collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
- inBuf.flip();
- BitFieldReader in = new BitFieldReader(InStream.create("test", new ByteBuffer[]{inBuf},
- new long[]{0}, inBuf.remaining(), null, 100), 1);
- for(int i=0; i < COUNT; i += 5) {
- int x = (int) in.next();
- if (i < COUNT/2) {
- assertEquals(i & 1, x);
- } else {
- assertEquals((i/3) & 1, x);
- }
- if (i < COUNT - 5) {
- in.skip(4);
- }
- in.skip(0);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitPack.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitPack.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitPack.java
deleted file mode 100644
index 41a807b..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitPack.java
+++ /dev/null
@@ -1,316 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.io.LongWritable;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Longs;
-
-public class TestBitPack {
-
- private static final int SIZE = 100;
- private static Random rand = new Random(100);
- Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
- + File.separator + "tmp"));
-
- Configuration conf;
- FileSystem fs;
- Path testFilePath;
-
- @Rule
- public TestName testCaseName = new TestName();
-
- @Before
- public void openFileSystem() throws Exception {
- conf = new Configuration();
- fs = FileSystem.getLocal(conf);
- testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
- fs.delete(testFilePath, false);
- }
-
- private long[] deltaEncode(long[] inp) {
- long[] output = new long[inp.length];
- SerializationUtils utils = new SerializationUtils();
- for (int i = 0; i < inp.length; i++) {
- output[i] = utils.zigzagEncode(inp[i]);
- }
- return output;
- }
-
- private long nextLong(Random rng, long n) {
- long bits, val;
- do {
- bits = (rng.nextLong() << 1) >>> 1;
- val = bits % n;
- } while (bits - val + (n - 1) < 0L);
- return val;
- }
-
- private void runTest(int numBits) throws IOException {
- long[] inp = new long[SIZE];
- for (int i = 0; i < SIZE; i++) {
- long val = 0;
- if (numBits <= 32) {
- if (numBits == 1) {
- val = -1 * rand.nextInt(2);
- } else {
- val = rand.nextInt((int) Math.pow(2, numBits - 1));
- }
- } else {
- val = nextLong(rand, (long) Math.pow(2, numBits - 2));
- }
- if (val % 2 == 0) {
- val = -val;
- }
- inp[i] = val;
- }
- long[] deltaEncoded = deltaEncode(inp);
- long minInput = Collections.min(Longs.asList(deltaEncoded));
- long maxInput = Collections.max(Longs.asList(deltaEncoded));
- long rangeInput = maxInput - minInput;
- SerializationUtils utils = new SerializationUtils();
- int fixedWidth = utils.findClosestNumBits(rangeInput);
- TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
- OutStream output = new OutStream("test", SIZE, null, collect);
- utils.writeInts(deltaEncoded, 0, deltaEncoded.length, fixedWidth, output);
- output.flush();
- ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
- collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
- inBuf.flip();
- long[] buff = new long[SIZE];
- utils.readInts(buff, 0, SIZE, fixedWidth, InStream.create("test", new ByteBuffer[] { inBuf },
- new long[] { 0 }, inBuf.remaining(), null, SIZE));
- for (int i = 0; i < SIZE; i++) {
- buff[i] = utils.zigzagDecode(buff[i]);
- }
- assertEquals(numBits, fixedWidth);
- assertArrayEquals(inp, buff);
- }
-
- @Test
- public void test01BitPacking1Bit() throws IOException {
- runTest(1);
- }
-
- @Test
- public void test02BitPacking2Bit() throws IOException {
- runTest(2);
- }
-
- @Test
- public void test03BitPacking3Bit() throws IOException {
- runTest(3);
- }
-
- @Test
- public void test04BitPacking4Bit() throws IOException {
- runTest(4);
- }
-
- @Test
- public void test05BitPacking5Bit() throws IOException {
- runTest(5);
- }
-
- @Test
- public void test06BitPacking6Bit() throws IOException {
- runTest(6);
- }
-
- @Test
- public void test07BitPacking7Bit() throws IOException {
- runTest(7);
- }
-
- @Test
- public void test08BitPacking8Bit() throws IOException {
- runTest(8);
- }
-
- @Test
- public void test09BitPacking9Bit() throws IOException {
- runTest(9);
- }
-
- @Test
- public void test10BitPacking10Bit() throws IOException {
- runTest(10);
- }
-
- @Test
- public void test11BitPacking11Bit() throws IOException {
- runTest(11);
- }
-
- @Test
- public void test12BitPacking12Bit() throws IOException {
- runTest(12);
- }
-
- @Test
- public void test13BitPacking13Bit() throws IOException {
- runTest(13);
- }
-
- @Test
- public void test14BitPacking14Bit() throws IOException {
- runTest(14);
- }
-
- @Test
- public void test15BitPacking15Bit() throws IOException {
- runTest(15);
- }
-
- @Test
- public void test16BitPacking16Bit() throws IOException {
- runTest(16);
- }
-
- @Test
- public void test17BitPacking17Bit() throws IOException {
- runTest(17);
- }
-
- @Test
- public void test18BitPacking18Bit() throws IOException {
- runTest(18);
- }
-
- @Test
- public void test19BitPacking19Bit() throws IOException {
- runTest(19);
- }
-
- @Test
- public void test20BitPacking20Bit() throws IOException {
- runTest(20);
- }
-
- @Test
- public void test21BitPacking21Bit() throws IOException {
- runTest(21);
- }
-
- @Test
- public void test22BitPacking22Bit() throws IOException {
- runTest(22);
- }
-
- @Test
- public void test23BitPacking23Bit() throws IOException {
- runTest(23);
- }
-
- @Test
- public void test24BitPacking24Bit() throws IOException {
- runTest(24);
- }
-
- @Test
- public void test26BitPacking26Bit() throws IOException {
- runTest(26);
- }
-
- @Test
- public void test28BitPacking28Bit() throws IOException {
- runTest(28);
- }
-
- @Test
- public void test30BitPacking30Bit() throws IOException {
- runTest(30);
- }
-
- @Test
- public void test32BitPacking32Bit() throws IOException {
- runTest(32);
- }
-
- @Test
- public void test40BitPacking40Bit() throws IOException {
- runTest(40);
- }
-
- @Test
- public void test48BitPacking48Bit() throws IOException {
- runTest(48);
- }
-
- @Test
- public void test56BitPacking56Bit() throws IOException {
- runTest(56);
- }
-
- @Test
- public void test64BitPacking64Bit() throws IOException {
- runTest(64);
- }
-
- @Test
- public void testBitPack64Large() throws Exception {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
-
- int size = 1080832;
- long[] inp = new long[size];
- Random rand = new Random(1234);
- for (int i = 0; i < size; i++) {
- inp[i] = rand.nextLong();
- }
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.ZLIB));
- for (Long l : input) {
- writer.addRow(l);
- }
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- int idx = 0;
- while (rows.hasNext()) {
- Object row = rows.next(null);
- assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
index a51177e..9433283 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
@@ -33,9 +33,18 @@ import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
+import org.apache.orc.ColumnStatistics;
+import org.apache.orc.impl.ColumnStatisticsImpl;
+import org.apache.orc.DateColumnStatistics;
+import org.apache.orc.DecimalColumnStatistics;
+import org.apache.orc.DoubleColumnStatistics;
+import org.apache.orc.IntegerColumnStatistics;
+import org.apache.orc.StringColumnStatistics;
+import org.apache.orc.StripeStatistics;
+import org.apache.orc.TimestampColumnStatistics;
+import org.apache.orc.TypeDescription;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java
deleted file mode 100644
index c6d3c01..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.Random;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-
-public class TestDynamicArray {
-
- @Test
- public void testByteArray() throws Exception {
- DynamicByteArray dba = new DynamicByteArray(3, 10);
- dba.add((byte) 0);
- dba.add((byte) 1);
- dba.set(3, (byte) 3);
- dba.set(2, (byte) 2);
- dba.add((byte) 4);
- assertEquals("{0,1,2,3,4}", dba.toString());
- assertEquals(5, dba.size());
- byte[] val;
- val = new byte[0];
- assertEquals(0, dba.compare(val, 0, 0, 2, 0));
- assertEquals(-1, dba.compare(val, 0, 0, 2, 1));
- val = new byte[]{3,42};
- assertEquals(1, dba.compare(val, 0, 1, 2, 0));
- assertEquals(1, dba.compare(val, 0, 1, 2, 1));
- assertEquals(0, dba.compare(val, 0, 1, 3, 1));
- assertEquals(-1, dba.compare(val, 0, 1, 3, 2));
- assertEquals(1, dba.compare(val, 0, 2, 3, 1));
- val = new byte[256];
- for(int b=-128; b < 128; ++b) {
- dba.add((byte) b);
- val[b+128] = (byte) b;
- }
- assertEquals(0, dba.compare(val, 0, 256, 5, 256));
- assertEquals(1, dba.compare(val, 0, 1, 0, 1));
- assertEquals(1, dba.compare(val, 254, 1, 0, 1));
- assertEquals(1, dba.compare(val, 120, 1, 64, 1));
- val = new byte[1024];
- Random rand = new Random(1701);
- for(int i = 0; i < val.length; ++i) {
- rand.nextBytes(val);
- }
- dba.add(val, 0, 1024);
- assertEquals(1285, dba.size());
- assertEquals(0, dba.compare(val, 0, 1024, 261, 1024));
- }
-
- @Test
- public void testIntArray() throws Exception {
- DynamicIntArray dia = new DynamicIntArray(10);
- for(int i=0; i < 10000; ++i) {
- dia.add(2*i);
- }
- assertEquals(10000, dia.size());
- for(int i=0; i < 10000; ++i) {
- assertEquals(2*i, dia.get(i));
- }
- dia.clear();
- assertEquals(0, dia.size());
- dia.add(3);
- dia.add(12);
- dia.add(65);
- assertEquals("{3,12,65}", dia.toString());
- for(int i=0; i < 5; ++i) {
- dia.increment(i, 3);
- }
- assertEquals("{6,15,68,3,3}", dia.toString());
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInStream.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInStream.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInStream.java
deleted file mode 100644
index 4c3ddfc..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInStream.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.fail;
-
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.junit.Test;
-
-public class TestInStream {
-
- static class OutputCollector implements OutStream.OutputReceiver {
- DynamicByteArray buffer = new DynamicByteArray();
-
- @Override
- public void output(ByteBuffer buffer) throws IOException {
- this.buffer.add(buffer.array(), buffer.arrayOffset() + buffer.position(),
- buffer.remaining());
- }
- }
-
- static class PositionCollector
- implements PositionProvider, PositionRecorder {
- private List<Long> positions = new ArrayList<Long>();
- private int index = 0;
-
- @Override
- public long getNext() {
- return positions.get(index++);
- }
-
- @Override
- public void addPosition(long offset) {
- positions.add(offset);
- }
-
- public void reset() {
- index = 0;
- }
-
- @Override
- public String toString() {
- StringBuilder builder = new StringBuilder("position: ");
- for(int i=0; i < positions.size(); ++i) {
- if (i != 0) {
- builder.append(", ");
- }
- builder.append(positions.get(i));
- }
- return builder.toString();
- }
- }
-
- @Test
- public void testUncompressed() throws Exception {
- OutputCollector collect = new OutputCollector();
- OutStream out = new OutStream("test", 100, null, collect);
- PositionCollector[] positions = new PositionCollector[1024];
- for(int i=0; i < 1024; ++i) {
- positions[i] = new PositionCollector();
- out.getPosition(positions[i]);
- out.write(i);
- }
- out.flush();
- assertEquals(1024, collect.buffer.size());
- for(int i=0; i < 1024; ++i) {
- assertEquals((byte) i, collect.buffer.get(i));
- }
- ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
- collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
- inBuf.flip();
- InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
- new long[]{0}, inBuf.remaining(), null, 100);
- assertEquals("uncompressed stream test position: 0 length: 1024" +
- " range: 0 offset: 0 limit: 0",
- in.toString());
- for(int i=0; i < 1024; ++i) {
- int x = in.read();
- assertEquals(i & 0xff, x);
- }
- for(int i=1023; i >= 0; --i) {
- in.seek(positions[i]);
- assertEquals(i & 0xff, in.read());
- }
- }
-
- @Test
- public void testCompressed() throws Exception {
- OutputCollector collect = new OutputCollector();
- CompressionCodec codec = new ZlibCodec();
- OutStream out = new OutStream("test", 300, codec, collect);
- PositionCollector[] positions = new PositionCollector[1024];
- for(int i=0; i < 1024; ++i) {
- positions[i] = new PositionCollector();
- out.getPosition(positions[i]);
- out.write(i);
- }
- out.flush();
- assertEquals("test", out.toString());
- assertEquals(961, collect.buffer.size());
- ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
- collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
- inBuf.flip();
- InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
- new long[]{0}, inBuf.remaining(), codec, 300);
- assertEquals("compressed stream test position: 0 length: 961 range: 0" +
- " offset: 0 limit: 0 range 0 = 0 to 961",
- in.toString());
- for(int i=0; i < 1024; ++i) {
- int x = in.read();
- assertEquals(i & 0xff, x);
- }
- assertEquals(0, in.available());
- for(int i=1023; i >= 0; --i) {
- in.seek(positions[i]);
- assertEquals(i & 0xff, in.read());
- }
- }
-
- @Test
- public void testCorruptStream() throws Exception {
- OutputCollector collect = new OutputCollector();
- CompressionCodec codec = new ZlibCodec();
- OutStream out = new OutStream("test", 500, codec, collect);
- PositionCollector[] positions = new PositionCollector[1024];
- for(int i=0; i < 1024; ++i) {
- positions[i] = new PositionCollector();
- out.getPosition(positions[i]);
- out.write(i);
- }
- out.flush();
-
- // now try to read the stream with a buffer that is too small
- ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
- collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
- inBuf.flip();
- InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
- new long[]{0}, inBuf.remaining(), codec, 100);
- byte[] contents = new byte[1024];
- try {
- in.read(contents);
- fail();
- } catch(IllegalArgumentException iae) {
- // EXPECTED
- }
-
- // make a corrupted header
- inBuf.clear();
- inBuf.put((byte) 32);
- inBuf.put((byte) 0);
- inBuf.flip();
- in = InStream.create("test2", new ByteBuffer[]{inBuf}, new long[]{0},
- inBuf.remaining(), codec, 300);
- try {
- in.read();
- fail();
- } catch (IllegalStateException ise) {
- // EXPECTED
- }
- }
-
- @Test
- public void testDisjointBuffers() throws Exception {
- OutputCollector collect = new OutputCollector();
- CompressionCodec codec = new ZlibCodec();
- OutStream out = new OutStream("test", 400, codec, collect);
- PositionCollector[] positions = new PositionCollector[1024];
- DataOutput stream = new DataOutputStream(out);
- for(int i=0; i < 1024; ++i) {
- positions[i] = new PositionCollector();
- out.getPosition(positions[i]);
- stream.writeInt(i);
- }
- out.flush();
- assertEquals("test", out.toString());
- assertEquals(1674, collect.buffer.size());
- ByteBuffer[] inBuf = new ByteBuffer[3];
- inBuf[0] = ByteBuffer.allocate(500);
- inBuf[1] = ByteBuffer.allocate(1200);
- inBuf[2] = ByteBuffer.allocate(500);
- collect.buffer.setByteBuffer(inBuf[0], 0, 483);
- collect.buffer.setByteBuffer(inBuf[1], 483, 1625 - 483);
- collect.buffer.setByteBuffer(inBuf[2], 1625, 1674 - 1625);
-
- for(int i=0; i < inBuf.length; ++i) {
- inBuf[i].flip();
- }
- InStream in = InStream.create("test", inBuf,
- new long[]{0,483, 1625}, 1674, codec, 400);
- assertEquals("compressed stream test position: 0 length: 1674 range: 0" +
- " offset: 0 limit: 0 range 0 = 0 to 483;" +
- " range 1 = 483 to 1142; range 2 = 1625 to 49",
- in.toString());
- DataInputStream inStream = new DataInputStream(in);
- for(int i=0; i < 1024; ++i) {
- int x = inStream.readInt();
- assertEquals(i, x);
- }
- assertEquals(0, in.available());
- for(int i=1023; i >= 0; --i) {
- in.seek(positions[i]);
- assertEquals(i, inStream.readInt());
- }
-
- in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]},
- new long[]{483, 1625}, 1674, codec, 400);
- inStream = new DataInputStream(in);
- positions[303].reset();
- in.seek(positions[303]);
- for(int i=303; i < 1024; ++i) {
- assertEquals(i, inStream.readInt());
- }
-
- in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]},
- new long[]{0, 1625}, 1674, codec, 400);
- inStream = new DataInputStream(in);
- positions[1001].reset();
- for(int i=0; i < 300; ++i) {
- assertEquals(i, inStream.readInt());
- }
- in.seek(positions[1001]);
- for(int i=1001; i < 1024; ++i) {
- assertEquals(i, inStream.readInt());
- }
- }
-
- @Test
- public void testUncompressedDisjointBuffers() throws Exception {
- OutputCollector collect = new OutputCollector();
- OutStream out = new OutStream("test", 400, null, collect);
- PositionCollector[] positions = new PositionCollector[1024];
- DataOutput stream = new DataOutputStream(out);
- for(int i=0; i < 1024; ++i) {
- positions[i] = new PositionCollector();
- out.getPosition(positions[i]);
- stream.writeInt(i);
- }
- out.flush();
- assertEquals("test", out.toString());
- assertEquals(4096, collect.buffer.size());
- ByteBuffer[] inBuf = new ByteBuffer[3];
- inBuf[0] = ByteBuffer.allocate(1100);
- inBuf[1] = ByteBuffer.allocate(2200);
- inBuf[2] = ByteBuffer.allocate(1100);
- collect.buffer.setByteBuffer(inBuf[0], 0, 1024);
- collect.buffer.setByteBuffer(inBuf[1], 1024, 2048);
- collect.buffer.setByteBuffer(inBuf[2], 3072, 1024);
-
- for(int i=0; i < inBuf.length; ++i) {
- inBuf[i].flip();
- }
- InStream in = InStream.create("test", inBuf,
- new long[]{0, 1024, 3072}, 4096, null, 400);
- assertEquals("uncompressed stream test position: 0 length: 4096" +
- " range: 0 offset: 0 limit: 0",
- in.toString());
- DataInputStream inStream = new DataInputStream(in);
- for(int i=0; i < 1024; ++i) {
- int x = inStream.readInt();
- assertEquals(i, x);
- }
- assertEquals(0, in.available());
- for(int i=1023; i >= 0; --i) {
- in.seek(positions[i]);
- assertEquals(i, inStream.readInt());
- }
-
- in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]},
- new long[]{1024, 3072}, 4096, null, 400);
- inStream = new DataInputStream(in);
- positions[256].reset();
- in.seek(positions[256]);
- for(int i=256; i < 1024; ++i) {
- assertEquals(i, inStream.readInt());
- }
-
- in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]},
- new long[]{0, 3072}, 4096, null, 400);
- inStream = new DataInputStream(in);
- positions[768].reset();
- for(int i=0; i < 256; ++i) {
- assertEquals(i, inStream.readInt());
- }
- in.seek(positions[768]);
- for(int i=768; i < 1024; ++i) {
- assertEquals(i, inStream.readInt());
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index a4b948a..f81f5bb8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -108,6 +108,8 @@ import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;
+import org.apache.orc.OrcProto;
+
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java
deleted file mode 100644
index 29dce30..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.nio.ByteBuffer;
-import java.util.Random;
-
-import org.junit.Test;
-
-public class TestIntegerCompressionReader {
-
- public void runSeekTest(CompressionCodec codec) throws Exception {
- TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
- RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2(
- new OutStream("test", 1000, codec, collect), true);
- TestInStream.PositionCollector[] positions =
- new TestInStream.PositionCollector[4096];
- Random random = new Random(99);
- int[] junk = new int[2048];
- for(int i=0; i < junk.length; ++i) {
- junk[i] = random.nextInt();
- }
- for(int i=0; i < 4096; ++i) {
- positions[i] = new TestInStream.PositionCollector();
- out.getPosition(positions[i]);
- // test runs, incrementing runs, non-runs
- if (i < 1024) {
- out.write(i/4);
- } else if (i < 2048) {
- out.write(2*i);
- } else {
- out.write(junk[i-2048]);
- }
- }
- out.flush();
- ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
- collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
- inBuf.flip();
- RunLengthIntegerReaderV2 in =
- new RunLengthIntegerReaderV2(InStream.create
- ("test", new ByteBuffer[]{inBuf},
- new long[]{0}, inBuf.remaining(),
- codec, 1000), true, false);
- for(int i=0; i < 2048; ++i) {
- int x = (int) in.next();
- if (i < 1024) {
- assertEquals(i/4, x);
- } else if (i < 2048) {
- assertEquals(2*i, x);
- } else {
- assertEquals(junk[i-2048], x);
- }
- }
- for(int i=2047; i >= 0; --i) {
- in.seek(positions[i]);
- int x = (int) in.next();
- if (i < 1024) {
- assertEquals(i/4, x);
- } else if (i < 2048) {
- assertEquals(2*i, x);
- } else {
- assertEquals(junk[i-2048], x);
- }
- }
- }
-
- @Test
- public void testUncompressedSeek() throws Exception {
- runSeekTest(null);
- }
-
- @Test
- public void testCompressedSeek() throws Exception {
- runSeekTest(new ZlibCodec());
- }
-
- @Test
- public void testSkips() throws Exception {
- TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
- RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2(
- new OutStream("test", 100, null, collect), true);
- for(int i=0; i < 2048; ++i) {
- if (i < 1024) {
- out.write(i);
- } else {
- out.write(256 * i);
- }
- }
- out.flush();
- ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
- collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
- inBuf.flip();
- RunLengthIntegerReaderV2 in =
- new RunLengthIntegerReaderV2(InStream.create("test",
- new ByteBuffer[]{inBuf},
- new long[]{0},
- inBuf.remaining(),
- null, 100), true, false);
- for(int i=0; i < 2048; i += 10) {
- int x = (int) in.next();
- if (i < 1024) {
- assertEquals(i, x);
- } else {
- assertEquals(256 * i, x);
- }
- if (i < 2038) {
- in.skip(9);
- }
- in.skip(0);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
index d17c528..acf232d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hive.common.util.HiveTestUtils;
+import org.apache.orc.CompressionKind;
import org.junit.Before;
import org.junit.Test;