You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/01/10 01:09:15 UTC
svn commit: r1650699 [1/4] - in /hive/trunk/ql/src:
gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/
java/org/apache/hadoop/hive/ql/io/orc/
protobuf/org/apache/hadoop/hive/ql/io/orc/
test/org/apache/hadoop/hive/ql/io/orc/ test/resources/ test/re...
Author: prasanthj
Date: Sat Jan 10 00:09:14 2015
New Revision: 1650699
URL: http://svn.apache.org/r1650699
Log:
HIVE-4639: Add has null flag to ORC internal index (Prasanth Jayachandran reviewed by Gopal V)
Added:
hive/trunk/ql/src/test/resources/orc-file-has-null.out
Modified:
hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
hive/trunk/ql/src/test/resources/orc-file-dump.out
hive/trunk/ql/src/test/results/clientpositive/alter_merge_orc.q.out
hive/trunk/ql/src/test/results/clientpositive/alter_merge_stats_orc.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out
hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
hive/trunk/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
hive/trunk/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
hive/trunk/ql/src/test/results/clientpositive/orc_analyze.q.out
hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/alter_merge_orc.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/alter_merge_stats_orc.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorized_ptf.q.out
Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Sat Jan 10 00:09:14 2015
@@ -4796,6 +4796,16 @@ public final class OrcProto {
* <code>optional .org.apache.hadoop.hive.ql.io.orc.TimestampStatistics timestampStatistics = 9;</code>
*/
org.apache.hadoop.hive.ql.io.orc.OrcProto.TimestampStatisticsOrBuilder getTimestampStatisticsOrBuilder();
+
+ // optional bool hasNull = 10;
+ /**
+ * <code>optional bool hasNull = 10;</code>
+ */
+ boolean hasHasNull();
+ /**
+ * <code>optional bool hasNull = 10;</code>
+ */
+ boolean getHasNull();
}
/**
* Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.ColumnStatistics}
@@ -4957,6 +4967,11 @@ public final class OrcProto {
bitField0_ |= 0x00000100;
break;
}
+ case 80: {
+ bitField0_ |= 0x00000200;
+ hasNull_ = input.readBool();
+ break;
+ }
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -5189,6 +5204,22 @@ public final class OrcProto {
return timestampStatistics_;
}
+ // optional bool hasNull = 10;
+ public static final int HASNULL_FIELD_NUMBER = 10;
+ private boolean hasNull_;
+ /**
+ * <code>optional bool hasNull = 10;</code>
+ */
+ public boolean hasHasNull() {
+ return ((bitField0_ & 0x00000200) == 0x00000200);
+ }
+ /**
+ * <code>optional bool hasNull = 10;</code>
+ */
+ public boolean getHasNull() {
+ return hasNull_;
+ }
+
private void initFields() {
numberOfValues_ = 0L;
intStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.getDefaultInstance();
@@ -5199,6 +5230,7 @@ public final class OrcProto {
dateStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.getDefaultInstance();
binaryStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.getDefaultInstance();
timestampStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.TimestampStatistics.getDefaultInstance();
+ hasNull_ = false;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -5239,6 +5271,9 @@ public final class OrcProto {
if (((bitField0_ & 0x00000100) == 0x00000100)) {
output.writeMessage(9, timestampStatistics_);
}
+ if (((bitField0_ & 0x00000200) == 0x00000200)) {
+ output.writeBool(10, hasNull_);
+ }
getUnknownFields().writeTo(output);
}
@@ -5284,6 +5319,10 @@ public final class OrcProto {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(9, timestampStatistics_);
}
+ if (((bitField0_ & 0x00000200) == 0x00000200)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeBoolSize(10, hasNull_);
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -5458,6 +5497,8 @@ public final class OrcProto {
timestampStatisticsBuilder_.clear();
}
bitField0_ = (bitField0_ & ~0x00000100);
+ hasNull_ = false;
+ bitField0_ = (bitField0_ & ~0x00000200);
return this;
}
@@ -5554,6 +5595,10 @@ public final class OrcProto {
} else {
result.timestampStatistics_ = timestampStatisticsBuilder_.build();
}
+ if (((from_bitField0_ & 0x00000200) == 0x00000200)) {
+ to_bitField0_ |= 0x00000200;
+ }
+ result.hasNull_ = hasNull_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@@ -5597,6 +5642,9 @@ public final class OrcProto {
if (other.hasTimestampStatistics()) {
mergeTimestampStatistics(other.getTimestampStatistics());
}
+ if (other.hasHasNull()) {
+ setHasNull(other.getHasNull());
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -6593,6 +6641,39 @@ public final class OrcProto {
return timestampStatisticsBuilder_;
}
+ // optional bool hasNull = 10;
+ private boolean hasNull_ ;
+ /**
+ * <code>optional bool hasNull = 10;</code>
+ */
+ public boolean hasHasNull() {
+ return ((bitField0_ & 0x00000200) == 0x00000200);
+ }
+ /**
+ * <code>optional bool hasNull = 10;</code>
+ */
+ public boolean getHasNull() {
+ return hasNull_;
+ }
+ /**
+ * <code>optional bool hasNull = 10;</code>
+ */
+ public Builder setHasNull(boolean value) {
+ bitField0_ |= 0x00000200;
+ hasNull_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * <code>optional bool hasNull = 10;</code>
+ */
+ public Builder clearHasNull() {
+ bitField0_ = (bitField0_ & ~0x00000200);
+ hasNull_ = false;
+ onChanged();
+ return this;
+ }
+
// @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.ColumnStatistics)
}
@@ -17657,7 +17738,7 @@ public final class OrcProto {
"\"2\n\016DateStatistics\022\017\n\007minimum\030\001 \001(\021\022\017\n\007m",
"aximum\030\002 \001(\021\"7\n\023TimestampStatistics\022\017\n\007m" +
"inimum\030\001 \001(\022\022\017\n\007maximum\030\002 \001(\022\"\037\n\020BinaryS" +
- "tatistics\022\013\n\003sum\030\001 \001(\022\"\234\005\n\020ColumnStatist" +
+ "tatistics\022\013\n\003sum\030\001 \001(\022\"\255\005\n\020ColumnStatist" +
"ics\022\026\n\016numberOfValues\030\001 \001(\004\022J\n\rintStatis" +
"tics\030\002 \001(\01323.org.apache.hadoop.hive.ql.i" +
"o.orc.IntegerStatistics\022L\n\020doubleStatist" +
@@ -17674,60 +17755,60 @@ public final class OrcProto {
"org.apache.hadoop.hive.ql.io.orc.BinaryS" +
"tatistics\022R\n\023timestampStatistics\030\t \001(\01325" +
".org.apache.hadoop.hive.ql.io.orc.Timest" +
- "ampStatistics\"n\n\rRowIndexEntry\022\025\n\tpositi",
- "ons\030\001 \003(\004B\002\020\001\022F\n\nstatistics\030\002 \001(\01322.org." +
- "apache.hadoop.hive.ql.io.orc.ColumnStati" +
- "stics\"J\n\010RowIndex\022>\n\005entry\030\001 \003(\0132/.org.a" +
- "pache.hadoop.hive.ql.io.orc.RowIndexEntr" +
- "y\"\331\001\n\006Stream\022;\n\004kind\030\001 \002(\0162-.org.apache." +
- "hadoop.hive.ql.io.orc.Stream.Kind\022\016\n\006col" +
- "umn\030\002 \001(\r\022\016\n\006length\030\003 \001(\004\"r\n\004Kind\022\013\n\007PRE" +
- "SENT\020\000\022\010\n\004DATA\020\001\022\n\n\006LENGTH\020\002\022\023\n\017DICTIONA" +
- "RY_DATA\020\003\022\024\n\020DICTIONARY_COUNT\020\004\022\r\n\tSECON" +
- "DARY\020\005\022\r\n\tROW_INDEX\020\006\"\263\001\n\016ColumnEncoding",
- "\022C\n\004kind\030\001 \002(\01625.org.apache.hadoop.hive." +
- "ql.io.orc.ColumnEncoding.Kind\022\026\n\016diction" +
- "arySize\030\002 \001(\r\"D\n\004Kind\022\n\n\006DIRECT\020\000\022\016\n\nDIC" +
- "TIONARY\020\001\022\r\n\tDIRECT_V2\020\002\022\021\n\rDICTIONARY_V" +
- "2\020\003\"\214\001\n\014StripeFooter\0229\n\007streams\030\001 \003(\0132(." +
- "org.apache.hadoop.hive.ql.io.orc.Stream\022" +
- "A\n\007columns\030\002 \003(\01320.org.apache.hadoop.hiv" +
- "e.ql.io.orc.ColumnEncoding\"\370\002\n\004Type\0229\n\004k" +
- "ind\030\001 \002(\0162+.org.apache.hadoop.hive.ql.io" +
- ".orc.Type.Kind\022\024\n\010subtypes\030\002 \003(\rB\002\020\001\022\022\n\n",
- "fieldNames\030\003 \003(\t\022\025\n\rmaximumLength\030\004 \001(\r\022" +
- "\021\n\tprecision\030\005 \001(\r\022\r\n\005scale\030\006 \001(\r\"\321\001\n\004Ki" +
- "nd\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003" +
- "INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n" +
- "\n\006STRING\020\007\022\n\n\006BINARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n" +
- "\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022" +
- "\013\n\007DECIMAL\020\016\022\010\n\004DATE\020\017\022\013\n\007VARCHAR\020\020\022\010\n\004C" +
- "HAR\020\021\"x\n\021StripeInformation\022\016\n\006offset\030\001 \001" +
- "(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\ndataLength\030\003 " +
- "\001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014numberOfRow",
- "s\030\005 \001(\004\"/\n\020UserMetadataItem\022\014\n\004name\030\001 \002(" +
- "\t\022\r\n\005value\030\002 \002(\014\"X\n\020StripeStatistics\022D\n\010" +
- "colStats\030\001 \003(\01322.org.apache.hadoop.hive." +
- "ql.io.orc.ColumnStatistics\"S\n\010Metadata\022G" +
- "\n\013stripeStats\030\001 \003(\01322.org.apache.hadoop." +
- "hive.ql.io.orc.StripeStatistics\"\356\002\n\006Foot" +
- "er\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLengt" +
- "h\030\002 \001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.ha" +
- "doop.hive.ql.io.orc.StripeInformation\0225\n" +
- "\005types\030\004 \003(\0132&.org.apache.hadoop.hive.ql",
- ".io.orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apa" +
- "che.hadoop.hive.ql.io.orc.UserMetadataIt" +
- "em\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007" +
- " \003(\01322.org.apache.hadoop.hive.ql.io.orc." +
- "ColumnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r" +
- "\"\334\001\n\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n" +
- "\013compression\030\002 \001(\01621.org.apache.hadoop.h" +
- "ive.ql.io.orc.CompressionKind\022\034\n\024compres" +
- "sionBlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001" +
- "\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\rwriterVersio",
- "n\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKi" +
- "nd\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZ" +
- "O\020\003"
+ "ampStatistics\022\017\n\007hasNull\030\n \001(\010\"n\n\rRowInd",
+ "exEntry\022\025\n\tpositions\030\001 \003(\004B\002\020\001\022F\n\nstatis" +
+ "tics\030\002 \001(\01322.org.apache.hadoop.hive.ql.i" +
+ "o.orc.ColumnStatistics\"J\n\010RowIndex\022>\n\005en" +
+ "try\030\001 \003(\0132/.org.apache.hadoop.hive.ql.io" +
+ ".orc.RowIndexEntry\"\331\001\n\006Stream\022;\n\004kind\030\001 " +
+ "\002(\0162-.org.apache.hadoop.hive.ql.io.orc.S" +
+ "tream.Kind\022\016\n\006column\030\002 \001(\r\022\016\n\006length\030\003 \001" +
+ "(\004\"r\n\004Kind\022\013\n\007PRESENT\020\000\022\010\n\004DATA\020\001\022\n\n\006LEN" +
+ "GTH\020\002\022\023\n\017DICTIONARY_DATA\020\003\022\024\n\020DICTIONARY" +
+ "_COUNT\020\004\022\r\n\tSECONDARY\020\005\022\r\n\tROW_INDEX\020\006\"\263",
+ "\001\n\016ColumnEncoding\022C\n\004kind\030\001 \002(\01625.org.ap" +
+ "ache.hadoop.hive.ql.io.orc.ColumnEncodin" +
+ "g.Kind\022\026\n\016dictionarySize\030\002 \001(\r\"D\n\004Kind\022\n" +
+ "\n\006DIRECT\020\000\022\016\n\nDICTIONARY\020\001\022\r\n\tDIRECT_V2\020" +
+ "\002\022\021\n\rDICTIONARY_V2\020\003\"\214\001\n\014StripeFooter\0229\n" +
+ "\007streams\030\001 \003(\0132(.org.apache.hadoop.hive." +
+ "ql.io.orc.Stream\022A\n\007columns\030\002 \003(\01320.org." +
+ "apache.hadoop.hive.ql.io.orc.ColumnEncod" +
+ "ing\"\370\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apache." +
+ "hadoop.hive.ql.io.orc.Type.Kind\022\024\n\010subty",
+ "pes\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rmax" +
+ "imumLength\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n\005s" +
+ "cale\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE" +
+ "\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOA" +
+ "T\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020\010\022" +
+ "\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006STR" +
+ "UCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020\017\022" +
+ "\013\n\007VARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021StripeInformat" +
+ "ion\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004" +
+ "\022\022\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001",
+ "(\004\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadata" +
+ "Item\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"X\n\020Str" +
+ "ipeStatistics\022D\n\010colStats\030\001 \003(\01322.org.ap" +
+ "ache.hadoop.hive.ql.io.orc.ColumnStatist" +
+ "ics\"S\n\010Metadata\022G\n\013stripeStats\030\001 \003(\01322.o" +
+ "rg.apache.hadoop.hive.ql.io.orc.StripeSt" +
+ "atistics\"\356\002\n\006Footer\022\024\n\014headerLength\030\001 \001(" +
+ "\004\022\025\n\rcontentLength\030\002 \001(\004\022D\n\007stripes\030\003 \003(" +
+ "\01323.org.apache.hadoop.hive.ql.io.orc.Str" +
+ "ipeInformation\0225\n\005types\030\004 \003(\0132&.org.apac",
+ "he.hadoop.hive.ql.io.orc.Type\022D\n\010metadat" +
+ "a\030\005 \003(\01322.org.apache.hadoop.hive.ql.io.o" +
+ "rc.UserMetadataItem\022\024\n\014numberOfRows\030\006 \001(" +
+ "\004\022F\n\nstatistics\030\007 \003(\01322.org.apache.hadoo" +
+ "p.hive.ql.io.orc.ColumnStatistics\022\026\n\016row" +
+ "IndexStride\030\010 \001(\r\"\334\001\n\nPostScript\022\024\n\014foot" +
+ "erLength\030\001 \001(\004\022F\n\013compression\030\002 \001(\01621.or" +
+ "g.apache.hadoop.hive.ql.io.orc.Compressi" +
+ "onKind\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007" +
+ "version\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(",
+ "\004\022\025\n\rwriterVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t" +
+ "*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022" +
+ "\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -17787,7 +17868,7 @@ public final class OrcProto {
internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor,
- new java.lang.String[] { "NumberOfValues", "IntStatistics", "DoubleStatistics", "StringStatistics", "BucketStatistics", "DecimalStatistics", "DateStatistics", "BinaryStatistics", "TimestampStatistics", });
+ new java.lang.String[] { "NumberOfValues", "IntStatistics", "DoubleStatistics", "StringStatistics", "BucketStatistics", "DecimalStatistics", "DateStatistics", "BinaryStatistics", "TimestampStatistics", "HasNull", });
internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_descriptor =
getDescriptor().getMessageTypes().get(9);
internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_fieldAccessorTable = new
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java Sat Jan 10 00:09:14 2015
@@ -27,4 +27,10 @@ public interface ColumnStatistics {
* @return the number of values
*/
long getNumberOfValues();
+
+ /**
+ * Returns true if there are nulls in the scope of column statistics.
+ * @return true if null present else false
+ */
+ boolean hasNull();
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Sat Jan 10 00:09:14 2015
@@ -17,8 +17,6 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
-import java.sql.Timestamp;
-
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -26,6 +24,8 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
+import java.sql.Timestamp;
+
class ColumnStatisticsImpl implements ColumnStatistics {
private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
@@ -816,11 +816,16 @@ class ColumnStatisticsImpl implements Co
}
private long count = 0;
+ private boolean hasNull = false;
ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
if (stats.hasNumberOfValues()) {
count = stats.getNumberOfValues();
}
+
+ if (stats.hasHasNull()) {
+ hasNull = stats.getHasNull();
+ }
}
ColumnStatisticsImpl() {
@@ -830,6 +835,10 @@ class ColumnStatisticsImpl implements Co
count += 1;
}
+ void setNull() {
+ hasNull = true;
+ }
+
void updateBoolean(boolean value) {
throw new UnsupportedOperationException("Can't update boolean");
}
@@ -864,10 +873,12 @@ class ColumnStatisticsImpl implements Co
void merge(ColumnStatisticsImpl stats) {
count += stats.count;
+ hasNull |= stats.hasNull;
}
void reset() {
count = 0;
+ hasNull = false;
}
@Override
@@ -876,14 +887,20 @@ class ColumnStatisticsImpl implements Co
}
@Override
+ public boolean hasNull() {
+ return hasNull;
+ }
+
+ @Override
public String toString() {
- return "count: " + count;
+ return "count: " + count + " hasNull: " + hasNull;
}
OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder builder =
OrcProto.ColumnStatistics.newBuilder();
builder.setNumberOfValues(count);
+ builder.setHasNull(hasNull);
return builder;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Sat Jan 10 00:09:14 2015
@@ -17,15 +17,6 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
-import java.io.OutputStreamWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.Map;
-
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
@@ -46,6 +37,14 @@ import org.apache.hadoop.io.LongWritable
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
/**
* A tool for printing out the file structure of ORC files.
*/
@@ -170,10 +169,7 @@ public final class FileDump {
buf.append("no stats at ");
} else {
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
- Object min = RecordReaderImpl.getMin(cs), max = RecordReaderImpl.getMax(cs);
- buf.append(" count: ").append(cs.getNumberOfValues());
- buf.append(" min: ").append(min);
- buf.append(" max: ").append(max);
+ buf.append(cs.toString());
}
buf.append(" positions: ");
for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Sat Jan 10 00:09:14 2015
@@ -18,18 +18,9 @@
package org.apache.hadoop.hive.ql.io.orc;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.NavigableMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -54,9 +45,9 @@ import org.apache.hadoop.hive.ql.io.Reco
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -74,9 +65,18 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
/**
* A MapReduce/Hive input format for ORC files.
* <p>
@@ -919,13 +919,8 @@ public class OrcInputFormat implements
if (filterColumns[pred] != -1) {
// column statistics at index 0 contains only the number of rows
- ColumnStatistics stats =
- stripeStatistics.getColumnStatistics()[filterColumns[pred]];
- Object minValue = RecordReaderImpl.getMin(stats);
- Object maxValue = RecordReaderImpl.getMax(stats);
- truthValues[pred] =
- RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred),
- minValue, maxValue);
+ ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
+ truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred));
} else {
// parition column case.
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sat Jan 10 00:09:14 2015
@@ -2356,21 +2356,36 @@ class RecordReaderImpl implements Record
/**
* Evaluate a predicate with respect to the statistics from the column
* that is referenced in the predicate.
- * @param index the statistics for the column mentioned in the predicate
+ * @param statsProto the statistics for the column mentioned in the predicate
* @param predicate the leaf predicate we need to evaluation
* @return the set of truth values that may be returned for the given
* predicate.
*/
- static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index,
+ static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
PredicateLeaf predicate) {
- ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index);
+ ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
Object minValue = getMin(cs);
Object maxValue = getMax(cs);
- return evaluatePredicateRange(predicate, minValue, maxValue);
+ return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull());
+ }
+
+ /**
+ * Evaluate a predicate with respect to the statistics from the column
+ * that is referenced in the predicate.
+ * @param stats the statistics for the column mentioned in the predicate
+ * @param predicate the leaf predicate we need to evaluation
+ * @return the set of truth values that may be returned for the given
+ * predicate.
+ */
+ static TruthValue evaluatePredicate(ColumnStatistics stats,
+ PredicateLeaf predicate) {
+ Object minValue = getMin(stats);
+ Object maxValue = getMax(stats);
+ return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull());
}
static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
- Object max) {
+ Object max, boolean hasNull) {
// if we didn't have any values, everything must have been null
if (min == null) {
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
@@ -2405,29 +2420,29 @@ class RecordReaderImpl implements Record
case EQUALS:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (minValue.equals(maxValue) && loc == Location.MIN) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE || loc == Location.AFTER) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case LESS_THAN:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.AFTER) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE || loc == Location.MIN) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case LESS_THAN_EQUALS:
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.AFTER || loc == Location.MAX) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case IN:
if (minValue.equals(maxValue)) {
@@ -2437,10 +2452,10 @@ class RecordReaderImpl implements Record
predObj = getBaseObjectForComparison(arg, minValue);
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
}
}
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
// are all of the values outside of the range?
for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
@@ -2448,10 +2463,10 @@ class RecordReaderImpl implements Record
loc = compareToRange((Comparable) predObj, minValue, maxValue);
if (loc == Location.MIN || loc == Location.MIDDLE ||
loc == Location.MAX) {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
}
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
}
case BETWEEN:
List<Object> args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC);
@@ -2463,26 +2478,26 @@ class RecordReaderImpl implements Record
Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue);
if (loc2 == Location.AFTER || loc2 == Location.MAX) {
- return TruthValue.YES_NULL;
+ return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc2 == Location.BEFORE) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
} else if (loc == Location.AFTER) {
- return TruthValue.NO_NULL;
+ return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
} else {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case IS_NULL:
- return TruthValue.YES_NO;
+ return hasNull ? TruthValue.YES : TruthValue.NO;
default:
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
// in case failed conversion, return the default YES_NO_NULL truth value
} catch (NumberFormatException nfe) {
- return TruthValue.YES_NO_NULL;
+ return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Sat Jan 10 00:09:14 2015
@@ -656,6 +656,8 @@ class WriterImpl implements Writer, Memo
void write(Object obj) throws IOException {
if (obj != null) {
indexStatistics.increment();
+ } else {
+ indexStatistics.setNull();
}
if (isPresent != null) {
isPresent.write(obj == null ? 0 : 1);
Modified: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Sat Jan 10 00:09:14 2015
@@ -74,6 +74,7 @@ message ColumnStatistics {
optional DateStatistics dateStatistics = 7;
optional BinaryStatistics binaryStatistics = 8;
optional TimestampStatistics timestampStatistics = 9;
+ optional bool hasNull = 10;
}
message RowIndexEntry {
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java Sat Jan 10 00:09:14 2015
@@ -18,16 +18,28 @@
package org.apache.hadoop.hive.ql.io.orc;
+import static junit.framework.Assert.assertEquals;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
+import org.junit.Before;
+import org.junit.Rule;
import org.junit.Test;
+import org.junit.rules.TestName;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
import java.sql.Timestamp;
-
-import static junit.framework.Assert.assertEquals;
+import java.util.List;
/**
* Test ColumnStatisticsImpl for ORC.
@@ -173,4 +185,159 @@ public class TestColumnStatistics {
assertEquals(-10, typed.getMinimum().longValue());
assertEquals(10000, typed.getMaximum().longValue());
}
+
+
+ public static class SimpleStruct {
+ BytesWritable bytes1;
+ Text string1;
+
+ SimpleStruct(BytesWritable b1, String s1) {
+ this.bytes1 = b1;
+ if (s1 == null) {
+ this.string1 = null;
+ } else {
+ this.string1 = new Text(s1);
+ }
+ }
+ }
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir",
+ "target" + File.separator + "test" + File.separator + "tmp"));
+
+ Configuration conf;
+ FileSystem fs;
+ Path testFilePath;
+
+ @Rule
+ public TestName testCaseName = new TestName();
+
+ @Before
+ public void openFileSystem() throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ fs.setWorkingDirectory(workDir);
+ testFilePath = new Path("TestOrcFile." + testCaseName.getMethodName() + ".orc");
+ fs.delete(testFilePath, false);
+ }
+
+ private static BytesWritable bytes(int... items) {
+ BytesWritable result = new BytesWritable();
+ result.setSize(items.length);
+ for (int i = 0; i < items.length; ++i) {
+ result.getBytes()[i] = (byte) items[i];
+ }
+ return result;
+ }
+
+ @Test
+ public void testHasNull() throws Exception {
+
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (SimpleStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .inspector(inspector)
+ .rowIndexStride(1000)
+ .stripeSize(10000)
+ .bufferSize(10000));
+ // STRIPE 1
+ // RG1
+ for(int i=0; i<1000; i++) {
+ writer.addRow(new SimpleStruct(bytes(1,2,3), "RG1"));
+ }
+ // RG2
+ for(int i=0; i<1000; i++) {
+ writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+ }
+ // RG3
+ for(int i=0; i<1000; i++) {
+ writer.addRow(new SimpleStruct(bytes(1,2,3), "RG3"));
+ }
+ // RG4
+ for(int i=0; i<1000; i++) {
+ writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+ }
+ // RG5
+ for(int i=0; i<1000; i++) {
+ writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+ }
+ // STRIPE 2
+ for(int i=0; i<5000; i++) {
+ writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+ }
+ // STRIPE 3
+ for(int i=0; i<5000; i++) {
+ writer.addRow(new SimpleStruct(bytes(1,2,3), "STRIPE-3"));
+ }
+ // STRIPE 4
+ for(int i=0; i<5000; i++) {
+ writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+ }
+ writer.close();
+ Reader reader = OrcFile.createReader(testFilePath,
+ OrcFile.readerOptions(conf).filesystem(fs));
+
+ // check the file level stats
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(20000, stats[0].getNumberOfValues());
+ assertEquals(20000, stats[1].getNumberOfValues());
+ assertEquals(7000, stats[2].getNumberOfValues());
+ assertEquals(false, stats[0].hasNull());
+ assertEquals(false, stats[1].hasNull());
+ assertEquals(true, stats[2].hasNull());
+
+ // check the stripe level stats
+ List<StripeStatistics> stripeStats = reader.getMetadata().getStripeStatistics();
+ // stripe 1 stats
+ StripeStatistics ss1 = stripeStats.get(0);
+ ColumnStatistics ss1_cs1 = ss1.getColumnStatistics()[0];
+ ColumnStatistics ss1_cs2 = ss1.getColumnStatistics()[1];
+ ColumnStatistics ss1_cs3 = ss1.getColumnStatistics()[2];
+ assertEquals(false, ss1_cs1.hasNull());
+ assertEquals(false, ss1_cs2.hasNull());
+ assertEquals(true, ss1_cs3.hasNull());
+
+ // stripe 2 stats
+ StripeStatistics ss2 = stripeStats.get(1);
+ ColumnStatistics ss2_cs1 = ss2.getColumnStatistics()[0];
+ ColumnStatistics ss2_cs2 = ss2.getColumnStatistics()[1];
+ ColumnStatistics ss2_cs3 = ss2.getColumnStatistics()[2];
+ assertEquals(false, ss2_cs1.hasNull());
+ assertEquals(false, ss2_cs2.hasNull());
+ assertEquals(true, ss2_cs3.hasNull());
+
+ // stripe 3 stats
+ StripeStatistics ss3 = stripeStats.get(2);
+ ColumnStatistics ss3_cs1 = ss3.getColumnStatistics()[0];
+ ColumnStatistics ss3_cs2 = ss3.getColumnStatistics()[1];
+ ColumnStatistics ss3_cs3 = ss3.getColumnStatistics()[2];
+ assertEquals(false, ss3_cs1.hasNull());
+ assertEquals(false, ss3_cs2.hasNull());
+ assertEquals(false, ss3_cs3.hasNull());
+
+ // stripe 4 stats
+ StripeStatistics ss4 = stripeStats.get(3);
+ ColumnStatistics ss4_cs1 = ss4.getColumnStatistics()[0];
+ ColumnStatistics ss4_cs2 = ss4.getColumnStatistics()[1];
+ ColumnStatistics ss4_cs3 = ss4.getColumnStatistics()[2];
+ assertEquals(false, ss4_cs1.hasNull());
+ assertEquals(false, ss4_cs2.hasNull());
+ assertEquals(true, ss4_cs3.hasNull());
+
+ // Test file dump
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-has-null.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
+ System.out.flush();
+ System.setOut(origOut);
+
+ TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
}
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java Sat Jan 10 00:09:14 2015
@@ -21,6 +21,19 @@ package org.apache.hadoop.hive.ql.io.orc
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hive.common.util.HiveTestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
@@ -36,21 +49,6 @@ import java.util.List;
import java.util.Map;
import java.util.Random;
-import junit.framework.Assert;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hive.common.util.HiveTestUtils;
-import org.fusesource.leveldbjni.All;
-import org.junit.Before;
-import org.junit.Test;
-
public class TestFileDump {
Path workDir = new Path(System.getProperty("test.tmp.dir"));
@@ -127,7 +125,7 @@ public class TestFileDump {
}
}
- private static void checkOutput(String expected,
+ static void checkOutput(String expected,
String actual) throws Exception {
BufferedReader eStream =
new BufferedReader(new FileReader(HiveTestUtils.getFileFromClasspath(expected)));
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Sat Jan 10 00:09:14 2015
@@ -21,6 +21,27 @@ import static org.junit.Assert.assertArr
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.TreeSet;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -83,27 +104,6 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.TimeZone;
-import java.util.TreeSet;
-
public class TestInputOutputFormat {
Path workDir = new Path(System.getProperty("test.tmp.dir","target/tmp"));
@@ -1638,14 +1638,14 @@ public class TestInputOutputFormat {
assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000",
split.getPath().toString());
assertEquals(0, split.getStart());
- assertEquals(582, split.getLength());
+ assertEquals(607, split.getLength());
split = (HiveInputFormat.HiveInputSplit) splits[1];
assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
split.inputFormatClassName());
assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001",
split.getPath().toString());
assertEquals(0, split.getStart());
- assertEquals(603, split.getLength());
+ assertEquals(629, split.getLength());
CombineHiveInputFormat.CombineHiveInputSplit combineSplit =
(CombineHiveInputFormat.CombineHiveInputSplit) splits[2];
assertEquals(BUCKETS, combineSplit.getNumPaths());
@@ -1653,7 +1653,7 @@ public class TestInputOutputFormat {
assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0",
combineSplit.getPath(bucket).toString());
assertEquals(0, combineSplit.getOffset(bucket));
- assertEquals(227, combineSplit.getLength(bucket));
+ assertEquals(241, combineSplit.getLength(bucket));
}
String[] hosts = combineSplit.getLocations();
assertEquals(2, hosts.length);
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Sat Jan 10 00:09:14 2015
@@ -256,13 +256,13 @@ public class TestOrcFile {
assertEquals(7500, stats[1].getNumberOfValues());
assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
- assertEquals("count: 7500 true: 3750", stats[1].toString());
+ assertEquals("count: 7500 hasNull: false true: 3750", stats[1].toString());
assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
- assertEquals("count: 7500 min: 1024 max: 2048 sum: 11520000",
+ assertEquals("count: 7500 hasNull: false min: 1024 max: 2048 sum: 11520000",
stats[3].toString());
assertEquals(Long.MAX_VALUE,
@@ -271,17 +271,17 @@ public class TestOrcFile {
((IntegerColumnStatistics) stats[5]).getMinimum());
assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
assertEquals(
- "count: 7500 min: 9223372036854775807 max: 9223372036854775807",
+ "count: 7500 hasNull: false min: 9223372036854775807 max: 9223372036854775807",
stats[5].toString());
assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(),
0.00001);
- assertEquals("count: 7500 min: -15.0 max: -5.0 sum: -75000.0",
+ assertEquals("count: 7500 hasNull: false min: -15.0 max: -5.0 sum: -75000.0",
stats[7].toString());
- assertEquals("count: 7500 min: bye max: hi sum: 0", stats[9].toString());
+ assertEquals("count: 7500 hasNull: false min: bye max: hi sum: 0", stats[9].toString());
// check the inspectors
StructObjectInspector readerInspector = (StructObjectInspector) reader
@@ -541,17 +541,17 @@ public class TestOrcFile {
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(4, stats[0].getNumberOfValues());
- assertEquals("count: 4", stats[0].toString());
+ assertEquals("count: 4 hasNull: false", stats[0].toString());
assertEquals(3, stats[1].getNumberOfValues());
assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
- assertEquals("count: 3 sum: 15", stats[1].toString());
+ assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
assertEquals(3, stats[2].getNumberOfValues());
assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
- assertEquals("count: 3 min: bar max: hi sum: 8",
+ assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8",
stats[2].toString());
// check the inspectors
@@ -722,13 +722,13 @@ public class TestOrcFile {
assertEquals(2, stats[1].getNumberOfValues());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
- assertEquals("count: 2 true: 1", stats[1].toString());
+ assertEquals("count: 2 hasNull: false true: 1", stats[1].toString());
assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
- assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+ assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072",
stats[3].toString());
StripeStatistics ss = metadata.getStripeStatistics().get(0);
@@ -740,10 +740,10 @@ public class TestOrcFile {
assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
- assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+ assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0",
stats[7].toString());
- assertEquals("count: 2 min: bye max: hi sum: 5", stats[9].toString());
+ assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString());
// check the inspectors
StructObjectInspector readerInspector =
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java Sat Jan 10 00:09:14 2015
@@ -119,13 +119,13 @@ public class TestOrcNullOptimization {
assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
- assertEquals("count: 19998 min: 0 max: 0 sum: 0",
+ assertEquals("count: 19998 hasNull: true min: 0 max: 0 sum: 0",
stats[1].toString());
assertEquals("a", ((StringColumnStatistics) stats[2]).getMaximum());
assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals(19998, stats[2].getNumberOfValues());
- assertEquals("count: 19998 min: a max: a sum: 19998",
+ assertEquals("count: 19998 hasNull: true min: a max: a sum: 19998",
stats[2].toString());
// check the inspectors
@@ -223,13 +223,13 @@ public class TestOrcNullOptimization {
assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
- assertEquals("count: 20000 min: 0 max: 0 sum: 0",
+ assertEquals("count: 20000 hasNull: false min: 0 max: 0 sum: 0",
stats[1].toString());
assertEquals("b", ((StringColumnStatistics) stats[2]).getMaximum());
assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals(20000, stats[2].getNumberOfValues());
- assertEquals("count: 20000 min: a max: b sum: 20000",
+ assertEquals("count: 20000 hasNull: false min: a max: b sum: 20000",
stats[2].toString());
// check the inspectors
@@ -324,13 +324,13 @@ public class TestOrcNullOptimization {
assertEquals(2, ((IntegerColumnStatistics) stats[1]).getMinimum());
assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
assertEquals(17, ((IntegerColumnStatistics) stats[1]).getSum());
- assertEquals("count: 7 min: 2 max: 3 sum: 17",
+ assertEquals("count: 7 hasNull: true min: 2 max: 3 sum: 17",
stats[1].toString());
assertEquals("h", ((StringColumnStatistics) stats[2]).getMaximum());
assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals(7, stats[2].getNumberOfValues());
- assertEquals("count: 7 min: a max: h sum: 7",
+ assertEquals("count: 7 hasNull: true min: a max: h sum: 7",
stats[2].toString());
// check the inspectors
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java Sat Jan 10 00:09:14 2015
@@ -220,17 +220,17 @@ public class TestOrcSerDeStats {
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(4, stats[0].getNumberOfValues());
- assertEquals("count: 4", stats[0].toString());
+ assertEquals("count: 4 hasNull: false", stats[0].toString());
assertEquals(3, stats[1].getNumberOfValues());
assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
- assertEquals("count: 3 sum: 15", stats[1].toString());
+ assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
assertEquals(3, stats[2].getNumberOfValues());
assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
- assertEquals("count: 3 min: bar max: hi sum: 8",
+ assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8",
stats[2].toString());
// check the inspectors
@@ -448,13 +448,13 @@ public class TestOrcSerDeStats {
assertEquals(2, stats[1].getNumberOfValues());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
- assertEquals("count: 2 true: 1", stats[1].toString());
+ assertEquals("count: 2 hasNull: false true: 1", stats[1].toString());
assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
- assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+ assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072",
stats[3].toString());
assertEquals(Long.MAX_VALUE,
@@ -462,16 +462,16 @@ public class TestOrcSerDeStats {
assertEquals(Long.MAX_VALUE,
((IntegerColumnStatistics) stats[5]).getMinimum());
assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
- assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
+ assertEquals("count: 2 hasNull: false min: 9223372036854775807 max: 9223372036854775807",
stats[5].toString());
assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
- assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+ assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0",
stats[7].toString());
- assertEquals("count: 2 min: bye max: hi sum: 5", stats[9].toString());
+ assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString());
}
@Test
@@ -541,13 +541,13 @@ public class TestOrcSerDeStats {
assertEquals(2, stats[1].getNumberOfValues());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
- assertEquals("count: 2 true: 1", stats[1].toString());
+ assertEquals("count: 2 hasNull: false true: 1", stats[1].toString());
assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
- assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+ assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072",
stats[3].toString());
assertEquals(Long.MAX_VALUE,
@@ -555,22 +555,22 @@ public class TestOrcSerDeStats {
assertEquals(Long.MAX_VALUE,
((IntegerColumnStatistics) stats[5]).getMinimum());
assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
- assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
+ assertEquals("count: 2 hasNull: false min: 9223372036854775807 max: 9223372036854775807",
stats[5].toString());
assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
- assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+ assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0",
stats[7].toString());
assertEquals(5, ((BinaryColumnStatistics) stats[8]).getSum());
- assertEquals("count: 2 sum: 5", stats[8].toString());
+ assertEquals("count: 2 hasNull: false sum: 5", stats[8].toString());
assertEquals("bye", ((StringColumnStatistics) stats[9]).getMinimum());
assertEquals("hi", ((StringColumnStatistics) stats[9]).getMaximum());
assertEquals(5, ((StringColumnStatistics) stats[9]).getSum());
- assertEquals("count: 2 min: bye max: hi sum: 5", stats[9].toString());
+ assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString());
}
@Test(expected = ClassCastException.class)
@@ -603,13 +603,13 @@ public class TestOrcSerDeStats {
assertEquals(7500, stats[1].getNumberOfValues());
assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
- assertEquals("count: 7500 true: 3750", stats[1].toString());
+ assertEquals("count: 7500 hasNull: false true: 3750", stats[1].toString());
assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
- assertEquals("count: 7500 min: 1024 max: 2048 sum: 11520000",
+ assertEquals("count: 7500 hasNull: false min: 1024 max: 2048 sum: 11520000",
stats[3].toString());
assertEquals(Long.MAX_VALUE,
@@ -618,24 +618,24 @@ public class TestOrcSerDeStats {
((IntegerColumnStatistics) stats[5]).getMinimum());
assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
assertEquals(
- "count: 7500 min: 9223372036854775807 max: 9223372036854775807",
+ "count: 7500 hasNull: false min: 9223372036854775807 max: 9223372036854775807",
stats[5].toString());
assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(),
0.00001);
- assertEquals("count: 7500 min: -15.0 max: -5.0 sum: -75000.0",
+ assertEquals("count: 7500 hasNull: false min: -15.0 max: -5.0 sum: -75000.0",
stats[7].toString());
assertEquals("bye", ((StringColumnStatistics) stats[9]).getMinimum());
assertEquals("hi", ((StringColumnStatistics) stats[9]).getMaximum());
assertEquals(0, ((StringColumnStatistics) stats[9]).getSum());
- assertEquals("count: 7500 min: bye max: hi sum: 0", stats[9].toString());
+ assertEquals("count: 7500 hasNull: false min: bye max: hi sum: 0", stats[9].toString());
// old orc format will not have binary statistics. toString() will show only
// the general column statistics
- assertEquals("count: 7500", stats[8].toString());
+ assertEquals("count: 7500 hasNull: false", stats[8].toString());
// since old orc format doesn't support binary statistics,
// this should throw ClassCastException
assertEquals(5, ((BinaryColumnStatistics) stats[8]).getSum());
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java Sat Jan 10 00:09:14 2015
@@ -23,11 +23,6 @@ import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
@@ -46,6 +41,11 @@ import org.junit.Test;
import org.mockito.MockSettings;
import org.mockito.Mockito;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
public class TestRecordReaderImpl {
// can add .verboseLogging() to cause Mockito to log invocations
@@ -264,6 +264,15 @@ public class TestRecordReaderImpl {
return OrcProto.ColumnStatistics.newBuilder().setDoubleStatistics(dblStats.build()).build();
}
+ private static OrcProto.ColumnStatistics createStringStats(String min, String max,
+ boolean hasNull) {
+ OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();
+ strStats.setMinimum(min);
+ strStats.setMaximum(max);
+ return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build())
+ .setHasNull(hasNull).build();
+ }
+
private static OrcProto.ColumnStatistics createStringStats(String min, String max) {
OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();
strStats.setMinimum(min);
@@ -483,17 +492,17 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
(PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
- assertEquals(TruthValue.NO_NULL,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
- assertEquals(TruthValue.NO_NULL,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
- assertEquals(TruthValue.YES_NULL,
+ assertEquals(TruthValue.YES,
RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred));
}
@@ -521,15 +530,15 @@ public class TestRecordReaderImpl {
PredicateLeaf lessThan = TestSearchArgumentImpl.createPredicateLeaf
(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
- assertEquals(TruthValue.NO_NULL,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), lessThan));
- assertEquals(TruthValue.NO_NULL,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), lessThan));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), lessThan));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), lessThan));
- assertEquals(TruthValue.YES_NULL,
+ assertEquals(TruthValue.YES,
RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), lessThan));
}
@@ -538,15 +547,15 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.INTEGER,
"x", 15L, null);
- assertEquals(TruthValue.NO_NULL,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
- assertEquals(TruthValue.YES_NULL,
+ assertEquals(TruthValue.YES,
RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
- assertEquals(TruthValue.YES_NULL,
+ assertEquals(TruthValue.YES,
RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
}
@@ -558,13 +567,13 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
"x", null, args);
- assertEquals(TruthValue.YES_NULL,
+ assertEquals(TruthValue.YES,
RecordReaderImpl.evaluatePredicate(createIntStats(20L, 20L), pred));
- assertEquals(TruthValue.NO_NULL,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(30L, 30L), pred));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
- assertEquals(TruthValue.NO_NULL,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred));
}
@@ -576,19 +585,19 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
(PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.INTEGER,
"x", null, args);
- assertEquals(TruthValue.NO_NULL,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(0L, 5L), pred));
- assertEquals(TruthValue.NO_NULL,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(30L, 40L), pred));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(5L, 15L), pred));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(15L, 25L), pred));
- assertEquals(TruthValue.YES_NO_NULL,
+ assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicate(createIntStats(5L, 25L), pred));
- assertEquals(TruthValue.YES_NULL,
+ assertEquals(TruthValue.YES,
RecordReaderImpl.evaluatePredicate(createIntStats(10L, 20L), pred));
- assertEquals(TruthValue.YES_NULL,
+ assertEquals(TruthValue.YES,
RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred));
}
@@ -597,10 +606,156 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.INTEGER,
"x", null, null);
- assertEquals(TruthValue.YES_NO,
+ assertEquals(TruthValue.NO,
RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
}
+
+ @Test
+ public void testEqualsWithNullInStats() throws Exception {
+ PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+ (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING,
+ "x", "c", null);
+ assertEquals(TruthValue.NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ assertEquals(TruthValue.NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ assertEquals(TruthValue.YES_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ }
+
+ @Test
+ public void testNullSafeEqualsWithNullInStats() throws Exception {
+ PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+ (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING,
+ "x", "c", null);
+ assertEquals(TruthValue.NO,
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ assertEquals(TruthValue.NO,
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ assertEquals(TruthValue.YES_NO,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ assertEquals(TruthValue.YES_NO,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ assertEquals(TruthValue.YES_NO,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ assertEquals(TruthValue.YES_NO,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ }
+
+ @Test
+ public void testLessThanWithNullInStats() throws Exception {
+ PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+ (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING,
+ "x", "c", null);
+ assertEquals(TruthValue.NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ assertEquals(TruthValue.YES_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ assertEquals(TruthValue.NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ assertEquals(TruthValue.NO_NULL, // min, same stats
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred));
+ }
+
+ @Test
+ public void testLessThanEqualsWithNullInStats() throws Exception {
+ PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+ (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING,
+ "x", "c", null);
+ assertEquals(TruthValue.NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+ assertEquals(TruthValue.YES_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ assertEquals(TruthValue.YES_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ }
+
+ @Test
+ public void testInWithNullInStats() throws Exception {
+ List<Object> args = new ArrayList<Object>();
+ args.add("c");
+ args.add("f");
+ PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+ (PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
+ "x", null, args);
+ assertEquals(TruthValue.NO_NULL, // before & after
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred));
+ assertEquals(TruthValue.NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred)); // max
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ assertEquals(TruthValue.YES_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+ }
+
+ @Test
+ public void testBetweenWithNullInStats() throws Exception {
+ List<Object> args = new ArrayList<Object>();
+ args.add("c");
+ args.add("f");
+ PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+ (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING,
+ "x", null, args);
+ assertEquals(TruthValue.YES_NULL, // before & after
+ RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred));
+ assertEquals(TruthValue.YES_NULL, // before & max
+ RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred));
+ assertEquals(TruthValue.NO_NULL, // before & before
+ RecordReaderImpl.evaluatePredicate(createStringStats("h", "g", true), pred));
+ assertEquals(TruthValue.YES_NO_NULL, // before & min
+ RecordReaderImpl.evaluatePredicate(createStringStats("f", "g", true), pred));
+ assertEquals(TruthValue.YES_NO_NULL, // before & middle
+ RecordReaderImpl.evaluatePredicate(createStringStats("e", "g", true), pred));
+
+ assertEquals(TruthValue.YES_NULL, // min & after
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "e", true), pred));
+ assertEquals(TruthValue.YES_NULL, // min & max
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "f", true), pred));
+ assertEquals(TruthValue.YES_NO_NULL, // min & middle
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "g", true), pred));
+
+ assertEquals(TruthValue.NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("a", "c", true), pred)); // max
+ assertEquals(TruthValue.YES_NO_NULL,
+ RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+ assertEquals(TruthValue.YES_NULL, // min & after, same stats
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred));
+ }
+
+ @Test
+ public void testIsNullWithNullInStats() throws Exception {
+ PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+ (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING,
+ "x", null, null);
+ assertEquals(TruthValue.YES,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred));
+ assertEquals(TruthValue.NO,
+ RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", false), pred));
+ }
+
@Test
public void testOverlap() throws Exception {
assertTrue(!RecordReaderImpl.overlap(0, 10, -10, -1));