You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2013/11/04 22:34:44 UTC
svn commit: r1538780 - in /hive/trunk/ql/src:
gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/
java/org/apache/hadoop/hive/ql/io/orc/
protobuf/org/apache/hadoop/hive/ql/io/orc/
Author: brock
Date: Mon Nov 4 21:34:43 2013
New Revision: 1538780
URL: http://svn.apache.org/r1538780
Log:
HIVE-5354 - Decimal precision/scale support in ORC file (Xuefu Zhang via Brock Noland)
Modified:
hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Mon Nov 4 21:34:43 2013
@@ -9668,6 +9668,26 @@ public final class OrcProto {
* <code>optional uint32 maximumLength = 4;</code>
*/
int getMaximumLength();
+
+ // optional uint32 precision = 5;
+ /**
+ * <code>optional uint32 precision = 5;</code>
+ */
+ boolean hasPrecision();
+ /**
+ * <code>optional uint32 precision = 5;</code>
+ */
+ int getPrecision();
+
+ // optional uint32 scale = 6;
+ /**
+ * <code>optional uint32 scale = 6;</code>
+ */
+ boolean hasScale();
+ /**
+ * <code>optional uint32 scale = 6;</code>
+ */
+ int getScale();
}
/**
* Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.Type}
@@ -9765,6 +9785,16 @@ public final class OrcProto {
maximumLength_ = input.readUInt32();
break;
}
+ case 40: {
+ bitField0_ |= 0x00000004;
+ precision_ = input.readUInt32();
+ break;
+ }
+ case 48: {
+ bitField0_ |= 0x00000008;
+ scale_ = input.readUInt32();
+ break;
+ }
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -10114,11 +10144,45 @@ public final class OrcProto {
return maximumLength_;
}
+ // optional uint32 precision = 5;
+ public static final int PRECISION_FIELD_NUMBER = 5;
+ private int precision_;
+ /**
+ * <code>optional uint32 precision = 5;</code>
+ */
+ public boolean hasPrecision() {
+ return ((bitField0_ & 0x00000004) == 0x00000004);
+ }
+ /**
+ * <code>optional uint32 precision = 5;</code>
+ */
+ public int getPrecision() {
+ return precision_;
+ }
+
+ // optional uint32 scale = 6;
+ public static final int SCALE_FIELD_NUMBER = 6;
+ private int scale_;
+ /**
+ * <code>optional uint32 scale = 6;</code>
+ */
+ public boolean hasScale() {
+ return ((bitField0_ & 0x00000008) == 0x00000008);
+ }
+ /**
+ * <code>optional uint32 scale = 6;</code>
+ */
+ public int getScale() {
+ return scale_;
+ }
+
private void initFields() {
kind_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.Kind.BOOLEAN;
subtypes_ = java.util.Collections.emptyList();
fieldNames_ = com.google.protobuf.LazyStringArrayList.EMPTY;
maximumLength_ = 0;
+ precision_ = 0;
+ scale_ = 0;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -10152,6 +10216,12 @@ public final class OrcProto {
if (((bitField0_ & 0x00000002) == 0x00000002)) {
output.writeUInt32(4, maximumLength_);
}
+ if (((bitField0_ & 0x00000004) == 0x00000004)) {
+ output.writeUInt32(5, precision_);
+ }
+ if (((bitField0_ & 0x00000008) == 0x00000008)) {
+ output.writeUInt32(6, scale_);
+ }
getUnknownFields().writeTo(output);
}
@@ -10192,6 +10262,14 @@ public final class OrcProto {
size += com.google.protobuf.CodedOutputStream
.computeUInt32Size(4, maximumLength_);
}
+ if (((bitField0_ & 0x00000004) == 0x00000004)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeUInt32Size(5, precision_);
+ }
+ if (((bitField0_ & 0x00000008) == 0x00000008)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeUInt32Size(6, scale_);
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -10316,6 +10394,10 @@ public final class OrcProto {
bitField0_ = (bitField0_ & ~0x00000004);
maximumLength_ = 0;
bitField0_ = (bitField0_ & ~0x00000008);
+ precision_ = 0;
+ bitField0_ = (bitField0_ & ~0x00000010);
+ scale_ = 0;
+ bitField0_ = (bitField0_ & ~0x00000020);
return this;
}
@@ -10363,6 +10445,14 @@ public final class OrcProto {
to_bitField0_ |= 0x00000002;
}
result.maximumLength_ = maximumLength_;
+ if (((from_bitField0_ & 0x00000010) == 0x00000010)) {
+ to_bitField0_ |= 0x00000004;
+ }
+ result.precision_ = precision_;
+ if (((from_bitField0_ & 0x00000020) == 0x00000020)) {
+ to_bitField0_ |= 0x00000008;
+ }
+ result.scale_ = scale_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@@ -10405,6 +10495,12 @@ public final class OrcProto {
if (other.hasMaximumLength()) {
setMaximumLength(other.getMaximumLength());
}
+ if (other.hasPrecision()) {
+ setPrecision(other.getPrecision());
+ }
+ if (other.hasScale()) {
+ setScale(other.getScale());
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -10664,6 +10760,72 @@ public final class OrcProto {
return this;
}
+ // optional uint32 precision = 5;
+ private int precision_ ;
+ /**
+ * <code>optional uint32 precision = 5;</code>
+ */
+ public boolean hasPrecision() {
+ return ((bitField0_ & 0x00000010) == 0x00000010);
+ }
+ /**
+ * <code>optional uint32 precision = 5;</code>
+ */
+ public int getPrecision() {
+ return precision_;
+ }
+ /**
+ * <code>optional uint32 precision = 5;</code>
+ */
+ public Builder setPrecision(int value) {
+ bitField0_ |= 0x00000010;
+ precision_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * <code>optional uint32 precision = 5;</code>
+ */
+ public Builder clearPrecision() {
+ bitField0_ = (bitField0_ & ~0x00000010);
+ precision_ = 0;
+ onChanged();
+ return this;
+ }
+
+ // optional uint32 scale = 6;
+ private int scale_ ;
+ /**
+ * <code>optional uint32 scale = 6;</code>
+ */
+ public boolean hasScale() {
+ return ((bitField0_ & 0x00000020) == 0x00000020);
+ }
+ /**
+ * <code>optional uint32 scale = 6;</code>
+ */
+ public int getScale() {
+ return scale_;
+ }
+ /**
+ * <code>optional uint32 scale = 6;</code>
+ */
+ public Builder setScale(int value) {
+ bitField0_ |= 0x00000020;
+ scale_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * <code>optional uint32 scale = 6;</code>
+ */
+ public Builder clearScale() {
+ bitField0_ = (bitField0_ & ~0x00000020);
+ scale_ = 0;
+ onChanged();
+ return this;
+ }
+
// @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.Type)
}
@@ -15166,34 +15328,35 @@ public final class OrcProto {
"9\n\007streams\030\001 \003(\0132(.org.apache.hadoop.hiv" +
"e.ql.io.orc.Stream\022A\n\007columns\030\002 \003(\01320.or" +
"g.apache.hadoop.hive.ql.io.orc.ColumnEnc" +
- "oding\"\314\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apach" +
+ "oding\"\356\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apach" +
"e.hadoop.hive.ql.io.orc.Type.Kind\022\024\n\010sub" +
"types\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rm" +
- "aximumLength\030\004 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000" +
- "\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004" +
- "\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006B",
- "INARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020" +
- "\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n" +
- "\004DATE\020\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformatio" +
- "n\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022" +
- "\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004" +
- "\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadataIt" +
- "em\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Foot" +
- "er\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLengt" +
- "h\030\002 \001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.ha" +
- "doop.hive.ql.io.orc.StripeInformation\0225\n",
- "\005types\030\004 \003(\0132&.org.apache.hadoop.hive.ql" +
- ".io.orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apa" +
- "che.hadoop.hive.ql.io.orc.UserMetadataIt" +
- "em\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007" +
- " \003(\01322.org.apache.hadoop.hive.ql.io.orc." +
- "ColumnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r" +
- "\"\255\001\n\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n" +
- "\013compression\030\002 \001(\01621.org.apache.hadoop.h" +
- "ive.ql.io.orc.CompressionKind\022\034\n\024compres" +
- "sionBlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001",
- "\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004N" +
- "ONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
+ "aximumLength\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n" +
+ "\005scale\030\006 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BY" +
+ "TE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FL",
+ "OAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020" +
+ "\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006S" +
+ "TRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020" +
+ "\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformation\022\016\n\006o" +
+ "ffset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\ndata" +
+ "Length\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014nu" +
+ "mberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem\022\014\n\004" +
+ "name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Footer\022\024\n\014" +
+ "headerLength\030\001 \001(\004\022\025\n\rcontentLength\030\002 \001(" +
+ "\004\022D\n\007stripes\030\003 \003(\01323.org.apache.hadoop.h",
+ "ive.ql.io.orc.StripeInformation\0225\n\005types" +
+ "\030\004 \003(\0132&.org.apache.hadoop.hive.ql.io.or" +
+ "c.Type\022D\n\010metadata\030\005 \003(\01322.org.apache.ha" +
+ "doop.hive.ql.io.orc.UserMetadataItem\022\024\n\014" +
+ "numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\01322" +
+ ".org.apache.hadoop.hive.ql.io.orc.Column" +
+ "Statistics\022\026\n\016rowIndexStride\030\010 \001(\r\"\255\001\n\nP" +
+ "ostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013compr" +
+ "ession\030\002 \001(\01621.org.apache.hadoop.hive.ql" +
+ ".io.orc.CompressionKind\022\034\n\024compressionBl",
+ "ockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\016\n\005ma" +
+ "gic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000\022" +
+ "\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -15283,7 +15446,7 @@ public final class OrcProto {
internal_static_org_apache_hadoop_hive_ql_io_orc_Type_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor,
- new java.lang.String[] { "Kind", "Subtypes", "FieldNames", "MaximumLength", });
+ new java.lang.String[] { "Kind", "Subtypes", "FieldNames", "MaximumLength", "Precision", "Scale", });
internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor =
getDescriptor().getMessageTypes().get(14);
internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_fieldAccessorTable = new
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java Mon Nov 4 21:34:43 2013
@@ -25,6 +25,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -544,9 +545,10 @@ final class OrcStruct implements Writabl
case DATE:
return PrimitiveObjectInspectorFactory.javaDateObjectInspector;
case DECIMAL:
- // TODO: get precision/scale from TYPE
+ int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.MAX_PRECISION;
+ int scale = type.hasScale()? type.getScale() : HiveDecimal.MAX_SCALE;
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
- TypeInfoFactory.decimalTypeInfo);
+ TypeInfoFactory.getDecimalTypeInfo(precision, scale));
case STRUCT:
return new OrcStructInspector(columnId, types);
case UNION:
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Mon Nov 4 21:34:43 2013
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.serde2.io.
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
@@ -1022,8 +1023,13 @@ class RecordReaderImpl implements Record
private InStream valueStream;
private IntegerReader scaleStream = null;
- DecimalTreeReader(Path path, int columnId) {
+ private final int precision;
+ private final int scale;
+
+ DecimalTreeReader(Path path, int columnId, int precision, int scale) {
super(path, columnId);
+ this.precision = precision;
+ this.scale = scale;
}
@Override
@@ -1057,8 +1063,9 @@ class RecordReaderImpl implements Record
Object next(Object previous) throws IOException {
super.next(previous);
if (valuePresent) {
- return HiveDecimal.create(SerializationUtils.readBigInteger(valueStream),
+ HiveDecimal dec = HiveDecimal.create(SerializationUtils.readBigInteger(valueStream),
(int) scaleStream.next());
+ return HiveDecimalUtils.enforcePrecisionScale(dec, precision, scale);
}
return null;
}
@@ -1892,7 +1899,9 @@ class RecordReaderImpl implements Record
case DATE:
return new DateTreeReader(path, columnId);
case DECIMAL:
- return new DecimalTreeReader(path, columnId);
+ int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.MAX_PRECISION;
+ int scale = type.hasScale()? type.getScale() : HiveDecimal.MAX_SCALE;
+ return new DecimalTreeReader(path, columnId, precision, scale);
case STRUCT:
return new StructTreeReader(path, columnId, types, included);
case LIST:
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Mon Nov 4 21:34:43 2013
@@ -59,6 +59,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
@@ -1625,8 +1626,10 @@ class WriterImpl implements Writer, Memo
type.setKind(OrcProto.Type.Kind.DATE);
break;
case DECIMAL:
- // TODO: save precision/scale
+ DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)((PrimitiveObjectInspector)treeWriter.inspector).getTypeInfo();
type.setKind(OrcProto.Type.Kind.DECIMAL);
+ type.setPrecision(decTypeInfo.precision());
+ type.setScale(decTypeInfo.scale());
break;
default:
throw new IllegalArgumentException("Unknown primitive category: " +
Modified: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Mon Nov 4 21:34:43 2013
@@ -135,6 +135,8 @@ message Type {
repeated uint32 subtypes = 2 [packed=true];
repeated string fieldNames = 3;
optional uint32 maximumLength = 4;
+ optional uint32 precision = 5;
+ optional uint32 scale = 6;
}
message StripeInformation {