You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2013/11/04 22:34:44 UTC

svn commit: r1538780 - in /hive/trunk/ql/src: gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/ java/org/apache/hadoop/hive/ql/io/orc/ protobuf/org/apache/hadoop/hive/ql/io/orc/

Author: brock
Date: Mon Nov  4 21:34:43 2013
New Revision: 1538780

URL: http://svn.apache.org/r1538780
Log:
HIVE-5354 - Decimal precision/scale support in ORC file (Xuefu Zhang via Brock Noland)

Modified:
    hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
    hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto

Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Mon Nov  4 21:34:43 2013
@@ -9668,6 +9668,26 @@ public final class OrcProto {
      * <code>optional uint32 maximumLength = 4;</code>
      */
     int getMaximumLength();
+
+    // optional uint32 precision = 5;
+    /**
+     * <code>optional uint32 precision = 5;</code>
+     */
+    boolean hasPrecision();
+    /**
+     * <code>optional uint32 precision = 5;</code>
+     */
+    int getPrecision();
+
+    // optional uint32 scale = 6;
+    /**
+     * <code>optional uint32 scale = 6;</code>
+     */
+    boolean hasScale();
+    /**
+     * <code>optional uint32 scale = 6;</code>
+     */
+    int getScale();
   }
   /**
    * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.Type}
@@ -9765,6 +9785,16 @@ public final class OrcProto {
               maximumLength_ = input.readUInt32();
               break;
             }
+            case 40: {
+              bitField0_ |= 0x00000004;
+              precision_ = input.readUInt32();
+              break;
+            }
+            case 48: {
+              bitField0_ |= 0x00000008;
+              scale_ = input.readUInt32();
+              break;
+            }
           }
         }
       } catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -10114,11 +10144,45 @@ public final class OrcProto {
       return maximumLength_;
     }
 
+    // optional uint32 precision = 5;
+    public static final int PRECISION_FIELD_NUMBER = 5;
+    private int precision_;
+    /**
+     * <code>optional uint32 precision = 5;</code>
+     */
+    public boolean hasPrecision() {
+      return ((bitField0_ & 0x00000004) == 0x00000004);
+    }
+    /**
+     * <code>optional uint32 precision = 5;</code>
+     */
+    public int getPrecision() {
+      return precision_;
+    }
+
+    // optional uint32 scale = 6;
+    public static final int SCALE_FIELD_NUMBER = 6;
+    private int scale_;
+    /**
+     * <code>optional uint32 scale = 6;</code>
+     */
+    public boolean hasScale() {
+      return ((bitField0_ & 0x00000008) == 0x00000008);
+    }
+    /**
+     * <code>optional uint32 scale = 6;</code>
+     */
+    public int getScale() {
+      return scale_;
+    }
+
     private void initFields() {
       kind_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.Kind.BOOLEAN;
       subtypes_ = java.util.Collections.emptyList();
       fieldNames_ = com.google.protobuf.LazyStringArrayList.EMPTY;
       maximumLength_ = 0;
+      precision_ = 0;
+      scale_ = 0;
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -10152,6 +10216,12 @@ public final class OrcProto {
       if (((bitField0_ & 0x00000002) == 0x00000002)) {
         output.writeUInt32(4, maximumLength_);
       }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        output.writeUInt32(5, precision_);
+      }
+      if (((bitField0_ & 0x00000008) == 0x00000008)) {
+        output.writeUInt32(6, scale_);
+      }
       getUnknownFields().writeTo(output);
     }
 
@@ -10192,6 +10262,14 @@ public final class OrcProto {
         size += com.google.protobuf.CodedOutputStream
           .computeUInt32Size(4, maximumLength_);
       }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeUInt32Size(5, precision_);
+      }
+      if (((bitField0_ & 0x00000008) == 0x00000008)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeUInt32Size(6, scale_);
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -10316,6 +10394,10 @@ public final class OrcProto {
         bitField0_ = (bitField0_ & ~0x00000004);
         maximumLength_ = 0;
         bitField0_ = (bitField0_ & ~0x00000008);
+        precision_ = 0;
+        bitField0_ = (bitField0_ & ~0x00000010);
+        scale_ = 0;
+        bitField0_ = (bitField0_ & ~0x00000020);
         return this;
       }
 
@@ -10363,6 +10445,14 @@ public final class OrcProto {
           to_bitField0_ |= 0x00000002;
         }
         result.maximumLength_ = maximumLength_;
+        if (((from_bitField0_ & 0x00000010) == 0x00000010)) {
+          to_bitField0_ |= 0x00000004;
+        }
+        result.precision_ = precision_;
+        if (((from_bitField0_ & 0x00000020) == 0x00000020)) {
+          to_bitField0_ |= 0x00000008;
+        }
+        result.scale_ = scale_;
         result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
@@ -10405,6 +10495,12 @@ public final class OrcProto {
         if (other.hasMaximumLength()) {
           setMaximumLength(other.getMaximumLength());
         }
+        if (other.hasPrecision()) {
+          setPrecision(other.getPrecision());
+        }
+        if (other.hasScale()) {
+          setScale(other.getScale());
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -10664,6 +10760,72 @@ public final class OrcProto {
         return this;
       }
 
+      // optional uint32 precision = 5;
+      private int precision_ ;
+      /**
+       * <code>optional uint32 precision = 5;</code>
+       */
+      public boolean hasPrecision() {
+        return ((bitField0_ & 0x00000010) == 0x00000010);
+      }
+      /**
+       * <code>optional uint32 precision = 5;</code>
+       */
+      public int getPrecision() {
+        return precision_;
+      }
+      /**
+       * <code>optional uint32 precision = 5;</code>
+       */
+      public Builder setPrecision(int value) {
+        bitField0_ |= 0x00000010;
+        precision_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional uint32 precision = 5;</code>
+       */
+      public Builder clearPrecision() {
+        bitField0_ = (bitField0_ & ~0x00000010);
+        precision_ = 0;
+        onChanged();
+        return this;
+      }
+
+      // optional uint32 scale = 6;
+      private int scale_ ;
+      /**
+       * <code>optional uint32 scale = 6;</code>
+       */
+      public boolean hasScale() {
+        return ((bitField0_ & 0x00000020) == 0x00000020);
+      }
+      /**
+       * <code>optional uint32 scale = 6;</code>
+       */
+      public int getScale() {
+        return scale_;
+      }
+      /**
+       * <code>optional uint32 scale = 6;</code>
+       */
+      public Builder setScale(int value) {
+        bitField0_ |= 0x00000020;
+        scale_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional uint32 scale = 6;</code>
+       */
+      public Builder clearScale() {
+        bitField0_ = (bitField0_ & ~0x00000020);
+        scale_ = 0;
+        onChanged();
+        return this;
+      }
+
       // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.Type)
     }
 
@@ -15166,34 +15328,35 @@ public final class OrcProto {
       "9\n\007streams\030\001 \003(\0132(.org.apache.hadoop.hiv" +
       "e.ql.io.orc.Stream\022A\n\007columns\030\002 \003(\01320.or" +
       "g.apache.hadoop.hive.ql.io.orc.ColumnEnc" +
-      "oding\"\314\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apach" +
+      "oding\"\356\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apach" +
       "e.hadoop.hive.ql.io.orc.Type.Kind\022\024\n\010sub" +
       "types\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rm" +
-      "aximumLength\030\004 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000" +
-      "\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004" +
-      "\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006B",
-      "INARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020" +
-      "\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n" +
-      "\004DATE\020\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformatio" +
-      "n\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022" +
-      "\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004" +
-      "\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadataIt" +
-      "em\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Foot" +
-      "er\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLengt" +
-      "h\030\002 \001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.ha" +
-      "doop.hive.ql.io.orc.StripeInformation\0225\n",
-      "\005types\030\004 \003(\0132&.org.apache.hadoop.hive.ql" +
-      ".io.orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apa" +
-      "che.hadoop.hive.ql.io.orc.UserMetadataIt" +
-      "em\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007" +
-      " \003(\01322.org.apache.hadoop.hive.ql.io.orc." +
-      "ColumnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r" +
-      "\"\255\001\n\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n" +
-      "\013compression\030\002 \001(\01621.org.apache.hadoop.h" +
-      "ive.ql.io.orc.CompressionKind\022\034\n\024compres" +
-      "sionBlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001",
-      "\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004N" +
-      "ONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
+      "aximumLength\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n" +
+      "\005scale\030\006 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BY" +
+      "TE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FL",
+      "OAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020" +
+      "\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006S" +
+      "TRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020" +
+      "\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformation\022\016\n\006o" +
+      "ffset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\ndata" +
+      "Length\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014nu" +
+      "mberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem\022\014\n\004" +
+      "name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"\356\002\n\006Footer\022\024\n\014" +
+      "headerLength\030\001 \001(\004\022\025\n\rcontentLength\030\002 \001(" +
+      "\004\022D\n\007stripes\030\003 \003(\01323.org.apache.hadoop.h",
+      "ive.ql.io.orc.StripeInformation\0225\n\005types" +
+      "\030\004 \003(\0132&.org.apache.hadoop.hive.ql.io.or" +
+      "c.Type\022D\n\010metadata\030\005 \003(\01322.org.apache.ha" +
+      "doop.hive.ql.io.orc.UserMetadataItem\022\024\n\014" +
+      "numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\01322" +
+      ".org.apache.hadoop.hive.ql.io.orc.Column" +
+      "Statistics\022\026\n\016rowIndexStride\030\010 \001(\r\"\255\001\n\nP" +
+      "ostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013compr" +
+      "ession\030\002 \001(\01621.org.apache.hadoop.hive.ql" +
+      ".io.orc.CompressionKind\022\034\n\024compressionBl",
+      "ockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\016\n\005ma" +
+      "gic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000\022" +
+      "\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -15283,7 +15446,7 @@ public final class OrcProto {
           internal_static_org_apache_hadoop_hive_ql_io_orc_Type_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_org_apache_hadoop_hive_ql_io_orc_Type_descriptor,
-              new java.lang.String[] { "Kind", "Subtypes", "FieldNames", "MaximumLength", });
+              new java.lang.String[] { "Kind", "Subtypes", "FieldNames", "MaximumLength", "Precision", "Scale", });
           internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_descriptor =
             getDescriptor().getMessageTypes().get(14);
           internal_static_org_apache_hadoop_hive_ql_io_orc_StripeInformation_fieldAccessorTable = new

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java Mon Nov  4 21:34:43 2013
@@ -25,6 +25,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -544,9 +545,10 @@ final class OrcStruct implements Writabl
       case DATE:
         return PrimitiveObjectInspectorFactory.javaDateObjectInspector;
       case DECIMAL:
-        // TODO: get precision/scale from TYPE
+        int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.MAX_PRECISION;
+        int scale =  type.hasScale()? type.getScale() : HiveDecimal.MAX_SCALE;
         return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
-            TypeInfoFactory.decimalTypeInfo);
+            TypeInfoFactory.getDecimalTypeInfo(precision, scale));
       case STRUCT:
         return new OrcStructInspector(columnId, types);
       case UNION:

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Mon Nov  4 21:34:43 2013
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.serde2.io.
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
@@ -1022,8 +1023,13 @@ class RecordReaderImpl implements Record
     private InStream valueStream;
     private IntegerReader scaleStream = null;
 
-    DecimalTreeReader(Path path, int columnId) {
+    private final int precision;
+    private final int scale;
+
+    DecimalTreeReader(Path path, int columnId, int precision, int scale) {
       super(path, columnId);
+      this.precision = precision;
+      this.scale = scale;
     }
 
     @Override
@@ -1057,8 +1063,9 @@ class RecordReaderImpl implements Record
     Object next(Object previous) throws IOException {
       super.next(previous);
       if (valuePresent) {
-        return HiveDecimal.create(SerializationUtils.readBigInteger(valueStream),
+        HiveDecimal dec = HiveDecimal.create(SerializationUtils.readBigInteger(valueStream),
             (int) scaleStream.next());
+        return HiveDecimalUtils.enforcePrecisionScale(dec, precision, scale);
       }
       return null;
     }
@@ -1892,7 +1899,9 @@ class RecordReaderImpl implements Record
       case DATE:
         return new DateTreeReader(path, columnId);
       case DECIMAL:
-        return new DecimalTreeReader(path, columnId);
+        int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.MAX_PRECISION;
+        int scale =  type.hasScale()? type.getScale() : HiveDecimal.MAX_SCALE;
+        return new DecimalTreeReader(path, columnId, precision, scale);
       case STRUCT:
         return new StructTreeReader(path, columnId, types, included);
       case LIST:

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Mon Nov  4 21:34:43 2013
@@ -59,6 +59,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
@@ -1625,8 +1626,10 @@ class WriterImpl implements Writer, Memo
             type.setKind(OrcProto.Type.Kind.DATE);
             break;
           case DECIMAL:
-            // TODO: save precision/scale
+            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)((PrimitiveObjectInspector)treeWriter.inspector).getTypeInfo();
             type.setKind(OrcProto.Type.Kind.DECIMAL);
+            type.setPrecision(decTypeInfo.precision());
+            type.setScale(decTypeInfo.scale());
             break;
           default:
             throw new IllegalArgumentException("Unknown primitive category: " +

Modified: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1538780&r1=1538779&r2=1538780&view=diff
==============================================================================
--- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Mon Nov  4 21:34:43 2013
@@ -135,6 +135,8 @@ message Type {
   repeated uint32 subtypes = 2 [packed=true];
   repeated string fieldNames = 3;
   optional uint32 maximumLength = 4;
+  optional uint32 precision = 5;
+  optional uint32 scale = 6;
 }
 
 message StripeInformation {