You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/10/01 00:23:17 UTC

svn commit: r1628569 - in /hive/trunk/serde/src: java/org/apache/hadoop/hive/serde2/avro/ test/org/apache/hadoop/hive/serde2/avro/ test/resources/

Author: xuefu
Date: Tue Sep 30 22:23:17 2014
New Revision: 1628569

URL: http://svn.apache.org/r1628569
Log:
HIVE-8130: Support Date in Avro(Mohit via Xuefu)

Modified:
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java
    hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java
    hive/trunk/serde/src/test/resources/avro-struct.avsc

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java Tue Sep 30 22:23:17 2014
@@ -22,6 +22,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.rmi.server.UID;
+import java.sql.Date;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -45,6 +46,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
@@ -280,6 +282,12 @@ class AvroDeserializer {
       str = datum.toString();
       HiveVarchar hvc = new HiveVarchar(str, maxLength);
       return hvc;
+    case DATE:
+      if (recordSchema.getType() != Type.INT) {
+        throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType());
+      }
+
+      return new Date(DateWritable.daysToMillis((Integer)datum));
     default:
       return datum;
     }

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java Tue Sep 30 22:23:17 2014
@@ -43,11 +43,13 @@ public class AvroSerDe extends AbstractS
   public static final String DECIMAL_TYPE_NAME = "decimal";
   public static final String CHAR_TYPE_NAME = "char";
   public static final String VARCHAR_TYPE_NAME = "varchar";
+  public static final String DATE_TYPE_NAME = "date";
   public static final String AVRO_PROP_LOGICAL_TYPE = "logicalType";
   public static final String AVRO_PROP_PRECISION = "precision";
   public static final String AVRO_PROP_SCALE = "scale";
   public static final String AVRO_PROP_MAX_LENGTH = "maxLength";
   public static final String AVRO_STRING_TYPE_NAME = "string";
+  public static final String AVRO_INT_TYPE_NAME = "int";
 
   private ObjectInspector oi;
   private List<String> columnNames;

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java Tue Sep 30 22:23:17 2014
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.serde2.avro;
 
+import java.sql.Date;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -33,6 +34,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -40,6 +42,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
@@ -201,6 +204,9 @@ class AvroSerializer {
     case VARCHAR:
       HiveVarchar vc = (HiveVarchar)fieldOI.getPrimitiveJavaObject(structFieldData);
       return vc.getValue();
+    case DATE:
+      Date date = ((DateObjectInspector)fieldOI).getPrimitiveJavaObject(structFieldData);
+      return DateWritable.dateToDays(date);
     case UNKNOWN:
       throw new AvroSerdeException("Received UNKNOWN primitive category.");
     case VOID:

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java Tue Sep 30 22:23:17 2014
@@ -150,6 +150,11 @@ class SchemaToTypeInfo {
       return TypeInfoFactory.getVarcharTypeInfo(maxLength);
     }
 
+    if (type == Schema.Type.INT &&
+        AvroSerDe.DATE_TYPE_NAME.equals(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) {
+      return TypeInfoFactory.dateTypeInfo;
+    }
+
     return typeInfoCache.retrieve(schema);
   }
 

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java Tue Sep 30 22:23:17 2014
@@ -152,6 +152,11 @@ public class TypeInfoToSchema {
             "\"precision\":" + precision + "," +
             "\"scale\":" + scale + "}");
         break;
+      case DATE:
+        schema = AvroSerdeUtils.getSchemaFor("{" +
+            "\"type\":\"" + AvroSerDe.AVRO_INT_TYPE_NAME + "\"," +
+            "\"logicalType\":\"" + AvroSerDe.DATE_TYPE_NAME + "\"}");
+        break;
       case VOID:
         schema = Schema.create(Schema.Type.NULL);
         break;

Modified: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java (original)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java Tue Sep 30 22:23:17 2014
@@ -19,9 +19,11 @@
 package org.apache.hadoop.hive.serde2.avro;
 
 import com.google.common.io.Resources;
+
 import org.junit.Assert;
 import org.apache.avro.Schema;
 import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -44,16 +46,28 @@ public class TestTypeInfoToSchema {
 
   private static Logger LOGGER = Logger.getLogger(TestTypeInfoToSchema.class);
   private static final List<String> COLUMN_NAMES = Arrays.asList("testCol");
-  private static final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo("string");
-  private static final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo("int");
-  private static final TypeInfo BOOLEAN = TypeInfoFactory.getPrimitiveTypeInfo("boolean");
-  private static final TypeInfo LONG = TypeInfoFactory.getPrimitiveTypeInfo("bigint");
-  private static final TypeInfo FLOAT = TypeInfoFactory.getPrimitiveTypeInfo("float");
-  private static final TypeInfo DOUBLE = TypeInfoFactory.getPrimitiveTypeInfo("double");
-  private static final TypeInfo BINARY = TypeInfoFactory.getPrimitiveTypeInfo("binary");
-  private static final TypeInfo BYTE = TypeInfoFactory.getPrimitiveTypeInfo("tinyint");
-  private static final TypeInfo SHORT = TypeInfoFactory.getPrimitiveTypeInfo("smallint");
-  private static final TypeInfo VOID = TypeInfoFactory.getPrimitiveTypeInfo("void");
+  private static final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.STRING_TYPE_NAME);
+  private static final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.INT_TYPE_NAME);
+  private static final TypeInfo BOOLEAN = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.BOOLEAN_TYPE_NAME);
+  private static final TypeInfo LONG = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.BIGINT_TYPE_NAME);
+  private static final TypeInfo FLOAT = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.FLOAT_TYPE_NAME);
+  private static final TypeInfo DOUBLE = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.DOUBLE_TYPE_NAME);
+  private static final TypeInfo BINARY = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.BINARY_TYPE_NAME);
+  private static final TypeInfo BYTE = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.TINYINT_TYPE_NAME);
+  private static final TypeInfo SHORT = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.SMALLINT_TYPE_NAME);
+  private static final TypeInfo VOID = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.VOID_TYPE_NAME);
+  private static final TypeInfo DATE = TypeInfoFactory.getPrimitiveTypeInfo(
+      serdeConstants.DATE_TYPE_NAME);
   private static final int PRECISION = 4;
   private static final int SCALE = 2;
   private static final TypeInfo DECIMAL = TypeInfoFactory.getPrimitiveTypeInfo(
@@ -229,6 +243,17 @@ public class TestTypeInfoToSchema {
   }
 
   @Test
+  public void createAvroDateSchema() {
+    final String specificSchema = "{" +
+        "\"type\":\"int\"," +
+        "\"logicalType\":\"date\"}";
+    String expectedSchema = genSchema(specificSchema);
+
+    Assert.assertEquals("Test for date in avro schema failed",
+        expectedSchema, getAvroSchemaString(DATE));
+  }
+
+  @Test
   public void createAvroListSchema() {
     ListTypeInfo listTypeInfo = new ListTypeInfo();
     listTypeInfo.setListElementTypeInfo(STRING);
@@ -337,6 +362,7 @@ public class TestTypeInfoToSchema {
     names.add("field11");
     names.add("field12");
     names.add("field13");
+    names.add("field14");
     structTypeInfo.setAllStructFieldNames(names);
     ArrayList<TypeInfo> typeInfos = new ArrayList<TypeInfo>();
     typeInfos.add(STRING);
@@ -351,6 +377,7 @@ public class TestTypeInfoToSchema {
     typeInfos.add(DOUBLE);
     typeInfos.add(BOOLEAN);
     typeInfos.add(DECIMAL);
+    typeInfos.add(DATE);
     typeInfos.add(VOID);
     structTypeInfo.setAllStructFieldTypeInfos(typeInfos);
     LOGGER.info("structTypeInfo is " + structTypeInfo);

Modified: hive/trunk/serde/src/test/resources/avro-struct.avsc
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/resources/avro-struct.avsc?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/test/resources/avro-struct.avsc (original)
+++ hive/trunk/serde/src/test/resources/avro-struct.avsc Tue Sep 30 22:23:17 2014
@@ -4,7 +4,7 @@
 "namespace":"",
 "doc":"struct<field1:string,field2:char(5),field3:varchar(5),field4:binary,field5:tinyint,
 field6:smallint,field7:int,field8:bigint,field9:float,field10:double,field11:boolean,
-field12:decimal(4,2),field13:void>",
+field12:decimal(4,2),field13:date,field14:void>",
 "fields":[
 {"name":"field1","type":["null","string"],"doc":"string","default":null},
 {"name":"field2","type":["null",{"type":"string","logicalType":"char","maxLength":5}],"doc":"char(5)","default":null},
@@ -17,8 +17,8 @@ field12:decimal(4,2),field13:void>",
 {"name":"field9","type":["null","float"],"doc":"float","default":null},
 {"name":"field10","type":["null","double"],"doc":"double","default":null},
 {"name":"field11","type":["null","boolean"],"doc":"boolean","default":null},
-{"name":"field12","type":["null",{"type":"bytes","logicalType":"decimal","precision":4,
-"scale":2}],"doc":"decimal(4,2)","default":null},
-{"name":"field13","type":"null","doc":"void","default":null}
+{"name":"field12","type":["null",{"type":"bytes","logicalType":"decimal","precision":4,"scale":2}],"doc":"decimal(4,2)","default":null},
+{"name":"field13","type":["null",{"type":"int","logicalType":"date"}],"doc":"date","default":null},
+{"name":"field14","type":"null","doc":"void","default":null}
 ]
 }