You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/10/01 00:23:17 UTC
svn commit: r1628569 - in /hive/trunk/serde/src:
java/org/apache/hadoop/hive/serde2/avro/
test/org/apache/hadoop/hive/serde2/avro/ test/resources/
Author: xuefu
Date: Tue Sep 30 22:23:17 2014
New Revision: 1628569
URL: http://svn.apache.org/r1628569
Log:
HIVE-8130: Support Date in Avro(Mohit via Xuefu)
Modified:
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java
hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java
hive/trunk/serde/src/test/resources/avro-struct.avsc
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java Tue Sep 30 22:23:17 2014
@@ -22,6 +22,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.rmi.server.UID;
+import java.sql.Date;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -45,6 +46,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
@@ -280,6 +282,12 @@ class AvroDeserializer {
str = datum.toString();
HiveVarchar hvc = new HiveVarchar(str, maxLength);
return hvc;
+ case DATE:
+ if (recordSchema.getType() != Type.INT) {
+ throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType());
+ }
+
+ return new Date(DateWritable.daysToMillis((Integer)datum));
default:
return datum;
}
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java Tue Sep 30 22:23:17 2014
@@ -43,11 +43,13 @@ public class AvroSerDe extends AbstractS
public static final String DECIMAL_TYPE_NAME = "decimal";
public static final String CHAR_TYPE_NAME = "char";
public static final String VARCHAR_TYPE_NAME = "varchar";
+ public static final String DATE_TYPE_NAME = "date";
public static final String AVRO_PROP_LOGICAL_TYPE = "logicalType";
public static final String AVRO_PROP_PRECISION = "precision";
public static final String AVRO_PROP_SCALE = "scale";
public static final String AVRO_PROP_MAX_LENGTH = "maxLength";
public static final String AVRO_STRING_TYPE_NAME = "string";
+ public static final String AVRO_INT_TYPE_NAME = "int";
private ObjectInspector oi;
private List<String> columnNames;
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java Tue Sep 30 22:23:17 2014
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.serde2.avro;
+import java.sql.Date;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -33,6 +34,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -40,6 +42,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
@@ -201,6 +204,9 @@ class AvroSerializer {
case VARCHAR:
HiveVarchar vc = (HiveVarchar)fieldOI.getPrimitiveJavaObject(structFieldData);
return vc.getValue();
+ case DATE:
+ Date date = ((DateObjectInspector)fieldOI).getPrimitiveJavaObject(structFieldData);
+ return DateWritable.dateToDays(date);
case UNKNOWN:
throw new AvroSerdeException("Received UNKNOWN primitive category.");
case VOID:
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java Tue Sep 30 22:23:17 2014
@@ -150,6 +150,11 @@ class SchemaToTypeInfo {
return TypeInfoFactory.getVarcharTypeInfo(maxLength);
}
+ if (type == Schema.Type.INT &&
+ AvroSerDe.DATE_TYPE_NAME.equals(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) {
+ return TypeInfoFactory.dateTypeInfo;
+ }
+
return typeInfoCache.retrieve(schema);
}
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java Tue Sep 30 22:23:17 2014
@@ -152,6 +152,11 @@ public class TypeInfoToSchema {
"\"precision\":" + precision + "," +
"\"scale\":" + scale + "}");
break;
+ case DATE:
+ schema = AvroSerdeUtils.getSchemaFor("{" +
+ "\"type\":\"" + AvroSerDe.AVRO_INT_TYPE_NAME + "\"," +
+ "\"logicalType\":\"" + AvroSerDe.DATE_TYPE_NAME + "\"}");
+ break;
case VOID:
schema = Schema.create(Schema.Type.NULL);
break;
Modified: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java (original)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java Tue Sep 30 22:23:17 2014
@@ -19,9 +19,11 @@
package org.apache.hadoop.hive.serde2.avro;
import com.google.common.io.Resources;
+
import org.junit.Assert;
import org.apache.avro.Schema;
import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -44,16 +46,28 @@ public class TestTypeInfoToSchema {
private static Logger LOGGER = Logger.getLogger(TestTypeInfoToSchema.class);
private static final List<String> COLUMN_NAMES = Arrays.asList("testCol");
- private static final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo("string");
- private static final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo("int");
- private static final TypeInfo BOOLEAN = TypeInfoFactory.getPrimitiveTypeInfo("boolean");
- private static final TypeInfo LONG = TypeInfoFactory.getPrimitiveTypeInfo("bigint");
- private static final TypeInfo FLOAT = TypeInfoFactory.getPrimitiveTypeInfo("float");
- private static final TypeInfo DOUBLE = TypeInfoFactory.getPrimitiveTypeInfo("double");
- private static final TypeInfo BINARY = TypeInfoFactory.getPrimitiveTypeInfo("binary");
- private static final TypeInfo BYTE = TypeInfoFactory.getPrimitiveTypeInfo("tinyint");
- private static final TypeInfo SHORT = TypeInfoFactory.getPrimitiveTypeInfo("smallint");
- private static final TypeInfo VOID = TypeInfoFactory.getPrimitiveTypeInfo("void");
+ private static final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.STRING_TYPE_NAME);
+ private static final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.INT_TYPE_NAME);
+ private static final TypeInfo BOOLEAN = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.BOOLEAN_TYPE_NAME);
+ private static final TypeInfo LONG = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.BIGINT_TYPE_NAME);
+ private static final TypeInfo FLOAT = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.FLOAT_TYPE_NAME);
+ private static final TypeInfo DOUBLE = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.DOUBLE_TYPE_NAME);
+ private static final TypeInfo BINARY = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.BINARY_TYPE_NAME);
+ private static final TypeInfo BYTE = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.TINYINT_TYPE_NAME);
+ private static final TypeInfo SHORT = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.SMALLINT_TYPE_NAME);
+ private static final TypeInfo VOID = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.VOID_TYPE_NAME);
+ private static final TypeInfo DATE = TypeInfoFactory.getPrimitiveTypeInfo(
+ serdeConstants.DATE_TYPE_NAME);
private static final int PRECISION = 4;
private static final int SCALE = 2;
private static final TypeInfo DECIMAL = TypeInfoFactory.getPrimitiveTypeInfo(
@@ -229,6 +243,17 @@ public class TestTypeInfoToSchema {
}
@Test
+ public void createAvroDateSchema() {
+ final String specificSchema = "{" +
+ "\"type\":\"int\"," +
+ "\"logicalType\":\"date\"}";
+ String expectedSchema = genSchema(specificSchema);
+
+ Assert.assertEquals("Test for date in avro schema failed",
+ expectedSchema, getAvroSchemaString(DATE));
+ }
+
+ @Test
public void createAvroListSchema() {
ListTypeInfo listTypeInfo = new ListTypeInfo();
listTypeInfo.setListElementTypeInfo(STRING);
@@ -337,6 +362,7 @@ public class TestTypeInfoToSchema {
names.add("field11");
names.add("field12");
names.add("field13");
+ names.add("field14");
structTypeInfo.setAllStructFieldNames(names);
ArrayList<TypeInfo> typeInfos = new ArrayList<TypeInfo>();
typeInfos.add(STRING);
@@ -351,6 +377,7 @@ public class TestTypeInfoToSchema {
typeInfos.add(DOUBLE);
typeInfos.add(BOOLEAN);
typeInfos.add(DECIMAL);
+ typeInfos.add(DATE);
typeInfos.add(VOID);
structTypeInfo.setAllStructFieldTypeInfos(typeInfos);
LOGGER.info("structTypeInfo is " + structTypeInfo);
Modified: hive/trunk/serde/src/test/resources/avro-struct.avsc
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/resources/avro-struct.avsc?rev=1628569&r1=1628568&r2=1628569&view=diff
==============================================================================
--- hive/trunk/serde/src/test/resources/avro-struct.avsc (original)
+++ hive/trunk/serde/src/test/resources/avro-struct.avsc Tue Sep 30 22:23:17 2014
@@ -4,7 +4,7 @@
"namespace":"",
"doc":"struct<field1:string,field2:char(5),field3:varchar(5),field4:binary,field5:tinyint,
field6:smallint,field7:int,field8:bigint,field9:float,field10:double,field11:boolean,
-field12:decimal(4,2),field13:void>",
+field12:decimal(4,2),field13:date,field14:void>",
"fields":[
{"name":"field1","type":["null","string"],"doc":"string","default":null},
{"name":"field2","type":["null",{"type":"string","logicalType":"char","maxLength":5}],"doc":"char(5)","default":null},
@@ -17,8 +17,8 @@ field12:decimal(4,2),field13:void>",
{"name":"field9","type":["null","float"],"doc":"float","default":null},
{"name":"field10","type":["null","double"],"doc":"double","default":null},
{"name":"field11","type":["null","boolean"],"doc":"boolean","default":null},
-{"name":"field12","type":["null",{"type":"bytes","logicalType":"decimal","precision":4,
-"scale":2}],"doc":"decimal(4,2)","default":null},
-{"name":"field13","type":"null","doc":"void","default":null}
+{"name":"field12","type":["null",{"type":"bytes","logicalType":"decimal","precision":4,"scale":2}],"doc":"decimal(4,2)","default":null},
+{"name":"field13","type":["null",{"type":"int","logicalType":"date"}],"doc":"date","default":null},
+{"name":"field14","type":"null","doc":"void","default":null}
]
}