You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/04/28 08:59:44 UTC

git commit: [SQL]Append some missing types for HiveUDF

Repository: spark
Updated Branches:
  refs/heads/master ea01affc3 -> f73588441


[SQL]Append some missing types for HiveUDF

Add the missing types

Author: Cheng Hao <ha...@intel.com>

Closes #459 from chenghao-intel/missing_types and squashes the following commits:

21cba2e [Cheng Hao] Append some missing types for HiveUDF


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f7358844
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f7358844
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f7358844

Branch: refs/heads/master
Commit: f735884414a15c0c07df60068ee11f9da47eff77
Parents: ea01aff
Author: Cheng Hao <ha...@intel.com>
Authored: Sun Apr 27 23:59:42 2014 -0700
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Apr 27 23:59:42 2014 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/hive/hiveUdfs.scala    | 58 ++++++++++++++++----
 1 file changed, 48 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f7358844/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index a09270e..c7de4ab 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -70,24 +70,26 @@ private[hive] object HiveFunctionRegistry
   }
 
   def javaClassToDataType(clz: Class[_]): DataType = clz match {
+    // writable
     case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType
     case c: Class[_] if c == classOf[hiveIo.DoubleWritable] => DoubleType
     case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType
     case c: Class[_] if c == classOf[hiveIo.ByteWritable] => ByteType
     case c: Class[_] if c == classOf[hiveIo.ShortWritable] => ShortType
+    case c: Class[_] if c == classOf[hiveIo.TimestampWritable] => TimestampType
     case c: Class[_] if c == classOf[hadoopIo.Text] => StringType
     case c: Class[_] if c == classOf[hadoopIo.IntWritable] => IntegerType
     case c: Class[_] if c == classOf[hadoopIo.LongWritable] => LongType
     case c: Class[_] if c == classOf[hadoopIo.FloatWritable] => FloatType
     case c: Class[_] if c == classOf[hadoopIo.BooleanWritable] => BooleanType
+    case c: Class[_] if c == classOf[hadoopIo.BytesWritable] => BinaryType
+    
+    // java class
     case c: Class[_] if c == classOf[java.lang.String] => StringType
-    case c: Class[_] if c == java.lang.Short.TYPE => ShortType
-    case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
-    case c: Class[_] if c == java.lang.Long.TYPE => LongType
-    case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
-    case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
-    case c: Class[_] if c == java.lang.Float.TYPE => FloatType
-    case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
+    case c: Class[_] if c == classOf[java.sql.Timestamp] => TimestampType
+    case c: Class[_] if c == classOf[HiveDecimal] => DecimalType
+    case c: Class[_] if c == classOf[java.math.BigDecimal] => DecimalType
+    case c: Class[_] if c == classOf[Array[Byte]] => BinaryType
     case c: Class[_] if c == classOf[java.lang.Short] => ShortType
     case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
     case c: Class[_] if c == classOf[java.lang.Long] => LongType
@@ -95,6 +97,16 @@ private[hive] object HiveFunctionRegistry
     case c: Class[_] if c == classOf[java.lang.Byte] => ByteType
     case c: Class[_] if c == classOf[java.lang.Float] => FloatType
     case c: Class[_] if c == classOf[java.lang.Boolean] => BooleanType
+    
+    // primitive type
+    case c: Class[_] if c == java.lang.Short.TYPE => ShortType
+    case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
+    case c: Class[_] if c == java.lang.Long.TYPE => LongType
+    case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
+    case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
+    case c: Class[_] if c == java.lang.Float.TYPE => FloatType
+    case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
+    
     case c: Class[_] if c.isArray => ArrayType(javaClassToDataType(c.getComponentType))
   }
 }
@@ -111,11 +123,19 @@ private[hive] trait HiveFunctionFactory {
     case i: hadoopIo.IntWritable => i.get
     case t: hadoopIo.Text => t.toString
     case l: hadoopIo.LongWritable => l.get
-    case d: hadoopIo.DoubleWritable => d.get()
+    case d: hadoopIo.DoubleWritable => d.get
     case d: hiveIo.DoubleWritable => d.get
     case s: hiveIo.ShortWritable => s.get
-    case b: hadoopIo.BooleanWritable => b.get()
+    case b: hadoopIo.BooleanWritable => b.get
     case b: hiveIo.ByteWritable => b.get
+    case b: hadoopIo.FloatWritable => b.get
+    case b: hadoopIo.BytesWritable => {
+      val bytes = new Array[Byte](b.getLength)
+      System.arraycopy(b.getBytes(), 0, bytes, 0, b.getLength)
+      bytes
+    }
+    case t: hiveIo.TimestampWritable => t.getTimestamp
+    case b: hiveIo.HiveDecimalWritable => BigDecimal(b.getHiveDecimal().bigDecimalValue())
     case list: java.util.List[_] => list.map(unwrap)
     case map: java.util.Map[_,_] => map.map { case (k, v) => (unwrap(k), unwrap(v)) }.toMap
     case array: Array[_] => array.map(unwrap).toSeq
@@ -127,6 +147,9 @@ private[hive] trait HiveFunctionFactory {
     case p: java.lang.Byte => p
     case p: java.lang.Boolean => p
     case str: String => str
+    case p: BigDecimal => p
+    case p: Array[Byte] => p
+    case p: java.sql.Timestamp => p
   }
 }
 
@@ -252,13 +275,17 @@ private[hive] trait HiveInspectors {
 
   /** Converts native catalyst types to the types expected by Hive */
   def wrap(a: Any): AnyRef = a match {
-    case s: String => new hadoopIo.Text(s)
+    case s: String => new hadoopIo.Text(s) // TODO why should be Text?
     case i: Int => i: java.lang.Integer
     case b: Boolean => b: java.lang.Boolean
+    case f: Float => f: java.lang.Float
     case d: Double => d: java.lang.Double
     case l: Long => l: java.lang.Long
     case l: Short => l: java.lang.Short
     case l: Byte => l: java.lang.Byte
+    case b: BigDecimal => b.bigDecimal
+    case b: Array[Byte] => b
+    case t: java.sql.Timestamp => t
     case s: Seq[_] => seqAsJavaList(s.map(wrap))
     case m: Map[_,_] =>
       mapAsJavaMap(m.map { case (k, v) => wrap(k) -> wrap(v) })
@@ -280,6 +307,8 @@ private[hive] trait HiveInspectors {
     case ByteType => PrimitiveObjectInspectorFactory.javaByteObjectInspector
     case NullType => PrimitiveObjectInspectorFactory.javaVoidObjectInspector
     case BinaryType => PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector
+    case TimestampType => PrimitiveObjectInspectorFactory.javaTimestampObjectInspector
+    case DecimalType => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
   }
 
   def inspectorToDataType(inspector: ObjectInspector): DataType = inspector match {
@@ -307,6 +336,14 @@ private[hive] trait HiveInspectors {
     case _: JavaShortObjectInspector => ShortType
     case _: WritableByteObjectInspector => ByteType
     case _: JavaByteObjectInspector => ByteType
+    case _: WritableFloatObjectInspector => FloatType
+    case _: JavaFloatObjectInspector => FloatType
+    case _: WritableBinaryObjectInspector => BinaryType
+    case _: JavaBinaryObjectInspector => BinaryType
+    case _: WritableHiveDecimalObjectInspector => DecimalType
+    case _: JavaHiveDecimalObjectInspector => DecimalType
+    case _: WritableTimestampObjectInspector => TimestampType
+    case _: JavaTimestampObjectInspector => TimestampType
   }
 
   implicit class typeInfoConversions(dt: DataType) {
@@ -324,6 +361,7 @@ private[hive] trait HiveInspectors {
       case ShortType => shortTypeInfo
       case StringType => stringTypeInfo
       case DecimalType => decimalTypeInfo
+      case TimestampType => timestampTypeInfo
       case NullType => voidTypeInfo
     }
   }