You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/04/28 08:59:44 UTC
git commit: [SQL]Append some missing types for HiveUDF
Repository: spark
Updated Branches:
refs/heads/master ea01affc3 -> f73588441
[SQL]Append some missing types for HiveUDF
Add the missing types
Author: Cheng Hao <ha...@intel.com>
Closes #459 from chenghao-intel/missing_types and squashes the following commits:
21cba2e [Cheng Hao] Append some missing types for HiveUDF
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f7358844
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f7358844
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f7358844
Branch: refs/heads/master
Commit: f735884414a15c0c07df60068ee11f9da47eff77
Parents: ea01aff
Author: Cheng Hao <ha...@intel.com>
Authored: Sun Apr 27 23:59:42 2014 -0700
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Apr 27 23:59:42 2014 -0700
----------------------------------------------------------------------
.../org/apache/spark/sql/hive/hiveUdfs.scala | 58 ++++++++++++++++----
1 file changed, 48 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/f7358844/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index a09270e..c7de4ab 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -70,24 +70,26 @@ private[hive] object HiveFunctionRegistry
}
def javaClassToDataType(clz: Class[_]): DataType = clz match {
+ // writable
case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType
case c: Class[_] if c == classOf[hiveIo.DoubleWritable] => DoubleType
case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType
case c: Class[_] if c == classOf[hiveIo.ByteWritable] => ByteType
case c: Class[_] if c == classOf[hiveIo.ShortWritable] => ShortType
+ case c: Class[_] if c == classOf[hiveIo.TimestampWritable] => TimestampType
case c: Class[_] if c == classOf[hadoopIo.Text] => StringType
case c: Class[_] if c == classOf[hadoopIo.IntWritable] => IntegerType
case c: Class[_] if c == classOf[hadoopIo.LongWritable] => LongType
case c: Class[_] if c == classOf[hadoopIo.FloatWritable] => FloatType
case c: Class[_] if c == classOf[hadoopIo.BooleanWritable] => BooleanType
+ case c: Class[_] if c == classOf[hadoopIo.BytesWritable] => BinaryType
+
+ // java class
case c: Class[_] if c == classOf[java.lang.String] => StringType
- case c: Class[_] if c == java.lang.Short.TYPE => ShortType
- case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
- case c: Class[_] if c == java.lang.Long.TYPE => LongType
- case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
- case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
- case c: Class[_] if c == java.lang.Float.TYPE => FloatType
- case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
+ case c: Class[_] if c == classOf[java.sql.Timestamp] => TimestampType
+ case c: Class[_] if c == classOf[HiveDecimal] => DecimalType
+ case c: Class[_] if c == classOf[java.math.BigDecimal] => DecimalType
+ case c: Class[_] if c == classOf[Array[Byte]] => BinaryType
case c: Class[_] if c == classOf[java.lang.Short] => ShortType
case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
case c: Class[_] if c == classOf[java.lang.Long] => LongType
@@ -95,6 +97,16 @@ private[hive] object HiveFunctionRegistry
case c: Class[_] if c == classOf[java.lang.Byte] => ByteType
case c: Class[_] if c == classOf[java.lang.Float] => FloatType
case c: Class[_] if c == classOf[java.lang.Boolean] => BooleanType
+
+ // primitive type
+ case c: Class[_] if c == java.lang.Short.TYPE => ShortType
+ case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
+ case c: Class[_] if c == java.lang.Long.TYPE => LongType
+ case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
+ case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
+ case c: Class[_] if c == java.lang.Float.TYPE => FloatType
+ case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
+
case c: Class[_] if c.isArray => ArrayType(javaClassToDataType(c.getComponentType))
}
}
@@ -111,11 +123,19 @@ private[hive] trait HiveFunctionFactory {
case i: hadoopIo.IntWritable => i.get
case t: hadoopIo.Text => t.toString
case l: hadoopIo.LongWritable => l.get
- case d: hadoopIo.DoubleWritable => d.get()
+ case d: hadoopIo.DoubleWritable => d.get
case d: hiveIo.DoubleWritable => d.get
case s: hiveIo.ShortWritable => s.get
- case b: hadoopIo.BooleanWritable => b.get()
+ case b: hadoopIo.BooleanWritable => b.get
case b: hiveIo.ByteWritable => b.get
+ case b: hadoopIo.FloatWritable => b.get
+ case b: hadoopIo.BytesWritable => {
+ val bytes = new Array[Byte](b.getLength)
+ System.arraycopy(b.getBytes(), 0, bytes, 0, b.getLength)
+ bytes
+ }
+ case t: hiveIo.TimestampWritable => t.getTimestamp
+ case b: hiveIo.HiveDecimalWritable => BigDecimal(b.getHiveDecimal().bigDecimalValue())
case list: java.util.List[_] => list.map(unwrap)
case map: java.util.Map[_,_] => map.map { case (k, v) => (unwrap(k), unwrap(v)) }.toMap
case array: Array[_] => array.map(unwrap).toSeq
@@ -127,6 +147,9 @@ private[hive] trait HiveFunctionFactory {
case p: java.lang.Byte => p
case p: java.lang.Boolean => p
case str: String => str
+ case p: BigDecimal => p
+ case p: Array[Byte] => p
+ case p: java.sql.Timestamp => p
}
}
@@ -252,13 +275,17 @@ private[hive] trait HiveInspectors {
/** Converts native catalyst types to the types expected by Hive */
def wrap(a: Any): AnyRef = a match {
- case s: String => new hadoopIo.Text(s)
+ case s: String => new hadoopIo.Text(s) // TODO why should be Text?
case i: Int => i: java.lang.Integer
case b: Boolean => b: java.lang.Boolean
+ case f: Float => f: java.lang.Float
case d: Double => d: java.lang.Double
case l: Long => l: java.lang.Long
case l: Short => l: java.lang.Short
case l: Byte => l: java.lang.Byte
+ case b: BigDecimal => b.bigDecimal
+ case b: Array[Byte] => b
+ case t: java.sql.Timestamp => t
case s: Seq[_] => seqAsJavaList(s.map(wrap))
case m: Map[_,_] =>
mapAsJavaMap(m.map { case (k, v) => wrap(k) -> wrap(v) })
@@ -280,6 +307,8 @@ private[hive] trait HiveInspectors {
case ByteType => PrimitiveObjectInspectorFactory.javaByteObjectInspector
case NullType => PrimitiveObjectInspectorFactory.javaVoidObjectInspector
case BinaryType => PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector
+ case TimestampType => PrimitiveObjectInspectorFactory.javaTimestampObjectInspector
+ case DecimalType => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
}
def inspectorToDataType(inspector: ObjectInspector): DataType = inspector match {
@@ -307,6 +336,14 @@ private[hive] trait HiveInspectors {
case _: JavaShortObjectInspector => ShortType
case _: WritableByteObjectInspector => ByteType
case _: JavaByteObjectInspector => ByteType
+ case _: WritableFloatObjectInspector => FloatType
+ case _: JavaFloatObjectInspector => FloatType
+ case _: WritableBinaryObjectInspector => BinaryType
+ case _: JavaBinaryObjectInspector => BinaryType
+ case _: WritableHiveDecimalObjectInspector => DecimalType
+ case _: JavaHiveDecimalObjectInspector => DecimalType
+ case _: WritableTimestampObjectInspector => TimestampType
+ case _: JavaTimestampObjectInspector => TimestampType
}
implicit class typeInfoConversions(dt: DataType) {
@@ -324,6 +361,7 @@ private[hive] trait HiveInspectors {
case ShortType => shortTypeInfo
case StringType => stringTypeInfo
case DecimalType => decimalTypeInfo
+ case TimestampType => timestampTypeInfo
case NullType => voidTypeInfo
}
}