You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by Ophir Etzion <op...@foursquare.com> on 2016/01/12 16:55:03 UTC

sporadic `Unable to find class` with anonymous functions in udf

using cdh5.4.3 (hive1.1) via HiveServer.

Does anyone have a suggestion about what to do / look for?

the error:

org.apache.hadoop.hive.ql.parse.SemanticException: Generate Map Join Task
Error: Unable to find class:
com.foursquare.hadoop.hive.udf.IsDefinedUDF$$anonfun$initialize$6
Serialization trace:
isDefinedFunc (com.foursquare.hadoop.hive.udf.IsDefinedUDF)
genericUDF (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)
chidren (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)
chidren (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)
chidren (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)
chidren (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)
chidren (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)
predicate (org.apache.hadoop.hive.ql.plan.FilterDesc)
conf (org.apache.hadoop.hive.ql.exec.FilterOperator)
opParseCtxMap (org.apache.hadoop.hive.ql.plan.MapWork)
mapWork (org.apache.hadoop.hive.ql.plan.MapredWork)
at
org.apache.hadoop.hive.ql.optimizer.physical.CommonJoinTaskDispatcher.processCurrentTask(CommonJoinTaskDispatcher.java:517)
at
org.apache.hadoop.hive.ql.optimizer.physical.AbstractJoinTaskDispatcher.dispatch(AbstractJoinTaskDispatcher.java:180)
at
org.apache.hadoop.hive.ql.lib.TaskGraphWalker.dispatch(TaskGraphWalker.java:111)
at
org.apache.hadoop.hive.ql.lib.TaskGraphWalker.walk(TaskGraphWalker.java:180)
....

the udf:

@Description(name = "isDefined",
value = "returns true if the object is not null and not empty and not \"\"",
extended = "Example:\n" +
  "SELECT isDefined(col)\n")
class IsDefinedUDF extends GenericUDF with Serializable {
  var isDefinedFunc: Option[Object] => Boolean = null

  override def initialize(arguments: Array[ObjectInspector]):
ObjectInspector = {
    val arg = arguments.toVector

    if (arg.length !=? 1) {
      throw new UDFArgumentLengthException("isDefined only takes one
argument.")
    }

    Option(arg.head) match {
      case Some(a: ListObjectInspector) => {
        isDefinedFunc = {obj => obj.map(o =>
!(a.getList(o).asScala.toList.isEmpty)).getOrElse(false)}
      }
      case Some(a: MapObjectInspector) => {
        isDefinedFunc = {obj => obj.map(o =>
!(a.getMap(o).asScala.toMap.isEmpty)).getOrElse(false)}
      }
      case Some(a: LazyStringObjectInspector) => {
        isDefinedFunc = {obj => a.getPrimitiveJavaObject(obj.getOrElse(new
LazyString(a))) != ""}
      }
      case Some(a: StringObjectInspector) => {
        isDefinedFunc = {obj => a.getPrimitiveJavaObject(obj.getOrElse(new
Text(""))) != ""}
      }
      case None => {
        isDefinedFunc = {x => false}
      }
      case _ => {
        isDefinedFunc = {obj => obj.isDefined}
      }
    }

    PrimitiveObjectInspectorFactory.javaBooleanObjectInspector
  }

  override def evaluate(arguments: Array[DeferredObject]): Object = {
    val arg = arguments.toVector.head

    isDefinedFunc(Option(arg.get())): java.lang.Boolean
  }

  override def getDisplayString(children: Array[String]) = {
    "isDefined(" + children(0) + ")"
  }
}