You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by cloud-fan <gi...@git.apache.org> on 2016/05/31 16:59:49 UTC

[GitHub] spark pull request: [SPARK-15657][SQL] RowEncoder should validate the data t...

Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/13401#discussion_r65221948
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala ---
    @@ -721,8 +716,55 @@ case class GetExternalRowField(
               "cannot be null.");
           }
     
    -      final ${ctx.javaType(dataType)} ${ev.value} = $getField;
    +      final Object ${ev.value} = ${row.value}.get($index);
          """
         ev.copy(code = code, isNull = "false")
       }
     }
    +
    +/**
    + * Validates the actual data type of input expression at runtime.  If it doesn't match the
    + * expectation, throw an exception.
    + */
    +case class ValidateExternalType(child: Expression, expected: DataType)
    +  extends UnaryExpression with NonSQLExpression with ExpectsInputTypes {
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq(ObjectType(classOf[Object]))
    +
    +  override def nullable: Boolean = child.nullable
    +
    +  override def dataType: DataType = RowEncoder.externalDataTypeForInput(expected)
    +
    +  override def eval(input: InternalRow): Any =
    +    throw new UnsupportedOperationException("Only code-generated evaluation is supported")
    +
    +  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    +    val input = child.genCode(ctx)
    +    val obj = input.value
    +
    +    val typeCheck = expected match {
    +      case _: DecimalType =>
    +        Seq(classOf[java.math.BigDecimal], classOf[scala.math.BigDecimal], classOf[Decimal])
    +          .map(cls => s"$obj instanceof ${cls.getName}").mkString(" || ")
    +      case _: ArrayType =>
    +        s"$obj instanceof ${classOf[Seq[_]].getName} || $obj.getClass().isArray()"
    +      case _ =>
    +        s"$obj instanceof ${ctx.boxedType(dataType)}"
    +    }
    +
    +    val code = s"""
    +      ${input.code}
    +      ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
    --- End diff --
    
    This is by means, we can't cast an object to int directly, but have to cast to boxed int first.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org