You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by yhuai <gi...@git.apache.org> on 2015/09/30 06:25:29 UTC

[GitHub] spark pull request: [SPARK-9617] [SQL] Implement json_tuple

Github user yhuai commented on a diff in the pull request:

    https://github.com/apache/spark/pull/7946#discussion_r40757438
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala ---
    @@ -307,3 +308,140 @@ case class GetJsonObject(json: Expression, path: Expression)
         }
       }
     }
    +
    +case class JsonTuple(children: Seq[Expression])
    +  extends Expression with CodegenFallback {
    +
    +  import SharedFactory._
    +
    +  override def nullable: Boolean = {
    +    // a row is always returned
    +    false
    +  }
    +
    +  // if processing fails this shared value will be returned
    +  @transient private lazy val nullRow: InternalRow =
    +    new GenericInternalRow(Array.ofDim[Any](fieldExpressions.length))
    +
    +  // the json body is the first child
    +  @transient private lazy val jsonExpr: Expression = children.head
    +
    +  // the fields to query are the remaining children
    +  @transient private lazy val fieldExpressions: Seq[Expression] = children.tail
    +
    +  // eagerly evaluate any foldable the field names
    +  @transient private lazy val foldableFieldNames: IndexedSeq[String] = {
    +    fieldExpressions.map {
    +      case expr if expr.foldable => expr.eval().asInstanceOf[UTF8String].toString
    +      case _ => null
    +    }.toIndexedSeq
    +  }
    +
    +  // and count the number of foldable fields, we'll use this later to optimize evaluation
    +  @transient private lazy val constantFields: Int = foldableFieldNames.count(_ != null)
    +
    +  override lazy val dataType: StructType = {
    +    val fields = fieldExpressions.zipWithIndex.map {
    +      case (_, idx) => StructField(
    +        name = s"c$idx", // mirroring GenericUDTFJSONTuple.initialize
    +        dataType = StringType,
    +        nullable = true)
    +    }
    +
    +    StructType(fields)
    +  }
    +
    +  override def prettyName: String = "json_tuple"
    +
    +  override def checkInputDataTypes(): TypeCheckResult = {
    +    if (children.length < 2) {
    +      TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least two arguments")
    +    } else if (children.forall(child => StringType.acceptsType(child.dataType))) {
    +      TypeCheckResult.TypeCheckSuccess
    +    } else {
    +      TypeCheckResult.TypeCheckFailure(s"$prettyName requires that all arguments are strings")
    +    }
    +  }
    +
    +  override def eval(input: InternalRow): InternalRow = {
    +    try {
    +      val json = jsonExpr.eval(input).asInstanceOf[UTF8String]
    +      if (json == null) {
    +        return nullRow
    +      }
    +
    +      val parser = jsonFactory.createParser(json.getBytes)
    --- End diff --
    
    Do we need to close this parser later?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org