You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by ueshin <gi...@git.apache.org> on 2018/10/02 09:11:59 UTC

[GitHub] spark pull request #22527: [SPARK-17952][SQL] Nested Java beans support in c...

Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22527#discussion_r221857463
  
    --- Diff: sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala ---
    @@ -1100,13 +1101,23 @@ object SQLContext {
           attrs: Seq[AttributeReference]): Iterator[InternalRow] = {
         val extractors =
           JavaTypeInference.getJavaBeanReadableProperties(beanClass).map(_.getReadMethod)
    -    val methodsToConverts = extractors.zip(attrs).map { case (e, attr) =>
    -      (e, CatalystTypeConverters.createToCatalystConverter(attr.dataType))
    +    val methodsToTypes = extractors.zip(attrs).map { case (e, attr) =>
    +      (e, attr.dataType)
    +    }
    +    def invoke(element: Any)(tuple: (Method, DataType)): Any = tuple match {
    --- End diff --
    
    Can we create converters before `data.map { ... }` instead of calculating converters for each row?
    
    I mean something like:
    
    ```scala
    def converter(e: Method, dt: DataType): Any => Any = dt match {
      case StructType(fields) =>
        val nestedExtractors =
          JavaTypeInference.getJavaBeanReadableProperties(e.getReturnType).map(_.getReadMethod)
        val nestedConverters =
          nestedExtractors.zip(fields).map { case (extractor, field) =>
            converter(extractor, field.dataType)
          }
    
        element =>
          val value = e.invoke(element)
          new GenericInternalRow(nestedConverters.map(_(value)))
      case _ =>
        val convert = CatalystTypeConverters.createToCatalystConverter(dt)
        element => convert(e.invoke(element))
    }
    ```
    and then
    ```scala
    val converters = extractors.zip(attrs).map { case (e, attr) =>
      converter(e, attr.dataType)
    }
    data.map { element =>
      new GenericInternalRow(converters.map(_(element))): InternalRow
    }
    ```



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org