You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/02/02 02:52:22 UTC

spark git commit: [Minor][SQL] Little refactor DataFrame related codes

Repository: spark
Updated Branches:
  refs/heads/master 883bc88d5 -> ef89b82d8


[Minor][SQL] Little refactor DataFrame related codes

Simplify some codes related to DataFrame.

*  Calling `toAttributes` instead of a `map`.
*  Original `createDataFrame` creates the `StructType` and its attributes in a redundant way. Refactored it to create `StructType` and call `toAttributes` on it directly.

Author: Liang-Chi Hsieh <vi...@gmail.com>

Closes #4298 from viirya/refactor_df and squashes the following commits:

1d61c64 [Liang-Chi Hsieh] Revert it.
f36efb5 [Liang-Chi Hsieh] Relax the constraint of toDataFrame.
2c9f370 [Liang-Chi Hsieh] Just refactor DataFrame codes.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ef89b82d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ef89b82d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ef89b82d

Branch: refs/heads/master
Commit: ef89b82d831d1d35dfaa6387ff2077ea2f2073cc
Parents: 883bc88
Author: Liang-Chi Hsieh <vi...@gmail.com>
Authored: Sun Feb 1 17:52:18 2015 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Sun Feb 1 17:52:18 2015 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/catalyst/ScalaReflection.scala    | 2 +-
 sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ef89b82d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 90646fd..e0db587 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -98,7 +98,7 @@ trait ScalaReflection {
   /** Returns a Sequence of attributes for the given case class type. */
   def attributesFor[T: TypeTag]: Seq[Attribute] = schemaFor[T] match {
     case Schema(s: StructType, _) =>
-      s.fields.map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
+      s.toAttributes
   }
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */

http://git-wip-us.apache.org/repos/asf/spark/blob/ef89b82d/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index f87fde4..84933dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -168,8 +168,8 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   implicit def createDataFrame[A <: Product: TypeTag](rdd: RDD[A]): DataFrame = {
     SparkPlan.currentContext.set(self)
-    val attributeSeq = ScalaReflection.attributesFor[A]
-    val schema = StructType.fromAttributes(attributeSeq)
+    val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
+    val attributeSeq = schema.toAttributes
     val rowRDD = RDDConversions.productToRowRdd(rdd, schema)
     new DataFrame(this, LogicalRDD(attributeSeq, rowRDD)(self))
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org