You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/04/21 03:54:09 UTC

spark git commit: [SPARK-6635][SQL] DataFrame.withColumn should replace columns with identical column names

Repository: spark
Updated Branches:
  refs/heads/master ce7ddabbc -> c736220da


[SPARK-6635][SQL] DataFrame.withColumn should replace columns with identical column names

JIRA https://issues.apache.org/jira/browse/SPARK-6635

Author: Liang-Chi Hsieh <vi...@gmail.com>

Closes #5541 from viirya/replace_with_column and squashes the following commits:

b539c7b [Liang-Chi Hsieh] For comment.
72f35b1 [Liang-Chi Hsieh] DataFrame.withColumn can replace original column with identical column name.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c736220d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c736220d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c736220d

Branch: refs/heads/master
Commit: c736220dac51cf73181fdd7f621c960c4e7bf0c2
Parents: ce7ddab
Author: Liang-Chi Hsieh <vi...@gmail.com>
Authored: Mon Apr 20 18:54:01 2015 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Apr 20 18:54:01 2015 -0700

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/sql/DataFrame.scala   | 14 +++++++++++++-
 .../scala/org/apache/spark/sql/DataFrameSuite.scala   |  8 ++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c736220d/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 17c21f6..45f5da3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -747,7 +747,19 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] by adding a column.
    * @group dfops
    */
-  def withColumn(colName: String, col: Column): DataFrame = select(Column("*"), col.as(colName))
+  def withColumn(colName: String, col: Column): DataFrame = {
+    val resolver = sqlContext.analyzer.resolver
+    val replaced = schema.exists(f => resolver(f.name, colName))
+    if (replaced) {
+      val colNames = schema.map { field =>
+        val name = field.name
+        if (resolver(name, colName)) col.as(colName) else Column(name)
+      }
+      select(colNames :_*)
+    } else {
+      select(Column("*"), col.as(colName))
+    }
+  }
 
   /**
    * Returns a new [[DataFrame]] with a column renamed.

http://git-wip-us.apache.org/repos/asf/spark/blob/c736220d/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 3250ab4..b9b6a40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -473,6 +473,14 @@ class DataFrameSuite extends QueryTest {
     assert(df.schema.map(_.name).toSeq === Seq("key", "value", "newCol"))
   }
 
+  test("replace column using withColumn") {
+    val df2 = TestSQLContext.sparkContext.parallelize(Array(1, 2, 3)).toDF("x")
+    val df3 = df2.withColumn("x", df2("x") + 1)
+    checkAnswer(
+      df3.select("x"),
+      Row(2) :: Row(3) :: Row(4) :: Nil)
+  }
+
   test("withColumnRenamed") {
     val df = testData.toDF().withColumn("newCol", col("key") + 1)
       .withColumnRenamed("value", "valueRenamed")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org