You are viewing a plain text version of this content. The canonical link for it is here.

Posted to issues@spark.apache.org by "Ulrich zink (JIRA)" <ji...@apache.org> on 2016/10/28 10:36:58 UTC

[jira] [Created] (SPARK-18163) Union unexpected behaviour when generating data frames programatically

Ulrich zink created SPARK-18163:
-----------------------------------

             Summary: Union unexpected behaviour when generating data frames programatically
                 Key: SPARK-18163
                 URL: https://issues.apache.org/jira/browse/SPARK-18163
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 2.0.1
            Reporter: Ulrich zink


//expected behaviour
val df1 = Seq((1,2),(3,4)).toDF("a","b")
val df2 = Seq((5,6)).toDF("a","b")
df1.union(df2).show()

+---+---+
|  a|  b|
+---+---+
|  1|  2|
|  3|  4|
|  5|  6|
+---+---+

// When generating the data frames programmatically

val nInst = 2
val fltr = 1

case class Instrument(id: Long,  value: Double)
def dataset (nst:Int,fltrVal:Int) = sqlContext.range(0, nst).select(($"id"),
                round(abs(randn)).alias("value")).as[Instrument].filter('value > fltrVal)
val df3 = dataset(nInst,fltr)
val df4 = dataset(nInst,fltr)

df3.show()
df4.show()
df3.union(df4).show()

df3: org.apache.spark.sql.Dataset[Instrument] = [id: bigint, value: double]
+---+-----+
| id|value|
+---+-----+
|  0|  1.0|
|  1|  1.0|
+---+-----+
df4: org.apache.spark.sql.Dataset[Instrument] = [id: bigint, value: double]
+---+-----+
| id|value|
+---+-----+
|  0|  1.0|
|  1|  0.0|
+---+-----+
+---+-----+
| id|value|
+---+-----+
|  0|  1.0|
|  1|  1.0|
|  0|  1.0|
|  1|  2.0|
+---+-----+




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org