You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Cheng Lian (JIRA)" <ji...@apache.org> on 2015/05/12 03:16:59 UTC

[jira] [Updated] (SPARK-7269) Incorrect aggregation analysis

     [ https://issues.apache.org/jira/browse/SPARK-7269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Cheng Lian updated SPARK-7269:
------------------------------
    Description: 
In a case insensitive analyzer (HiveContext), the attribute name captial differences will fail the analysis check for aggregation.
{code}
test("check analysis failed in case in-sensitive") {
    Seq(1,2,3).map(i => (i, i.toString)).toDF("key", "value").registerTempTable("df_analysis")
    sql("SELECT kEy from df_analysis group by key")
}
{code}

{noformat}
expression 'kEy' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() if you don't care which value you get.;
org.apache.spark.sql.AnalysisException: expression 'kEy' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() if you don't care which value you get.;
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:39)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:85)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:101)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)
	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:89)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:39)
	at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1121)
	at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
	at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
	at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:97)
	at org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply$mcV$sp(SQLQuerySuite.scala:408)
	at org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
	at org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
	at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
	at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
	at org.scalatest.Transformer.apply(Transformer.scala:22)
	at org.scalatest.Transformer.apply(Transformer.scala:20)
	at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
	at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
	at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
	at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
{noformat}

  was:
In a case insensitive analyzer (HiveContext), the attribute name captial differences will fail the analysis check for aggregation.
{code}
test("check analysis failed in case in-sensitive") {
    Seq(1,2,3).map(i => (i, i.toString)).toDF("key", "value").registerTempTable("df_analysis")
    sql("SELECT kEy from df_analysis group by key")
}
{code}

{panel}
expression 'kEy' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() if you don't care which value you get.;
org.apache.spark.sql.AnalysisException: expression 'kEy' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() if you don't care which value you get.;
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:39)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:85)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:101)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)
	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:89)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:39)
	at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1121)
	at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
	at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
	at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:97)
	at org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply$mcV$sp(SQLQuerySuite.scala:408)
	at org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
	at org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
	at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
	at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
	at org.scalatest.Transformer.apply(Transformer.scala:22)
	at org.scalatest.Transformer.apply(Transformer.scala:20)
	at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
	at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
	at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
	at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
{panel}


> Incorrect aggregation analysis
> ------------------------------
>
>                 Key: SPARK-7269
>                 URL: https://issues.apache.org/jira/browse/SPARK-7269
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>            Reporter: Cheng Hao
>            Priority: Blocker
>
> In a case insensitive analyzer (HiveContext), the attribute name captial differences will fail the analysis check for aggregation.
> {code}
> test("check analysis failed in case in-sensitive") {
>     Seq(1,2,3).map(i => (i, i.toString)).toDF("key", "value").registerTempTable("df_analysis")
>     sql("SELECT kEy from df_analysis group by key")
> }
> {code}
> {noformat}
> expression 'kEy' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() if you don't care which value you get.;
> org.apache.spark.sql.AnalysisException: expression 'kEy' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() if you don't care which value you get.;
> 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38)
> 	at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:39)
> 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:85)
> 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
> 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
> 	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> 	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:101)
> 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)
> 	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:89)
> 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)
> 	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:39)
> 	at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1121)
> 	at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
> 	at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
> 	at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:97)
> 	at org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply$mcV$sp(SQLQuerySuite.scala:408)
> 	at org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
> 	at org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
> 	at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
> 	at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
> 	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
> 	at org.scalatest.Transformer.apply(Transformer.scala:22)
> 	at org.scalatest.Transformer.apply(Transformer.scala:20)
> 	at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
> 	at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
> 	at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
> 	at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org