You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@drill.apache.org by "Chun Chang (JIRA)" <ji...@apache.org> on 2014/12/17 02:23:13 UTC
[jira] [Created] (DRILL-1880) Query with three where conditions
returned wrong results
Chun Chang created DRILL-1880:
---------------------------------
Summary: Query with three where conditions returned wrong results
Key: DRILL-1880
URL: https://issues.apache.org/jira/browse/DRILL-1880
Project: Apache Drill
Issue Type: Bug
Components: Execution - Flow
Affects Versions: 0.7.0
Reporter: Chun Chang
#Mon Dec 15 11:37:23 EST 2014
git.commit.id.abbrev=3b0ff5d
The following query containing three where conditions returned wrong results. (data is too big so not included here.)
{code}
SELECT t.gbyi,
Count(t.id)
FROM `complex.json` t
WHERE t.gbyi <= 5
OR t.gbyi >= 11
AND t.gbyt <> 'ooof'
GROUP BY t.gbyi
ORDER BY t.gbyi;
{code}
Wrong result, the count column is mostly wrong:
{code}
0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.gbyi, count(t.id) from `complex.json` t where t.gbyi <= 5 or t.gbyi >= 11 and t.gbyt <> 'ooof' group by t.gbyi order by t.gbyi;
+------------+------------+
| gbyi | EXPR$1 |
+------------+------------+
| 0 | 66943 |
| 1 | 66318 |
| 2 | 66994 |
| 3 | 66683 |
| 4 | 66638 |
| 5 | 66439 |
| 11 | 63172 |
| 12 | 63008 |
| 13 | 62685 |
| 14 | 62970 |
+------------+------------+
{code}
Reduce the where condition to just two gives the correct result:
{code}
SELECT t.gbyi,
Count(t.id)
FROM `complex.json` t
WHERE t.gbyi <= 5
AND t.gbyt <> 'ooof'
GROUP BY t.gbyi
ORDER BY t.gbyi;
{code}
{code}
0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.gbyi, count(t.id) from `complex.json` t where t.gbyi <= 5 and t.gbyt <> 'ooof' group by t.gbyi order by t.gbyi;
+------------+------------+
| gbyi | EXPR$1 |
+------------+------------+
| 0 | 63305 |
| 1 | 62671 |
| 2 | 63249 |
| 3 | 63070 |
| 4 | 62967 |
| 5 | 62737 |
+------------+------------+
{code}
physical plan for the query returned wrong result:
{code}
0: jdbc:drill:schema=dfs.drillTestDirComplexJ> explain plan for select t.gbyi, count(t.id) from `complex.json` t where t.gbyi <= 5 or t.gbyi >= 11 and t.gbyt <> 'ooof' group by t.gbyi order by t.gbyi;
+------------+------------+
| text | json |
+------------+------------+
| 00-00 Screen
00-01 Project(gbyi=[$0], EXPR$1=[$1])
00-02 SelectionVectorRemover
00-03 Sort(sort0=[$0], dir0=[ASC])
00-04 HashAgg(group=[{0}], EXPR$1=[$SUM0($1)])
00-05 HashAgg(group=[{0}], EXPR$1=[COUNT($1)])
00-06 Project(gbyi=[$0], id=[$2])
00-07 SelectionVectorRemover
00-08 Filter(condition=[OR(<=($0, 5), AND(>=($0, 11), <>($1, 'ooof')))])
00-09 Project(gbyi=[$1], gbyt=[$2], id=[$0])
00-10 Scan(groupscan=[EasyGroupScan [selectionRoot=/drill/testdata/complex_type/json/complex.json, numFiles=1, columns=[`gbyi`, `gbyt`, `id`], files=[maprfs:/drill/testdata/complex_type/json/complex.json]]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "fs-scan",
"@id" : 10,
"files" : [ "maprfs:/drill/testdata/complex_type/json/complex.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"defaultInputFormat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"defaultInputFormat" : "csv"
},
"drillTestDir" : {
"location" : "/drill/testdata/",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirComplexJson" : {
"location" : "/drill/testdata/complex_type/json",
"writable" : true,
"defaultInputFormat" : "json"
},
"drillTestDirAmplab" : {
"location" : "/drill/testdata/amplab",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirInformationSchema" : {
"location" : "/drill/testdata/information-schema",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirUdfs" : {
"location" : "/drill/testdata/udfs/",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirP1" : {
"location" : "/drill/testdata/p1tests",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirTpch10Parquet" : {
"location" : "/drill/testdata/tpch10",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"Join" : {
"location" : "/drill/testdata/join",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"NoExtJson" : {
"location" : "/drill/testdata/no-extension/json",
"writable" : true,
"defaultInputFormat" : "json"
},
"NoExtParquet" : {
"location" : "/drill/testdata/no-extension/parquet",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"NoExtParquetNull" : {
"location" : "/drill/testdata/no-extension/parquet",
"writable" : true,
"defaultInputFormat" : null
},
"NoExtText" : {
"location" : "/drill/testdata/no-extension/text",
"writable" : true,
"defaultInputFormat" : "psv"
},
"drillTestDirExchanges" : {
"location" : "/drill/testdata/exchanges_test",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"TpcHMulti" : {
"location" : "/drill/testdata/tpch-multi",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"TpcHMulti100" : {
"location" : "/drill/testdata/SF100",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"TpcHMulti1" : {
"location" : "/drill/testdata/tpch_SF1",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirExplicit" : {
"location" : "/drill/testdata/explicit_cast",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirImplicit" : {
"location" : "/drill/testdata/implicit_cast",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirImplicit1" : {
"location" : "/drill/testdata/implicit_cast",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirTPCDS" : {
"location" : "/user/root/tpcds/parquet",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"TPCDS" : {
"location" : "/drill/testdata/tpcds",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillMondrian" : {
"location" : "/user/root/mondrian",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirDatetime" : {
"location" : "/drill/testdata/datetime/datasources",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirViews" : {
"location" : "/drill/testdata/views/",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirNumerical" : {
"location" : "/drill/testdata/numerical/",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirJson" : {
"location" : "/drill/testdata/json_storage/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTestNewWS" : {
"location" : "/drill/testdata/newWS/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch01Text" : {
"location" : "/drill/testdata/Tpch0.01/text/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch01Json" : {
"location" : "/drill/testdata/Tpch0.01/json/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch01Parquet" : {
"location" : "/drill/testdata/Tpch0.01/parquet/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirConvert" : {
"location" : "/drill/testdata/convert",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch100Text" : {
"location" : "/drill/testdata/tpch100/text/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch100Parquet" : {
"location" : "/drill/testdata/tpch100/parquet",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirAggregate1parquet" : {
"location" : "/drill/testdata/tpcds/parquet/s1",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirAggregate1csv" : {
"location" : "/drill/testdata/tpcds/csv/s1",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirAggregate1json" : {
"location" : "/drill/testdata/tpcds/json/s1",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirMondrian" : {
"location" : "/drill/testdata/mondrian",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpcdsImpalaSF1" : {
"location" : "/drill/testdata/tpcds-impala-sf1",
"writable" : true,
"defaultInputFormat" : null
},
"sandbox" : {
"location" : "/sandbox",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"sandbox-logs" : {
"location" : "/sandbox/flat",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"sandbox-json" : {
"location" : "/sandbox/json",
"writable" : true,
"defaultInputFormat" : "parquet"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"dsv" : {
"type" : "text",
"extensions" : [ "dat" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"columns" : [ "`gbyi`", "`gbyt`", "`id`" ],
"selectionRoot" : "/drill/testdata/complex_type/json/complex.json",
"cost" : 1186767.0
}, {
"pop" : "project",
"@id" : 9,
"exprs" : [ {
"ref" : "`gbyi`",
"expr" : "`gbyi`"
}, {
"ref" : "`gbyt`",
"expr" : "`gbyt`"
}, {
"ref" : "`id`",
"expr" : "`id`"
} ],
"child" : 10,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1186767.0
}, {
"pop" : "filter",
"@id" : 8,
"child" : 9,
"expr" : "booleanOr(less_than_or_equal_to(`gbyi`, 5) , booleanAnd(greater_than_or_equal_to(`gbyi`, 11) , not_equal(`gbyt`, 'ooof') ) ) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 296691.75
}, {
"pop" : "selection-vector-remover",
"@id" : 7,
"child" : 8,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 296691.75
}, {
"pop" : "project",
"@id" : 6,
|
+------------+------------+
{code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)