You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Chun Chang (JIRA)" <ji...@apache.org> on 2015/04/28 03:14:06 UTC
[jira] [Closed] (DRILL-1860) When a 'key' is missing in a map, it
should not contribute to the count() function
[ https://issues.apache.org/jira/browse/DRILL-1860?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Chun Chang closed DRILL-1860.
-----------------------------
Assignee: Chun Chang (was: Mehant Baid)
already verified. test case complex23.q
> When a 'key' is missing in a map, it should not contribute to the count() function
> ----------------------------------------------------------------------------------
>
> Key: DRILL-1860
> URL: https://issues.apache.org/jira/browse/DRILL-1860
> Project: Apache Drill
> Issue Type: Bug
> Components: Functions - Drill
> Affects Versions: 0.7.0
> Reporter: Chun Chang
> Assignee: Chun Chang
> Fix For: 0.8.0
>
> Attachments: CALCITE.patch, DRILL-1860.patch
>
>
> #Fri Dec 12 11:47:55 EST 2014
> git.commit.id.abbrev=d925eab
> Have the following json data:
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDir> select t.soa from `complex.json` t limit 10;
> +------------+
> | soa |
> +------------+
> | [{"in":1},{"in":1,"fl":1.12345},{"in":1,"fl":10.12345,"nul":"not null"},{"in":1,"fl":10.6789,"nul":"not null","bool":true,"str":"here is a string at row 1"}] |
> | [{"in":2},{"in":2,"fl":2.12345},{"in":2,"fl":20.12345,"nul":"not null"},{"in":2,"fl":20.6789,"bool":false,"str":"here is a string at row 2"}] |
> | [{"in":3},{"in":3,"fl":3.12345},{"in":3,"fl":30.12345,"nul":"not null"},{"in":3,"fl":30.6789,"nul":"not null","bool":true,"str":"here is a string at row 3"}] |
> | [{"in":4},{"in":4,"fl":4.12345},{"in":4,"fl":40.12345,"nul":"not null"},{"in":4,"fl":40.6789,"bool":true,"str":"here is a string at row 4"}] |
> | [{"in":5},{"in":5,"fl":5.12345},{"in":5,"fl":50.12345,"nul":"not null"},{"in":5,"fl":50.6789,"nul":"not null","bool":true,"str":"here is a string at row 5"}] |
> | [{"in":6},{"in":6,"fl":6.12345},{"in":6,"fl":60.12345,"nul":"not null"},{"in":6,"fl":60.6789,"nul":"not null","bool":true,"str":"here is a string at row 6"}] |
> | [{"in":7},{"in":7,"fl":7.12345},{"in":7,"fl":70.12345},{"in":7,"fl":70.6789,"nul":"not null","bool":false,"str":"here is a string at row 7"}] |
> | [{"in":8},{"in":8,"fl":8.12345},{"in":8,"fl":80.12345},{"in":8,"fl":80.6789,"bool":true,"str":"here is a string at row 8"}] |
> | [{"in":9},{"in":9,"fl":9.12345},{"in":9,"fl":90.12345,"nul":"not null"},{"in":9,"fl":90.6789,"nul":"not null","bool":true,"str":"here is a string at row 9"}] |
> | [{"in":10},{"in":10,"fl":10.12345},{"in":10,"fl":100.12345},{"in":10,"fl":100.6789,"bool":false,"str":"here is a string at row 10"}] |
> +------------+
> {code}
> For some of the rows, for the key named 'nul', it is missing, hence returning null:
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDir> select t.soa[2].nul, t.soa[3].bool from `complex.json` t limit 10;
> +------------+------------+
> | EXPR$0 | EXPR$1 |
> +------------+------------+
> | not null | true |
> | not null | false |
> | not null | true |
> | not null | true |
> | not null | true |
> | not null | true |
> | null | false |
> | null | true |
> | not null | true |
> | null | false |
> +------------+------------+
> {code}
> But when I do a count on that, the null value still counted:
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDir> select count(t.soa[2].nul), count(t.soa[3].bool) from `complex.json` t limit 10;
> +------------+------------+
> | EXPR$0 | EXPR$1 |
> +------------+------------+
> | 1000000 | 1000000 |
> +------------+------------+
> {code}
> Here is the physical plan:
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDir> explain plan for select count(t.soa[2].nul), count(t.soa[3].bool) from `complex.json` t;
> +------------+------------+
> | text | json |
> +------------+------------+
> | 00-00 Screen
> 00-01 Project(EXPR$0=[$0], EXPR$1=[$0])
> 00-02 StreamAgg(group=[{}], EXPR$0=[$SUM0($0)])
> 00-03 StreamAgg(group=[{}], EXPR$0=[COUNT()])
> 00-04 Project($f0=[ITEM(ITEM($0, 2), 'nul')], $f1=[ITEM(ITEM($0, 3), 'bool')])
> 00-05 Scan(groupscan=[EasyGroupScan [selectionRoot=/drill/testdata/complex_type/json/complex.json, numFiles=1, columns=[`soa`[2].`nul`, `soa`[3].`bool`], files=[maprfs:/drill/testdata/complex_type/json/complex.json]]])
> | {
> "head" : {
> "version" : 1,
> "generator" : {
> "type" : "ExplainHandler",
> "info" : ""
> },
> "type" : "APACHE_DRILL_PHYSICAL",
> "options" : [ ],
> "queue" : 0,
> "resultMode" : "EXEC"
> },
> "graph" : [ {
> "pop" : "fs-scan",
> "@id" : 5,
> "files" : [ "maprfs:/drill/testdata/complex_type/json/complex.json" ],
> "storage" : {
> "type" : "file",
> "enabled" : true,
> "connection" : "maprfs:///",
> "workspaces" : {
> "root" : {
> "location" : "/",
> "writable" : false,
> "defaultInputFormat" : null
> },
> "tmp" : {
> "location" : "/tmp",
> "writable" : true,
> "defaultInputFormat" : "csv"
> },
> "drillTestDir" : {
> "location" : "/drill/testdata/",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirComplexJson" : {
> "location" : "/drill/testdata/complex_type/json",
> "writable" : true,
> "defaultInputFormat" : "json"
> },
> "drillTestDirAmplab" : {
> "location" : "/drill/testdata/amplab",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirInformationSchema" : {
> "location" : "/drill/testdata/information-schema",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirUdfs" : {
> "location" : "/drill/testdata/udfs/",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirP1" : {
> "location" : "/drill/testdata/p1tests",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirTpch10Parquet" : {
> "location" : "/drill/testdata/tpch10",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "Join" : {
> "location" : "/drill/testdata/join",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "NoExtJson" : {
> "location" : "/drill/testdata/no-extension/json",
> "writable" : true,
> "defaultInputFormat" : "json"
> },
> "NoExtParquet" : {
> "location" : "/drill/testdata/no-extension/parquet",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "NoExtParquetNull" : {
> "location" : "/drill/testdata/no-extension/parquet",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "NoExtText" : {
> "location" : "/drill/testdata/no-extension/text",
> "writable" : true,
> "defaultInputFormat" : "psv"
> },
> "drillTestDirExchanges" : {
> "location" : "/drill/testdata/exchanges_test",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "TpcHMulti" : {
> "location" : "/drill/testdata/tpch-multi",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "TpcHMulti100" : {
> "location" : "/drill/testdata/SF100",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "TpcHMulti1" : {
> "location" : "/drill/testdata/tpch_SF1",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirExplicit" : {
> "location" : "/drill/testdata/explicit_cast",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirImplicit" : {
> "location" : "/drill/testdata/implicit_cast",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirImplicit1" : {
> "location" : "/drill/testdata/implicit_cast",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirTPCDS" : {
> "location" : "/user/root/tpcds/parquet",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "TPCDS" : {
> "location" : "/drill/testdata/tpcds",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillMondrian" : {
> "location" : "/user/root/mondrian",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirDatetime" : {
> "location" : "/drill/testdata/datetime/datasources",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirViews" : {
> "location" : "/drill/testdata/views/",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirNumerical" : {
> "location" : "/drill/testdata/numerical/",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirJson" : {
> "location" : "/drill/testdata/json_storage/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTestNewWS" : {
> "location" : "/drill/testdata/newWS/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch01Text" : {
> "location" : "/drill/testdata/Tpch0.01/text/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch01Json" : {
> "location" : "/drill/testdata/Tpch0.01/json/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch01Parquet" : {
> "location" : "/drill/testdata/Tpch0.01/parquet/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirConvert" : {
> "location" : "/drill/testdata/convert",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch100Text" : {
> "location" : "/drill/testdata/tpch100/text/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch100Parquet" : {
> "location" : "/drill/testdata/tpch100/parquet",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirAggregate1parquet" : {
> "location" : "/drill/testdata/tpcds/parquet/s1",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirAggregate1csv" : {
> "location" : "/drill/testdata/tpcds/csv/s1",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirAggregate1json" : {
> "location" : "/drill/testdata/tpcds/json/s1",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirMondrian" : {
> "location" : "/drill/testdata/mondrian",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpcdsImpalaSF1" : {
> "location" : "/drill/testdata/tpcds-impala-sf1",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "sandbox" : {
> "location" : "/sandbox",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "sandbox-logs" : {
> "location" : "/sandbox/flat",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "sandbox-json" : {
> "location" : "/sandbox/json",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> }
> },
> "formats" : {
> "psv" : {
> "type" : "text",
> "extensions" : [ "tbl" ],
> "delimiter" : "|"
> },
> "dsv" : {
> "type" : "text",
> "extensions" : [ "dat" ],
> "delimiter" : "|"
> },
> "csv" : {
> "type" : "text",
> "extensions" : [ "csv" ],
> "delimiter" : ","
> },
> "tsv" : {
> "type" : "text",
> "extensions" : [ "tsv" ],
> "delimiter" : "\t"
> },
> "parquet" : {
> "type" : "parquet"
> },
> "json" : {
> "type" : "json"
> }
> }
> },
> "format" : {
> "type" : "json"
> },
> "columns" : [ "`soa`[2].`nul`", "`soa`[3].`bool`" ],
> "selectionRoot" : "/drill/testdata/complex_type/json/complex.json",
> "cost" : 1186767.0
> }, {
> "pop" : "project",
> "@id" : 4,
> "exprs" : [ {
> "ref" : "`$f0`",
> "expr" : "`soa`[2].`nul`"
> }, {
> "ref" : "`$f1`",
> "expr" : "`soa`[3].`bool`"
> } ],
> "child" : 5,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 1186767.0
> }, {
> "pop" : "streaming-aggregate",
> "@id" : 3,
> "child" : 4,
> "keys" : [ ],
> "exprs" : [ {
> "ref" : "`EXPR$0`",
> "expr" : "count(1) "
> } ],
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 1.0
> }, {
> "pop" : "streaming-aggregate",
> "@id" : 2,
> "child" : 3,
> "keys" : [ ],
> "exprs" : [ {
> "ref" : "`EXPR$0`",
> "expr" : "$sum0(`EXPR$0`) "
> } ],
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 1.0
> }, {
> "pop" : "project",
> "@id" : 1,
> "exprs" : [ {
> "ref" : "`EXPR$0`",
> "expr" : "`EXPR$0`"
> }, {
> "ref" : "`EXPR$1`",
> "expr" : "`EXPR$0`"
> } ],
> "child" : 2,
> "initialAllocation" : 1000000,
> "maxAllocation" : 1000000000 |
> +------------+------------+
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)