You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Chun Chang (JIRA)" <ji...@apache.org> on 2015/04/28 03:13:06 UTC
[jira] [Closed] (DRILL-1872) empty map returned with order by on
large dataset
[ https://issues.apache.org/jira/browse/DRILL-1872?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Chun Chang closed DRILL-1872.
-----------------------------
Assignee: Chun Chang (was: Parth Chandra)
already verified. test case complex42.q
> empty map returned with order by on large dataset
> -------------------------------------------------
>
> Key: DRILL-1872
> URL: https://issues.apache.org/jira/browse/DRILL-1872
> Project: Apache Drill
> Issue Type: Bug
> Components: Execution - Data Types
> Affects Versions: 0.7.0
> Reporter: Chun Chang
> Assignee: Chun Chang
> Priority: Critical
> Fix For: 0.8.0
>
> Attachments: DRILL-1872.2.diff, DRILL-1872.diff
>
>
> #Mon Dec 15 11:37:23 EST 2014
> git.commit.id.abbrev=3b0ff5d
> Have a json file contains 1 million records. The following query without order by give me correct result:
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.id, t.oooi from `complex.json` t limit 5;
> +------------+------------+
> | id | oooi |
> +------------+------------+
> | 1 | {"oa":{"oab":{"oabc":1}}} |
> | 2 | {"oa":{"oab":{"oabc":2}}} |
> | 3 | {"oa":{"oab":{"oabc":3}}} |
> | 4 | {"oa":{"oab":{"oabc":4}}} |
> | 5 | {"oa":{"oab":{"oabc":5}}} |
> {code}
> Add order by will give me empty map"
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.id, t.oooi from `complex.json` t order by t.id limit 5;
> +------------+------------+
> | id | oooi |
> +------------+------------+
> | 1 | {} |
> | 2 | {} |
> | 3 | {} |
> | 4 | {} |
> | 5 | {} |
> +------------+------------+
> {code}
> The query with order by against a smaller dataset works. Here is the record:
> {code}
> {
> "id": 1,
> "gbyi": 0,
> "gbyt": "soa",
> "fl": 1.6789,
> "nul": "not null",
> "bool": false,
> "str": "This is row 1",
> "sia": [
> 1,
> 11,
> 101,
> 1001
> ],
> "sfa": [
> 0,
> 1.01,
> 10.222,
> 10.0006789
> ],
> "sba": [
> -1,
> -9.8766,
> null,
> true,
> "text row 1"
> ],
> "soa": [
> {
> "in": 1
> },
> {
> "in": 1,
> "fl": 1.12345
> },
> {
> "in": 1,
> "fl": 10.12345,
> "nul": "not null"
> },
> {
> "in": 1,
> "fl": 10.6789,
> "nul": "not null",
> "bool": true,
> "str": "here is a string at row 1"
> }
> ],
> "ooa": [
> {
> "in": 1
> },
> {
> "fl": {
> "f1": 1.6789,
> "f2": 54331
> },
> "in": 1
> },
> {
> "a": {
> "aa": {
> "aaa": "aaa 1"
> }
> },
> "b": {
> "bb": {
> "bbb": "bbb 1"
> },
> "c": {
> "cc": "ccc 1"
> }
> }
> }
> ],
> "aaa": [
> [
> [
> "aa0 1"
> ],
> [
> "ab0 1"
> ]
> ],
> [
> [
> "ba0 1"
> ],
> [
> "bb0 1"
> ]
> ],
> [
> [
> "ca0 1",
> "ca1 1"
> ],
> [
> "cb0 1",
> "cb1 1",
> "cb2 1"
> ]
> ]
> ],
> "saa": [
> -1,
> [
> -10,
> -9.3211
> ],
> [
> 1,
> [
> 10.12345,
> "not null"
> ],
> [
> 1,
> 1.6789,
> "not null",
> true
> ],
> [
> -1,
> 6779,
> "not null",
> false,
> "this is a short string 1"
> ]
> ]
> ],
> "oooi": {
> "oa": {
> "oab": {
> "oabc": 1
> }
> }
> },
> "ooof": {
> "oa": {
> "oab": {
> "oabc": 1.5678
> }
> }
> },
> "ooos": {
> "oa": {
> "oab": {
> "oabc": "ooos string 1"
> }
> }
> },
> "oooa": {
> "oa": {
> "oab": {
> "oabc": [
> {
> "rowId": 1
> },
> {
> "rowValue1": 1,
> "rowValue2": 1
> }
> ]
> }
> }
> }
> }
> {code}
> Here is the physical plan:
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDirComplexJ> explain plan for select t.id, t.oooi from `complex.json` t order by t.id limit 5;
> +------------+------------+
> | text | json |
> +------------+------------+
> | 00-00 Screen
> 00-01 Project(id=[$0], oooi=[$1])
> 00-02 SelectionVectorRemover
> 00-03 Limit(fetch=[5])
> 00-04 SingleMergeExchange(sort0=[0 ASC])
> 01-01 SelectionVectorRemover
> 01-02 TopN(limit=[5])
> 01-03 HashToRandomExchange(dist0=[[$0]])
> 02-01 Project(id=[$1], oooi=[$0])
> 02-02 Scan(groupscan=[EasyGroupScan [selectionRoot=/drill/testdata/complex_type/json/complex.json, numFiles=1, columns=[`id`, `oooi`], files=[maprfs:/drill/testdata/complex_type/json/complex.json]]])
> | {
> "head" : {
> "version" : 1,
> "generator" : {
> "type" : "ExplainHandler",
> "info" : ""
> },
> "type" : "APACHE_DRILL_PHYSICAL",
> "options" : [ ],
> "queue" : 0,
> "resultMode" : "EXEC"
> },
> "graph" : [ {
> "pop" : "fs-scan",
> "@id" : 131074,
> "files" : [ "maprfs:/drill/testdata/complex_type/json/complex.json" ],
> "storage" : {
> "type" : "file",
> "enabled" : true,
> "connection" : "maprfs:///",
> "workspaces" : {
> "root" : {
> "location" : "/",
> "writable" : false,
> "defaultInputFormat" : null
> },
> "tmp" : {
> "location" : "/tmp",
> "writable" : true,
> "defaultInputFormat" : "csv"
> },
> "drillTestDir" : {
> "location" : "/drill/testdata/",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirComplexJson" : {
> "location" : "/drill/testdata/complex_type/json",
> "writable" : true,
> "defaultInputFormat" : "json"
> },
> "drillTestDirAmplab" : {
> "location" : "/drill/testdata/amplab",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirInformationSchema" : {
> "location" : "/drill/testdata/information-schema",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirUdfs" : {
> "location" : "/drill/testdata/udfs/",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirP1" : {
> "location" : "/drill/testdata/p1tests",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirTpch10Parquet" : {
> "location" : "/drill/testdata/tpch10",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "Join" : {
> "location" : "/drill/testdata/join",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "NoExtJson" : {
> "location" : "/drill/testdata/no-extension/json",
> "writable" : true,
> "defaultInputFormat" : "json"
> },
> "NoExtParquet" : {
> "location" : "/drill/testdata/no-extension/parquet",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "NoExtParquetNull" : {
> "location" : "/drill/testdata/no-extension/parquet",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "NoExtText" : {
> "location" : "/drill/testdata/no-extension/text",
> "writable" : true,
> "defaultInputFormat" : "psv"
> },
> "drillTestDirExchanges" : {
> "location" : "/drill/testdata/exchanges_test",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "TpcHMulti" : {
> "location" : "/drill/testdata/tpch-multi",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "TpcHMulti100" : {
> "location" : "/drill/testdata/SF100",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "TpcHMulti1" : {
> "location" : "/drill/testdata/tpch_SF1",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirExplicit" : {
> "location" : "/drill/testdata/explicit_cast",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirImplicit" : {
> "location" : "/drill/testdata/implicit_cast",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirImplicit1" : {
> "location" : "/drill/testdata/implicit_cast",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirTPCDS" : {
> "location" : "/user/root/tpcds/parquet",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "TPCDS" : {
> "location" : "/drill/testdata/tpcds",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillMondrian" : {
> "location" : "/user/root/mondrian",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirDatetime" : {
> "location" : "/drill/testdata/datetime/datasources",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirViews" : {
> "location" : "/drill/testdata/views/",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirNumerical" : {
> "location" : "/drill/testdata/numerical/",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "drillTestDirJson" : {
> "location" : "/drill/testdata/json_storage/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTestNewWS" : {
> "location" : "/drill/testdata/newWS/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch01Text" : {
> "location" : "/drill/testdata/Tpch0.01/text/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch01Json" : {
> "location" : "/drill/testdata/Tpch0.01/json/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch01Parquet" : {
> "location" : "/drill/testdata/Tpch0.01/parquet/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirConvert" : {
> "location" : "/drill/testdata/convert",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch100Text" : {
> "location" : "/drill/testdata/tpch100/text/",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpch100Parquet" : {
> "location" : "/drill/testdata/tpch100/parquet",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirAggregate1parquet" : {
> "location" : "/drill/testdata/tpcds/parquet/s1",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirAggregate1csv" : {
> "location" : "/drill/testdata/tpcds/csv/s1",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirAggregate1json" : {
> "location" : "/drill/testdata/tpcds/json/s1",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirMondrian" : {
> "location" : "/drill/testdata/mondrian",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "drillTestDirTpcdsImpalaSF1" : {
> "location" : "/drill/testdata/tpcds-impala-sf1",
> "writable" : true,
> "defaultInputFormat" : null
> },
> "sandbox" : {
> "location" : "/sandbox",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "sandbox-logs" : {
> "location" : "/sandbox/flat",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> },
> "sandbox-json" : {
> "location" : "/sandbox/json",
> "writable" : true,
> "defaultInputFormat" : "parquet"
> }
> },
> "formats" : {
> "psv" : {
> "type" : "text",
> "extensions" : [ "tbl" ],
> "delimiter" : "|"
> },
> "dsv" : {
> "type" : "text",
> "extensions" : [ "dat" ],
> "delimiter" : "|"
> },
> "csv" : {
> "type" : "text",
> "extensions" : [ "csv" ],
> "delimiter" : ","
> },
> "tsv" : {
> "type" : "text",
> "extensions" : [ "tsv" ],
> "delimiter" : "\t"
> },
> "parquet" : {
> "type" : "parquet"
> },
> "json" : {
> "type" : "json"
> }
> }
> },
> "format" : {
> "type" : "json"
> },
> "columns" : [ "`id`", "`oooi`" ],
> "selectionRoot" : "/drill/testdata/complex_type/json/complex.json",
> "cost" : 1186767.0
> }, {
> "pop" : "project",
> "@id" : 131073,
> "exprs" : [ {
> "ref" : "`id`",
> "expr" : "`id`"
> }, {
> "ref" : "`oooi`",
> "expr" : "`oooi`"
> } ],
> "child" : 131074,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 1186767.0
> }, {
> "pop" : "hash-to-random-exchange",
> "@id" : 65539,
> "child" : 131073,
> "expr" : "hash(`id`) ",
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 1186767.0
> }, {
> "pop" : "top-n",
> "@id" : 65538,
> "child" : 65539,
> "orderings" : [ {
> "order" : "ASC",
> "expr" : "`id`",
> "nullDirection" : "UNSPECIFIED"
> } ],
> "reverse" : false,
> "limit" : 5,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 1186767.0
> }, {
> "pop" : "selection-vector-remover",
> "@id" : 65537,
> "child" : 65538,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 1186767.0
> }, {
> "pop" : " |
> +------------+------------+
> {code}
> Did not see any error messages in log files.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)