You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@drill.apache.org by "Chun Chang (JIRA)" <ji...@apache.org> on 2014/12/16 02:23:13 UTC

[jira] [Created] (DRILL-1872) empty map returned with order by on large dataset

Chun Chang created DRILL-1872:
---------------------------------

             Summary: empty map returned with order by on large dataset
                 Key: DRILL-1872
                 URL: https://issues.apache.org/jira/browse/DRILL-1872
             Project: Apache Drill
          Issue Type: Bug
          Components: Execution - Flow
    Affects Versions: 0.7.0
            Reporter: Chun Chang


#Mon Dec 15 11:37:23 EST 2014
git.commit.id.abbrev=3b0ff5d

Have a json file contains 1 million records. The following query without order by give me correct result:

{code}
0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.id, t.oooi from `complex.json` t limit 5;
+------------+------------+
|     id     |    oooi    |
+------------+------------+
| 1          | {"oa":{"oab":{"oabc":1}}} |
| 2          | {"oa":{"oab":{"oabc":2}}} |
| 3          | {"oa":{"oab":{"oabc":3}}} |
| 4          | {"oa":{"oab":{"oabc":4}}} |
| 5          | {"oa":{"oab":{"oabc":5}}} |
{code}

Add order by will give me empty map"

{code}
0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.id, t.oooi from `complex.json` t order by t.id limit 5;
+------------+------------+
|     id     |    oooi    |
+------------+------------+
| 1          | {}         |
| 2          | {}         |
| 3          | {}         |
| 4          | {}         |
| 5          | {}         |
+------------+------------+
{code}

The query with order by against a smaller dataset works. Here is the record:

{code}
{
    "id": 1,
    "gbyi": 0,
    "gbyt": "soa",
    "fl": 1.6789,
    "nul": "not null",
    "bool": false,
    "str": "This is row 1",
    "sia": [
        1,
        11,
        101,
        1001
    ],
    "sfa": [
        0,
        1.01,
        10.222,
        10.0006789
    ],
    "sba": [
        -1,
        -9.8766,
        null,
        true,
        "text row 1"
    ],
    "soa": [
        {
            "in": 1
        },
        {
            "in": 1,
            "fl": 1.12345
        },
        {
            "in": 1,
            "fl": 10.12345,
            "nul": "not null"
        },
        {
            "in": 1,
            "fl": 10.6789,
            "nul": "not null",
            "bool": true,
            "str": "here is a string at row 1"
        }
    ],
    "ooa": [
        {
            "in": 1
        },
        {
            "fl": {
                "f1": 1.6789,
                "f2": 54331
            },
            "in": 1
        },
        {
            "a": {
                "aa": {
                    "aaa": "aaa 1"
                }
            },
            "b": {
                "bb": {
                    "bbb": "bbb 1"
                },
                "c": {
                    "cc": "ccc 1"
                }
            }
        }
    ],
    "aaa": [
        [
            [
                "aa0 1"
            ],
            [
                "ab0 1"
            ]
        ],
        [
            [
                "ba0 1"
            ],
            [
                "bb0 1"
            ]
        ],
        [
            [
                "ca0 1",
                "ca1 1"
            ],
            [
                "cb0 1",
                "cb1 1",
                "cb2 1"
            ]
        ]
    ],
    "saa": [
        -1,
        [
            -10,
            -9.3211
        ],
        [
            1,
            [
                10.12345,
                "not null"
            ],
            [
                1,
                1.6789,
                "not null",
                true
            ],
            [
                -1,
                6779,
                "not null",
                false,
                "this is a short string 1"
            ]
        ]
    ],
    "oooi": {
        "oa": {
            "oab": {
                "oabc": 1
            }
        }
    },
    "ooof": {
        "oa": {
            "oab": {
                "oabc": 1.5678
            }
        }
    },
    "ooos": {
        "oa": {
            "oab": {
                "oabc": "ooos string 1"
            }
        }
    },
    "oooa": {
        "oa": {
            "oab": {
                "oabc": [
                    {
                        "rowId": 1
                    },
                    {
                        "rowValue1": 1,
                        "rowValue2": 1
                    }
                ]
            }
        }
    }
}
{code}

Here is the physical plan:

{code}
0: jdbc:drill:schema=dfs.drillTestDirComplexJ> explain plan for select t.id, t.oooi from `complex.json` t order by t.id limit 5;
+------------+------------+
|    text    |    json    |
+------------+------------+
| 00-00    Screen
00-01      Project(id=[$0], oooi=[$1])
00-02        SelectionVectorRemover
00-03          Limit(fetch=[5])
00-04            SingleMergeExchange(sort0=[0 ASC])
01-01              SelectionVectorRemover
01-02                TopN(limit=[5])
01-03                  HashToRandomExchange(dist0=[[$0]])
02-01                    Project(id=[$1], oooi=[$0])
02-02                      Scan(groupscan=[EasyGroupScan [selectionRoot=/drill/testdata/complex_type/json/complex.json, numFiles=1, columns=[`id`, `oooi`], files=[maprfs:/drill/testdata/complex_type/json/complex.json]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "fs-scan",
    "@id" : 131074,
    "files" : [ "maprfs:/drill/testdata/complex_type/json/complex.json" ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "defaultInputFormat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "defaultInputFormat" : "csv"
        },
        "drillTestDir" : {
          "location" : "/drill/testdata/",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirComplexJson" : {
          "location" : "/drill/testdata/complex_type/json",
          "writable" : true,
          "defaultInputFormat" : "json"
        },
        "drillTestDirAmplab" : {
          "location" : "/drill/testdata/amplab",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirInformationSchema" : {
          "location" : "/drill/testdata/information-schema",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirUdfs" : {
          "location" : "/drill/testdata/udfs/",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirP1" : {
          "location" : "/drill/testdata/p1tests",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirTpch10Parquet" : {
          "location" : "/drill/testdata/tpch10",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "Join" : {
          "location" : "/drill/testdata/join",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "NoExtJson" : {
          "location" : "/drill/testdata/no-extension/json",
          "writable" : true,
          "defaultInputFormat" : "json"
        },
        "NoExtParquet" : {
          "location" : "/drill/testdata/no-extension/parquet",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "NoExtParquetNull" : {
          "location" : "/drill/testdata/no-extension/parquet",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "NoExtText" : {
          "location" : "/drill/testdata/no-extension/text",
          "writable" : true,
          "defaultInputFormat" : "psv"
        },
        "drillTestDirExchanges" : {
          "location" : "/drill/testdata/exchanges_test",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "TpcHMulti" : {
          "location" : "/drill/testdata/tpch-multi",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "TpcHMulti100" : {
          "location" : "/drill/testdata/SF100",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "TpcHMulti1" : {
          "location" : "/drill/testdata/tpch_SF1",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirExplicit" : {
          "location" : "/drill/testdata/explicit_cast",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirImplicit" : {
          "location" : "/drill/testdata/implicit_cast",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirImplicit1" : {
          "location" : "/drill/testdata/implicit_cast",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirTPCDS" : {
          "location" : "/user/root/tpcds/parquet",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "TPCDS" : {
          "location" : "/drill/testdata/tpcds",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillMondrian" : {
          "location" : "/user/root/mondrian",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirDatetime" : {
          "location" : "/drill/testdata/datetime/datasources",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirViews" : {
          "location" : "/drill/testdata/views/",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirNumerical" : {
          "location" : "/drill/testdata/numerical/",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "drillTestDirJson" : {
          "location" : "/drill/testdata/json_storage/",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirTestNewWS" : {
          "location" : "/drill/testdata/newWS/",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirTpch01Text" : {
          "location" : "/drill/testdata/Tpch0.01/text/",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirTpch01Json" : {
          "location" : "/drill/testdata/Tpch0.01/json/",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirTpch01Parquet" : {
          "location" : "/drill/testdata/Tpch0.01/parquet/",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirConvert" : {
          "location" : "/drill/testdata/convert",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirTpch100Text" : {
          "location" : "/drill/testdata/tpch100/text/",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirTpch100Parquet" : {
          "location" : "/drill/testdata/tpch100/parquet",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirAggregate1parquet" : {
          "location" : "/drill/testdata/tpcds/parquet/s1",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirAggregate1csv" : {
          "location" : "/drill/testdata/tpcds/csv/s1",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirAggregate1json" : {
          "location" : "/drill/testdata/tpcds/json/s1",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirMondrian" : {
          "location" : "/drill/testdata/mondrian",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "drillTestDirTpcdsImpalaSF1" : {
          "location" : "/drill/testdata/tpcds-impala-sf1",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "sandbox" : {
          "location" : "/sandbox",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "sandbox-logs" : {
          "location" : "/sandbox/flat",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        },
        "sandbox-json" : {
          "location" : "/sandbox/json",
          "writable" : true,
          "defaultInputFormat" : "parquet"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "dsv" : {
          "type" : "text",
          "extensions" : [ "dat" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "json"
    },
    "columns" : [ "`id`", "`oooi`" ],
    "selectionRoot" : "/drill/testdata/complex_type/json/complex.json",
    "cost" : 1186767.0
  }, {
    "pop" : "project",
    "@id" : 131073,
    "exprs" : [ {
      "ref" : "`id`",
      "expr" : "`id`"
    }, {
      "ref" : "`oooi`",
      "expr" : "`oooi`"
    } ],
    "child" : 131074,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1186767.0
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 65539,
    "child" : 131073,
    "expr" : "hash(`id`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1186767.0
  }, {
    "pop" : "top-n",
    "@id" : 65538,
    "child" : 65539,
    "orderings" : [ {
      "order" : "ASC",
      "expr" : "`id`",
      "nullDirection" : "UNSPECIFIED"
    } ],
    "reverse" : false,
    "limit" : 5,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1186767.0
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 65537,
    "child" : 65538,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1186767.0
  }, {
    "pop" : " |
+------------+------------+
{code}

Did not see any error messages in log files.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)