You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@drill.apache.org by "Chun Chang (JIRA)" <ji...@apache.org> on 2014/12/16 02:23:13 UTC
[jira] [Created] (DRILL-1872) empty map returned with order by on
large dataset
Chun Chang created DRILL-1872:
---------------------------------
Summary: empty map returned with order by on large dataset
Key: DRILL-1872
URL: https://issues.apache.org/jira/browse/DRILL-1872
Project: Apache Drill
Issue Type: Bug
Components: Execution - Flow
Affects Versions: 0.7.0
Reporter: Chun Chang
#Mon Dec 15 11:37:23 EST 2014
git.commit.id.abbrev=3b0ff5d
Have a json file contains 1 million records. The following query without order by give me correct result:
{code}
0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.id, t.oooi from `complex.json` t limit 5;
+------------+------------+
| id | oooi |
+------------+------------+
| 1 | {"oa":{"oab":{"oabc":1}}} |
| 2 | {"oa":{"oab":{"oabc":2}}} |
| 3 | {"oa":{"oab":{"oabc":3}}} |
| 4 | {"oa":{"oab":{"oabc":4}}} |
| 5 | {"oa":{"oab":{"oabc":5}}} |
{code}
Add order by will give me empty map"
{code}
0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.id, t.oooi from `complex.json` t order by t.id limit 5;
+------------+------------+
| id | oooi |
+------------+------------+
| 1 | {} |
| 2 | {} |
| 3 | {} |
| 4 | {} |
| 5 | {} |
+------------+------------+
{code}
The query with order by against a smaller dataset works. Here is the record:
{code}
{
"id": 1,
"gbyi": 0,
"gbyt": "soa",
"fl": 1.6789,
"nul": "not null",
"bool": false,
"str": "This is row 1",
"sia": [
1,
11,
101,
1001
],
"sfa": [
0,
1.01,
10.222,
10.0006789
],
"sba": [
-1,
-9.8766,
null,
true,
"text row 1"
],
"soa": [
{
"in": 1
},
{
"in": 1,
"fl": 1.12345
},
{
"in": 1,
"fl": 10.12345,
"nul": "not null"
},
{
"in": 1,
"fl": 10.6789,
"nul": "not null",
"bool": true,
"str": "here is a string at row 1"
}
],
"ooa": [
{
"in": 1
},
{
"fl": {
"f1": 1.6789,
"f2": 54331
},
"in": 1
},
{
"a": {
"aa": {
"aaa": "aaa 1"
}
},
"b": {
"bb": {
"bbb": "bbb 1"
},
"c": {
"cc": "ccc 1"
}
}
}
],
"aaa": [
[
[
"aa0 1"
],
[
"ab0 1"
]
],
[
[
"ba0 1"
],
[
"bb0 1"
]
],
[
[
"ca0 1",
"ca1 1"
],
[
"cb0 1",
"cb1 1",
"cb2 1"
]
]
],
"saa": [
-1,
[
-10,
-9.3211
],
[
1,
[
10.12345,
"not null"
],
[
1,
1.6789,
"not null",
true
],
[
-1,
6779,
"not null",
false,
"this is a short string 1"
]
]
],
"oooi": {
"oa": {
"oab": {
"oabc": 1
}
}
},
"ooof": {
"oa": {
"oab": {
"oabc": 1.5678
}
}
},
"ooos": {
"oa": {
"oab": {
"oabc": "ooos string 1"
}
}
},
"oooa": {
"oa": {
"oab": {
"oabc": [
{
"rowId": 1
},
{
"rowValue1": 1,
"rowValue2": 1
}
]
}
}
}
}
{code}
Here is the physical plan:
{code}
0: jdbc:drill:schema=dfs.drillTestDirComplexJ> explain plan for select t.id, t.oooi from `complex.json` t order by t.id limit 5;
+------------+------------+
| text | json |
+------------+------------+
| 00-00 Screen
00-01 Project(id=[$0], oooi=[$1])
00-02 SelectionVectorRemover
00-03 Limit(fetch=[5])
00-04 SingleMergeExchange(sort0=[0 ASC])
01-01 SelectionVectorRemover
01-02 TopN(limit=[5])
01-03 HashToRandomExchange(dist0=[[$0]])
02-01 Project(id=[$1], oooi=[$0])
02-02 Scan(groupscan=[EasyGroupScan [selectionRoot=/drill/testdata/complex_type/json/complex.json, numFiles=1, columns=[`id`, `oooi`], files=[maprfs:/drill/testdata/complex_type/json/complex.json]]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "fs-scan",
"@id" : 131074,
"files" : [ "maprfs:/drill/testdata/complex_type/json/complex.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"defaultInputFormat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"defaultInputFormat" : "csv"
},
"drillTestDir" : {
"location" : "/drill/testdata/",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirComplexJson" : {
"location" : "/drill/testdata/complex_type/json",
"writable" : true,
"defaultInputFormat" : "json"
},
"drillTestDirAmplab" : {
"location" : "/drill/testdata/amplab",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirInformationSchema" : {
"location" : "/drill/testdata/information-schema",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirUdfs" : {
"location" : "/drill/testdata/udfs/",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirP1" : {
"location" : "/drill/testdata/p1tests",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirTpch10Parquet" : {
"location" : "/drill/testdata/tpch10",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"Join" : {
"location" : "/drill/testdata/join",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"NoExtJson" : {
"location" : "/drill/testdata/no-extension/json",
"writable" : true,
"defaultInputFormat" : "json"
},
"NoExtParquet" : {
"location" : "/drill/testdata/no-extension/parquet",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"NoExtParquetNull" : {
"location" : "/drill/testdata/no-extension/parquet",
"writable" : true,
"defaultInputFormat" : null
},
"NoExtText" : {
"location" : "/drill/testdata/no-extension/text",
"writable" : true,
"defaultInputFormat" : "psv"
},
"drillTestDirExchanges" : {
"location" : "/drill/testdata/exchanges_test",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"TpcHMulti" : {
"location" : "/drill/testdata/tpch-multi",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"TpcHMulti100" : {
"location" : "/drill/testdata/SF100",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"TpcHMulti1" : {
"location" : "/drill/testdata/tpch_SF1",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirExplicit" : {
"location" : "/drill/testdata/explicit_cast",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirImplicit" : {
"location" : "/drill/testdata/implicit_cast",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirImplicit1" : {
"location" : "/drill/testdata/implicit_cast",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirTPCDS" : {
"location" : "/user/root/tpcds/parquet",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"TPCDS" : {
"location" : "/drill/testdata/tpcds",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillMondrian" : {
"location" : "/user/root/mondrian",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirDatetime" : {
"location" : "/drill/testdata/datetime/datasources",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirViews" : {
"location" : "/drill/testdata/views/",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirNumerical" : {
"location" : "/drill/testdata/numerical/",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"drillTestDirJson" : {
"location" : "/drill/testdata/json_storage/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTestNewWS" : {
"location" : "/drill/testdata/newWS/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch01Text" : {
"location" : "/drill/testdata/Tpch0.01/text/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch01Json" : {
"location" : "/drill/testdata/Tpch0.01/json/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch01Parquet" : {
"location" : "/drill/testdata/Tpch0.01/parquet/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirConvert" : {
"location" : "/drill/testdata/convert",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch100Text" : {
"location" : "/drill/testdata/tpch100/text/",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpch100Parquet" : {
"location" : "/drill/testdata/tpch100/parquet",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirAggregate1parquet" : {
"location" : "/drill/testdata/tpcds/parquet/s1",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirAggregate1csv" : {
"location" : "/drill/testdata/tpcds/csv/s1",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirAggregate1json" : {
"location" : "/drill/testdata/tpcds/json/s1",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirMondrian" : {
"location" : "/drill/testdata/mondrian",
"writable" : true,
"defaultInputFormat" : null
},
"drillTestDirTpcdsImpalaSF1" : {
"location" : "/drill/testdata/tpcds-impala-sf1",
"writable" : true,
"defaultInputFormat" : null
},
"sandbox" : {
"location" : "/sandbox",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"sandbox-logs" : {
"location" : "/sandbox/flat",
"writable" : true,
"defaultInputFormat" : "parquet"
},
"sandbox-json" : {
"location" : "/sandbox/json",
"writable" : true,
"defaultInputFormat" : "parquet"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"dsv" : {
"type" : "text",
"extensions" : [ "dat" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"columns" : [ "`id`", "`oooi`" ],
"selectionRoot" : "/drill/testdata/complex_type/json/complex.json",
"cost" : 1186767.0
}, {
"pop" : "project",
"@id" : 131073,
"exprs" : [ {
"ref" : "`id`",
"expr" : "`id`"
}, {
"ref" : "`oooi`",
"expr" : "`oooi`"
} ],
"child" : 131074,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1186767.0
}, {
"pop" : "hash-to-random-exchange",
"@id" : 65539,
"child" : 131073,
"expr" : "hash(`id`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1186767.0
}, {
"pop" : "top-n",
"@id" : 65538,
"child" : 65539,
"orderings" : [ {
"order" : "ASC",
"expr" : "`id`",
"nullDirection" : "UNSPECIFIED"
} ],
"reverse" : false,
"limit" : 5,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1186767.0
}, {
"pop" : "selection-vector-remover",
"@id" : 65537,
"child" : 65538,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1186767.0
}, {
"pop" : " |
+------------+------------+
{code}
Did not see any error messages in log files.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)