You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Ramana Inukonda Nagaraj (JIRA)" <ji...@apache.org> on 2014/06/02 22:37:02 UTC
[jira] [Comment Edited] (DRILL-881) Join between a JSON file and a
parquet file fails with unsupported exception
[ https://issues.apache.org/jira/browse/DRILL-881?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14014364#comment-14014364 ]
Ramana Inukonda Nagaraj edited comment on DRILL-881 at 6/2/14 8:35 PM:
-----------------------------------------------------------------------
Explain plan:
{code}
{
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "fs-scan",
"@id" : 131073,
"files" : [ "maprfs:/drill/testdata/json_storage/crossData.json" ],
"storage" : {
"type" : "file",
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"storageformat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"storageformat" : "csv"
},
"drillTestDir" : {
"location" : "/drill/testdata/",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirAmplab" : {
"location" : "/drill/testdata/amplab",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirP1" : {
"location" : "/drill/testdata/p1tests",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirExchanges" : {
"location" : "/drill/testdata/exchanges_test",
"writable" : true,
"storageformat" : "parquet"
},
"TpcHMulti" : {
"location" : "/drill/testdata/tpch-multi",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirExplicit" : {
"location" : "/drill/testdata/explicit_cast",
"writable" : true,
"storageformat" : "parquet"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"columns" : [ "`id`", "`firstName`" ],
"selectionRoot" : "/drill/testdata/json_storage/crossData.json"
}, {
"pop" : "hash-to-random-exchange",
"@id" : 65540,
"child" : 131073,
"expr" : "hash(`id`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "parquet-scan",
"@id" : 196609,
"entries" : [ {
"path" : "maprfs:/drill/testdata/tpch-multi/customer"
} ],
"storage" : {
"type" : "file",
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"storageformat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"storageformat" : "csv"
},
"drillTestDir" : {
"location" : "/drill/testdata/",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirAmplab" : {
"location" : "/drill/testdata/amplab",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirP1" : {
"location" : "/drill/testdata/p1tests",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirExchanges" : {
"location" : "/drill/testdata/exchanges_test",
"writable" : true,
"storageformat" : "parquet"
},
"TpcHMulti" : {
"location" : "/drill/testdata/tpch-multi",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirExplicit" : {
"location" : "/drill/testdata/explicit_cast",
"writable" : true,
"storageformat" : "parquet"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "parquet"
},
"columns" : [ "`C_CUSTKEY`" ],
"selectionRoot" : "/drill/testdata/tpch-multi/customer"
}, {
"pop" : "hash-to-random-exchange",
"@id" : 65541,
"child" : 196609,
"expr" : "hash(`C_CUSTKEY`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "project",
"@id" : 65539,
"exprs" : [ {
"ref" : "`*0`",
"expr" : "`*`"
}, {
"ref" : "`C_CUSTKEY`",
"expr" : "`C_CUSTKEY`"
} ],
"child" : 65541,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "hash-join",
"@id" : 65538,
"left" : 65540,
"right" : 65539,
"conditions" : [ {
"relationship" : "==",
"left" : "`id`",
"right" : "`C_CUSTKEY`"
} ],
"joinType" : "INNER",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "project",
"@id" : 65537,
"exprs" : [ {
"ref" : "`firstName`",
"expr" : "`firstName`"
} ],
"child" : 65538,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "union-exchange",
"@id" : 1,
"child" : 65537,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "screen",
"@id" : 0,
"child" : 1,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
} ]
}
{code}
was (Author: inramana):
Explain plan:
{
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "fs-scan",
"@id" : 131073,
"files" : [ "maprfs:/drill/testdata/json_storage/crossData.json" ],
"storage" : {
"type" : "file",
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"storageformat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"storageformat" : "csv"
},
"drillTestDir" : {
"location" : "/drill/testdata/",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirAmplab" : {
"location" : "/drill/testdata/amplab",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirP1" : {
"location" : "/drill/testdata/p1tests",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirExchanges" : {
"location" : "/drill/testdata/exchanges_test",
"writable" : true,
"storageformat" : "parquet"
},
"TpcHMulti" : {
"location" : "/drill/testdata/tpch-multi",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirExplicit" : {
"location" : "/drill/testdata/explicit_cast",
"writable" : true,
"storageformat" : "parquet"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"columns" : [ "`id`", "`firstName`" ],
"selectionRoot" : "/drill/testdata/json_storage/crossData.json"
}, {
"pop" : "hash-to-random-exchange",
"@id" : 65540,
"child" : 131073,
"expr" : "hash(`id`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "parquet-scan",
"@id" : 196609,
"entries" : [ {
"path" : "maprfs:/drill/testdata/tpch-multi/customer"
} ],
"storage" : {
"type" : "file",
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"storageformat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"storageformat" : "csv"
},
"drillTestDir" : {
"location" : "/drill/testdata/",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirAmplab" : {
"location" : "/drill/testdata/amplab",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirP1" : {
"location" : "/drill/testdata/p1tests",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirExchanges" : {
"location" : "/drill/testdata/exchanges_test",
"writable" : true,
"storageformat" : "parquet"
},
"TpcHMulti" : {
"location" : "/drill/testdata/tpch-multi",
"writable" : true,
"storageformat" : "parquet"
},
"drillTestDirExplicit" : {
"location" : "/drill/testdata/explicit_cast",
"writable" : true,
"storageformat" : "parquet"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "parquet"
},
"columns" : [ "`C_CUSTKEY`" ],
"selectionRoot" : "/drill/testdata/tpch-multi/customer"
}, {
"pop" : "hash-to-random-exchange",
"@id" : 65541,
"child" : 196609,
"expr" : "hash(`C_CUSTKEY`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "project",
"@id" : 65539,
"exprs" : [ {
"ref" : "`*0`",
"expr" : "`*`"
}, {
"ref" : "`C_CUSTKEY`",
"expr" : "`C_CUSTKEY`"
} ],
"child" : 65541,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "hash-join",
"@id" : 65538,
"left" : 65540,
"right" : 65539,
"conditions" : [ {
"relationship" : "==",
"left" : "`id`",
"right" : "`C_CUSTKEY`"
} ],
"joinType" : "INNER",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "project",
"@id" : 65537,
"exprs" : [ {
"ref" : "`firstName`",
"expr" : "`firstName`"
} ],
"child" : 65538,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "union-exchange",
"@id" : 1,
"child" : 65537,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
}, {
"pop" : "screen",
"@id" : 0,
"child" : 1,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000
} ]
}
> Join between a JSON file and a parquet file fails with unsupported exception
> ----------------------------------------------------------------------------
>
> Key: DRILL-881
> URL: https://issues.apache.org/jira/browse/DRILL-881
> Project: Apache Drill
> Issue Type: Bug
> Components: Query Planning & Optimization
> Reporter: Ramana Inukonda Nagaraj
> Assignee: Steven Phillips
>
> select c_json.firstName from `json_storage/crossData.json` c_json,`tpch-multi/customer` c where c.C_CUSTKEY=c_json.id;
> The following queries work:
> select c_json.firstName from `json_storage/crossData.json` c_json;
> select c.C_CUSTKEY from `tpch-multi/customer` c;
> So its not a problem with the data sources
--
This message was sent by Atlassian JIRA
(v6.2#6252)