You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Neeraja (JIRA)" <ji...@apache.org> on 2014/09/10 15:46:28 UTC
[jira] [Created] (DRILL-1396) Query with EXISTS clause and
correlation fails
Neeraja created DRILL-1396:
------------------------------
Summary: Query with EXISTS clause and correlation fails
Key: DRILL-1396
URL: https://issues.apache.org/jira/browse/DRILL-1396
Project: Apache Drill
Issue Type: Bug
Reporter: Neeraja
Priority: Critical
The following query fails.
//Get the clickstream activity for for all the customers who have order total >100
select t.trans_info.purch_flag,
t.user_info.cust_id, t.trans_info.prod_id
from `Clickstream.clicks`.`/json/clicks.json` t
where exists (select * from hive.orders o where o.cust_id = t.user_info.cust_id and o.order_total > 100)
Query failed: Failure while running fragment. Failure finding function that runtime code generation expected. Signature: compare_to( MAP:REQUIREDMAP:REQUIRED, ) returns INT:REQUIRED [d6401ddd-f9bc-496d-ae0c-b5cde35bf289]
Below is the explain plan:
+------------+------------+
| text | json |
+------------+------------+
| 00-00 Screen
00-01 Project(EXPR$0=[$0], EXPR$1=[$1], EXPR$2=[$2])
00-02 Project(EXPR$0=[ITEM($2, 'purch_flag')], EXPR$1=[ITEM($1, 'cust_id')], EXPR$2=[ITEM($2, 'prod_id')])
00-03 SelectionVectorRemover
00-04 Filter(condition=[IS TRUE($4)])
00-05 HashJoin(condition=[=($1, $3)], joinType=[left])
00-07 Project(T24¦¦*=[$0], T24¦¦user_info=[$1], T24¦¦trans_info=[$2])
00-09 Scan(groupscan=[EasyGroupScan [selectionRoot=/mapr/my.cluster.com/demo/clicks/json/clicks.json, columns = null]])
00-06 HashAgg(group=[{0}], agg#0=[MIN($1)])
00-08 Project(T25¦¦user_info=[$1], $f0=[true])
00-10 HashJoin(condition=[=($0, $2)], joinType=[inner])
00-12 Project($f7=[CAST($0):ANY])
00-14 SelectionVectorRemover
00-16 Filter(condition=[>($1, 100)])
00-18 Project(cust_id=[$1], order_total=[$0])
00-20 Scan(groupscan=[HiveScan [table=Table(tableName:orders, dbName:default, owner:root, createTime:1409956843, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:order_id, type:bigint, comment:null), FieldSchema(name:month, type:string, comment:null), FieldSchema(name:purchdate, type:timestamp, comment:null), FieldSchema(name:cust_id, type:bigint, comment:null), FieldSchema(name:state, type:string, comment:null), FieldSchema(name:prod_id, type:bigint, comment:null), FieldSchema(name:order_total, type:int, comment:null)], location:maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=,, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1409956843}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE), inputSplits=[maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month2.agg.orders.csv:0+640155, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month5.agg.orders.csv:0+775506, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month6.agg.orders.csv:0+791685, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month8.agg.orders.csv:0+805072, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month4.agg.orders.csv:0+603886, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month9.agg.orders.csv:0+846270, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month1.agg.orders.csv:0+461090, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month7.agg.orders.csv:0+771399, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month3.agg.orders.csv:0+806738], columns=[SchemaPath [`cust_id`], SchemaPath [`order_total`]]]])
00-11 Project(T25¦¦user_info=[$0], $f1=[ITEM($0, 'cust_id')])
00-13 HashAgg(group=[{0}])
00-15 Project(T25¦¦user_info=[$1])
00-17 Project(T25¦¦*=[$0], T25¦¦user_info=[$1], T25¦¦trans_info=[$2])
00-19 Scan(groupscan=[EasyGroupScan [selectionRoot=/mapr/my.cluster.com/demo/clicks/json/clicks.json, columns = null]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "hive-scan",
"@id" : 20,
"hive-table" : {
"table" : {
"tableName" : "orders",
"dbName" : "default",
"owner" : "root",
"createTime" : 1409956843,
"lastAccessTime" : 0,
"retention" : 0,
"sd" : {
"cols" : [ {
"name" : "order_id",
"type" : "bigint",
"comment" : null
}, {
"name" : "month",
"type" : "string",
"comment" : null
}, {
"name" : "purchdate",
"type" : "timestamp",
"comment" : null
}, {
"name" : "cust_id",
"type" : "bigint",
"comment" : null
}, {
"name" : "state",
"type" : "string",
"comment" : null
}, {
"name" : "prod_id",
"type" : "bigint",
"comment" : null
}, {
"name" : "order_total",
"type" : "int",
"comment" : null
} ],
"location" : "maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders",
"inputFormat" : "org.apache.hadoop.mapred.TextInputFormat",
"outputFormat" : "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed" : false,
"numBuckets" : -1,
"serDeInfo" : {
"name" : null,
"serializationLib" : "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters" : {
"serialization.format" : ",",
"field.delim" : ","
}
},
"sortCols" : [ ],
"parameters" : { }
},
"partitionKeys" : [ ],
"parameters" : {
"EXTERNAL" : "TRUE",
"transient_lastDdlTime" : "1409956843"
},
"viewOriginalText" : null,
"viewExpandedText" : null,
"tableType" : "EXTERNAL_TABLE"
},
"partitions" : null,
"hiveConfigOverride" : {
"hive.metastore.uris" : "thrift://192.168.208.143:9083",
"hive.metastore.sasl.enabled" : "false"
}
},
"storage-plugin" : "hive",
"columns" : [ "`cust_id`", "`order_total`" ],
"cost" : 6349.0
}, {
"pop" : "project",
"@id" : 18,
"exprs" : [ {
"ref" : "`cust_id`",
"expr" : "`cust_id`"
}, {
"ref" : "`order_total`",
"expr" : "`order_total`"
} ],
"child" : 20,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 6349.0
}, {
"pop" : "filter",
"@id" : 16,
"child" : 18,
"expr" : "greater_than(`order_total`, 100) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 3174.5
}, {
"pop" : "selection-vector-remover",
"@id" : 14,
"child" : 16,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 3174.5
}, {
"pop" : "project",
"@id" : 12,
"exprs" : [ {
"ref" : "`$f7`",
"expr" : "`cust_id`"
} ],
"child" : 14,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 3174.5
}, {
"pop" : "fs-scan",
"@id" : 9,
"files" : [ "maprfs:/mapr/my.cluster.com/demo/clicks/json/clicks.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/mapr/my.cluster.com/demo",
"writable" : false,
"storageformat" : null
},
"clicks" : {
"location" : "/mapr/my.cluster.com/demo/clicks",
"writable" : true,
"storageformat" : "parquet"
},
"views" : {
"location" : "/mapr/my.cluster.com/demo/views",
"writable" : true,
"storageformat" : "parquet"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"selectionRoot" : "/mapr/my.cluster.com/demo/clicks/json/clicks.json",
"cost" : 5097.0
}, {
"pop" : "project",
"@id" : 7,
"exprs" : [ {
"ref" : "`T24¦¦*`",
"expr" : "`*`"
} ],
"child" : 9,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 5097.0
}, {
"pop" : "fs-scan",
"@id" : 19,
"files" : [ "maprfs:/mapr/my.cluster.com/demo/clicks/json/clicks.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/mapr/my.cluster.com/demo",
"writable" : false,
"storageformat" : null
},
"clicks" : {
"location" : "/mapr/my.cluster.com/demo/clicks",
"writable" : true,
"storageformat" : "parquet"
},
"views" : {
"location" : "/mapr/my.cluster.com/demo/views",
"writable" : true,
"storageformat" : "parquet"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"selectionRoot" : "/mapr/my.cluster.com/demo/clicks/json/clicks.json",
"cost" : 5097.0
}, {
"pop" : "project",
"@id" : 17, |
+------------+------------+
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)