You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "Yingyi Bu (JIRA)" <ji...@apache.org> on 2016/01/06 19:02:39 UTC
[jira] [Updated] (ASTERIXDB-1249) Self index join chooses wrong
probe/index branch
[ https://issues.apache.org/jira/browse/ASTERIXDB-1249?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Yingyi Bu updated ASTERIXDB-1249:
---------------------------------
Description:
DDLs:
{noformat}
drop dataverse test if exists;
create dataverse test;
use dataverse test;
create type TwitterUserType as closed {
screen-name: string,
lang: string,
friends-count: int64,
statuses-count: int64,
name: string,
followers-count: int64
}
create type TweetMessageType as closed {
tweetid: int64,
user: TwitterUserType,
sender-location: point,
send-time: datetime,
referred-topics: {{ string }},
message-text: string,
countA: int64,
countB: int64
}
create dataset TweetMessages(TweetMessageType)
primary key tweetid;
create index twmSndLocIx on TweetMessages(sender-location) type rtree;
create index msgCountAIx on TweetMessages(countA) type btree;
create index msgCountBIx on TweetMessages(countB) type btree;
create index msgTextIx on TweetMessages(message-text) type keyword;
{noformat}
Query:
{noformat}
for $t1 in dataset('TweetMessages')
for $t2 in dataset('TweetMessages')
let $n := create-circle($t1.sender-location, 0.5)
where spatial-intersect($t2.sender-location, $n)
order by $t2.tweetid
return {"tweetid2":$t2.tweetid, "loc2":$t2.sender-location};
{noformat}
Optimized plan:
{noformat}
distribute result [%0->$$10]
-- DISTRIBUTE_RESULT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$10])
-- STREAM_PROJECT |PARTITIONED|
assign [$$10] <- [function-call: asterix:closed-record-constructor, Args:[AString: {tweetid2}, %0->$$15, AString: {loc2}, %0->$$13]]
-- ASSIGN |PARTITIONED|
exchange
-- SORT_MERGE_EXCHANGE [$$15(ASC) ] |PARTITIONED|
order (ASC, %0->$$15)
-- STABLE_SORT [$$15(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$13, $$15])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: asterix:spatial-intersect, Args:[%0->$$13, function-call: asterix:create-circle, Args:[function-call: asterix:field-access-by-index, Args:[%0->$$0, AInt32: {2}], ADouble: {0.5}]])
-- STREAM_SELECT |PARTITIONED|
project ([$$0, $$13, $$15])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
unnest-map [$$14, $$0] <- function-call: asterix:index-search, Args:[AString: {TweetMessages}, AInt32: {0}, AString: {test}, AString: {TweetMessages}, ABoolean: {true}, ABoolean: {false}, ABoolean: {false}, AInt32: {1}, %0->$$27, AInt32: {1}, %0->$$27, TRUE, TRUE, TRUE]
-- BTREE_SEARCH |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$27)
-- STABLE_SORT [$$27(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$27, $$13, $$15])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
unnest-map [$$23, $$24, $$25, $$26, $$27] <- function-call: asterix:index-search, Args:[AString: {twmSndLocIx}, AInt32: {1}, AString: {test}, AString: {TweetMessages}, ABoolean: {true}, ABoolean: {false}, ABoolean: {true}, AInt32: {4}, %0->$$19, %0->$$20, %0->$$21, %0->$$22]
-- RTREE_SEARCH |PARTITIONED|
exchange
-- BROADCAST_EXCHANGE |PARTITIONED|
assign [$$19, $$20, $$21, $$22] <- [function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {0}], function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {1}], function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {2}], function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {3}]]
-- ASSIGN |PARTITIONED|
project ([$$13, $$15])
-- STREAM_PROJECT |PARTITIONED|
assign [$$13] <- [function-call: asterix:field-access-by-index, Args:[%0->$$1, AInt32: {2}]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan []<-[$$15, $$1] <- test:TweetMessages
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
{noformat}
The optimized plan is incorrect --- the index search doesn't use the right join condition and hence the result is different from expected.
was:
DDLs:
{noformat}
drop dataverse test if exists;
create dataverse test;
use dataverse test;
create type TwitterUserType as closed {
screen-name: string,
lang: string,
friends-count: int64,
statuses-count: int64,
name: string,
followers-count: int64
}
create type TweetMessageType as closed {
tweetid: int64,
user: TwitterUserType,
sender-location: point,
send-time: datetime,
referred-topics: {{ string }},
message-text: string,
countA: int64,
countB: int64
}
create dataset TweetMessages(TweetMessageType)
primary key tweetid;
create index twmSndLocIx on TweetMessages(sender-location) type rtree;
create index msgCountAIx on TweetMessages(countA) type btree;
create index msgCountBIx on TweetMessages(countB) type btree;
create index msgTextIx on TweetMessages(message-text) type keyword;
{noformat}
Query:
{noformat}
for $t1 in dataset('TweetMessages')
for $t2 in dataset('TweetMessages')
let $n := create-circle($t1.sender-location, 0.5)
where spatial-intersect($t2.sender-location, $n)
order by $t2.tweetid
return {"tweetid2":$t2.tweetid, "loc2":$t2.sender-location};
{noformat}
Optimized plan:
{noformat}
distribute result [%0->$$10]
-- DISTRIBUTE_RESULT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$10])
-- STREAM_PROJECT |PARTITIONED|
assign [$$10] <- [function-call: asterix:closed-record-constructor, Args:[AString: {tweetid2}, %0->$$15, AString: {loc2}, %0->$$13]]
-- ASSIGN |PARTITIONED|
exchange
-- SORT_MERGE_EXCHANGE [$$15(ASC) ] |PARTITIONED|
order (ASC, %0->$$15)
-- STABLE_SORT [$$15(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$13, $$15])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: asterix:spatial-intersect, Args:[%0->$$13, function-call: asterix:create-circle, Args:[function-call: asterix:field-access-by-index, Args:[%0->$$0, AInt32: {2}], ADouble: {0.5}]])
-- STREAM_SELECT |PARTITIONED|
project ([$$0, $$13, $$15])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
unnest-map [$$14, $$0] <- function-call: asterix:index-search, Args:[AString: {TweetMessages}, AInt32: {0}, AString: {test}, AString: {TweetMessages}, ABoolean: {true}, ABoolean: {false}, ABoolean: {false}, AInt32: {1}, %0->$$27, AInt32: {1}, %0->$$27, TRUE, TRUE, TRUE]
-- BTREE_SEARCH |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$27)
-- STABLE_SORT [$$27(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$27, $$13, $$15])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
unnest-map [$$23, $$24, $$25, $$26, $$27] <- function-call: asterix:index-search, Args:[AString: {twmSndLocIx}, AInt32: {1}, AString: {test}, AString: {TweetMessages}, ABoolean: {true}, ABoolean: {false}, ABoolean: {true}, AInt32: {4}, %0->$$19, %0->$$20, %0->$$21, %0->$$22]
-- RTREE_SEARCH |PARTITIONED|
exchange
-- BROADCAST_EXCHANGE |PARTITIONED|
assign [$$19, $$20, $$21, $$22] <- [function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {0}], function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {1}], function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {2}], function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {3}]]
-- ASSIGN |PARTITIONED|
project ([$$13, $$15])
-- STREAM_PROJECT |PARTITIONED|
assign [$$13] <- [function-call: asterix:field-access-by-index, Args:[%0->$$1, AInt32: {2}]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan []<-[$$15, $$1] <- test:TweetMessages
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
{noformat}
The optimized plan is incorrect --- the index search doesn't use the right join condition and hence the result is different from expected.
> Self index join chooses wrong probe/index branch
> ------------------------------------------------
>
> Key: ASTERIXDB-1249
> URL: https://issues.apache.org/jira/browse/ASTERIXDB-1249
> Project: Apache AsterixDB
> Issue Type: Bug
> Components: Optimizer
> Reporter: Yingyi Bu
> Assignee: Taewoo Kim
>
> DDLs:
> {noformat}
> drop dataverse test if exists;
> create dataverse test;
> use dataverse test;
> create type TwitterUserType as closed {
> screen-name: string,
> lang: string,
> friends-count: int64,
> statuses-count: int64,
> name: string,
> followers-count: int64
> }
> create type TweetMessageType as closed {
> tweetid: int64,
> user: TwitterUserType,
> sender-location: point,
> send-time: datetime,
> referred-topics: {{ string }},
> message-text: string,
> countA: int64,
> countB: int64
> }
> create dataset TweetMessages(TweetMessageType)
> primary key tweetid;
> create index twmSndLocIx on TweetMessages(sender-location) type rtree;
> create index msgCountAIx on TweetMessages(countA) type btree;
> create index msgCountBIx on TweetMessages(countB) type btree;
> create index msgTextIx on TweetMessages(message-text) type keyword;
> {noformat}
> Query:
> {noformat}
> for $t1 in dataset('TweetMessages')
> for $t2 in dataset('TweetMessages')
> let $n := create-circle($t1.sender-location, 0.5)
> where spatial-intersect($t2.sender-location, $n)
> order by $t2.tweetid
> return {"tweetid2":$t2.tweetid, "loc2":$t2.sender-location};
> {noformat}
> Optimized plan:
> {noformat}
> distribute result [%0->$$10]
> -- DISTRIBUTE_RESULT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> project ([$$10])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$10] <- [function-call: asterix:closed-record-constructor, Args:[AString: {tweetid2}, %0->$$15, AString: {loc2}, %0->$$13]]
> -- ASSIGN |PARTITIONED|
> exchange
> -- SORT_MERGE_EXCHANGE [$$15(ASC) ] |PARTITIONED|
> order (ASC, %0->$$15)
> -- STABLE_SORT [$$15(ASC)] |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> project ([$$13, $$15])
> -- STREAM_PROJECT |PARTITIONED|
> select (function-call: asterix:spatial-intersect, Args:[%0->$$13, function-call: asterix:create-circle, Args:[function-call: asterix:field-access-by-index, Args:[%0->$$0, AInt32: {2}], ADouble: {0.5}]])
> -- STREAM_SELECT |PARTITIONED|
> project ([$$0, $$13, $$15])
> -- STREAM_PROJECT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> unnest-map [$$14, $$0] <- function-call: asterix:index-search, Args:[AString: {TweetMessages}, AInt32: {0}, AString: {test}, AString: {TweetMessages}, ABoolean: {true}, ABoolean: {false}, ABoolean: {false}, AInt32: {1}, %0->$$27, AInt32: {1}, %0->$$27, TRUE, TRUE, TRUE]
> -- BTREE_SEARCH |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> order (ASC, %0->$$27)
> -- STABLE_SORT [$$27(ASC)] |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> project ([$$27, $$13, $$15])
> -- STREAM_PROJECT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> unnest-map [$$23, $$24, $$25, $$26, $$27] <- function-call: asterix:index-search, Args:[AString: {twmSndLocIx}, AInt32: {1}, AString: {test}, AString: {TweetMessages}, ABoolean: {true}, ABoolean: {false}, ABoolean: {true}, AInt32: {4}, %0->$$19, %0->$$20, %0->$$21, %0->$$22]
> -- RTREE_SEARCH |PARTITIONED|
> exchange
> -- BROADCAST_EXCHANGE |PARTITIONED|
> assign [$$19, $$20, $$21, $$22] <- [function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {0}], function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {1}], function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {2}], function-call: asterix:create-mbr, Args:[%0->$$13, AInt32: {2}, AInt32: {3}]]
> -- ASSIGN |PARTITIONED|
> project ([$$13, $$15])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$13] <- [function-call: asterix:field-access-by-index, Args:[%0->$$1, AInt32: {2}]]
> -- ASSIGN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> data-scan []<-[$$15, $$1] <- test:TweetMessages
> -- DATASOURCE_SCAN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> empty-tuple-source
> -- EMPTY_TUPLE_SOURCE |PARTITIONED|
> {noformat}
> The optimized plan is incorrect --- the index search doesn't use the right join condition and hence the result is different from expected.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)