You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "Shiva Jahangiri (JIRA)" <ji...@apache.org> on 2017/12/14 20:02:00 UTC
[jira] [Updated] (ASTERIXDB-2199) Nested primary key and hash
repartitioning bug
[ https://issues.apache.org/jira/browse/ASTERIXDB-2199?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Shiva Jahangiri updated ASTERIXDB-2199:
---------------------------------------
Description:
If a join is happening on primary keys of two tables, no hash partitioning should happen. Having the following DDL(Note that primary key of Friendship2 is string):
DROP DATAVERSE Facebook IF EXISTS;
CREATE DATAVERSE Facebook;
Use Facebook;
CREATE TYPE FriendshipType AS closed {
id:string,
friends :[string]
};
CREATE DATASET Friendship2(FriendshipType)
PRIMARY KEY id;
insert into Friendship2([ {"id":"1","friends" : [ "2","3","4"]}, {"id":"2","friends" : [ "4","5","6"]}
]);
By running the following query:
Use Facebook;
select * from Friendship2 first, Friendship2 second where first.id = second.id;
we can see that there is no hash partitioning happening in optimized logical plan which is correct as join is happening on the primary key of both relations and data is already partitioned on primary key:
{
"operator":"distribute-result",
"expressions":"$$9",
"operatorId" : "1.1",
"physical-operator":"DISTRIBUTE_RESULT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.2",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$9"],
"operatorId" : "1.3",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$9"],
"expressions":"{ first : $$first, second : $$second}",
"operatorId" : "1.4",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$first","$$second"],
"operatorId" : "1.5",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.6",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"join",
"condition":"eq($$10, $$11)",
"operatorId" : "1.7",
"physical-operator":"HYBRID_HASH_JOIN [$$10][$$11]",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.8",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"data-scan",
"variables" :["$$10","$$first"],
"data-source":"Facebook.Friendship2",
"operatorId" : "1.9",
"physical-operator":"DATASOURCE_SCAN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.10",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"empty-tuple-source",
"operatorId" : "1.11",
"physical-operator":"EMPTY_TUPLE_SOURCE",
"execution-mode":"PARTITIONED"
}
]
}
]
}
]
}
, {
"operator":"exchange",
"operatorId" : "1.12",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"data-scan",
"variables" :["$$11","$$second"],
"data-source":"Facebook.Friendship2",
"operatorId" : "1.13",
"physical-operator":"DATASOURCE_SCAN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.14",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"empty-tuple-source",
"operatorId" : "1.15",
"physical-operator":"EMPTY_TUPLE_SOURCE",
"execution-mode":"PARTITIONED"
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
However, if in DDL primary key of a relation is nested (e.g. person.id in Friendship)such as follow:
DROP DATAVERSE Facebook IF EXISTS;
CREATE DATAVERSE Facebook;
Use Facebook;
CREATE TYPE PersonType AS closed {
id:string
};
CREATE TYPE FriendshipType AS closed {
person : PersonType,
Friends :[PersonType]
};
/* Creating Datasets */
CREATE DATASET Person(PersonType)
PRIMARY KEY id;
CREATE DATASET Friendship(FriendshipType)
PRIMARY KEY person.id;
Use Facebook;
insert into Person ([{"id":"1"},{"id":"2"},{"id":"3"},{"id":"4"}]);
insert into Friendship ([ {"person":{"id":"1"},"Friends" : [ {"id":"2"},{"id":"3"},{"id":"4"}]}
]);
By running the following query which is equivalent with the previous query:
Use Facebook;
select * from Friendship first, Friendship second where first.person.id = second.person.id;
We can see that Hash_Partitioning_Exchange happens which shows that if primary key is nested, AsterixDB does not recognize that data is partitioned based on it and should not repartition it. Following is the optimized logical plan of this query with two hash partitioning happened on two relations involved in join while there should be none in this case.
{
"operator":"distribute-result",
"expressions":"$$11",
"operatorId" : "1.1",
"physical-operator":"DISTRIBUTE_RESULT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.2",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$11"],
"operatorId" : "1.3",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$11"],
"expressions":"{ first : $$first, second : $$second}",
"operatorId" : "1.4",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$first","$$second"],
"operatorId" : "1.5",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.6",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"join",
"condition":"eq($$14, $$15)",
"operatorId" : "1.7",
"physical-operator":"HYBRID_HASH_JOIN [$$14][$$15]",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.8",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$first","$$14"],
"operatorId" : "1.9",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$first","$$14"],
"expressions":"$$second, $$15",
"operatorId" : "1.10",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.11",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"replicate",
"operatorId" : "1.12",
"physical-operator":"REPLICATE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.13",
"physical-operator":"HASH_PARTITION_EXCHANGE [$$15]",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$15"],
"expressions":"$$second.getField(0).getField(0)",
"operatorId" : "1.14",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$second"],
"operatorId" : "1.15",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.16",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"data-scan",
"variables" :["$$13","$$second"],
"data-source":"Facebook.Friendship",
"operatorId" : "1.17",
"physical-operator":"DATASOURCE_SCAN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.18",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"empty-tuple-source",
"operatorId" : "1.19",
"physical-operator":"EMPTY_TUPLE_SOURCE",
"execution-mode":"PARTITIONED"
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
, {
"operator":"exchange",
"operatorId" : "1.20",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"replicate",
"operatorId" : "1.12",
"physical-operator":"REPLICATE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.13",
"physical-operator":"HASH_PARTITION_EXCHANGE [$$15]",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$15"],
"expressions":"$$second.getField(0).getField(0)",
"operatorId" : "1.14",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$second"],
"operatorId" : "1.15",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.16",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"data-scan",
"variables" :["$$13","$$second"],
"data-source":"Facebook.Friendship",
"operatorId" : "1.17",
"physical-operator":"DATASOURCE_SCAN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.18",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"empty-tuple-source",
"operatorId" : "1.19",
"physical-operator":"EMPTY_TUPLE_SOURCE",
"execution-mode":"PARTITIONED"
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
was:
If a join is happening on primary keys of two tables, no hash partitioning should happen. Having the following DDL(Note that primary key of Friendship2 is string):
DROP DATAVERSE Facebook IF EXISTS;
CREATE DATAVERSE Facebook;
Use Facebook;
CREATE TYPE FriendshipType AS closed {
id:string,
friends :[string]
};
CREATE DATASET Friendship2(FriendshipType)
PRIMARY KEY id;
insert into Friendship2([ {"id":"1","friends" : [ "2","3","4"]}, {"id":"2","friends" : [ "4","5","6"]}
]);
By running the following query:
Use Facebook;
select * from Friendship2 first, Friendship2 second where first.id = second.id;
we can see that there is no hash partitioning happening in optimized logical plan which is correct as join is happening on the primary key of both relations and data is already partitioned on primary key:
{
"operator":"distribute-result",
"expressions":"$$9",
"operatorId" : "1.1",
"physical-operator":"DISTRIBUTE_RESULT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.2",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$9"],
"operatorId" : "1.3",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$9"],
"expressions":"{ first : $$first, second : $$second}",
"operatorId" : "1.4",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$first","$$second"],
"operatorId" : "1.5",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.6",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"join",
"condition":"eq($$10, $$11)",
"operatorId" : "1.7",
"physical-operator":"HYBRID_HASH_JOIN [$$10][$$11]",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.8",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"data-scan",
"variables" :["$$10","$$first"],
"data-source":"Facebook.Friendship2",
"operatorId" : "1.9",
"physical-operator":"DATASOURCE_SCAN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.10",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"empty-tuple-source",
"operatorId" : "1.11",
"physical-operator":"EMPTY_TUPLE_SOURCE",
"execution-mode":"PARTITIONED"
}
]
}
]
}
]
}
, {
"operator":"exchange",
"operatorId" : "1.12",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"data-scan",
"variables" :["$$11","$$second"],
"data-source":"Facebook.Friendship2",
"operatorId" : "1.13",
"physical-operator":"DATASOURCE_SCAN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.14",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"empty-tuple-source",
"operatorId" : "1.15",
"physical-operator":"EMPTY_TUPLE_SOURCE",
"execution-mode":"PARTITIONED"
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
However, if in DDL primary key of a relation is nested (e.g. person.id in Friendship)such as follow:
DROP DATAVERSE Facebook IF EXISTS;
CREATE DATAVERSE Facebook;
Use Facebook;
CREATE TYPE PersonType AS closed {
id:string
};
CREATE TYPE FriendshipType AS closed {
person : PersonType,
Friends :[PersonType]
};
/* Creating Datasets */
CREATE DATASET Person(PersonType)
PRIMARY KEY id;
CREATE DATASET Friendship(FriendshipType)
PRIMARY KEY person.id;
Use Facebook;
insert into Person ([{"id":"1"},{"id":"2"},{"id":"3"},{"id":"4"}]);
insert into Friendship ([ {"person":{"id":"1"},"Friends" : [ {"id":"2"},{"id":"3"},{"id":"4"}]}
]);
By running the following query which is equivalent with the previous query:
Use Facebook;
select * from Friendship first, Friendship second where first.person.id = second.person.id;
We can see that Hash_Partitioning_Exchange happens which shows that if primary key is nested, AsterixDB does not recognize that data is partitioned based on it and should not repartition it. Following is the optimized logical plan of this query with two hash partitioning happened on two relations involved in join while there should be none in this case.
{
"operator":"distribute-result",
"expressions":"$$11",
"operatorId" : "1.1",
"physical-operator":"DISTRIBUTE_RESULT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.2",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$11"],
"operatorId" : "1.3",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$11"],
"expressions":"{ first : $$first, second : $$second}",
"operatorId" : "1.4",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$first","$$second"],
"operatorId" : "1.5",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.6",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"join",
"condition":"eq($$14, $$15)",
"operatorId" : "1.7",
"physical-operator":"HYBRID_HASH_JOIN [$$14][$$15]",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.8",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$first","$$14"],
"operatorId" : "1.9",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$first","$$14"],
"expressions":"$$second, $$15",
"operatorId" : "1.10",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.11",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"replicate",
"operatorId" : "1.12",
"physical-operator":"REPLICATE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.13",
"physical-operator":"HASH_PARTITION_EXCHANGE [$$15]",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$15"],
"expressions":"$$second.getField(0).getField(0)",
"operatorId" : "1.14",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$second"],
"operatorId" : "1.15",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.16",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"data-scan",
"variables" :["$$13","$$second"],
"data-source":"Facebook.Friendship",
"operatorId" : "1.17",
"physical-operator":"DATASOURCE_SCAN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.18",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"empty-tuple-source",
"operatorId" : "1.19",
"physical-operator":"EMPTY_TUPLE_SOURCE",
"execution-mode":"PARTITIONED"
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
, {
"operator":"exchange",
"operatorId" : "1.20",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"replicate",
"operatorId" : "1.12",
"physical-operator":"REPLICATE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.13",
"physical-operator":"HASH_PARTITION_EXCHANGE [$$15]",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"assign",
"variables" :["$$15"],
"expressions":"$$second.getField(0).getField(0)",
"operatorId" : "1.14",
"physical-operator":"ASSIGN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"project",
"variables" :["$$second"],
"operatorId" : "1.15",
"physical-operator":"STREAM_PROJECT",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.16",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"data-scan",
"variables" :["$$13","$$second"],
"data-source":"Facebook.Friendship",
"operatorId" : "1.17",
"physical-operator":"DATASOURCE_SCAN",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"exchange",
"operatorId" : "1.18",
"physical-operator":"ONE_TO_ONE_EXCHANGE",
"execution-mode":"PARTITIONED",
"inputs":[
{
"operator":"empty-tuple-source",
"operatorId" : "1.19",
"physical-operator":"EMPTY_TUPLE_SOURCE",
"execution-mode":"PARTITIONED"
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
> Nested primary key and hash repartitioning bug
> -----------------------------------------------
>
> Key: ASTERIXDB-2199
> URL: https://issues.apache.org/jira/browse/ASTERIXDB-2199
> Project: Apache AsterixDB
> Issue Type: Bug
> Components: *DB - AsterixDB
> Reporter: Shiva Jahangiri
>
> If a join is happening on primary keys of two tables, no hash partitioning should happen. Having the following DDL(Note that primary key of Friendship2 is string):
> DROP DATAVERSE Facebook IF EXISTS;
> CREATE DATAVERSE Facebook;
> Use Facebook;
> CREATE TYPE FriendshipType AS closed {
> id:string,
> friends :[string]
> };
> CREATE DATASET Friendship2(FriendshipType)
> PRIMARY KEY id;
> insert into Friendship2([ {"id":"1","friends" : [ "2","3","4"]}, {"id":"2","friends" : [ "4","5","6"]}
> ]);
> By running the following query:
> Use Facebook;
> select * from Friendship2 first, Friendship2 second where first.id = second.id;
> we can see that there is no hash partitioning happening in optimized logical plan which is correct as join is happening on the primary key of both relations and data is already partitioned on primary key:
> {
> "operator":"distribute-result",
> "expressions":"$$9",
> "operatorId" : "1.1",
> "physical-operator":"DISTRIBUTE_RESULT",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.2",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"project",
> "variables" :["$$9"],
> "operatorId" : "1.3",
> "physical-operator":"STREAM_PROJECT",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"assign",
> "variables" :["$$9"],
> "expressions":"{ first : $$first, second : $$second}",
> "operatorId" : "1.4",
> "physical-operator":"ASSIGN",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"project",
> "variables" :["$$first","$$second"],
> "operatorId" : "1.5",
> "physical-operator":"STREAM_PROJECT",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.6",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"join",
> "condition":"eq($$10, $$11)",
> "operatorId" : "1.7",
> "physical-operator":"HYBRID_HASH_JOIN [$$10][$$11]",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.8",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"data-scan",
> "variables" :["$$10","$$first"],
> "data-source":"Facebook.Friendship2",
> "operatorId" : "1.9",
> "physical-operator":"DATASOURCE_SCAN",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.10",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"empty-tuple-source",
> "operatorId" : "1.11",
> "physical-operator":"EMPTY_TUPLE_SOURCE",
> "execution-mode":"PARTITIONED"
> }
> ]
> }
> ]
> }
> ]
> }
> , {
> "operator":"exchange",
> "operatorId" : "1.12",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"data-scan",
> "variables" :["$$11","$$second"],
> "data-source":"Facebook.Friendship2",
> "operatorId" : "1.13",
> "physical-operator":"DATASOURCE_SCAN",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.14",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"empty-tuple-source",
> "operatorId" : "1.15",
> "physical-operator":"EMPTY_TUPLE_SOURCE",
> "execution-mode":"PARTITIONED"
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> However, if in DDL primary key of a relation is nested (e.g. person.id in Friendship)such as follow:
> DROP DATAVERSE Facebook IF EXISTS;
> CREATE DATAVERSE Facebook;
> Use Facebook;
> CREATE TYPE PersonType AS closed {
> id:string
> };
> CREATE TYPE FriendshipType AS closed {
> person : PersonType,
> Friends :[PersonType]
> };
> /* Creating Datasets */
> CREATE DATASET Person(PersonType)
> PRIMARY KEY id;
> CREATE DATASET Friendship(FriendshipType)
> PRIMARY KEY person.id;
> Use Facebook;
> insert into Person ([{"id":"1"},{"id":"2"},{"id":"3"},{"id":"4"}]);
> insert into Friendship ([ {"person":{"id":"1"},"Friends" : [ {"id":"2"},{"id":"3"},{"id":"4"}]}
> ]);
> By running the following query which is equivalent with the previous query:
> Use Facebook;
> select * from Friendship first, Friendship second where first.person.id = second.person.id;
> We can see that Hash_Partitioning_Exchange happens which shows that if primary key is nested, AsterixDB does not recognize that data is partitioned based on it and should not repartition it. Following is the optimized logical plan of this query with two hash partitioning happened on two relations involved in join while there should be none in this case.
> {
> "operator":"distribute-result",
> "expressions":"$$11",
> "operatorId" : "1.1",
> "physical-operator":"DISTRIBUTE_RESULT",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.2",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"project",
> "variables" :["$$11"],
> "operatorId" : "1.3",
> "physical-operator":"STREAM_PROJECT",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"assign",
> "variables" :["$$11"],
> "expressions":"{ first : $$first, second : $$second}",
> "operatorId" : "1.4",
> "physical-operator":"ASSIGN",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"project",
> "variables" :["$$first","$$second"],
> "operatorId" : "1.5",
> "physical-operator":"STREAM_PROJECT",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.6",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"join",
> "condition":"eq($$14, $$15)",
> "operatorId" : "1.7",
> "physical-operator":"HYBRID_HASH_JOIN [$$14][$$15]",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.8",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"project",
> "variables" :["$$first","$$14"],
> "operatorId" : "1.9",
> "physical-operator":"STREAM_PROJECT",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"assign",
> "variables" :["$$first","$$14"],
> "expressions":"$$second, $$15",
> "operatorId" : "1.10",
> "physical-operator":"ASSIGN",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.11",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"replicate",
> "operatorId" : "1.12",
> "physical-operator":"REPLICATE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.13",
> "physical-operator":"HASH_PARTITION_EXCHANGE [$$15]",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"assign",
> "variables" :["$$15"],
> "expressions":"$$second.getField(0).getField(0)",
> "operatorId" : "1.14",
> "physical-operator":"ASSIGN",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"project",
> "variables" :["$$second"],
> "operatorId" : "1.15",
> "physical-operator":"STREAM_PROJECT",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.16",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"data-scan",
> "variables" :["$$13","$$second"],
> "data-source":"Facebook.Friendship",
> "operatorId" : "1.17",
> "physical-operator":"DATASOURCE_SCAN",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.18",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"empty-tuple-source",
> "operatorId" : "1.19",
> "physical-operator":"EMPTY_TUPLE_SOURCE",
> "execution-mode":"PARTITIONED"
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> , {
> "operator":"exchange",
> "operatorId" : "1.20",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"replicate",
> "operatorId" : "1.12",
> "physical-operator":"REPLICATE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.13",
> "physical-operator":"HASH_PARTITION_EXCHANGE [$$15]",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"assign",
> "variables" :["$$15"],
> "expressions":"$$second.getField(0).getField(0)",
> "operatorId" : "1.14",
> "physical-operator":"ASSIGN",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"project",
> "variables" :["$$second"],
> "operatorId" : "1.15",
> "physical-operator":"STREAM_PROJECT",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.16",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"data-scan",
> "variables" :["$$13","$$second"],
> "data-source":"Facebook.Friendship",
> "operatorId" : "1.17",
> "physical-operator":"DATASOURCE_SCAN",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"exchange",
> "operatorId" : "1.18",
> "physical-operator":"ONE_TO_ONE_EXCHANGE",
> "execution-mode":"PARTITIONED",
> "inputs":[
> {
> "operator":"empty-tuple-source",
> "operatorId" : "1.19",
> "physical-operator":"EMPTY_TUPLE_SOURCE",
> "execution-mode":"PARTITIONED"
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
> ]
> }
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)