You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wa...@apache.org on 2018/10/10 00:59:16 UTC
[07/36] asterixdb git commit: [NO ISSUE][COMP][RT] Enable multiway
similarity joins
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-dual-order.plan
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-dual-order.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-dual-order.plan
new file mode 100644
index 0000000..e8b029a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-dual-order.plan
@@ -0,0 +1,191 @@
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- PRE_CLUSTERED_GROUP_BY[$$162, $$164] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STABLE_SORT [$$162(ASC), $$164(ASC)] |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$162, $$164] |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- HYBRID_HASH_JOIN [$$prefixTokenRight][$$prefixTokenLeft] |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$prefixTokenRight] |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- PRE_CLUSTERED_GROUP_BY[$$126] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- IN_MEMORY_STABLE_SORT [$$i(ASC)] |LOCAL|
+ -- STREAM_SELECT |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- IN_MEMORY_HASH_JOIN [$$tokenUnranked][$$tokenRightGrouped] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- BROADCAST_EXCHANGE |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- RUNNING_AGGREGATE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- SORT_MERGE_EXCHANGE [$$124(ASC) ] |PARTITIONED|
+ -- STABLE_SORT [$$124(ASC)] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- HYBRID_HASH_JOIN [$$tokenRightGrouped][$$tokenLeftGrouped] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- REPLICATE |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$tokenRightGrouped] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$172] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- HASH_PARTITION_EXCHANGE [$$172] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$orderTokenRight] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- REPLICATE |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$tokenLeftGrouped] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$174] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- HASH_PARTITION_EXCHANGE [$$174] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$orderTokenLeft] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$prefixTokenLeft] |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- PRE_CLUSTERED_GROUP_BY[$$129] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- IN_MEMORY_STABLE_SORT [$$i(ASC)] |LOCAL|
+ -- STREAM_SELECT |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- IN_MEMORY_HASH_JOIN [$$tokenUnranked][$$tokenRightGrouped] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- BROADCAST_EXCHANGE |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- RUNNING_AGGREGATE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- SORT_MERGE_EXCHANGE [$$125(ASC) ] |PARTITIONED|
+ -- STABLE_SORT [$$125(ASC)] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- HYBRID_HASH_JOIN [$$tokenRightGrouped][$$tokenLeftGrouped] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- REPLICATE |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$tokenRightGrouped] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$172] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- HASH_PARTITION_EXCHANGE [$$172] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$orderTokenRight] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- REPLICATE |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$tokenLeftGrouped] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$174] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- HASH_PARTITION_EXCHANGE [$$174] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$orderTokenLeft] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-right-ahead.plan
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-right-ahead.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-right-ahead.plan
new file mode 100644
index 0000000..ab700b6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-right-ahead.plan
@@ -0,0 +1,128 @@
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- PRE_CLUSTERED_GROUP_BY[$$98, $$100] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STABLE_SORT [$$98(ASC), $$100(ASC)] |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$98, $$100] |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- HYBRID_HASH_JOIN [$$prefixTokenRight][$$prefixTokenLeft] |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$prefixTokenRight] |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- PRE_CLUSTERED_GROUP_BY[$$78] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- IN_MEMORY_STABLE_SORT [$$i(ASC)] |LOCAL|
+ -- STREAM_SELECT |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- IN_MEMORY_HASH_JOIN [$$tokenUnranked][$$tokenRightGrouped] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- REPLICATE |PARTITIONED|
+ -- BROADCAST_EXCHANGE |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- RUNNING_AGGREGATE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- SORT_MERGE_EXCHANGE [$$83(ASC) ] |PARTITIONED|
+ -- STABLE_SORT [$$83(ASC)] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$106] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- HASH_PARTITION_EXCHANGE [$$106] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$orderTokenRight] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$prefixTokenLeft] |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- PRE_CLUSTERED_GROUP_BY[$$80] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- IN_MEMORY_STABLE_SORT [$$i(ASC)] |LOCAL|
+ -- STREAM_SELECT |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- IN_MEMORY_HASH_JOIN [$$tokenUnranked][$$tokenRightGrouped] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- REPLICATE |PARTITIONED|
+ -- BROADCAST_EXCHANGE |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- RUNNING_AGGREGATE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- SORT_MERGE_EXCHANGE [$$83(ASC) ] |PARTITIONED|
+ -- STABLE_SORT [$$83(ASC)] |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$106] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- HASH_PARTITION_EXCHANGE [$$106] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$orderTokenRight] |PARTITIONED|
+ {
+ -- AGGREGATE |LOCAL|
+ -- NESTED_TUPLE_SOURCE |LOCAL|
+ }
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- UNNEST |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ASSIGN |PARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- DATASOURCE_SCAN |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.1.ddl.aql
new file mode 100644
index 0000000..a3b6ec2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.1.ddl.aql
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as open {
+ id: int32,
+ summary: string?
+}
+
+create type BasicWithIndexType as open {
+ id: int32
+}
+
+create dataset Basic(BasicType) primary key id;
+
+create dataset BasicWithIndex(BasicWithIndexType) primary key id;
+
+create index BasicWithIndex_summary_bt_idx on BasicWithIndex(summary: string?) type btree enforced;
+
+create index BasicWithIndex_summary_kw_idx on BasicWithIndex(summary: string?) type keyword enforced;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.2.update.aql
new file mode 100644
index 0000000..95ef5f1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.2.update.aql
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+insert into dataset Basic(
+{"id": 1, "summary": "Clear, Concise, and fun!"}
+);
+insert into dataset Basic(
+{"id": 2, "summary": "Clear, Concise, and Charitable"}
+);
+
+insert into dataset BasicWithIndex(
+{"id": 1, "summary": "Clear, Concise, and fun!"}
+);
+insert into dataset BasicWithIndex(
+{"id": 2, "summary": "Clear, Concise, and Charitable"}
+);
+
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.3.query.aql
new file mode 100644
index 0000000..0e2aea7
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.3.query.aql
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set simthreshold '0.6f';
+
+let $s1 := "Clear, Concise, and fun!"
+let $s2 := "Clear, Concise, and Charitable"
+let $psj := (
+ for $s in [$s1]
+ for $t in [$s2]
+ where word-tokens($s) ~= word-tokens($t)
+ return {"s1": $s, "s2": $t}
+)
+let $nsj := (
+ for $s in [$s1]
+ for $t in [$s2]
+ where word-tokens($s) /*+ indexnl */ ~= word-tokens($t)
+ return {"s1": $s, "s2": $t}
+)
+let $nvj := (
+ for $s in [$s1]
+ for $t in [$s2]
+ where similarity-jaccard-check(word-tokens($s), word-tokens($t), .6f)[0] = true
+ return {"s1": $s, "s2": $t}
+)
+let $nvr := (
+ for $s in [$s1]
+ for $t in [$s2]
+ where similarity-jaccard(word-tokens($s), word-tokens($t)) >= .6f
+ return {"s1": $s, "s2": $t}
+)
+let $tpsj := (
+ for $s in dataset Basic
+ for $t in dataset Basic
+ where /*+ skip-index */ similarity-jaccard(word-tokens($s.summary), word-tokens($t.summary)) >= .6f
+ and $s.id < $t.id
+ return {"s1": $s.summary, "s2": $t.summary}
+)
+let $tnsj := (
+ for $s in dataset Basic
+ for $t in dataset Basic
+ where /*+ indexnl */ similarity-jaccard(word-tokens($s.summary), word-tokens($t.summary)) >= .6f
+ and $s.id < $t.id
+ return {"s1": $s.summary, "s2": $t.summary}
+)
+let $itpsj := (
+ for $s in dataset BasicWithIndex
+ for $t in dataset BasicWithIndex
+ where /*+ skip-index */ similarity-jaccard(word-tokens($s.summary), word-tokens($t.summary)) >= 0.6
+ and $s.id < $t.id
+ return {"s1": $s.summary, "s2": $t.summary}
+)
+let $itnsj := (
+ for $s in dataset BasicWithIndex
+ for $t in dataset BasicWithIndex
+ where /*+ indexnl */ similarity-jaccard(word-tokens($s.summary), word-tokens($t.summary)) >= 0.6
+ and $s.id < $t.id
+ return {"s1": $s.summary, "s2": $t.summary}
+)
+let $left := word-tokens($s1)
+let $right := word-tokens($s2)
+let $vj := similarity-jaccard-check($left, $right, 0.6f)
+let $sr := similarity-jaccard($left, $right) >= 0.6
+return {"psj": $psj, "nsj": $nsj, "nvj": $nvj, "nvr": $nvr, "tpsj": $tpsj, "tnsj": $tnsj, "itpsj": $itpsj, "itnsj": $itnsj, "vj": $vj, "sr": $sr}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.ddl.aql
new file mode 100644
index 0000000..45cc975
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.ddl.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as closed {
+ id: uuid,
+ authors: string
+}
+
+create dataset left(BasicType) primary key id autogenerated;
+create dataset right(BasicType) primary key id autogenerated;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.2.update.aql
new file mode 100644
index 0000000..c9aceb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+load dataset left
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblpauthors.adm"),("format"="adm"));
+
+load dataset right
+using localfs
+(("path"="asterix_nc1://data/pub-small/csxauthors.adm"),("format"="adm"));
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.3.query.aql
new file mode 100644
index 0000000..c00681b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.3.query.aql
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $hash := (
+ for $r in dataset left
+ let $c := (
+ for $t in counthashed-word-tokens($r.authors)
+ order by $t
+ distinct by $t
+ return $t
+ )
+ order by $r.id
+ return {"id": $r.id, "authors": $r.authors, "nondup": $c}
+)
+
+let $word := (
+ for $r in dataset left
+ let $c := (
+ for $t in word-tokens($r.authors)
+ order by $t
+ distinct by $t
+ return $t
+ )
+ order by $r.id
+ return {"id": $r.id, "authors": $r.authors, "nondup": $c}
+)
+
+for $s in $hash
+for $t in $word
+where $s.id = $t.id and count($s.nondup) != count($t.nondup)
+order by $s.authors
+return {
+ "authors": $s.authors,
+ "hdistinct": $s.nondup,
+ "hcount": count($s.nondup),
+ "hash": counthashed-word-tokens($s.authors),
+ "vhcount": count(counthashed-word-tokens($s.authors)),
+ "wdistinct": $t.nondup,
+ "wcount": count($t.nondup),
+ "word": word-tokens($t.authors),
+ "vwcount": count(word-tokens($t.authors))
+ }
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.1.ddl.aql
new file mode 100644
index 0000000..65a52b1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.1.ddl.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.query.aql
new file mode 100644
index 0000000..e60be60
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.query.aql
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9] and [2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 0.8 by similarity-jaccard-check.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9, 10]
+return similarity-jaccard-check($left, $right, 0.8f)
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.2.update.aql
new file mode 100644
index 0000000..042f3ce
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.2.update.aql
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.query.aql
new file mode 100644
index 0000000..69dc59c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9] and [2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 0.8 by similarity-jaccard-prefix even the first token of the left side is removed so that it takes the
+ * form of [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9, 10]
+return similarity-jaccard-prefix(9, $left, 9, $right, $left[1], 0.8f)
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.query.aql
new file mode 100644
index 0000000..ecfed74
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9] and [2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 0.8 by similarity-jaccard-prefix even the first token of the right side is removed so that it takes the
+ * form of [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9]
+return similarity-jaccard-prefix(9, $left, 9, $right, $left[1], 0.8f)
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.query.aql
new file mode 100644
index 0000000..0b6482d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [2, 3, 4, 5, 6, 7, 8, 9, 10] and [1, 2, 3, 4, 5, 6, 7, 8, 9], we should return their
+ * similarity 0.8 by similarity-jaccard-prefix even the first token of the right side is removed so that it takes the
+ * form of [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [2, 3, 4, 5, 6, 7, 8, 9, 10]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9]
+return similarity-jaccard-prefix(9, $left, 9, $right, $left[1], 0.8f)
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.query.aql
new file mode 100644
index 0000000..21f1805
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [2, 3, 4, 5, 6, 7, 8, 9, 10] and [1, 2, 3, 4, 5, 6, 7, 8, 9], we should return their
+ * similarity 0.8 by similarity-jaccard-prefix even the first token of the left side is removed so that it takes the
+ * form of [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [1, 2, 3, 4, 5, 6, 7, 8, 9]
+return similarity-jaccard-prefix(9, $left, 9, $right, $left[1], 0.8f)
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.query.aql
new file mode 100644
index 0000000..5430f75
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [-3, -2, -1, 2, 0, 3, 5, 7, 9] and [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 5/15 by similarity-jaccard-prefix even the tokens {-3, -2, -1} of the left side are removed so that it
+ * takes the form of [2, 0, 3, 5, 7, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [2, 0, 3, 5, 7, 9]
+let $right := [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+return similarity-jaccard-prefix(10, $left, 10, $right, $right[1], 0.33f)
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.query.aql
new file mode 100644
index 0000000..eb20a08
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] and [-3, -2, -1, 0, 2, 3, 5, 7, 9], we should return their
+ * similarity 5/14 by similarity-jaccard-prefix even the tokens {-3, -2, -1, 0} of the right side is removed so that it
+ * takes the form of [2, 3, 5, 7, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+let $right := [2, 3, 5, 7, 9]
+return similarity-jaccard-prefix(10, $left, 9, $right, $left[0], 0.35f)
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.query.aql
new file mode 100644
index 0000000..586c162
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.query.aql
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9] and [2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 0.8 by similarity-jaccard-sorted.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9, 10]
+return similarity-jaccard-sorted($left, $right)
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.1.ddl.aql
new file mode 100644
index 0000000..65a52b1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.1.ddl.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.2.update.aql
new file mode 100644
index 0000000..042f3ce
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.2.update.aql
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.3.query.aql
new file mode 100644
index 0000000..b7e2025
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.3.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [3, 4, 5, 6, 8, 9] and [0, 1, 2, 3, 5, 6, 7], we should return their similarity 0.3 by
+ * similarity-jaccard-prefix even the first token of the left side is removed so that it takes the form of
+ * [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [3, 4, 5, 6, 8, 9]
+let $right := [3, 5, 6, 7]
+return similarity-jaccard-prefix(6, $left, 7, $right, $right[0], 0.3f)
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.4.query.aql
new file mode 100644
index 0000000..da8e765
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.4.query.aql
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $sims := [
+ similarity-jaccard-prefix(6, [3, 4, 5, 6, 8, 9], 7, [0, 1, 2, 3, 5, 6, 7], -1, 0.3f),
+ similarity-jaccard-prefix(6, [3, 4, 5, 6, 8, 9], 7, [3, 5, 6, 7], -1, 0.3f),
+ similarity-jaccard-prefix(7, [3, 5, 6, 7], 6, [3, 4, 5, 6, 8, 9], -1, 0.3f),
+ similarity-jaccard-prefix(6, [3, 4, 5, 6, 8, 9], 7, [3, 5, 6, 7], 3, 0.3f),
+ similarity-jaccard-prefix(7, [3, 5, 6, 7], 6, [3, 4, 5, 6, 8, 9], 3, 0.3f),
+ similarity-jaccard-prefix(6, [3, 4, 5, 6, 8, 9], 7, [3, 5, 6, 7], 6, 0.3f),
+ similarity-jaccard-prefix(7, [3, 5, 6, 7], 6, [3, 4, 5, 6, 8, 9], 6, 0.3f),
+ similarity-jaccard-prefix(7, [0, 1, 2, 3, 5, 6, 7], 6, [3, 5, 6, 8], -1, 0.3f),
+ similarity-jaccard-prefix(6, [3, 5, 6, 9], 7, [0, 1, 2, 3, 5, 6, 7], -1, 0.3f),
+ similarity-jaccard-prefix(7, [0, 1, 2, 3, 5, 6, 7], 6, [3, 5, 6, 8], 3, 0.3f),
+ similarity-jaccard-prefix(6, [3, 5, 6, 9], 7, [0, 1, 2, 3, 5, 6, 7], 3, 0.3f),
+ similarity-jaccard-prefix(7, [0, 1, 2, 3, 5, 6, 7], 6, [3, 5, 6, 8], 6, 0.3f),
+ similarity-jaccard-prefix(6, [3, 5, 6, 9], 7, [0, 1, 2, 3, 5, 6, 7], 6, 0.3f)
+]
+return $sims
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.1.ddl.aql
new file mode 100644
index 0000000..45cc975
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.1.ddl.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as closed {
+ id: uuid,
+ authors: string
+}
+
+create dataset left(BasicType) primary key id autogenerated;
+create dataset right(BasicType) primary key id autogenerated;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.2.update.aql
new file mode 100644
index 0000000..c9aceb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+load dataset left
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblpauthors.adm"),("format"="adm"));
+
+load dataset right
+using localfs
+(("path"="asterix_nc1://data/pub-small/csxauthors.adm"),("format"="adm"));
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.query.aql
new file mode 100644
index 0000000..5d9ab27
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.query.aql
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 5, 6, 7]
+let $right := [4, 5, 6]
+let $leftnull := [null, null, 5, 6, 7]
+let $rightnull := [4, 5, 6]
+let $nullstring := [null, null, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+let $prefix1 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.1f))
+let $prefix2 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.2f))
+let $prefix3 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.3f))
+let $prefix4 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.4f))
+let $prefix5 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.5f))
+let $prefix6 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.6f))
+let $prefix7 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.7f))
+let $prefix8 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.8f))
+let $prefix9 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.9f))
+
+let $bound :=
+for $l in [1]
+return [
+ [
+ similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 3], 1, 1f),
+ similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 1, .5f),
+ similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 1, .6f),
+ similarity-jaccard-prefix(3, [1, 2, 3], 9, [1, 2, 3], 1, .5f),
+ similarity-jaccard-prefix(4, [1, 2, 3, 4], 2, [1, 2], 1, .5f),
+ similarity-jaccard-prefix(4, [1, 2, 3, 4], 4, [1, 2], 1, .33f)
+ ],[
+ similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 3], 2, 1f),
+ similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 2, .5f),
+ similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 2, .6f),
+ similarity-jaccard-prefix(3, [1, 2, 3], 9, [1, 2, 3], 2, .5f),
+ similarity-jaccard-prefix(4, [1, 2, 3, 4], 2, [1, 2], 2, .5f),
+ similarity-jaccard-prefix(4, [1, 2, 3, 4], 4, [1, 2], 2, .33f)
+ ],[
+ similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 3], 3, 1f),
+ similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 3, .5f),
+ similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 3, .6f),
+ similarity-jaccard-prefix(3, [1, 2, 3], 9, [1, 2, 3], 3, .5f),
+ similarity-jaccard-prefix(4, [1, 2, 3, 4], 2, [1, 2], 3, .5f),
+ similarity-jaccard-prefix(4, [1, 2, 3, 4], 4, [1, 2], 3, .33f)
+ ]
+]
+
+let $trybound :=
+for $l in [1]
+return [
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], -1, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [3, 4, 5, 6, 7, 8, 9, 10], 0, 0.5f),
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 1, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [3, 4, 5, 6, 7, 8, 9, 10], 2, 0.5f),
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 3, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [3, 4, 5, 6, 7, 8, 9, 10], 4, 0.5f),
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 5, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [3, 4, 5, 6, 7, 8, 9, 10], 6, 0.5f),
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 7, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 8, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 9, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 10, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 5, 6, 7, 3, 4, 8], 10, [4, 5, 0, 0, 3, 6, 7, 8, 9, 10], 11, 0.5f)
+]
+
+let $checkbound :=
+for $l in [1]
+return [
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [null, 0, 3, 4, 5, 6, 7, 8, 9, 10], 0.1f),
+similarity-jaccard-check([null, 2, 3, 4, 5, 6, 7, 8], [null, 0, 3, 4, 5, 6, 7, 8, 9, 10], 0.2f),
+similarity-jaccard-check([null, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 9, 10], 0.3f),
+similarity-jaccard-check([null, null, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 9, 10], 0.4f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 9, 10], 0.5f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [0, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.6f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 2], 0.7f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 1, 2], 0.8f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8, 9], [null, 2, 3, 4, 5, 6, 7, 8, 9, 1], 0.9f)
+]
+
+let $len := len($right)
+
+let $negativesub1 := subset-collection($right, 6, prefix-len-jaccard($len, .4f))
+let $negativesub2 := subset-collection($right, 0, -1)
+let $prefix := subset-collection($right, 0, prefix-len-jaccard($len, .4f))
+let $sim1 := similarity-jaccard($leftnull, $right)
+let $sim2 := similarity-jaccard-prefix(len($left), $left, len($right), $right, -1, .4f)
+let $sim3 := similarity-jaccard($leftnull, $rightnull)
+let $sim4 := similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 1, .4f)
+let $sim5 := similarity-jaccard([1, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 9, 10])
+
+let $joinpair :=
+for $s in $prefix4
+for $r in $prefix1
+where $s = $r
+return $s
+
+let $clearnull :=
+for $index at $i in $nullstring
+where $nullstring[$i] <= null
+return int16(string($nullstring[$i])) + 64
+
+let $cleanup := [$clearnull, $nullstring]
+
+return [$prefix, $sim1, $sim2, $sim3, $sim4, $sim5, $bound, $trybound, $checkbound, $negativesub1, $negativesub2,
+[$prefix1, $prefix2, $prefix3, $prefix4, $prefix5, $prefix6, $prefix7, $prefix8, $prefix9], $joinpair, $cleanup]
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.query.aql
new file mode 100644
index 0000000..6eff4f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.query.aql
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+let $records := [[5, 6, 7], [0, 3, 4]]
+let $kv := [1, 2, 3, 4, 5, 6]
+
+let $lorecords :=
+for $r in $records
+let $c :=
+ for $t in $r
+ let $index :=
+ for $k at $i in $kv
+ where $t = $k
+ return $i
+ return $index
+return $c
+
+for $record in $lorecords
+let $orecord :=
+for $d in $record
+order by $d[0]
+return $d[0]
+return $orecord
+
+let $records := [[5, 6, 7], [0, 3, 4]]
+let $kv := [1, 2, 3, 4, 5, 6]
+for $r in $records
+let $c :=
+ for $t in $r
+ for $k at $i in $kv
+ where $t = $k
+ order by $i
+ return $i
+return $c
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.query.aql
new file mode 100644
index 0000000..9696695
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.query.aql
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+// Stage 1
+let $rankedTokens := (
+ for $right in dataset left
+ let $id := $right.id
+ for $token in word-tokens($right.authors)
+ /*+ hash */ group by $tokenGroupped := $token with $id
+ /*+ inmem 34 198608 */ order by count($id), $tokenGroupped
+ return $tokenGroupped
+)
+
+// Stage 2_1 of left
+let $tokenLeftVerify := (
+ for $left in dataset left
+ let $tokenUnrankedLeft := word-tokens($left.authors)
+ let $lenLeft := len($tokenUnrankedLeft)
+ let $tokens := (
+ for $token in $tokenUnrankedLeft
+ let $index :=
+ for $tokenRanked at $i in $rankedTokens
+ where $token = /*+ bcast */ $tokenRanked
+ return $i
+ order by $index
+ return $index
+ )
+ where count($tokenUnrankedLeft) != count($tokens)
+ order by $left.id
+ return {"tid": $left.id, "authors": $left.authors, "tokens": $tokenUnrankedLeft, "ranked": $tokens}
+)
+let $tokenLeft := (
+ for $left in dataset left
+ let $tokenUnrankedLeft := word-tokens($left.authors)
+ let $lenLeft := len($tokenUnrankedLeft)
+ for $token in $tokenUnrankedLeft
+ for $tokenRanked at $i in $rankedTokens
+ where $token = /*+ bcast */ $tokenRanked
+ order by $i
+ return $i
+)
+
+for $r in $tokenLeftVerify
+return $r
\ No newline at end of file