You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wa...@apache.org on 2018/10/10 00:59:16 UTC

[07/36] asterixdb git commit: [NO ISSUE][COMP][RT] Enable multiway similarity joins

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-dual-order.plan
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-dual-order.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-dual-order.plan
new file mode 100644
index 0000000..e8b029a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-dual-order.plan
@@ -0,0 +1,191 @@
+-- DISTRIBUTE_RESULT  |PARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+    -- STREAM_PROJECT  |PARTITIONED|
+      -- ASSIGN  |PARTITIONED|
+        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+          -- PRE_CLUSTERED_GROUP_BY[$$162, $$164]  |PARTITIONED|
+                  {
+                    -- AGGREGATE  |LOCAL|
+                      -- NESTED_TUPLE_SOURCE  |LOCAL|
+                  }
+            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+              -- STABLE_SORT [$$162(ASC), $$164(ASC)]  |PARTITIONED|
+                -- HASH_PARTITION_EXCHANGE [$$162, $$164]  |PARTITIONED|
+                  -- STREAM_PROJECT  |PARTITIONED|
+                    -- ASSIGN  |PARTITIONED|
+                      -- STREAM_SELECT  |PARTITIONED|
+                        -- STREAM_PROJECT  |PARTITIONED|
+                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                            -- HYBRID_HASH_JOIN [$$prefixTokenRight][$$prefixTokenLeft]  |PARTITIONED|
+                              -- HASH_PARTITION_EXCHANGE [$$prefixTokenRight]  |PARTITIONED|
+                                -- UNNEST  |PARTITIONED|
+                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                    -- PRE_CLUSTERED_GROUP_BY[$$126]  |PARTITIONED|
+                                            {
+                                              -- AGGREGATE  |LOCAL|
+                                                -- IN_MEMORY_STABLE_SORT [$$i(ASC)]  |LOCAL|
+                                                  -- STREAM_SELECT  |LOCAL|
+                                                    -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                            }
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        -- STREAM_PROJECT  |PARTITIONED|
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            -- IN_MEMORY_HASH_JOIN [$$tokenUnranked][$$tokenRightGrouped]  |PARTITIONED|
+                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                -- UNNEST  |PARTITIONED|
+                                                  -- STREAM_PROJECT  |PARTITIONED|
+                                                    -- ASSIGN  |PARTITIONED|
+                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                        -- DATASOURCE_SCAN  |PARTITIONED|
+                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                            -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                              -- BROADCAST_EXCHANGE  |PARTITIONED|
+                                                -- ASSIGN  |PARTITIONED|
+                                                  -- RUNNING_AGGREGATE  |PARTITIONED|
+                                                    -- STREAM_PROJECT  |PARTITIONED|
+                                                      -- SORT_MERGE_EXCHANGE [$$124(ASC) ]  |PARTITIONED|
+                                                        -- STABLE_SORT [$$124(ASC)]  |PARTITIONED|
+                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                            -- STREAM_PROJECT  |PARTITIONED|
+                                                              -- ASSIGN  |PARTITIONED|
+                                                                -- STREAM_PROJECT  |PARTITIONED|
+                                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                    -- HYBRID_HASH_JOIN [$$tokenRightGrouped][$$tokenLeftGrouped]  |PARTITIONED|
+                                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                        -- STREAM_PROJECT  |PARTITIONED|
+                                                                          -- ASSIGN  |PARTITIONED|
+                                                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                              -- REPLICATE  |PARTITIONED|
+                                                                                -- HASH_PARTITION_EXCHANGE [$$tokenRightGrouped]  |PARTITIONED|
+                                                                                  -- EXTERNAL_GROUP_BY[$$172]  |PARTITIONED|
+                                                                                          {
+                                                                                            -- AGGREGATE  |LOCAL|
+                                                                                              -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                          }
+                                                                                    -- HASH_PARTITION_EXCHANGE [$$172]  |PARTITIONED|
+                                                                                      -- EXTERNAL_GROUP_BY[$$orderTokenRight]  |PARTITIONED|
+                                                                                              {
+                                                                                                -- AGGREGATE  |LOCAL|
+                                                                                                  -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                              }
+                                                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                          -- STREAM_PROJECT  |PARTITIONED|
+                                                                                            -- UNNEST  |PARTITIONED|
+                                                                                              -- STREAM_PROJECT  |PARTITIONED|
+                                                                                                -- ASSIGN  |PARTITIONED|
+                                                                                                  -- STREAM_PROJECT  |PARTITIONED|
+                                                                                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                                      -- DATASOURCE_SCAN  |PARTITIONED|
+                                                                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                                          -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                        -- STREAM_PROJECT  |PARTITIONED|
+                                                                          -- ASSIGN  |PARTITIONED|
+                                                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                              -- REPLICATE  |PARTITIONED|
+                                                                                -- HASH_PARTITION_EXCHANGE [$$tokenLeftGrouped]  |PARTITIONED|
+                                                                                  -- EXTERNAL_GROUP_BY[$$174]  |PARTITIONED|
+                                                                                          {
+                                                                                            -- AGGREGATE  |LOCAL|
+                                                                                              -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                          }
+                                                                                    -- HASH_PARTITION_EXCHANGE [$$174]  |PARTITIONED|
+                                                                                      -- EXTERNAL_GROUP_BY[$$orderTokenLeft]  |PARTITIONED|
+                                                                                              {
+                                                                                                -- AGGREGATE  |LOCAL|
+                                                                                                  -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                              }
+                                                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                          -- STREAM_PROJECT  |PARTITIONED|
+                                                                                            -- UNNEST  |PARTITIONED|
+                                                                                              -- STREAM_PROJECT  |PARTITIONED|
+                                                                                                -- ASSIGN  |PARTITIONED|
+                                                                                                  -- STREAM_PROJECT  |PARTITIONED|
+                                                                                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                                      -- DATASOURCE_SCAN  |PARTITIONED|
+                                                                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                                          -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                              -- HASH_PARTITION_EXCHANGE [$$prefixTokenLeft]  |PARTITIONED|
+                                -- STREAM_PROJECT  |PARTITIONED|
+                                  -- UNNEST  |PARTITIONED|
+                                    -- ASSIGN  |PARTITIONED|
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        -- PRE_CLUSTERED_GROUP_BY[$$129]  |PARTITIONED|
+                                                {
+                                                  -- AGGREGATE  |LOCAL|
+                                                    -- IN_MEMORY_STABLE_SORT [$$i(ASC)]  |LOCAL|
+                                                      -- STREAM_SELECT  |LOCAL|
+                                                        -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                }
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            -- STREAM_PROJECT  |PARTITIONED|
+                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                -- IN_MEMORY_HASH_JOIN [$$tokenUnranked][$$tokenRightGrouped]  |PARTITIONED|
+                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                    -- UNNEST  |PARTITIONED|
+                                                      -- STREAM_PROJECT  |PARTITIONED|
+                                                        -- ASSIGN  |PARTITIONED|
+                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                            -- DATASOURCE_SCAN  |PARTITIONED|
+                                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                                  -- BROADCAST_EXCHANGE  |PARTITIONED|
+                                                    -- ASSIGN  |PARTITIONED|
+                                                      -- RUNNING_AGGREGATE  |PARTITIONED|
+                                                        -- STREAM_PROJECT  |PARTITIONED|
+                                                          -- SORT_MERGE_EXCHANGE [$$125(ASC) ]  |PARTITIONED|
+                                                            -- STABLE_SORT [$$125(ASC)]  |PARTITIONED|
+                                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                -- STREAM_PROJECT  |PARTITIONED|
+                                                                  -- ASSIGN  |PARTITIONED|
+                                                                    -- STREAM_PROJECT  |PARTITIONED|
+                                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                        -- HYBRID_HASH_JOIN [$$tokenRightGrouped][$$tokenLeftGrouped]  |PARTITIONED|
+                                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                            -- REPLICATE  |PARTITIONED|
+                                                                              -- HASH_PARTITION_EXCHANGE [$$tokenRightGrouped]  |PARTITIONED|
+                                                                                -- EXTERNAL_GROUP_BY[$$172]  |PARTITIONED|
+                                                                                        {
+                                                                                          -- AGGREGATE  |LOCAL|
+                                                                                            -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                        }
+                                                                                  -- HASH_PARTITION_EXCHANGE [$$172]  |PARTITIONED|
+                                                                                    -- EXTERNAL_GROUP_BY[$$orderTokenRight]  |PARTITIONED|
+                                                                                            {
+                                                                                              -- AGGREGATE  |LOCAL|
+                                                                                                -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                            }
+                                                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                        -- STREAM_PROJECT  |PARTITIONED|
+                                                                                          -- UNNEST  |PARTITIONED|
+                                                                                            -- STREAM_PROJECT  |PARTITIONED|
+                                                                                              -- ASSIGN  |PARTITIONED|
+                                                                                                -- STREAM_PROJECT  |PARTITIONED|
+                                                                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                                    -- DATASOURCE_SCAN  |PARTITIONED|
+                                                                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                                        -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                            -- REPLICATE  |PARTITIONED|
+                                                                              -- HASH_PARTITION_EXCHANGE [$$tokenLeftGrouped]  |PARTITIONED|
+                                                                                -- EXTERNAL_GROUP_BY[$$174]  |PARTITIONED|
+                                                                                        {
+                                                                                          -- AGGREGATE  |LOCAL|
+                                                                                            -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                        }
+                                                                                  -- HASH_PARTITION_EXCHANGE [$$174]  |PARTITIONED|
+                                                                                    -- EXTERNAL_GROUP_BY[$$orderTokenLeft]  |PARTITIONED|
+                                                                                            {
+                                                                                              -- AGGREGATE  |LOCAL|
+                                                                                                -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                            }
+                                                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                        -- STREAM_PROJECT  |PARTITIONED|
+                                                                                          -- UNNEST  |PARTITIONED|
+                                                                                            -- STREAM_PROJECT  |PARTITIONED|
+                                                                                              -- ASSIGN  |PARTITIONED|
+                                                                                                -- STREAM_PROJECT  |PARTITIONED|
+                                                                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                                    -- DATASOURCE_SCAN  |PARTITIONED|
+                                                                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                                        -- EMPTY_TUPLE_SOURCE  |PARTITIONED|

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-right-ahead.plan
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-right-ahead.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-right-ahead.plan
new file mode 100644
index 0000000..ab700b6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/similarity/jaccard-similarity-join-right-ahead.plan
@@ -0,0 +1,128 @@
+-- DISTRIBUTE_RESULT  |PARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+    -- STREAM_PROJECT  |PARTITIONED|
+      -- ASSIGN  |PARTITIONED|
+        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+          -- PRE_CLUSTERED_GROUP_BY[$$98, $$100]  |PARTITIONED|
+                  {
+                    -- AGGREGATE  |LOCAL|
+                      -- NESTED_TUPLE_SOURCE  |LOCAL|
+                  }
+            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+              -- STABLE_SORT [$$98(ASC), $$100(ASC)]  |PARTITIONED|
+                -- HASH_PARTITION_EXCHANGE [$$98, $$100]  |PARTITIONED|
+                  -- STREAM_PROJECT  |PARTITIONED|
+                    -- ASSIGN  |PARTITIONED|
+                      -- STREAM_SELECT  |PARTITIONED|
+                        -- STREAM_PROJECT  |PARTITIONED|
+                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                            -- HYBRID_HASH_JOIN [$$prefixTokenRight][$$prefixTokenLeft]  |PARTITIONED|
+                              -- HASH_PARTITION_EXCHANGE [$$prefixTokenRight]  |PARTITIONED|
+                                -- UNNEST  |PARTITIONED|
+                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                    -- PRE_CLUSTERED_GROUP_BY[$$78]  |PARTITIONED|
+                                            {
+                                              -- AGGREGATE  |LOCAL|
+                                                -- IN_MEMORY_STABLE_SORT [$$i(ASC)]  |LOCAL|
+                                                  -- STREAM_SELECT  |LOCAL|
+                                                    -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                            }
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        -- STREAM_PROJECT  |PARTITIONED|
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            -- IN_MEMORY_HASH_JOIN [$$tokenUnranked][$$tokenRightGrouped]  |PARTITIONED|
+                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                -- UNNEST  |PARTITIONED|
+                                                  -- STREAM_PROJECT  |PARTITIONED|
+                                                    -- ASSIGN  |PARTITIONED|
+                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                        -- DATASOURCE_SCAN  |PARTITIONED|
+                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                            -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                -- STREAM_PROJECT  |PARTITIONED|
+                                                  -- ASSIGN  |PARTITIONED|
+                                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                      -- REPLICATE  |PARTITIONED|
+                                                        -- BROADCAST_EXCHANGE  |PARTITIONED|
+                                                          -- ASSIGN  |PARTITIONED|
+                                                            -- RUNNING_AGGREGATE  |PARTITIONED|
+                                                              -- STREAM_PROJECT  |PARTITIONED|
+                                                                -- SORT_MERGE_EXCHANGE [$$83(ASC) ]  |PARTITIONED|
+                                                                  -- STABLE_SORT [$$83(ASC)]  |PARTITIONED|
+                                                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                      -- EXTERNAL_GROUP_BY[$$106]  |PARTITIONED|
+                                                                              {
+                                                                                -- AGGREGATE  |LOCAL|
+                                                                                  -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                              }
+                                                                        -- HASH_PARTITION_EXCHANGE [$$106]  |PARTITIONED|
+                                                                          -- EXTERNAL_GROUP_BY[$$orderTokenRight]  |PARTITIONED|
+                                                                                  {
+                                                                                    -- AGGREGATE  |LOCAL|
+                                                                                      -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                  }
+                                                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                              -- STREAM_PROJECT  |PARTITIONED|
+                                                                                -- UNNEST  |PARTITIONED|
+                                                                                  -- STREAM_PROJECT  |PARTITIONED|
+                                                                                    -- ASSIGN  |PARTITIONED|
+                                                                                      -- STREAM_PROJECT  |PARTITIONED|
+                                                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                          -- DATASOURCE_SCAN  |PARTITIONED|
+                                                                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                              -- HASH_PARTITION_EXCHANGE [$$prefixTokenLeft]  |PARTITIONED|
+                                -- STREAM_PROJECT  |PARTITIONED|
+                                  -- UNNEST  |PARTITIONED|
+                                    -- ASSIGN  |PARTITIONED|
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        -- PRE_CLUSTERED_GROUP_BY[$$80]  |PARTITIONED|
+                                                {
+                                                  -- AGGREGATE  |LOCAL|
+                                                    -- IN_MEMORY_STABLE_SORT [$$i(ASC)]  |LOCAL|
+                                                      -- STREAM_SELECT  |LOCAL|
+                                                        -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                }
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            -- STREAM_PROJECT  |PARTITIONED|
+                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                -- IN_MEMORY_HASH_JOIN [$$tokenUnranked][$$tokenRightGrouped]  |PARTITIONED|
+                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                    -- UNNEST  |PARTITIONED|
+                                                      -- STREAM_PROJECT  |PARTITIONED|
+                                                        -- ASSIGN  |PARTITIONED|
+                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                            -- DATASOURCE_SCAN  |PARTITIONED|
+                                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                    -- REPLICATE  |PARTITIONED|
+                                                      -- BROADCAST_EXCHANGE  |PARTITIONED|
+                                                        -- ASSIGN  |PARTITIONED|
+                                                          -- RUNNING_AGGREGATE  |PARTITIONED|
+                                                            -- STREAM_PROJECT  |PARTITIONED|
+                                                              -- SORT_MERGE_EXCHANGE [$$83(ASC) ]  |PARTITIONED|
+                                                                -- STABLE_SORT [$$83(ASC)]  |PARTITIONED|
+                                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                    -- EXTERNAL_GROUP_BY[$$106]  |PARTITIONED|
+                                                                            {
+                                                                              -- AGGREGATE  |LOCAL|
+                                                                                -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                            }
+                                                                      -- HASH_PARTITION_EXCHANGE [$$106]  |PARTITIONED|
+                                                                        -- EXTERNAL_GROUP_BY[$$orderTokenRight]  |PARTITIONED|
+                                                                                {
+                                                                                  -- AGGREGATE  |LOCAL|
+                                                                                    -- NESTED_TUPLE_SOURCE  |LOCAL|
+                                                                                }
+                                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                            -- STREAM_PROJECT  |PARTITIONED|
+                                                                              -- UNNEST  |PARTITIONED|
+                                                                                -- STREAM_PROJECT  |PARTITIONED|
+                                                                                  -- ASSIGN  |PARTITIONED|
+                                                                                    -- STREAM_PROJECT  |PARTITIONED|
+                                                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                        -- DATASOURCE_SCAN  |PARTITIONED|
+                                                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                            -- EMPTY_TUPLE_SOURCE  |PARTITIONED|

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.1.ddl.aql
new file mode 100644
index 0000000..a3b6ec2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.1.ddl.aql
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as open {
+    id: int32,
+    summary: string?
+}
+
+create type BasicWithIndexType as open {
+    id: int32
+}
+
+create dataset Basic(BasicType) primary key id;
+
+create dataset BasicWithIndex(BasicWithIndexType) primary key id;
+
+create index BasicWithIndex_summary_bt_idx on BasicWithIndex(summary: string?) type btree enforced;
+
+create index BasicWithIndex_summary_kw_idx on BasicWithIndex(summary: string?) type keyword enforced;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.2.update.aql
new file mode 100644
index 0000000..95ef5f1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.2.update.aql
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+insert into dataset Basic(
+{"id": 1, "summary": "Clear, Concise, and fun!"}
+);
+insert into dataset Basic(
+{"id": 2, "summary": "Clear, Concise, and Charitable"}
+);
+
+insert into dataset BasicWithIndex(
+{"id": 1, "summary": "Clear, Concise, and fun!"}
+);
+insert into dataset BasicWithIndex(
+{"id": 2, "summary": "Clear, Concise, and Charitable"}
+);
+

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.3.query.aql
new file mode 100644
index 0000000..0e2aea7
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1/basic-1_1.3.query.aql
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set simthreshold '0.6f';
+
+let $s1 := "Clear, Concise, and fun!"
+let $s2 := "Clear, Concise, and Charitable"
+let $psj := (
+    for $s in [$s1]
+    for $t in [$s2]
+    where word-tokens($s) ~= word-tokens($t)
+    return {"s1": $s, "s2": $t}
+)
+let $nsj := (
+    for $s in [$s1]
+    for $t in [$s2]
+    where word-tokens($s) /*+ indexnl */ ~= word-tokens($t)
+    return {"s1": $s, "s2": $t}
+)
+let $nvj := (
+    for $s in [$s1]
+    for $t in [$s2]
+    where similarity-jaccard-check(word-tokens($s), word-tokens($t), .6f)[0] = true
+    return {"s1": $s, "s2": $t}
+)
+let $nvr := (
+    for $s in [$s1]
+    for $t in [$s2]
+    where similarity-jaccard(word-tokens($s), word-tokens($t)) >= .6f
+    return {"s1": $s, "s2": $t}
+)
+let $tpsj := (
+    for $s in dataset Basic
+    for $t in dataset Basic
+    where /*+ skip-index */ similarity-jaccard(word-tokens($s.summary), word-tokens($t.summary)) >= .6f
+    and $s.id < $t.id
+    return {"s1": $s.summary, "s2": $t.summary}
+)
+let $tnsj := (
+    for $s in dataset Basic
+    for $t in dataset Basic
+    where /*+ indexnl */ similarity-jaccard(word-tokens($s.summary), word-tokens($t.summary)) >= .6f
+    and $s.id < $t.id
+    return {"s1": $s.summary, "s2": $t.summary}
+)
+let $itpsj := (
+    for $s in dataset BasicWithIndex
+    for $t in dataset BasicWithIndex
+    where /*+ skip-index */ similarity-jaccard(word-tokens($s.summary), word-tokens($t.summary)) >= 0.6
+    and $s.id < $t.id
+    return {"s1": $s.summary, "s2": $t.summary}
+)
+let $itnsj := (
+    for $s in dataset BasicWithIndex
+    for $t in dataset BasicWithIndex
+    where /*+ indexnl */ similarity-jaccard(word-tokens($s.summary), word-tokens($t.summary)) >= 0.6
+    and $s.id < $t.id
+    return {"s1": $s.summary, "s2": $t.summary}
+)
+let $left := word-tokens($s1)
+let $right := word-tokens($s2)
+let $vj := similarity-jaccard-check($left, $right, 0.6f)
+let $sr := similarity-jaccard($left, $right) >= 0.6
+return {"psj": $psj, "nsj": $nsj, "nvj": $nvj, "nvr": $nvr, "tpsj": $tpsj, "tnsj": $tnsj, "itpsj": $itpsj, "itnsj": $itnsj, "vj": $vj, "sr": $sr}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.ddl.aql
new file mode 100644
index 0000000..45cc975
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.ddl.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as closed {
+    id: uuid,
+    authors: string
+}
+
+create dataset left(BasicType) primary key id autogenerated;
+create dataset right(BasicType) primary key id autogenerated;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.2.update.aql
new file mode 100644
index 0000000..c9aceb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+load dataset left
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblpauthors.adm"),("format"="adm"));
+
+load dataset right
+using localfs
+(("path"="asterix_nc1://data/pub-small/csxauthors.adm"),("format"="adm"));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.3.query.aql
new file mode 100644
index 0000000..c00681b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_1/basic-1_1_1.3.query.aql
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $hash := (
+    for $r in dataset left
+    let $c := (
+        for $t in counthashed-word-tokens($r.authors)
+        order by $t
+        distinct by $t
+        return $t
+    )
+    order by $r.id
+    return {"id": $r.id, "authors": $r.authors, "nondup": $c}
+)
+
+let $word := (
+    for $r in dataset left
+    let $c := (
+        for $t in word-tokens($r.authors)
+        order by $t
+        distinct by $t
+        return $t
+    )
+    order by $r.id
+    return {"id": $r.id, "authors": $r.authors, "nondup": $c}
+)
+
+for $s in $hash
+for $t in $word
+where $s.id = $t.id and count($s.nondup) != count($t.nondup)
+order by $s.authors
+return {
+        "authors": $s.authors,
+        "hdistinct": $s.nondup,
+        "hcount": count($s.nondup),
+        "hash": counthashed-word-tokens($s.authors),
+        "vhcount": count(counthashed-word-tokens($s.authors)),
+        "wdistinct": $t.nondup,
+        "wcount": count($t.nondup),
+        "word": word-tokens($t.authors),
+        "vwcount": count(word-tokens($t.authors))
+        }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.1.ddl.aql
new file mode 100644
index 0000000..65a52b1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.1.ddl.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.query.aql
new file mode 100644
index 0000000..e60be60
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.query.aql
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9] and [2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 0.8 by similarity-jaccard-check.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9, 10]
+return similarity-jaccard-check($left, $right, 0.8f)

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.2.update.aql
new file mode 100644
index 0000000..042f3ce
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.2.update.aql
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.query.aql
new file mode 100644
index 0000000..69dc59c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9] and [2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 0.8 by similarity-jaccard-prefix even the first token of the left side is removed so that it takes the
+ * form of [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9, 10]
+return similarity-jaccard-prefix(9, $left, 9, $right, $left[1], 0.8f)

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.query.aql
new file mode 100644
index 0000000..ecfed74
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9] and [2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 0.8 by similarity-jaccard-prefix even the first token of the right side is removed so that it takes the
+ * form of [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9]
+return similarity-jaccard-prefix(9, $left, 9, $right, $left[1], 0.8f)

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.query.aql
new file mode 100644
index 0000000..0b6482d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [2, 3, 4, 5, 6, 7, 8, 9, 10] and [1, 2, 3, 4, 5, 6, 7, 8, 9], we should return their
+ * similarity 0.8 by similarity-jaccard-prefix even the first token of the right side is removed so that it takes the
+ * form of [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [2, 3, 4, 5, 6, 7, 8, 9, 10]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9]
+return similarity-jaccard-prefix(9, $left, 9, $right, $left[1], 0.8f)

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.query.aql
new file mode 100644
index 0000000..21f1805
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [2, 3, 4, 5, 6, 7, 8, 9, 10] and [1, 2, 3, 4, 5, 6, 7, 8, 9], we should return their
+ * similarity 0.8 by similarity-jaccard-prefix even the first token of the left side is removed so that it takes the
+ * form of [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [1, 2, 3, 4, 5, 6, 7, 8, 9]
+return similarity-jaccard-prefix(9, $left, 9, $right, $left[1], 0.8f)

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.query.aql
new file mode 100644
index 0000000..5430f75
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [-3, -2, -1, 2, 0, 3, 5, 7, 9] and [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 5/15 by similarity-jaccard-prefix even the tokens {-3, -2, -1} of the left side are removed so that it
+ * takes the form of [2, 0, 3, 5, 7, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [2, 0, 3, 5, 7, 9]
+let $right := [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+return similarity-jaccard-prefix(10, $left, 10, $right, $right[1], 0.33f)

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.query.aql
new file mode 100644
index 0000000..eb20a08
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] and [-3, -2, -1, 0, 2, 3, 5, 7, 9], we should return their
+ * similarity 5/14 by similarity-jaccard-prefix even the tokens {-3, -2, -1, 0} of the right side is removed so that it
+ * takes the form of [2, 3, 5, 7, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+let $right := [2, 3, 5, 7, 9]
+return similarity-jaccard-prefix(10, $left, 9, $right, $left[0], 0.35f)

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.query.aql
new file mode 100644
index 0000000..586c162
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.query.aql
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [1, 2, 3, 4, 5, 6, 7, 8, 9] and [2, 3, 4, 5, 6, 7, 8, 9, 10], we should return their
+ * similarity 0.8 by similarity-jaccard-sorted.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 2, 3, 4, 5, 6, 7, 8, 9]
+let $right := [2, 3, 4, 5, 6, 7, 8, 9, 10]
+return similarity-jaccard-sorted($left, $right)

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.1.ddl.aql
new file mode 100644
index 0000000..65a52b1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.1.ddl.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.2.update.aql
new file mode 100644
index 0000000..042f3ce
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.2.update.aql
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.3.query.aql
new file mode 100644
index 0000000..b7e2025
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.3.query.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Support we have two records [3, 4, 5, 6, 8, 9] and [0, 1, 2, 3, 5, 6, 7], we should return their similarity 0.3 by
+ * similarity-jaccard-prefix even the first token of the left side is removed so that it takes the form of
+ * [2, 3, 4, 5, 6, 7, 8, 9] with its actual length 9 as the first parameter of similarity-jaccard-prefix.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [3, 4, 5, 6, 8, 9]
+let $right := [3, 5, 6, 7]
+return similarity-jaccard-prefix(6, $left, 7, $right, $right[0], 0.3f)

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.4.query.aql
new file mode 100644
index 0000000..da8e765
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_1_3/basic-1_1_3.4.query.aql
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $sims := [
+    similarity-jaccard-prefix(6, [3, 4, 5, 6, 8, 9], 7, [0, 1, 2, 3, 5, 6, 7], -1, 0.3f),
+    similarity-jaccard-prefix(6, [3, 4, 5, 6, 8, 9], 7, [3, 5, 6, 7], -1, 0.3f),
+    similarity-jaccard-prefix(7, [3, 5, 6, 7], 6, [3, 4, 5, 6, 8, 9], -1, 0.3f),
+    similarity-jaccard-prefix(6, [3, 4, 5, 6, 8, 9], 7, [3, 5, 6, 7], 3, 0.3f),
+    similarity-jaccard-prefix(7, [3, 5, 6, 7], 6, [3, 4, 5, 6, 8, 9], 3, 0.3f),
+    similarity-jaccard-prefix(6, [3, 4, 5, 6, 8, 9], 7, [3, 5, 6, 7], 6, 0.3f),
+    similarity-jaccard-prefix(7, [3, 5, 6, 7], 6, [3, 4, 5, 6, 8, 9], 6, 0.3f),
+    similarity-jaccard-prefix(7, [0, 1, 2, 3, 5, 6, 7], 6, [3, 5, 6, 8], -1, 0.3f),
+    similarity-jaccard-prefix(6, [3, 5, 6, 9], 7, [0, 1, 2, 3, 5, 6, 7], -1, 0.3f),
+    similarity-jaccard-prefix(7, [0, 1, 2, 3, 5, 6, 7], 6, [3, 5, 6, 8], 3, 0.3f),
+    similarity-jaccard-prefix(6, [3, 5, 6, 9], 7, [0, 1, 2, 3, 5, 6, 7], 3, 0.3f),
+    similarity-jaccard-prefix(7, [0, 1, 2, 3, 5, 6, 7], 6, [3, 5, 6, 8], 6, 0.3f),
+    similarity-jaccard-prefix(6, [3, 5, 6, 9], 7, [0, 1, 2, 3, 5, 6, 7], 6, 0.3f)
+]
+return $sims
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.1.ddl.aql
new file mode 100644
index 0000000..45cc975
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.1.ddl.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as closed {
+    id: uuid,
+    authors: string
+}
+
+create dataset left(BasicType) primary key id autogenerated;
+create dataset right(BasicType) primary key id autogenerated;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.2.update.aql
new file mode 100644
index 0000000..c9aceb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+load dataset left
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblpauthors.adm"),("format"="adm"));
+
+load dataset right
+using localfs
+(("path"="asterix_nc1://data/pub-small/csxauthors.adm"),("format"="adm"));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.query.aql
new file mode 100644
index 0000000..5d9ab27
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.query.aql
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $left := [1, 5, 6, 7]
+let $right := [4, 5, 6]
+let $leftnull := [null, null, 5, 6, 7]
+let $rightnull := [4, 5, 6]
+let $nullstring := [null, null, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+let $prefix1 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.1f))
+let $prefix2 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.2f))
+let $prefix3 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.3f))
+let $prefix4 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.4f))
+let $prefix5 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.5f))
+let $prefix6 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.6f))
+let $prefix7 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.7f))
+let $prefix8 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.8f))
+let $prefix9 := subset-collection($nullstring, 0, prefix-len-jaccard(len($nullstring), 0.9f))
+
+let $bound :=
+for $l in [1]
+return [
+  [
+  similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 3], 1, 1f),
+  similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 1, .5f),
+  similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 1, .6f),
+  similarity-jaccard-prefix(3, [1, 2, 3], 9, [1, 2, 3], 1, .5f),
+  similarity-jaccard-prefix(4, [1, 2, 3, 4], 2, [1, 2], 1, .5f),
+  similarity-jaccard-prefix(4, [1, 2, 3, 4], 4, [1, 2], 1, .33f)
+  ],[
+  similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 3], 2, 1f),
+  similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 2, .5f),
+  similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 2, .6f),
+  similarity-jaccard-prefix(3, [1, 2, 3], 9, [1, 2, 3], 2, .5f),
+  similarity-jaccard-prefix(4, [1, 2, 3, 4], 2, [1, 2], 2, .5f),
+  similarity-jaccard-prefix(4, [1, 2, 3, 4], 4, [1, 2], 2, .33f)
+  ],[
+  similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 3], 3, 1f),
+  similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 3, .5f),
+  similarity-jaccard-prefix(3, [1, 2, 3], 3, [1, 2, 4], 3, .6f),
+  similarity-jaccard-prefix(3, [1, 2, 3], 9, [1, 2, 3], 3, .5f),
+  similarity-jaccard-prefix(4, [1, 2, 3, 4], 2, [1, 2], 3, .5f),
+  similarity-jaccard-prefix(4, [1, 2, 3, 4], 4, [1, 2], 3, .33f)
+  ]
+]
+
+let $trybound :=
+for $l in [1]
+return [
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], -1, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [3, 4, 5, 6, 7, 8, 9, 10], 0, 0.5f),
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 1, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [3, 4, 5, 6, 7, 8, 9, 10], 2, 0.5f),
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 3, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [3, 4, 5, 6, 7, 8, 9, 10], 4, 0.5f),
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 5, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [3, 4, 5, 6, 7, 8, 9, 10], 6, 0.5f),
+similarity-jaccard-prefix(8, [3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 7, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 8, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 9, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 10, 0.5f),
+similarity-jaccard-prefix(8, [1, 2, 5, 6, 7, 3, 4, 8], 10, [4, 5, 0, 0, 3, 6, 7, 8, 9, 10], 11, 0.5f)
+]
+
+let $checkbound :=
+for $l in [1]
+return [
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [null, 0, 3, 4, 5, 6, 7, 8, 9, 10], 0.1f),
+similarity-jaccard-check([null, 2, 3, 4, 5, 6, 7, 8], [null, 0, 3, 4, 5, 6, 7, 8, 9, 10], 0.2f),
+similarity-jaccard-check([null, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 9, 10], 0.3f),
+similarity-jaccard-check([null, null, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 9, 10], 0.4f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 9, 10], 0.5f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [0, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.6f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 2], 0.7f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 1, 2], 0.8f),
+similarity-jaccard-check([1, 2, 3, 4, 5, 6, 7, 8, 9], [null, 2, 3, 4, 5, 6, 7, 8, 9, 1], 0.9f)
+]
+
+let $len := len($right)
+
+let $negativesub1 := subset-collection($right, 6, prefix-len-jaccard($len, .4f))
+let $negativesub2 := subset-collection($right, 0, -1)
+let $prefix := subset-collection($right, 0, prefix-len-jaccard($len, .4f))
+let $sim1 := similarity-jaccard($leftnull, $right)
+let $sim2 := similarity-jaccard-prefix(len($left), $left, len($right), $right, -1, .4f)
+let $sim3 := similarity-jaccard($leftnull, $rightnull)
+let $sim4 := similarity-jaccard-prefix(8, [1, 2, 3, 4, 5, 6, 7, 8], 10, [0, 0, 3, 4, 5, 6, 7, 8, 9, 10], 1, .4f)
+let $sim5 := similarity-jaccard([1, 2, 3, 4, 5, 6, 7, 8], [null, null, 3, 4, 5, 6, 7, 8, 9, 10])
+
+let $joinpair :=
+for $s in $prefix4
+for $r in $prefix1
+where $s = $r
+return $s
+
+let $clearnull :=
+for $index at $i in $nullstring
+where $nullstring[$i] <= null
+return int16(string($nullstring[$i])) + 64
+
+let $cleanup := [$clearnull, $nullstring]
+
+return [$prefix, $sim1, $sim2, $sim3, $sim4, $sim5, $bound, $trybound, $checkbound, $negativesub1, $negativesub2,
+[$prefix1, $prefix2, $prefix3, $prefix4, $prefix5, $prefix6, $prefix7, $prefix8, $prefix9], $joinpair, $cleanup]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.query.aql
new file mode 100644
index 0000000..6eff4f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.query.aql
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+let $records := [[5, 6, 7], [0, 3, 4]]
+let $kv := [1, 2, 3, 4, 5, 6]
+
+let $lorecords :=
+for $r in $records
+let $c :=
+    for $t in $r
+    let $index :=
+        for $k at $i in $kv
+        where $t = $k
+        return $i
+    return $index
+return $c
+
+for $record in $lorecords
+let $orecord :=
+for $d in $record
+order by $d[0]
+return $d[0]
+return $orecord
+
+let $records := [[5, 6, 7], [0, 3, 4]]
+let $kv := [1, 2, 3, 4, 5, 6]
+for $r in $records
+let $c :=
+    for $t in $r
+        for $k at $i in $kv
+        where $t = $k
+        order by $i
+        return $i
+return $c
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.query.aql
new file mode 100644
index 0000000..9696695
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.query.aql
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+// Stage 1
+let $rankedTokens := (
+    for $right in dataset left
+    let $id := $right.id
+    for $token in word-tokens($right.authors)
+        /*+ hash */ group by $tokenGroupped := $token with $id
+        /*+ inmem 34 198608 */ order by count($id), $tokenGroupped
+    return $tokenGroupped
+)
+
+// Stage 2_1 of left
+let $tokenLeftVerify := (
+    for $left in dataset left
+        let $tokenUnrankedLeft := word-tokens($left.authors)
+        let $lenLeft := len($tokenUnrankedLeft)
+        let $tokens := (
+            for $token in $tokenUnrankedLeft
+            let $index :=
+                for $tokenRanked at $i in $rankedTokens
+                    where $token = /*+ bcast */ $tokenRanked
+                return $i
+            order by $index
+            return $index
+        )
+    where count($tokenUnrankedLeft) != count($tokens)
+    order by $left.id
+    return {"tid": $left.id, "authors": $left.authors, "tokens": $tokenUnrankedLeft, "ranked": $tokens}
+)
+let $tokenLeft := (
+    for $left in dataset left
+        let $tokenUnrankedLeft := word-tokens($left.authors)
+        let $lenLeft := len($tokenUnrankedLeft)
+        for $token in $tokenUnrankedLeft
+        for $tokenRanked at $i in $rankedTokens
+            where $token = /*+ bcast */ $tokenRanked
+        order by $i
+    return $i
+)
+
+for $r in $tokenLeftVerify
+return $r
\ No newline at end of file