You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wa...@apache.org on 2018/10/10 00:59:15 UTC

[06/36] asterixdb git commit: [NO ISSUE][COMP][RT] Enable multiway similarity joins

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.6.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.6.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.6.query.aql
new file mode 100644
index 0000000..310caa9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.6.query.aql
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+// Stage 1
+let $rankedTokens := (
+    for $right in dataset left
+    let $id := $right.id
+    for $token in word-tokens($right.authors)
+        /*+ hash */ group by $tokenGroupped := $token with $id
+        /*+ inmem 34 198608 */ order by count($id), $tokenGroupped
+    return $tokenGroupped
+)
+
+// Stage 2_2 of right
+let $tokenRightVerify := (
+    for $right in dataset right
+        let $tokenUnrankedLeft := word-tokens($right.authors)
+        let $tokens := (
+            for $token in $tokenUnrankedLeft
+            let $index :=
+                for $tokenRanked at $i in $rankedTokens
+                    where $token = /*+ bcast */ $tokenRanked
+                return $i
+            order by $index
+            return $index
+        )
+    order by $right.authors
+    return {"authors": $right.authors, "tokens": $tokenUnrankedLeft, "ranked": $tokens}
+)
+let $tokenRight := (
+    for $right in dataset right
+        let $tokenUnrankedRight := word-tokens($right.authors)
+        for $token in $tokenUnrankedRight
+        for $tokenRanked at $i in $rankedTokens
+            where $token = /*+ bcast */ $tokenRanked
+        order by $i
+    return $i
+)
+
+for $r in $tokenRightVerify
+return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.7.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.7.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.7.query.aql
new file mode 100644
index 0000000..4e51613
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_1/basic-1_2_1.7.query.aql
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+for $paperDBLP in dataset('right')
+    let $tokensUnrankedDBLP := word-tokens($paperDBLP.authors)
+    let $lenDBLP := len($tokensUnrankedDBLP)
+    let $tokensDBLP :=
+        for $tokenUnranked in $tokensUnrankedDBLP
+        let $index :=
+            for $tokenRanked at $i in
+                //
+                // -- - Stage 1 - --
+                //
+                for $paper in dataset('left')
+                let $id := $paper.id
+                for $token in word-tokens($paper.authors)
+                /*+ hash */
+                group by $tokenGrouped := $token with $id
+                /*+ inmem 1 302 */
+                order by count($id), $tokenGrouped
+                return $tokenGrouped
+            where $tokenUnranked = /*+ bcast */ $tokenRanked
+            return $i
+        order by $index[0]
+        return $index[0]
+order by $paperDBLP.authors
+return {"tokens": $tokensUnrankedDBLP, "ranks": $tokensDBLP}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.1.ddl.aql
new file mode 100644
index 0000000..45cc975
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.1.ddl.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as closed {
+    id: uuid,
+    authors: string
+}
+
+create dataset left(BasicType) primary key id autogenerated;
+create dataset right(BasicType) primary key id autogenerated;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.2.update.aql
new file mode 100644
index 0000000..c9aceb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+load dataset left
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblpauthors.adm"),("format"="adm"));
+
+load dataset right
+using localfs
+(("path"="asterix_nc1://data/pub-small/csxauthors.adm"),("format"="adm"));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.3.query.aql
new file mode 100644
index 0000000..510c1f0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.3.query.aql
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $r := count(
+    for $paperDBLP in dataset('left')
+    let $idDBLP := $paperDBLP.id
+    let $tokensUnrankedDBLP := word-tokens($paperDBLP.authors)
+    let $lenDBLP := len($tokensUnrankedDBLP)
+    let $tokensDBLP :=
+        for $tokenUnranked in $tokensUnrankedDBLP
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $paper in dataset('left')
+            let $id := $paper.id
+            for $token in word-tokens($paper.authors)
+            /*+ hash */
+            group by $tokenGrouped := $token with $id
+            /*+ inmem 1 302 */
+            order by count($id), $tokenGrouped
+            return $tokenGrouped
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    for $prefixTokenDBLP in subset-collection($tokensDBLP, 0, prefix-len-jaccard(len($tokensDBLP), .8f))
+
+    for $paperCSX in dataset('right')
+    let $idCSX := $paperCSX.id
+    let $tokensUnrankedCSX := word-tokens($paperCSX.authors)
+    let $lenCSX := len($tokensUnrankedCSX)
+    let $tokensCSX :=
+        for $tokenUnranked in $tokensUnrankedCSX
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $paper in dataset('left')
+            let $id := $paper.id
+            for $token in word-tokens($paper.authors)
+            /*+ hash */
+            group by $tokenGrouped := $token with $id
+            /*+ inmem 1 302 */
+            order by count($id), $tokenGrouped
+            return $tokenGrouped
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    for $prefixTokenCSX in subset-collection($tokensCSX, 0, prefix-len-jaccard(len($tokensCSX), .8f))
+
+    where $prefixTokenDBLP = $prefixTokenCSX
+    let $sim := similarity-jaccard-prefix($lenDBLP, $tokensDBLP, $lenCSX, $tokensCSX, $prefixTokenCSX, .8f)
+    where $sim >= .8f
+    /*+ hash*/
+    group by $idDBLP := $idDBLP, $idCSX := $idCSX with $sim
+    return {'idDBLP': $idDBLP, 'idCSX': $idCSX, "sim": $sim[0]}
+)
+return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.4.query.aql
new file mode 100644
index 0000000..b903881
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.4.query.aql
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $s := count(
+    for $paperDBLP in dataset('left')
+    let $idDBLP := $paperDBLP.id
+    let $tokensUnrankedDBLP := word-tokens($paperDBLP.authors)
+    let $lenDBLP := len($tokensUnrankedDBLP)
+    let $tokensDBLP :=
+        for $tokenUnranked in $tokensUnrankedDBLP
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $paper in dataset('left')
+            let $id := $paper.id
+            for $token in word-tokens($paper.authors)
+            /*+ hash */
+            group by $tokenGrouped := $token with $id
+            /*+ inmem 1 302 */
+            order by count($id), $tokenGrouped
+            return $tokenGrouped
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    for $prefixTokenDBLP in subset-collection($tokensDBLP, 0, prefix-len-jaccard(len($tokensDBLP), .8f))
+
+    for $paperCSX in dataset('right')
+    let $idCSX := $paperCSX.id
+    let $tokensUnrankedCSX := word-tokens($paperCSX.authors)
+    let $lenCSX := len($tokensUnrankedCSX)
+    let $tokensCSX :=
+        for $tokenUnranked in $tokensUnrankedCSX
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $paper in dataset('left')
+            let $id := $paper.id
+            for $token in word-tokens($paper.authors)
+            /*+ hash */
+            group by $tokenGrouped := $token with $id
+            /*+ inmem 1 302 */
+            order by count($id), $tokenGrouped
+            return $tokenGrouped
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    let $actualPrefixLen := prefix-len-jaccard(len($tokensUnrankedCSX), .8f) - len($tokensUnrankedCSX) + len($tokensCSX)
+    for $prefixTokenCSX in subset-collection($tokensCSX, 0, $actualPrefixLen)
+
+    where $prefixTokenDBLP = $prefixTokenCSX
+    let $sim := similarity-jaccard-prefix($lenDBLP, $tokensDBLP, $lenCSX, $tokensCSX, $prefixTokenCSX, .8f)
+    where $sim >= .8f
+    /*+ hash*/
+    group by $idDBLP := $idDBLP, $idCSX := $idCSX with $sim
+    return {'idDBLP': $idDBLP, 'idCSX': $idCSX, "sim": $sim[0]}
+)
+return 0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.5.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.5.query.aql
new file mode 100644
index 0000000..119520a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.5.query.aql
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $t := count(
+    for $paperDBLP in dataset('left')
+    let $idDBLP := $paperDBLP.id
+    let $tokensUnrankedDBLP := word-tokens($paperDBLP.authors)
+    let $lenDBLP := len($tokensUnrankedDBLP)
+    let $tokensDBLP :=
+        for $tokenUnranked in $tokensUnrankedDBLP
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $paper in dataset('left')
+            let $id := $paper.id
+            for $token in word-tokens($paper.authors)
+            /*+ hash */
+            group by $tokenGrouped := $token with $id
+            /*+ inmem 1 302 */
+            order by count($id), $tokenGrouped
+            return $tokenGrouped
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    for $prefixTokenDBLP in subset-collection($tokensDBLP, 0, prefix-len-jaccard(len($tokensDBLP), .8f))
+
+    for $paperCSX in dataset('right')
+    let $idCSX := $paperCSX.id
+    let $tokensUnrankedCSX := word-tokens($paperCSX.authors)
+    let $lenCSX := len($tokensUnrankedCSX)
+    let $tokensCSX :=
+        for $tokenUnranked in $tokensUnrankedCSX
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $paper in dataset('left')
+            let $id := $paper.id
+            for $token in word-tokens($paper.authors)
+            /*+ hash */
+            group by $tokenGrouped := $token with $id
+            /*+ inmem 1 302 */
+            order by count($id), $tokenGrouped
+            return $tokenGrouped
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    let $actualPrefixLen := prefix-len-jaccard(len($tokensUnrankedCSX), .8f) - len($tokensUnrankedCSX) + len($tokensCSX)
+    for $prefixTokenCSX in subset-collection($tokensCSX, 0, $actualPrefixLen)
+
+    where $prefixTokenDBLP = $prefixTokenCSX
+
+    /*+ hash*/
+    group by $idDBLP := $idDBLP, $idCSX := $idCSX with $tokensUnrankedDBLP, $tokensUnrankedCSX
+    let $sim := similarity-jaccard-check($tokensUnrankedDBLP[0], $tokensUnrankedCSX[0], .8f)
+    where $sim[1] >= .8f
+    return {'idDBLP': $idDBLP, 'idCSX': $idCSX, 'sim': $sim[1]}
+)
+return $t
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.6.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.6.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.6.query.aql
new file mode 100644
index 0000000..465cda9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_2/basic-1_2_2.6.query.aql
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+for $paperDBLP in dataset('left')
+let $idDBLP := $paperDBLP.id
+let $tokensUnrankedDBLP := word-tokens($paperDBLP.authors)
+let $lenDBLP := len($tokensUnrankedDBLP)
+let $tokensDBLP :=
+    for $tokenUnranked in $tokensUnrankedDBLP
+    for $tokenRanked at $i in
+        //
+        // -- - Stage 1 - --
+        //
+        for $paper in dataset('left')
+        let $id := $paper.id
+        for $token in word-tokens($paper.authors)
+        /*+ hash */
+        group by $tokenGrouped := $token with $id
+        /*+ inmem 1 302 */
+        order by count($id), $tokenGrouped
+        return $tokenGrouped
+    where $tokenUnranked = /*+ bcast */ $tokenRanked
+    order by $i
+    return $i
+for $prefixTokenDBLP in subset-collection($tokensDBLP, 0, prefix-len-jaccard(len($tokensDBLP), .8f))
+
+for $paperCSX in dataset('right')
+let $idCSX := $paperCSX.id
+let $tokensUnrankedCSX := word-tokens($paperCSX.authors)
+let $lenCSX := len($tokensUnrankedCSX)
+let $tokensCSX :=
+    for $tokenUnranked in $tokensUnrankedCSX
+    for $tokenRanked at $i in
+        //
+        // -- - Stage 1 - --
+        //
+        for $paper in dataset('left')
+        let $id := $paper.id
+        for $token in word-tokens($paper.authors)
+        /*+ hash */
+        group by $tokenGrouped := $token with $id
+        /*+ inmem 1 302 */
+        order by count($id), $tokenGrouped
+        return $tokenGrouped
+    where $tokenUnranked = /*+ bcast */ $tokenRanked
+    order by $i
+    return $i
+let $actualPrefixLen := prefix-len-jaccard(len($tokensUnrankedCSX), .8f) - len($tokensUnrankedCSX) + len($tokensCSX)
+for $prefixTokenCSX in subset-collection($tokensCSX, 0, $actualPrefixLen)
+
+where $prefixTokenDBLP = $prefixTokenCSX
+
+/*+ hash*/
+distinct by $idDBLP, $idCSX
+let $sim := similarity-jaccard-check($tokensUnrankedDBLP, $tokensUnrankedCSX, .8f)
+where $sim[1] >= .8f
+order by $tokensUnrankedDBLP, $tokensUnrankedCSX
+return {'DBLP': $tokensUnrankedDBLP, 'CSX': $tokensUnrankedCSX, 'sim': $sim[1]}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.1.ddl.aql
new file mode 100644
index 0000000..45cc975
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.1.ddl.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as closed {
+    id: uuid,
+    authors: string
+}
+
+create dataset left(BasicType) primary key id autogenerated;
+create dataset right(BasicType) primary key id autogenerated;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.2.update.aql
new file mode 100644
index 0000000..c9aceb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+load dataset left
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblpauthors.adm"),("format"="adm"));
+
+load dataset right
+using localfs
+(("path"="asterix_nc1://data/pub-small/csxauthors.adm"),("format"="adm"));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.3.query.aql
new file mode 100644
index 0000000..009c2b9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_3/basic-1_2_3.3.query.aql
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+set simthreshold '.2f';
+
+let $pj := (
+    for $r in dataset left
+    for $s in dataset right
+        where word-tokens($r.authors) ~= word-tokens($s.authors)
+    return {"rid": $r.id, "sid": $s.id, "rstr": $r.authors, "sstr": $s.authors}
+)
+
+let $nl := (
+    for $r in dataset left
+    for $s in dataset right
+        where word-tokens($r.authors) /*+ indexnl */ ~= word-tokens($s.authors)
+    return {"rid": $r.id, "sid": $s.id, "rstr": $r.authors, "sstr": $s.authors}
+)
+
+let $orderedTokens := (
+for $paper in dataset('right')
+            let $id := $paper.id
+            for $token in word-tokens($paper.authors)
+            /*+ hash */
+            group by $tokenGrouped := $token with $id
+            /*+ inmem 1 302 */
+            order by count($id), $tokenGrouped
+            return $tokenGrouped
+)
+
+let $simpairs := (
+    for $r in $nl
+    return {
+    "rid": $r.rid,
+    "sid": $r.sid,
+    "rstr": $r.rstr,
+    "sstr": $r.sstr,
+    "simpairs":
+        for $s in $pj
+            where $r.rid = $s.rid and $r.sid = $s.sid
+            return {"rid": $s.rid, "sid": $s.sid}
+    }
+)
+
+for $d in $simpairs
+where count($d.simpairs) = 0
+    let $rid := $d.rid
+    let $sid := $d.sid
+    let $rstr := for $t in word-tokens($d.rstr) order by $t return $t
+    let $sstr := for $t in word-tokens($d.sstr) order by $t return $t
+    let $rlen := len(for $t in word-tokens($d.rstr) order by $t return $t)
+    let $slen := len(for $t in word-tokens($d.sstr) order by $t return $t)
+    let $orstr := for $t in word-tokens($d.rstr) for $token at $i in $orderedTokens where $t /*+ bcast */ = $token order by $i return $i
+    let $osstr := for $t in word-tokens($d.sstr) for $token at $i in $orderedTokens where $t /*+ bcast */ = $token order by $i return $i
+    let $lorstr := len(for $t in word-tokens($d.rstr) for $token at $i in $orderedTokens where $t /*+ bcast */ = $token order by $i return $i)
+    let $losstr := len(for $t in word-tokens($d.sstr) for $token at $i in $orderedTokens where $t /*+ bcast */ = $token order by $i return $i)
+return {
+        "rid": $rid,
+        "sid": $sid,
+        "rstr": $rstr,
+        "sstr": $sstr,
+        "rlen": $rlen,
+        "slen": $slen,
+        "orstr": $orstr,
+        "osstr": $osstr,
+        "lorstr": $lorstr,
+        "losstr": $losstr,
+        "simpairs": $d,
+        "sim": let $cmmon := for $r in $orstr for $s in $osstr where $r = $s return $r
+                             return similarity-jaccard-prefix($rlen, $orstr, $slen, $osstr, $cmmon[0], 0.2f)}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.1.ddl.aql
new file mode 100644
index 0000000..45cc975
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.1.ddl.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as closed {
+    id: uuid,
+    authors: string
+}
+
+create dataset left(BasicType) primary key id autogenerated;
+create dataset right(BasicType) primary key id autogenerated;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.2.update.aql
new file mode 100644
index 0000000..c9aceb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+load dataset left
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblpauthors.adm"),("format"="adm"));
+
+load dataset right
+using localfs
+(("path"="asterix_nc1://data/pub-small/csxauthors.adm"),("format"="adm"));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.3.query.aql
new file mode 100644
index 0000000..b6976bd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_4/basic-1_2_4.3.query.aql
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $tokensUnrankedLeft := [ "a", "baesens", "baestaens", "bart", "den", "dirk", "dirk", "emma", "gestel", "johan", "k",
+"marleen", "poel", "suykens", "tony", "van", "van", "willekens" ]
+
+let $tokensUnrankedRight := [ "a", "baesens", "baestaens", "bart", "bedrijfskunde", "den", "dirk", "dirk", "emma", "en",
+"gent", "gestel", "johan", "k", "marleen", "poel", "suykens", "tony", "van", "van", "willekens" ]
+
+let $lenLeft := len($tokensUnrankedLeft)
+let $tokensLeft :=
+    for $tokenUnranked in $tokensUnrankedLeft
+    for $tokenRanked at $i in
+        //
+        // -- - Stage 1 - --
+        //
+        for $orders in dataset left
+        let $id := $orders.id
+        for $token in word-tokens($orders.authors)
+        /*+ hash */
+        group by $tokenGrouped := $token with $id
+        /*+ inmem 1 302 */
+        order by count($id), $tokenGrouped
+        return $tokenGrouped
+    where $tokenUnranked = /*+ bcast */ $tokenRanked
+    order by $i
+    return $i
+
+let $lenRight := len($tokensUnrankedRight)
+let $tokensRight :=
+    for $tokenUnranked in $tokensUnrankedRight
+    for $tokenRanked at $i in
+        //
+        // -- - Stage 1 - --
+        //
+        for $orders in dataset left
+        let $id := $orders.id
+        for $token in word-tokens($orders.authors)
+        /*+ hash */
+        group by $tokenGrouped := $token with $id
+        /*+ inmem 1 302 */
+        order by count($id), $tokenGrouped
+        return $tokenGrouped
+    where $tokenUnranked = /*+ bcast */ $tokenRanked
+    order by $i
+    return $i
+
+return {
+        "leftTokens": $tokensLeft, "rightTokens": $tokensRight, "lenLeftTrue": len($tokensUnrankedLeft),
+        "lenLeft": len($tokensLeft), "lenRightTrue": len($tokensUnrankedRight), "lenRight": len($tokensRight),
+        "full_sim": similarity-jaccard-check($tokensLeft, $tokensRight, .8f),
+        "true_sim": similarity-jaccard-check($tokensUnrankedLeft, $tokensUnrankedRight, .8f),
+        "pref_sim": similarity-jaccard-prefix(len($tokensLeft), $tokensLeft, len($tokensRight),
+                    $tokensRight, $tokensLeft[0], .8f),
+        "fast_sim": similarity-jaccard-prefix(len($tokensUnrankedLeft), $tokensLeft, len($tokensUnrankedRight),
+                    $tokensRight, $tokensLeft[0], .8f)
+        }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.1.ddl.aql
new file mode 100644
index 0000000..45cc975
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.1.ddl.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as closed {
+    id: uuid,
+    authors: string
+}
+
+create dataset left(BasicType) primary key id autogenerated;
+create dataset right(BasicType) primary key id autogenerated;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.2.update.aql
new file mode 100644
index 0000000..c9aceb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+load dataset left
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblpauthors.adm"),("format"="adm"));
+
+load dataset right
+using localfs
+(("path"="asterix_nc1://data/pub-small/csxauthors.adm"),("format"="adm"));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.3.query.aql
new file mode 100644
index 0000000..3b5d44b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_5/basic-1_2_5.3.query.aql
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set simthreshold '.2f';
+
+let $cpj := count(
+    for $r in dataset left
+    for $s in dataset right
+    where word-tokens($r.authors) ~= word-tokens($s.authors)
+    return {"rid": $r.id, "sid": $s.id}
+)
+
+let $cnl := count(
+    for $r in dataset left
+    for $s in dataset right
+    where word-tokens($r.authors) /*+ indexnl */ ~= word-tokens($s.authors)
+    return {"rid": $r.id, "sid": $s.id}
+)
+
+return [$cnl, $cpj]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.1.ddl.aql
new file mode 100644
index 0000000..a72efb5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.1.ddl.aql
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin if exists;
+
+create dataverse fuzzyjoin;
+
+use dataverse fuzzyjoin;
+
+create type DBLPType as closed {
+  id: int64,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) primary key id;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.2.update.aql
new file mode 100644
index 0000000..d3d02d4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.2.update.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin;
+
+load dataset DBLP
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.3.query.aql
new file mode 100644
index 0000000..c53475f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_6/basic-1_2_6.3.query.aql
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin;
+
+set simthreshold '.15f';
+
+let $cpj := count(
+    for $dblp in dataset('DBLP')
+    for $dblp2 in dataset('DBLP')
+    where word-tokens($dblp.title) ~= word-tokens($dblp2.title)
+    order by $dblp.id, $dblp2.id
+    return {'dblp': $dblp.id, 'dblp2': $dblp2.id}
+)
+
+let $cnl := count(
+    for $dblp in dataset('DBLP')
+    for $dblp2 in dataset('DBLP')
+    where word-tokens($dblp.title) /*+indexnl*/ ~= word-tokens($dblp2.title)
+    order by $dblp.id, $dblp2.id
+    return {'dblp': $dblp.id, 'dblp2': $dblp2.id}
+)
+
+return [$cnl, $cpj]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.1.ddl.aql
new file mode 100644
index 0000000..bd84097
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.1.ddl.aql
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin if exists;
+
+create dataverse fuzzyjoin;
+
+use dataverse fuzzyjoin;
+
+create type DBLPType as closed {
+  id: int64,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) primary key id;
+
+create dataset TO(DBLPType) primary key id;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.2.update.aql
new file mode 100644
index 0000000..7674827
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin;
+
+load dataset DBLP
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+
+load dataset TO
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.3.query.aql
new file mode 100644
index 0000000..597e8a1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_2_7/basic-1_2_7.3.query.aql
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin;
+
+set import-private-functions 'true'
+
+set simthreshold '.61f';
+
+let $pj := (
+    for $dblp in dataset DBLP
+    for $dblp2 in dataset TO
+    where word-tokens($dblp.title) ~= word-tokens($dblp2.title)
+    return {"rid": $dblp.id, "sid": $dblp2.id, "rstr": $dblp.title, "sstr": $dblp2.title}
+)
+
+let $nl := (
+    for $dblp in dataset DBLP
+    for $dblp2 in dataset TO
+    where word-tokens($dblp.title) /* +indexnl */ ~= word-tokens($dblp2.title)
+    return {"rid": $dblp.id, "sid": $dblp2.id, "rstr": $dblp.title, "sstr": $dblp2.title}
+)
+
+let $orderedTokens := (
+    for $paper in dataset TO
+    let $id := $paper.id
+    for $token in word-tokens($paper.title)
+    /*+ hash */
+    group by $tokenGrouped := $token with $id
+    /*+ inmem 1 302 */
+    order by count($id), $tokenGrouped
+    return $tokenGrouped
+)
+
+let $simpairs := (
+    for $r in $nl
+    return {
+    "rid": $r.rid,
+    "sid": $r.sid,
+    "rstr": $r.rstr,
+    "sstr": $r.sstr,
+    "simpairs":
+        for $s in $pj
+            where $r.rid = $s.rid and $r.sid = $s.sid
+            return {"rid": $s.rid, "sid": $s.sid}
+    }
+)
+
+for $d in $simpairs
+where count($d.simpairs) = 0
+    let $rid := $d.rid
+    let $sid := $d.sid
+    let $rstr := for $t in word-tokens($d.rstr) order by $t return $t
+    let $sstr := for $t in word-tokens($d.sstr) order by $t return $t
+    let $rlen := len(for $t in word-tokens($d.rstr) order by $t return $t)
+    let $slen := len(for $t in word-tokens($d.sstr) order by $t return $t)
+    let $orstr := for $t in word-tokens($d.rstr) for $token at $i in $orderedTokens where $t /*+ bcast */ = $token order by $i return $i
+    let $osstr := for $t in word-tokens($d.sstr) for $token at $i in $orderedTokens where $t /*+ bcast */ = $token order by $i return $i
+    let $lorstr := len(for $t in word-tokens($d.rstr) for $token at $i in $orderedTokens where $t /*+ bcast */ = $token order by $i return $i)
+    let $losstr := len(for $t in word-tokens($d.sstr) for $token at $i in $orderedTokens where $t /*+ bcast */ = $token order by $i return $i)
+return {
+        "rid": $rid,
+        "sid": $sid,
+        "rstr": $rstr,
+        "sstr": $sstr,
+        "rlen": $rlen,
+        "slen": $slen,
+        "orstr": $orstr,
+        "osstr": $osstr,
+        "lorstr": $lorstr,
+        "losstr": $losstr,
+        "simpairs": $d,
+        "sim": let $cmmon := for $r in $orstr for $s in $osstr where $r = $s return $r
+                             return similarity-jaccard-prefix($rlen, $orstr, $slen, $osstr, $cmmon[0], 0.61f)}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.1.ddl.aql
new file mode 100644
index 0000000..45cc975
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.1.ddl.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse fuzzyjoin_basic if exists;
+
+create dataverse fuzzyjoin_basic;
+
+use dataverse fuzzyjoin_basic;
+
+create type BasicType as closed {
+    id: uuid,
+    authors: string
+}
+
+create dataset left(BasicType) primary key id autogenerated;
+create dataset right(BasicType) primary key id autogenerated;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.2.update.aql
new file mode 100644
index 0000000..c9aceb2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.2.update.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+load dataset left
+using localfs
+(("path"="asterix_nc1://data/pub-small/dblpauthors.adm"),("format"="adm"));
+
+load dataset right
+using localfs
+(("path"="asterix_nc1://data/pub-small/csxauthors.adm"),("format"="adm"));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.3.query.aql
new file mode 100644
index 0000000..f91e841
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.3.query.aql
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+//
+// -- - Stage 1 - --
+//
+for $orderRight in dataset('right')
+let $rightId := $orderRight.id
+for $orderTokenRight in word-tokens($orderRight.authors)
+    /*+ hash */ group by $tokenRightGrouped := $orderTokenRight with $rightId
+/*+ inmem 1 302 */ order by count($rightId), $tokenRightGrouped
+return [ $tokenRightGrouped, count($rightId) ]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.4.query.aql
new file mode 100644
index 0000000..66dbbbc
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.4.query.aql
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+//
+// -- - Stage 1 - --
+//
+for $r in
+for $orderRight in dataset('right')
+let $rightId := $orderRight.id
+for $orderTokenRight in word-tokens($orderRight.authors)
+    /*+ hash */ group by $tokenRightGrouped := $orderTokenRight with $rightId
+return {"rt": $tokenRightGrouped, "rc": count($rightId)}
+
+for $l in
+for $orderLeft in dataset('left')
+let $leftId := $orderLeft.id
+for $orderTokenLeft in word-tokens($orderLeft.authors)
+    /*+ hash */ group by $tokenLeftGrouped := $orderTokenLeft with $leftId
+return {"lt": $tokenLeftGrouped, "lc": count($leftId)}
+
+where $r.rt = $l.lt
+/*+ inmem 1 302 */ order by $r.rc * $l.lc, $r.rt
+return [ $r.rt, $r.rc * $l.lc ]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.5.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.5.query.aql
new file mode 100644
index 0000000..ad93db1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.5.query.aql
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $r := count(
+    for $right in dataset('right')
+    let $idRight := $right.id
+    let $tokensUnrankedRight := word-tokens($right.authors)
+    let $lenRight := len($tokensUnrankedRight)
+    let $tokensRight :=
+        for $tokenUnranked in $tokensUnrankedRight
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $orderRight in dataset('right')
+            let $rightId := $orderRight.id
+            for $orderTokenRight in word-tokens($orderRight.authors)
+                /*+ hash */ group by $tokenRightGrouped := $orderTokenRight with $rightId
+            /*+ inmem 1 302 */ order by count($rightId)
+            return $tokenRightGrouped
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    for $prefixTokenRight in subset-collection($tokensRight, 0, prefix-len-jaccard(len($tokensRight), .8f))
+
+    for $left in dataset('left')
+    let $idLeft := $left.id
+    let $tokensUnrankedLeft := word-tokens($left.authors)
+    let $lenLeft := len($tokensUnrankedLeft)
+    let $tokensLeft :=
+        for $tokenUnranked in $tokensUnrankedLeft
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $orderRight in dataset('right')
+            let $rightId := $orderRight.id
+            for $orderTokenRight in word-tokens($orderRight.authors)
+                /*+ hash */ group by $tokenRightGrouped := $orderTokenRight with $rightId
+            /*+ inmem 1 302 */ order by count($rightId)
+            return $tokenRightGrouped
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    let $actualPrefixLen := prefix-len-jaccard(len($tokensUnrankedLeft), .8f) - len($tokensUnrankedLeft) + len($tokensLeft)
+    for $prefixTokenLeft in subset-collection($tokensLeft, 0, $actualPrefixLen)
+
+    where $prefixTokenRight = $prefixTokenLeft
+    let $sim := similarity-jaccard-prefix($lenRight, $tokensRight, $lenLeft, $tokensLeft, $prefixTokenLeft, .8f)
+    where $sim >= .8f
+    /*+ hash*/ group by $idRight := $idRight, $idLeft := $idLeft with $sim
+    return {'idDBLP': $idRight, 'idCSX': $idLeft, "sim": $sim[0]}
+)
+return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.6.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.6.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.6.query.aql
new file mode 100644
index 0000000..5594de3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/basic-1_3_1/basic-1_3_1.6.query.aql
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse fuzzyjoin_basic;
+
+set import-private-functions 'true'
+
+let $r := count(
+    for $right in dataset('right')
+    let $idRight := $right.id
+    let $tokensUnrankedRight := word-tokens($right.authors)
+    let $lenRight := len($tokensUnrankedRight)
+    let $tokensRight :=
+        for $tokenUnranked in $tokensUnrankedRight
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $r in
+                for $orderRight in dataset('right')
+                let $rightId := $orderRight.id
+                for $orderTokenRight in word-tokens($orderRight.authors)
+                    /*+ hash */ group by $tokenRightGrouped := $orderTokenRight with $rightId
+                return {"rt": $tokenRightGrouped, "rc": count($rightId)}
+            for $l in
+                for $orderLeft in dataset('left')
+                let $leftId := $orderLeft.id
+                for $orderTokenLeft in word-tokens($orderLeft.authors)
+                    /*+ hash */ group by $tokenLeftGrouped := $orderTokenLeft with $leftId
+                return {"lt": $tokenLeftGrouped, "lc": count($leftId)}
+            where $r.rt = $l.lt
+            /*+ inmem 1 302 */ order by $r.rc * $l.lc
+            return $r.rt
+
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    for $prefixTokenRight in subset-collection($tokensRight, 0, prefix-len-jaccard(len($tokensRight), .8f))
+
+    for $left in dataset('left')
+    let $idLeft := $left.id
+    let $tokensUnrankedLeft := word-tokens($left.authors)
+    let $lenLeft := len($tokensUnrankedLeft)
+    let $tokensLeft :=
+        for $tokenUnranked in $tokensUnrankedLeft
+        for $tokenRanked at $i in
+            //
+            // -- - Stage 1 - --
+            //
+            for $r in
+                for $orderRight in dataset('right')
+                let $rightId := $orderRight.id
+                for $orderTokenRight in word-tokens($orderRight.authors)
+                    /*+ hash */ group by $tokenRightGrouped := $orderTokenRight with $rightId
+                return {"rt": $tokenRightGrouped, "rc": count($rightId)}
+            for $l in
+                for $orderLeft in dataset('left')
+                let $leftId := $orderLeft.id
+                for $orderTokenLeft in word-tokens($orderLeft.authors)
+                    /*+ hash */ group by $tokenLeftGrouped := $orderTokenLeft with $leftId
+                return {"lt": $tokenLeftGrouped, "lc": count($leftId)}
+            where $r.rt = $l.lt
+            /*+ inmem 1 302 */ order by $r.rc * $l.lc
+            return $r.rt
+
+        where $tokenUnranked = /*+ bcast */ $tokenRanked
+        order by $i
+        return $i
+    let $actualPrefixLen := prefix-len-jaccard(len($tokensUnrankedLeft), .8f) - len($tokensUnrankedLeft) + len($tokensLeft)
+    for $prefixTokenLeft in subset-collection($tokensLeft, 0, $actualPrefixLen)
+
+    where $prefixTokenRight = $prefixTokenLeft
+    let $sim := similarity-jaccard-prefix($lenRight, $tokensRight, $lenLeft, $tokensLeft, $prefixTokenLeft, .8f)
+    where $sim >= .8f
+    /*+ hash*/ group by $idRight := $idRight, $idLeft := $idLeft with $sim
+    return {'idDBLP': $idRight, 'idCSX': $idLeft, "sim": $sim[0]}
+)
+return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.2/dblp-csx-2_5.2.3.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.2/dblp-csx-2_5.2.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.2/dblp-csx-2_5.2.3.query.aql
index 1cff8fc..7ecca70 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.2/dblp-csx-2_5.2.3.query.aql
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.2/dblp-csx-2_5.2.3.query.aql
@@ -71,9 +71,7 @@ set import-private-functions 'true';
                                 $tokensCSX,
                                 0,
                                 prefix-len-jaccard(len($tokensCSX), .5f))
-
     where $prefixTokenDBLP = $prefixTokenCSX
-
     let $sim := similarity-jaccard-prefix(
                     $lenDBLP,
                     $tokensDBLP,

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.1.ddl.aql
new file mode 100644
index 0000000..3573f47
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.1.ddl.aql
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens.
+ *                  We expect the join to be transformed into an prefix-based fuzzy join following with an < select.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPNestedType as closed {
+  id: int64,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested: DBLPNestedType
+}
+
+create type CSXNestedType as closed {
+  id: int64,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  nested: CSXNestedType
+}
+
+create dataset DBLPtmp(DBLPNestedType) primary key id;
+create dataset CSXtmp(CSXNestedType) primary key id;
+
+create dataset DBLP(DBLPType) primary key nested.id;
+create dataset CSX(CSXType) primary key nested.id;
+

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.2.update.aql
new file mode 100644
index 0000000..a2633b1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.2.update.aql
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+load dataset DBLPtmp
+using localfs
+(("path"="asterix_nc1://data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted;
+
+load dataset CSXtmp
+using localfs
+(("path"="asterix_nc1://data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
+
+insert into dataset DBLP(
+    for $x in dataset DBLPtmp
+    return {
+        "nested": $x
+    }
+);
+
+insert into dataset CSX(
+    for $x in dataset CSXtmp
+    return {
+        "nested": $x
+    }
+);

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.3.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.3.ddl.aql
new file mode 100644
index 0000000..0359448
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.3.ddl.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+create index keyword_index on DBLP(nested.title) type keyword;
+

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.4.query.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.4.query.aql
new file mode 100644
index 0000000..65e2576
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.1/word-jaccard.4.query.aql
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where word-tokens($a.nested.title) ~= word-tokens($b.nested.title)
+      and $a.nested.id < $b.nested.id
+order by $a.nested.id, $b.nested.id
+return { "arec": $a.nested, "brec": $b.nested }

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.1.ddl.aql
new file mode 100644
index 0000000..72458b9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.1.ddl.aql
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' 3-gram tokens.
+ *                  We expect the join to be transformed into an indexed prefix-based fuzzy join.
+ *                  We treat the < condition as a select over the fuzzy join results.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPNestedType as closed {
+  id: int64,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested: DBLPNestedType
+}
+
+create type CSXNestedType as closed {
+  id: int64,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  nested: CSXNestedType
+}
+
+create dataset DBLPtmp(DBLPNestedType) primary key id;
+create dataset CSXtmp(CSXNestedType) primary key id;
+
+create dataset DBLP(DBLPType) primary key nested.id;
+create dataset CSX(CSXType) primary key nested.id;
+

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.2.update.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.2.update.aql
new file mode 100644
index 0000000..a2633b1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.2.update.aql
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+load dataset DBLPtmp
+using localfs
+(("path"="asterix_nc1://data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted;
+
+load dataset CSXtmp
+using localfs
+(("path"="asterix_nc1://data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
+
+insert into dataset DBLP(
+    for $x in dataset DBLPtmp
+    return {
+        "nested": $x
+    }
+);
+
+insert into dataset CSX(
+    for $x in dataset CSXtmp
+    return {
+        "nested": $x
+    }
+);

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.3.ddl.aql
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.3.ddl.aql
new file mode 100644
index 0000000..9307af9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.3.ddl.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+create index ngram_index on DBLP(nested.title) type ngram(3);
+