You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wa...@apache.org on 2018/10/10 00:59:45 UTC

[36/36] asterixdb git commit: [NO ISSUE][COMP][RT] Enable multiway similarity joins

[NO ISSUE][COMP][RT] Enable multiway similarity joins

- Enable the FuzzyJoinRule that transforms
  a nested-loop-similarity-join plan to a three-stage-similarity join.
- Modify FuzzyJoinRuleCollections.
  - Add the ExtractCommonExpressionRule to extract common expressions
    in the star-like multiple similarity join substitutions.
  - Add the InlineSubplanInputForNestedTupleSourceRule to translate
    the generated subplan from the similarity function-derived
    substitution into join in case of nested schemas.
  - Use similarity-jaccard-prefix to enable the pp+ join strategy.
  - Use the right side to build the heavy hash join on
    the prefix tokens from both sides.
  - Add RemoveAssign/Variables/AggRules to iteratively remove unused
    assign/vars once FuzzyJoinRule is applied in each round.
- Add three new optimization cases for multiway similarity joins.
  - link-like multiway similarity joins
  - star-like multiway similarity joins
  - hybrid multiway similarity joins with the both styles of similarity joins.
- Add a check whether a similarity function is on
  a select over an existing similarity join.
- Change the inverted-index-based similarity join to the three-stage-similarity join
  due to efficiency considerations.

Change-Id: I8736f104905eeda763d39709e002c2b9629278cc
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1076
Sonar-Qube: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <je...@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dm...@couchbase.com>
Reviewed-by: Taewoo Kim <wa...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/d906bd89
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/d906bd89
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/d906bd89

Branch: refs/heads/master
Commit: d906bd89e48351156262c1c22096d23269bf0f0a
Parents: e7fa4b3
Author: Taewoo Kim <wa...@yahoo.com>
Authored: Mon Oct 8 18:01:23 2018 -0700
Committer: Taewoo Kim <wa...@gmail.com>
Committed: Tue Oct 9 17:58:47 2018 -0700

----------------------------------------------------------------------
 .../provider/DefaultRuleSetFactory.java         |    8 +-
 .../asterix/optimizer/base/FuzzyUtils.java      |   12 +
 .../asterix/optimizer/base/RuleCollections.java |   12 +-
 .../asterix/optimizer/rules/FuzzyJoinRule.java  |  508 ++--
 .../data/dblp-small/csx-small-multi-id.txt      |  100 +
 .../data/dblp-small/dblp-small-multi-id.txt     |  100 +
 .../data/pub-small/csx-small-multi-id.txt       |  100 +
 .../asterix-app/data/pub-small/csxauthors.adm   |  196 ++
 .../data/pub-small/dblp-small-multi-id.txt      |  100 +
 .../asterix-app/data/pub-small/dblpauthors.adm  |  194 ++
 .../src/test/resources/optimizerts/ignore.txt   |    1 -
 .../optimizerts/queries/fj-dblp-csx-hybrid.aql  |   69 +
 .../queries/fj-dblp-csx-selflink.aql            |   66 +
 .../optimizerts/queries/fj-dblp-csx-simple.aql  |   54 +
 .../optimizerts/queries/fj-dblp-csx-star.aql    |   69 +
 .../optimizerts/queries/fj-dblp-csx.aql         |  101 +-
 .../jaccard-similarity-join-dual-order.aql      |  109 +
 .../jaccard-similarity-join-right-ahead.aql     |   89 +
 .../optimizerts/results/fj-dblp-csx-hybrid.plan |  613 +++++
 .../results/fj-dblp-csx-selflink.plan           | 2436 +++++++++++++++++
 .../optimizerts/results/fj-dblp-csx-simple.plan |  146 ++
 .../optimizerts/results/fj-dblp-csx-star.plan   | 2481 ++++++++++++++++++
 .../optimizerts/results/fj-dblp-csx.plan        |  134 +
 .../ngram-jaccard-inline.plan                   |  170 +-
 .../word-jaccard-inline.plan                    |  170 +-
 .../results/inverted-index-join/issue741.plan   |  155 +-
 ...obe-pidx-with-join-jaccard-check-idx_01.plan |  166 +-
 .../ngram-fuzzyeq-jaccard_01.plan               |  157 +-
 .../ngram-fuzzyeq-jaccard_02.plan               |  157 +-
 .../ngram-fuzzyeq-jaccard_03.plan               |  158 +-
 .../ngram-jaccard-check_01.plan                 |  157 +-
 .../ngram-jaccard-check_02.plan                 |  157 +-
 .../ngram-jaccard-check_03.plan                 |  158 +-
 .../ngram-jaccard-check_04.plan                 |  160 +-
 .../inverted-index-join/ngram-jaccard_01.plan   |  157 +-
 .../inverted-index-join/ngram-jaccard_02.plan   |  157 +-
 .../inverted-index-join/ngram-jaccard_03.plan   |  158 +-
 .../inverted-index-join/ngram-jaccard_04.plan   |  160 +-
 .../word-fuzzyeq-jaccard_01.plan                |  157 +-
 .../word-fuzzyeq-jaccard_02.plan                |  157 +-
 .../word-fuzzyeq-jaccard_03.plan                |  158 +-
 .../word-jaccard-check-after-btree-access.plan  |  178 +-
 .../word-jaccard-check_01.plan                  |  157 +-
 .../word-jaccard-check_02.plan                  |  157 +-
 .../word-jaccard-check_03.plan                  |  158 +-
 .../word-jaccard-check_04.plan                  |  160 +-
 .../inverted-index-join/word-jaccard_01.plan    |  157 +-
 .../inverted-index-join/word-jaccard_02.plan    |  157 +-
 .../inverted-index-join/word-jaccard_03.plan    |  158 +-
 .../inverted-index-join/word-jaccard_04.plan    |  160 +-
 .../ngram-jaccard-check-multi-let.plan          |    2 +-
 .../word-jaccard-check-multi-let.plan           |    2 +-
 ...obe-pidx-with-join-jaccard-check-idx_01.plan |  171 +-
 .../ngram-fuzzyeq-jaccard_01.plan               |  164 +-
 .../ngram-jaccard-check_01.plan                 |  164 +-
 .../ngram-jaccard-inline.plan                   |  199 +-
 .../inverted-index-join/ngram-jaccard_01.plan   |  164 +-
 .../olist-jaccard-inline.plan                   |    2 +-
 .../ulist-jaccard-inline.plan                   |    2 +-
 .../word-fuzzyeq-jaccard_01.plan                |  164 +-
 .../word-jaccard-check-after-btree-access.plan  |  202 +-
 .../word-jaccard-check_01.plan                  |  164 +-
 .../word-jaccard-inline.plan                    |  199 +-
 .../inverted-index-join/word-jaccard_01.plan    |  164 +-
 .../ngram-jaccard-check-multi-let.plan          |    2 +-
 .../word-jaccard-check-multi-let.plan           |    2 +-
 .../ngram-fuzzyeq-jaccard_01.plan               |  164 +-
 .../ngram-fuzzyeq-jaccard_02.plan               |  164 +-
 .../ngram-fuzzyeq-jaccard_03.plan               |  163 +-
 .../ngram-fuzzyeq-jaccard_04.plan               |  164 +-
 .../ngram-jaccard-check_01.plan                 |  164 +-
 .../ngram-jaccard-check_02.plan                 |  164 +-
 .../ngram-jaccard-check_03.plan                 |  163 +-
 .../ngram-jaccard-check_04.plan                 |  164 +-
 .../ngram-jaccard-inline.plan                   |  199 +-
 .../inverted-index-join/ngram-jaccard_01.plan   |  164 +-
 .../inverted-index-join/ngram-jaccard_02.plan   |  164 +-
 .../inverted-index-join/ngram-jaccard_03.plan   |  163 +-
 .../inverted-index-join/ngram-jaccard_04.plan   |  164 +-
 .../word-fuzzyeq-jaccard_01.plan                |  164 +-
 .../word-fuzzyeq-jaccard_02.plan                |  164 +-
 .../word-fuzzyeq-jaccard_03.plan                |  163 +-
 .../word-fuzzyeq-jaccard_04.plan                |  164 +-
 .../word-jaccard-check-after-btree-access.plan  |  202 +-
 .../word-jaccard-check_01.plan                  |  164 +-
 .../word-jaccard-check_02.plan                  |  164 +-
 .../word-jaccard-check_03.plan                  |  163 +-
 .../word-jaccard-check_04.plan                  |  164 +-
 .../word-jaccard-inline.plan                    |  199 +-
 .../inverted-index-join/word-jaccard_01.plan    |  164 +-
 .../inverted-index-join/word-jaccard_02.plan    |  164 +-
 .../inverted-index-join/word-jaccard_03.plan    |  163 +-
 .../inverted-index-join/word-jaccard_04.plan    |  164 +-
 .../ngram-fuzzyeq-jaccard_01.plan               |  157 +-
 .../ngram-fuzzyeq-jaccard_02.plan               |  157 +-
 .../ngram-fuzzyeq-jaccard_03.plan               |  158 +-
 .../ngram-fuzzyeq-jaccard_04.plan               |  157 +-
 .../ngram-jaccard-check_01.plan                 |  157 +-
 .../ngram-jaccard-check_02.plan                 |  157 +-
 .../ngram-jaccard-check_03.plan                 |  158 +-
 .../ngram-jaccard-check_04.plan                 |  157 +-
 .../ngram-jaccard-check_inline_03.plan          |  160 +-
 .../inverted-index-join/ngram-jaccard_01.plan   |  157 +-
 .../inverted-index-join/ngram-jaccard_02.plan   |  157 +-
 .../inverted-index-join/ngram-jaccard_03.plan   |  158 +-
 .../inverted-index-join/ngram-jaccard_04.plan   |  157 +-
 .../ngram-jaccard_inline_03.plan                |  160 +-
 .../word-fuzzyeq-jaccard_01.plan                |  157 +-
 .../word-fuzzyeq-jaccard_02.plan                |  157 +-
 .../word-fuzzyeq-jaccard_03.plan                |  158 +-
 .../word-fuzzyeq-jaccard_04.plan                |  157 +-
 .../word-jaccard-check-after-btree-access.plan  |  178 +-
 .../word-jaccard-check_01.plan                  |  157 +-
 .../word-jaccard-check_02.plan                  |  157 +-
 .../word-jaccard-check_03.plan                  |  158 +-
 .../word-jaccard-check_04.plan                  |  157 +-
 .../word-jaccard-check_inline_03.plan           |  160 +-
 .../inverted-index-join/word-jaccard_01.plan    |  157 +-
 .../inverted-index-join/word-jaccard_02.plan    |  157 +-
 .../inverted-index-join/word-jaccard_03.plan    |  158 +-
 .../inverted-index-join/word-jaccard_04.plan    |  157 +-
 .../word-jaccard_inline_03.plan                 |  160 +-
 .../jaccard-similarity-join-dual-order.plan     |  191 ++
 .../jaccard-similarity-join-right-ahead.plan    |  128 +
 .../fuzzyjoin/basic-1_1/basic-1_1.1.ddl.aql     |   40 +
 .../fuzzyjoin/basic-1_1/basic-1_1.2.update.aql  |   34 +
 .../fuzzyjoin/basic-1_1/basic-1_1.3.query.aql   |   81 +
 .../fuzzyjoin/basic-1_1_1/basic-1_1_1.1.ddl.aql |   31 +
 .../basic-1_1_1/basic-1_1_1.2.update.aql        |   27 +
 .../basic-1_1_1/basic-1_1_1.3.query.aql         |   61 +
 .../fuzzyjoin/basic-1_1_2/basic-1_1_2.1.ddl.aql |   23 +
 .../basic-1_1_2/basic-1_1_2.10.query.aql        |   29 +
 .../basic-1_1_2/basic-1_1_2.2.update.aql        |   18 +
 .../basic-1_1_2/basic-1_1_2.3.query.aql         |   30 +
 .../basic-1_1_2/basic-1_1_2.4.query.aql         |   30 +
 .../basic-1_1_2/basic-1_1_2.5.query.aql         |   30 +
 .../basic-1_1_2/basic-1_1_2.6.query.aql         |   30 +
 .../basic-1_1_2/basic-1_1_2.7.query.aql         |   30 +
 .../basic-1_1_2/basic-1_1_2.8.query.aql         |   30 +
 .../basic-1_1_2/basic-1_1_2.9.query.aql         |   29 +
 .../fuzzyjoin/basic-1_1_3/basic-1_1_3.1.ddl.aql |   23 +
 .../basic-1_1_3/basic-1_1_3.2.update.aql        |   18 +
 .../basic-1_1_3/basic-1_1_3.3.query.aql         |   30 +
 .../basic-1_1_3/basic-1_1_3.4.query.aql         |   38 +
 .../fuzzyjoin/basic-1_2_1/basic-1_2_1.1.ddl.aql |   31 +
 .../basic-1_2_1/basic-1_2_1.2.update.aql        |   27 +
 .../basic-1_2_1/basic-1_2_1.3.query.aql         |  123 +
 .../basic-1_2_1/basic-1_2_1.4.query.aql         |   51 +
 .../basic-1_2_1/basic-1_2_1.5.query.aql         |   63 +
 .../basic-1_2_1/basic-1_2_1.6.query.aql         |   60 +
 .../basic-1_2_1/basic-1_2_1.7.query.aql         |   46 +
 .../fuzzyjoin/basic-1_2_2/basic-1_2_2.1.ddl.aql |   31 +
 .../basic-1_2_2/basic-1_2_2.2.update.aql        |   27 +
 .../basic-1_2_2/basic-1_2_2.3.query.aql         |   77 +
 .../basic-1_2_2/basic-1_2_2.4.query.aql         |   78 +
 .../basic-1_2_2/basic-1_2_2.5.query.aql         |   79 +
 .../basic-1_2_2/basic-1_2_2.6.query.aql         |   77 +
 .../fuzzyjoin/basic-1_2_3/basic-1_2_3.1.ddl.aql |   31 +
 .../basic-1_2_3/basic-1_2_3.2.update.aql        |   27 +
 .../basic-1_2_3/basic-1_2_3.3.query.aql         |   89 +
 .../fuzzyjoin/basic-1_2_4/basic-1_2_4.1.ddl.aql |   31 +
 .../basic-1_2_4/basic-1_2_4.2.update.aql        |   27 +
 .../basic-1_2_4/basic-1_2_4.3.query.aql         |   76 +
 .../fuzzyjoin/basic-1_2_5/basic-1_2_5.1.ddl.aql |   31 +
 .../basic-1_2_5/basic-1_2_5.2.update.aql        |   27 +
 .../basic-1_2_5/basic-1_2_5.3.query.aql         |   37 +
 .../fuzzyjoin/basic-1_2_6/basic-1_2_6.1.ddl.aql |   33 +
 .../basic-1_2_6/basic-1_2_6.2.update.aql        |   23 +
 .../basic-1_2_6/basic-1_2_6.3.query.aql         |   39 +
 .../fuzzyjoin/basic-1_2_7/basic-1_2_7.1.ddl.aql |   35 +
 .../basic-1_2_7/basic-1_2_7.2.update.aql        |   27 +
 .../basic-1_2_7/basic-1_2_7.3.query.aql         |   89 +
 .../fuzzyjoin/basic-1_3_1/basic-1_3_1.1.ddl.aql |   31 +
 .../basic-1_3_1/basic-1_3_1.2.update.aql        |   27 +
 .../basic-1_3_1/basic-1_3_1.3.query.aql         |   29 +
 .../basic-1_3_1/basic-1_3_1.4.query.aql         |   40 +
 .../basic-1_3_1/basic-1_3_1.5.query.aql         |   73 +
 .../basic-1_3_1/basic-1_3_1.6.query.aql         |   93 +
 .../dblp-csx-2_5.2/dblp-csx-2_5.2.3.query.aql   |    2 -
 .../dblp-csx-4.1.1/word-jaccard.1.ddl.aql       |   58 +
 .../dblp-csx-4.1.1/word-jaccard.2.update.aql    |   42 +
 .../dblp-csx-4.1.1/word-jaccard.3.ddl.aql       |   23 +
 .../dblp-csx-4.1.1/word-jaccard.4.query.aql     |   27 +
 .../ngram-jaccard-inline.1.ddl.aql              |   59 +
 .../ngram-jaccard-inline.2.update.aql           |   42 +
 .../ngram-jaccard-inline.3.ddl.aql              |   23 +
 .../ngram-jaccard-inline.4.query.aql            |   27 +
 .../dblp-csx-4.2.1/word-jaccard.1.ddl.aql       |   58 +
 .../dblp-csx-4.2.1/word-jaccard.2.update.aql    |   42 +
 .../dblp-csx-4.2.1/word-jaccard.3.ddl.aql       |   23 +
 .../dblp-csx-4.2.1/word-jaccard.4.query.aql     |   28 +
 .../ngram-jaccard-inline.1.ddl.aql              |   58 +
 .../ngram-jaccard-inline.2.update.aql           |   42 +
 .../ngram-jaccard-inline.3.ddl.aql              |   23 +
 .../ngram-jaccard-inline.4.query.aql            |   29 +
 .../dblp-csx-4.3.1/word-jaccard.1.ddl.aql       |   58 +
 .../dblp-csx-4.3.1/word-jaccard.2.update.aql    |   43 +
 .../dblp-csx-4.3.1/word-jaccard.3.ddl.aql       |   23 +
 .../dblp-csx-4.3.1/word-jaccard.4.query.aql     |   29 +
 .../ngram-jaccard-inline.1.ddl.aql              |   58 +
 .../ngram-jaccard-inline.2.update.aql           |   43 +
 .../ngram-jaccard-inline.3.ddl.aql              |   23 +
 .../ngram-jaccard-inline.4.query.aql            |   30 +
 .../dblp-csx-4.4.1/word-jaccard.1.ddl.aql       |   57 +
 .../dblp-csx-4.4.1/word-jaccard.2.update.aql    |   42 +
 .../dblp-csx-4.4.1/word-jaccard.3.ddl.aql       |   23 +
 .../dblp-csx-4.4.1/word-jaccard.4.query.aql     |   32 +
 .../dblp-csx-4.4.2/word-jaccard.1.ddl.aql       |   58 +
 .../dblp-csx-4.4.2/word-jaccard.2.update.aql    |   42 +
 .../dblp-csx-4.4.2/word-jaccard.3.ddl.aql       |   18 +
 .../dblp-csx-4.4.2/word-jaccard.4.query.aql     |   32 +
 .../dblp-csx-aqlplus_6.1.ddl.aql                |   53 +
 .../dblp-csx-aqlplus_6.2.update.aql             |   41 +
 .../dblp-csx-aqlplus_6.3.query.aql              |   26 +
 .../dblp-csx-dblp-aqlplus_1.3.query.aql         |    2 +-
 .../results/fuzzyjoin/basic-1_1/basic-1_1.1.adm |    1 +
 .../fuzzyjoin/basic-1_1_1/basic-1_1_1.1.adm     |   13 +
 .../fuzzyjoin/basic-1_1_2/basic-1_1_2.10.adm    |    1 +
 .../fuzzyjoin/basic-1_1_2/basic-1_1_2.3.adm     |    1 +
 .../fuzzyjoin/basic-1_1_2/basic-1_1_2.4.adm     |    1 +
 .../fuzzyjoin/basic-1_1_2/basic-1_1_2.5.adm     |    1 +
 .../fuzzyjoin/basic-1_1_2/basic-1_1_2.6.adm     |    1 +
 .../fuzzyjoin/basic-1_1_2/basic-1_1_2.7.adm     |    1 +
 .../fuzzyjoin/basic-1_1_2/basic-1_1_2.8.adm     |    1 +
 .../fuzzyjoin/basic-1_1_2/basic-1_1_2.9.adm     |    1 +
 .../fuzzyjoin/basic-1_1_3/basic-1_1_2.3.adm     |    1 +
 .../fuzzyjoin/basic-1_1_3/basic-1_1_2.4.adm     |    1 +
 .../fuzzyjoin/basic-1_2_1/basic-1_2_1.3.adm     |    1 +
 .../fuzzyjoin/basic-1_2_1/basic-1_2_1.4.adm     |    4 +
 .../fuzzyjoin/basic-1_2_1/basic-1_2_1.5.adm     |    0
 .../fuzzyjoin/basic-1_2_1/basic-1_2_1.6.adm     |  196 ++
 .../fuzzyjoin/basic-1_2_1/basic-1_2_1.7.adm     |  196 ++
 .../fuzzyjoin/basic-1_2_2/basic-1_2_2.3.adm     |    1 +
 .../fuzzyjoin/basic-1_2_2/basic-1_2_2.4.adm     |    1 +
 .../fuzzyjoin/basic-1_2_2/basic-1_2_2.5.adm     |    1 +
 .../fuzzyjoin/basic-1_2_2/basic-1_2_2.6.adm     |  120 +
 .../fuzzyjoin/basic-1_2_3/basic-1_2_3.3.adm     |    0
 .../fuzzyjoin/basic-1_2_4/basic-1_2_4.3.adm     |    1 +
 .../fuzzyjoin/basic-1_2_5/basic-1_2_5.3.adm     |    1 +
 .../fuzzyjoin/basic-1_2_6/basic-1_2_6.3.adm     |    1 +
 .../fuzzyjoin/basic-1_2_7/basic-1_2_7.3.adm     |    0
 .../fuzzyjoin/basic-1_3_1/basic-1_3_1.3.adm     |  834 ++++++
 .../fuzzyjoin/basic-1_3_1/basic-1_3_1.4.adm     |  547 ++++
 .../fuzzyjoin/basic-1_3_1/basic-1_3_1.5.adm     |    1 +
 .../fuzzyjoin/basic-1_3_1/basic-1_3_1.6.adm     |    1 +
 .../fuzzyjoin/dblp-csx-4.1.1/dblp-csx-4.1.1.adm |    7 +
 .../fuzzyjoin/dblp-csx-4.1.2/dblp-csx-4.1.2.adm |    7 +
 .../fuzzyjoin/dblp-csx-4.2.1/dblp-csx-4.2.1.adm |    2 +
 .../fuzzyjoin/dblp-csx-4.2.2/dblp-csx-4.2.2.adm |    5 +
 .../fuzzyjoin/dblp-csx-4.3.1/dblp-csx-4.3.1.adm |    2 +
 .../fuzzyjoin/dblp-csx-4.3.2/dblp-csx-4.3.2.adm |    5 +
 .../fuzzyjoin/dblp-csx-4.4.1/dblp-csx-4.4.1.adm |    5 +
 .../fuzzyjoin/dblp-csx-4.4.2/dblp-csx-4.4.2.adm |    5 +
 .../dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.1.adm |    9 +
 .../src/test/resources/runtimets/testsuite.xml  |  107 +-
 .../asterix/common/exceptions/ErrorCode.java    |    1 +
 .../main/resources/asx_errormsg/en.properties   |    1 +
 .../fuzzyjoin/similarity/SimilarityFilters.java |   21 +-
 .../similarity/SimilarityFiltersJaccard.java    |  141 +-
 .../SimilarityJaccardPrefixEvaluator.java       |   22 +-
 .../logical/visitors/IsomorphismUtilities.java  |   64 +
 261 files changed, 26850 insertions(+), 2387 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/DefaultRuleSetFactory.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/DefaultRuleSetFactory.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/DefaultRuleSetFactory.java
index 2db5d9a..eb41204 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/DefaultRuleSetFactory.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/DefaultRuleSetFactory.java
@@ -25,6 +25,7 @@ import org.apache.asterix.common.dataflow.ICcApplicationContext;
 import org.apache.asterix.optimizer.base.RuleCollections;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.common.utils.Pair;
+import org.apache.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialFirstRuleCheckFixpointRuleController;
 import org.apache.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialFixpointRuleController;
 import org.apache.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialOnceRuleController;
 import org.apache.hyracks.algebricks.core.rewriter.base.AbstractRuleController;
@@ -50,6 +51,8 @@ public class DefaultRuleSetFactory implements IRuleSetFactory {
         SequentialFixpointRuleController seqCtrlNoDfs = new SequentialFixpointRuleController(false);
         SequentialFixpointRuleController seqCtrlFullDfs = new SequentialFixpointRuleController(true);
         SequentialOnceRuleController seqOnceCtrl = new SequentialOnceRuleController(true);
+        SequentialFirstRuleCheckFixpointRuleController seqFirstRuleGateKeeperDfs =
+                new SequentialFirstRuleCheckFixpointRuleController(true);
         defaultLogicalRewrites.add(new Pair<>(seqOnceCtrl, RuleCollections.buildInitialTranslationRuleCollection()));
         defaultLogicalRewrites.add(new Pair<>(seqOnceCtrl, RuleCollections.buildTypeInferenceRuleCollection()));
         defaultLogicalRewrites.add(new Pair<>(seqOnceCtrl, RuleCollections.buildAutogenerateIDRuleCollection()));
@@ -58,9 +61,8 @@ public class DefaultRuleSetFactory implements IRuleSetFactory {
         defaultLogicalRewrites
                 .add(new Pair<>(seqCtrlNoDfs, RuleCollections.buildCondPushDownAndJoinInferenceRuleCollection()));
         defaultLogicalRewrites.add(new Pair<>(seqCtrlFullDfs, RuleCollections.buildLoadFieldsRuleCollection(appCtx)));
-        // fj
-        defaultLogicalRewrites.add(new Pair<>(seqCtrlFullDfs, RuleCollections.buildFuzzyJoinRuleCollection()));
-        //
+        defaultLogicalRewrites
+                .add(new Pair<>(seqFirstRuleGateKeeperDfs, RuleCollections.buildFuzzyJoinRuleCollection()));
         defaultLogicalRewrites
                 .add(new Pair<>(seqCtrlFullDfs, RuleCollections.buildNormalizationRuleCollection(appCtx)));
         defaultLogicalRewrites

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/FuzzyUtils.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/FuzzyUtils.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/FuzzyUtils.java
index d7e05b3..1e64c91 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/FuzzyUtils.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/FuzzyUtils.java
@@ -52,6 +52,7 @@ public class FuzzyUtils {
                 return BuiltinFunctions.COUNTHASHED_WORD_TOKENS;
             case MULTISET:
             case ARRAY:
+            case UNION:
             case ANY:
                 return null;
             default:
@@ -119,4 +120,15 @@ public class FuzzyUtils {
         simFunction = simFunction.toLowerCase();
         return simFunction;
     }
+
+    public static String getSimFunction(FunctionIdentifier simFuncId) {
+        if (simFuncId.equals(BuiltinFunctions.SIMILARITY_JACCARD)
+                || simFuncId.equals(BuiltinFunctions.SIMILARITY_JACCARD_CHECK)) {
+            return JACCARD_FUNCTION_NAME;
+        } else if (simFuncId.equals(BuiltinFunctions.EDIT_DISTANCE)
+                || simFuncId.equals(BuiltinFunctions.EDIT_DISTANCE_CHECK)) {
+            return EDIT_DISTANCE_FUNCTION_NAME;
+        }
+        return null;
+    }
 }

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
index cf01573..3c981d4 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
@@ -41,6 +41,7 @@ import org.apache.asterix.optimizer.rules.FeedScanCollectionToUnnest;
 import org.apache.asterix.optimizer.rules.FixReplicateOperatorOutputsRule;
 import org.apache.asterix.optimizer.rules.FullTextContainsParameterCheckRule;
 import org.apache.asterix.optimizer.rules.FuzzyEqRule;
+import org.apache.asterix.optimizer.rules.FuzzyJoinRule;
 import org.apache.asterix.optimizer.rules.InjectTypeCastForFunctionArgumentsRule;
 import org.apache.asterix.optimizer.rules.InjectTypeCastForUnionRule;
 import org.apache.asterix.optimizer.rules.InlineUnnestFunctionRule;
@@ -259,8 +260,15 @@ public final class RuleCollections {
 
     public static final List<IAlgebraicRewriteRule> buildFuzzyJoinRuleCollection() {
         List<IAlgebraicRewriteRule> fuzzy = new LinkedList<>();
-        // fuzzy.add(new FuzzyJoinRule()); -- The non-indexed fuzzy join will be temporarily disabled. It should be enabled some time in the near future.
-        fuzzy.add(new InferTypesRule());
+        fuzzy.add(new FuzzyJoinRule());
+        fuzzy.add(new ExtractCommonExpressionsRule());
+        fuzzy.add(new NestedSubplanToJoinRule());
+        fuzzy.add(new PushSelectIntoJoinRule());
+        fuzzy.add(new RemoveUnusedAssignAndAggregateRule());
+        fuzzy.add(new InlineSubplanInputForNestedTupleSourceRule());
+        fuzzy.add(new RemoveRedundantVariablesRule());
+        fuzzy.add(new AsterixInlineVariablesRule());
+        fuzzy.add(new RemoveUnusedAssignAndAggregateRule());
         return fuzzy;
     }
 

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/FuzzyJoinRule.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/FuzzyJoinRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/FuzzyJoinRule.java
index d814ba0..dabfb82 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/FuzzyJoinRule.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/FuzzyJoinRule.java
@@ -19,6 +19,7 @@
 package org.apache.asterix.optimizer.rules;
 
 import java.io.StringReader;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
@@ -26,11 +27,17 @@ import java.util.Locale;
 
 import org.apache.asterix.aqlplus.parser.AQLPlusParser;
 import org.apache.asterix.aqlplus.parser.ParseException;
+import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.lang.common.base.Clause;
-import org.apache.asterix.lang.common.struct.Identifier;
+import org.apache.asterix.lang.common.struct.VarIdentifier;
+import org.apache.asterix.lang.common.util.FunctionUtil;
 import org.apache.asterix.metadata.declared.MetadataProvider;
+import org.apache.asterix.om.base.AFloat;
+import org.apache.asterix.om.constants.AsterixConstantValue;
 import org.apache.asterix.om.functions.BuiltinFunctions;
 import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils;
+import org.apache.asterix.om.types.BuiltinType;
 import org.apache.asterix.om.types.IAType;
 import org.apache.asterix.optimizer.base.FuzzyUtils;
 import org.apache.asterix.translator.AqlPlusExpressionToPlanTranslator;
@@ -48,6 +55,7 @@ import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
 import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
 import org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
 import org.apache.hyracks.algebricks.core.algebra.expressions.IndexedNLJoinExpressionAnnotation;
+import org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
 import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
 import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
 import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
@@ -55,6 +63,7 @@ import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBina
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.IsomorphismUtilities;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.LogicalOperatorDeepCopyWithNewVariablesVisitor;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
 import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil;
@@ -62,118 +71,132 @@ import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
 
 public class FuzzyJoinRule implements IAlgebraicRewriteRule {
 
-    private static HashSet<FunctionIdentifier> simFuncs = new HashSet<FunctionIdentifier>();
+    private static HashSet<FunctionIdentifier> simFuncs = new HashSet<>();
 
     static {
         simFuncs.add(BuiltinFunctions.SIMILARITY_JACCARD_CHECK);
     }
 
+    private final List<Collection<LogicalVariable>> previousPKs = new ArrayList<>();
+
+    // Please correspond the value to the anchors $$RIGHT_##_0 and ##RIGHT_3 in AQLPLUS.
+    private static final int SUBSET_RIGHT_INDEX = 1; // Corresponds to $$RIGHT_1_0 and ##RIGHT_1
+    // private static final int LENGTH_LEFT_INDEX = 2;  // Corresponds to $$RIGHT_2_0 and ##RIGHT_2
+    private static final int LENGTH_RIGHT_INDEX = 3; // Corresponds to $$RIGHT_3_0 and ##RIGHT_3
+
+    // Step1: Initialize the host embedding aql to substitute the fuzzy equal condition such as $r.a ~= $s.b
     private static final String AQLPLUS = ""
             //
             // -- - Stage 3 - --
             //
-            + "((#RIGHT), " + "  (join((#LEFT), "
+            + "((##LEFT_0), " + "  (join((##RIGHT_0), "
             //
             // -- -- - Stage 2 - --
             //
-            + "    (" + "    join( " + "      ( " + "      #LEFT_1 " + "      let $tokensUnrankedLeft := %s($$LEFT_1) "
-            + "      let $lenLeft := len($tokensUnrankedLeft) " + "      let $tokensLeft := "
-            + "        for $token in $tokensUnrankedLeft " + "        for $tokenRanked at $i in "
+            + "    (" + "join( " + "( " + "##RIGHT_1 " + "      let $tokensUnrankedRight := %s($$RIGHT_1) "
+            + "    let $lenRight := len($tokensUnrankedRight) " + "      let $tokensRight := "
+            + "    for $token in $tokensUnrankedRight " + "for $tokenRanked at $i in "
             //
             // -- -- -- - Stage 1 - --
-            //
-            // + "          #LEFT_2 "
-            // + "          let $id := $$LEFTPK_2 "
-            // + "          for $token in %s($$LEFT_2) "
-            + "          #RIGHT_2 " + "          let $id := $$RIGHTPK_2 " + "          for $token in %s($$RIGHT_2) "
-            + "          /*+ hash */ " + "          group by $tokenGroupped := $token with $id "
-            + "          /*+ inmem 34 198608 */ " + "          order by count($id), $tokenGroupped "
-            + "          return $tokenGroupped "
+            // Since we use the right join branch R to generate our token order, this can shorten the prefix length of S
+            // in case of R ~= S, where some tokens are in S but not in R.
+            + "          ##RIGHT_3 " + "let $id := $$RIGHTPK_3_0 " + "for $token in %s($$RIGHT_3) "
+            + "          /*+ hash */ " + "group by $tokenGroupped := $token with $id "
+            + "          order by count($id), $tokenGroupped return $tokenGroupped "
             //
             // -- -- -- -
             //
-            + "        where $token = /*+ bcast */ $tokenRanked " + "        order by $i " + "        return $i "
-            + "      for $prefixTokenLeft in subset-collection($tokensLeft, 0, prefix-len-%s(len($tokensLeft), %ff)) "
-            + "      ),( " + "      #RIGHT_1 " + "      let $tokensUnrankedRight := %s($$RIGHT_1) "
-            + "      let $lenRight := len($tokensUnrankedRight) " + "      let $tokensRight := "
-            + "        for $token in $tokensUnrankedRight " + "        for $tokenRanked at $i in "
+            + "        where $token = /*+ bcast */ $tokenRanked " + "order by $i " + "return $i "
+            + "      for $prefixTokenRight in subset-collection($tokensRight, 0, prefix-len-%s(len($tokensRight), %ff)) "
+            + "      ), " + "( " + "##LEFT_1 " + "let $tokensUnrankedLeft := %s($$LEFT_1) "
+            + "      let $lenLeft := len($tokensUnrankedLeft) " + "let $tokensLeft := "
+            + "        for $token in $tokensUnrankedLeft " + "for $tokenRanked at $i in "
             //
             // -- -- -- - Stage 1 - --
-            //
-            // + "          #LEFT_3 "
-            // + "          let $id := $$LEFTPK_3 "
-            // + "          for $token in %s($$LEFT_3) "
-            + "          #RIGHT_3 " + "          let $id := $$RIGHTPK_3 " + "          for $token in %s($$RIGHT_3) "
-            + "          /*+ hash */ " + "          group by $tokenGroupped := $token with $id "
-            + "          /*+ inmem 34 198608 */ " + "          order by count($id), $tokenGroupped "
-            + "          return $tokenGroupped "
+            + "          ##RIGHT_2 " + "let $id := $$RIGHTPK_2_0 " + "for $token in %s($$RIGHT_2) "
+            + "          /*+ hash */ " + "group by $tokenGroupped := $token with $id "
+            + "          order by count($id), $tokenGroupped return $tokenGroupped "
             //
             // -- -- -- -
             //
-            + "        where $token = /*+ bcast */ $tokenRanked " + "        order by $i " + "        return $i "
-            + "      for $prefixTokenRight in subset-collection($tokensRight, 0, prefix-len-%s(len($tokensRight), %ff)) "
-            + "      ), $prefixTokenLeft = $prefixTokenRight) "
-            + "    let $sim := similarity-%s-prefix($lenLeft, $tokensLeft, $lenRight, $tokensRight, $prefixTokenLeft, %ff) "
-            + "    where $sim >= %ff " + "    /*+ hash*/ "
-            + "    group by $idLeft := $$LEFTPK_1, $idRight := $$RIGHTPK_1 with $sim "
+            + "        where $token = /*+ bcast */ $tokenRanked " + "order by $i " + "return $i "
+            // We use the input string $tokensUnrankedLeft instead of $tokensLeft to ensure it will not miss similar
+            // pairs when the prefix of S has been reduced in case of R ~= S, where some tokens are in S but not in R.
+            + "      let $actualPreLen := prefix-len-%s(len($tokensUnrankedLeft), %ff) - $lenLeft + len($tokensLeft) "
+            + "      for $prefixTokenLeft in subset-collection($tokensLeft, 0, $actualPreLen)) "
+            + "      , $prefixTokenLeft = $prefixTokenRight) "
+            + "let $sim := similarity-%s-prefix($lenRight, $tokensRight, $lenLeft, $tokensLeft, $prefixTokenLeft, %ff) "
+            + "where $sim >= %ff " + "/*+ hash*/ " + "group by %s, %s with $sim "
             //
             // -- -- -
             //
-            + "    ), $$LEFTPK = $idLeft)),  $$RIGHTPK = $idRight)";
+            + "    ), %s)),  %s)";
 
-    private Collection<LogicalVariable> liveVars = new HashSet<LogicalVariable>();
+    private static final String GROUPBY_LEFT = "$idLeft_%d := $$LEFTPK_1_%d";
+    private static final String GROUPBY_RIGHT = "$idRight_%d := $$RIGHTPK_1_%d";
+    private static final String JOIN_COND_LEFT = "$$LEFTPK_0_%d = $idLeft_%d";
+    private static final String JOIN_COND_RIGHT = "$$RIGHTPK_0_%d = $idRight_%d";
+    private static final String AQLPLUS_INNER_JOIN = "join";
+    private static final String AQLPLUS_LEFTOUTER_JOIN = "loj";
 
     @Override
     public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
             throws AlgebricksException {
+
         AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
-        // current opperator is join
+        // current operator should be a join.
         if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN
                 && op.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
             return false;
         }
 
-        // Find GET_ITEM function.
+        // Finds GET_ITEM function in the join condition.
         AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
-        Mutable<ILogicalExpression> expRef = joinOp.getCondition();
-        Mutable<ILogicalExpression> getItemExprRef = getSimilarityExpression(expRef);
+        Mutable<ILogicalExpression> exprRef = joinOp.getCondition();
+        Mutable<ILogicalExpression> getItemExprRef = getSimilarityExpression(exprRef);
         if (getItemExprRef == null) {
             return false;
         }
-        // Check if the GET_ITEM function is on one of the supported similarity-check functions.
+        // Checks if the GET_ITEM function is on the one of the supported similarity-check functions.
         AbstractFunctionCallExpression getItemFuncExpr = (AbstractFunctionCallExpression) getItemExprRef.getValue();
         Mutable<ILogicalExpression> argRef = getItemFuncExpr.getArguments().get(0);
         AbstractFunctionCallExpression simFuncExpr = (AbstractFunctionCallExpression) argRef.getValue();
         if (!simFuncs.contains(simFuncExpr.getFunctionIdentifier())) {
             return false;
         }
-        // Skip this rule based on annotations.
+        // Skips this rule based on annotations.
         if (simFuncExpr.getAnnotations().containsKey(IndexedNLJoinExpressionAnnotation.INSTANCE)) {
             return false;
         }
 
+        // Gets both input branches of fuzzy join.
         List<Mutable<ILogicalOperator>> inputOps = joinOp.getInputs();
         ILogicalOperator leftInputOp = inputOps.get(0).getValue();
         ILogicalOperator rightInputOp = inputOps.get(1).getValue();
 
-        List<Mutable<ILogicalExpression>> inputExps = simFuncExpr.getArguments();
+        List<Mutable<ILogicalExpression>> inputExprs = simFuncExpr.getArguments();
+        if (inputExprs.size() != 3) {
+            return false;
+        }
 
-        ILogicalExpression inputExp0 = inputExps.get(0).getValue();
-        ILogicalExpression inputExp1 = inputExps.get(1).getValue();
+        // Extracts Fuzzy similarity function.
+        ILogicalExpression leftOperatingExpr = inputExprs.get(0).getValue();
+        ILogicalExpression rightOperatingExpr = inputExprs.get(1).getValue();
+        ILogicalExpression thresholdConstantExpr = inputExprs.get(2).getValue();
 
-        // left and right expressions are variables
-        if (inputExp0.getExpressionTag() != LogicalExpressionTag.VARIABLE
-                || inputExp1.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+        // left and right expressions should be variables.
+        if (leftOperatingExpr.getExpressionTag() != LogicalExpressionTag.VARIABLE
+                || rightOperatingExpr.getExpressionTag() != LogicalExpressionTag.VARIABLE
+                || thresholdConstantExpr.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
             return false;
         }
 
-        LogicalVariable inputVar0 = ((VariableReferenceExpression) inputExp0).getVariableReference();
-        LogicalVariable inputVar1 = ((VariableReferenceExpression) inputExp1).getVariableReference();
+        LogicalVariable inputVar0 = ((VariableReferenceExpression) leftOperatingExpr).getVariableReference();
+        LogicalVariable inputVar1 = ((VariableReferenceExpression) rightOperatingExpr).getVariableReference();
 
         LogicalVariable leftInputVar;
         LogicalVariable rightInputVar;
-
-        liveVars.clear();
+        Collection<LogicalVariable> liveVars = new HashSet<>();
         VariableUtilities.getLiveVariables(leftInputOp, liveVars);
         if (liveVars.contains(inputVar0)) {
             leftInputVar = inputVar0;
@@ -182,72 +205,217 @@ public class FuzzyJoinRule implements IAlgebraicRewriteRule {
             leftInputVar = inputVar1;
             rightInputVar = inputVar0;
         }
-
-        List<LogicalVariable> leftInputPKs = context.findPrimaryKey(leftInputVar);
-        List<LogicalVariable> rightInputPKs = context.findPrimaryKey(rightInputVar);
-        // Bail if primary keys could not be inferred.
-        if (leftInputPKs == null || rightInputPKs == null) {
-            return false;
-        }
-        // primary key has only one variable
-        if (leftInputPKs.size() != 1 || rightInputPKs.size() != 1) {
-            return false;
-        }
+        // leftInputPKs in currrentPKs keeps all PKs derived from the left branch in the current similarity fuzzyjoin.
+        List<LogicalVariable> leftInputPKs = findPrimaryKeysInSubplan(liveVars, context);
+        liveVars.clear();
+        VariableUtilities.getLiveVariables(rightInputOp, liveVars);
+        // rightInputPKs in currentPKs keeps all PKs derived from the right branch in the current similarity fuzzyjoin.
+        List<LogicalVariable> rightInputPKs = findPrimaryKeysInSubplan(liveVars, context);
         IAType leftType = (IAType) context.getOutputTypeEnvironment(leftInputOp).getVarType(leftInputVar);
-        IAType rightType = (IAType) context.getOutputTypeEnvironment(rightInputOp).getVarType(rightInputVar);
-        // left-hand side and right-hand side of "~=" has the same type
-        IAType left2 = TypeComputeUtils.getActualType(leftType);
-        IAType right2 = TypeComputeUtils.getActualType(rightType);
-        if (!left2.deepEqual(right2)) {
+        if (!isPrefixFuzzyJoin(context, leftInputOp, rightInputOp, rightInputVar, leftInputPKs, rightInputPKs,
+                leftType)) {
             return false;
         }
+
         //
         // -- - FIRE - --
         //
         MetadataProvider metadataProvider = ((MetadataProvider) context.getMetadataProvider());
+        // Steps 1 and 2. Generate the prefix-based fuzzy jon template.
+        String aqlPlus = generateAqlTemplate(metadataProvider, joinOp, simFuncExpr, leftInputPKs, leftType,
+                rightInputPKs, thresholdConstantExpr);
+        // Steps 3 and 4. Generate the prefix-based fuzzy join subplan.
+        ILogicalOperator outputOp = generatePrefixFuzzyJoinSubplan(context, metadataProvider, aqlPlus, leftInputOp,
+                leftInputPKs, leftInputVar, rightInputOp, rightInputPKs, rightInputVar);
+
+        // Step 5. Bind the plan to the parent op referred by the following opRef.
+        SelectOperator extraSelect;
+        if (getItemExprRef != exprRef) {
+            // more than one join condition
+            getItemExprRef.setValue(ConstantExpression.TRUE);
+            switch (joinOp.getJoinKind()) {
+                case INNER: {
+                    extraSelect = new SelectOperator(exprRef, false, null);
+                    extraSelect.setSourceLocation(exprRef.getValue().getSourceLocation());
+                    extraSelect.getInputs().add(new MutableObject<>(outputOp));
+                    outputOp = extraSelect;
+                    break;
+                }
+                case LEFT_OUTER: {
+                    LeftOuterJoinOperator topJoin = (LeftOuterJoinOperator) outputOp;
+                    setConditionForLeftOuterJoin(topJoin, exprRef);
+                    break;
+                }
+                default: {
+                    throw new IllegalStateException();
+                }
+            }
+        }
+        opRef.setValue(outputOp);
+        OperatorPropertiesUtil.typeOpRec(opRef, context);
+        return true;
+    }
+
+    /**
+     * To handle multiple fuzzy-join conditions on a same pair of datasets, this rule checks the PKs in bottom-up way.
+     * The previousPKs list incrementally maintains the PKs from a previous fuzzy-join operator's input branches.
+     * In addition, the given fuzzy-join operator has been successfully translated into a prefix-based fuzzy join
+     * sub-plan of the current fuzzy-join operator. There are two cases:
+     * 1. If the previousPKs list contains the currentPKs list (the PKs from the input branches of the current
+     * fuzzy-join operator), this means that the current fuzzy-join condition has no new input branch. This case
+     * SHOULD BE regarded as a SELECT over one of the previous fuzzy-joins.
+     * 2. Otherwise, we can apply this rule to the current fuzzy-join operator to a new prefix-based fuzzy-join plan.
+     */
+    private boolean isPrefixFuzzyJoin(IOptimizationContext context, ILogicalOperator leftInputOp,
+            ILogicalOperator rightInputOp, LogicalVariable rightInputVar, List<LogicalVariable> leftInputPKs,
+            List<LogicalVariable> rightInputPKs, IAType leftType) throws AlgebricksException {
+        Collection<LogicalVariable> currentPKs = new HashSet<>();
+        currentPKs.addAll(leftInputPKs);
+        currentPKs.addAll(rightInputPKs);
+        // If PKs derived from the both branches are SAME as that of a previous fuzzyjoin, we treat this fuzzyjoin
+        // condition as a select over that fuzzyjoin.
+        for (int i = 0; i < previousPKs.size(); i++) {
+            if (previousPKs.get(i).containsAll(currentPKs) && currentPKs.containsAll(previousPKs.get(i))) {
+                return false;
+            }
+        }
+
+        //Suppose we want to query on the same dataset on the different fields, i.e. A.a1 ~= B.b1 AND A.a2 ~= B.b2
+        //We conduct this query as a select over a fuzzyjoin based on the PK inclusion relationship.
+        previousPKs.add(currentPKs);
+        // Avoids the duplicated PK generation in findPrimaryKeysInSubplan, especially for multiway fuzzy join.
+        // Refer to fj-dblp-csx-hybrid.aql in the optimized tests for example.
+        IsomorphismUtilities.mergeHomogeneousPK(leftInputOp, leftInputPKs);
+        // Fails if primary keys could not be inferred.
+        if (leftInputPKs.isEmpty() || rightInputPKs.isEmpty()) {
+            return false;
+        }
+
+        IAType rightType = (IAType) context.getOutputTypeEnvironment(rightInputOp).getVarType(rightInputVar);
+        // left-hand side and right-hand side of fuzzyjoin should be the same type
+        IAType actualLeftType = TypeComputeUtils.getActualType(leftType);
+        IAType actualRightType = TypeComputeUtils.getActualType(rightType);
+        if (!actualLeftType.deepEqual(actualRightType)) {
+            return false;
+        }
+        return true;
+    }
+
+    private String generateAqlTemplate(MetadataProvider metadataProvider, AbstractBinaryJoinOperator joinOp,
+            AbstractFunctionCallExpression simFuncExpr, List<LogicalVariable> leftInputPKs, IAType leftType,
+            List<LogicalVariable> rightInputPKs, ILogicalExpression thresholdConstantExpr) throws AlgebricksException {
         FunctionIdentifier funcId = FuzzyUtils.getTokenizer(leftType.getTypeTag());
-        String tokenizer;
-        if (funcId == null) {
-            tokenizer = "";
-        } else {
+        String tokenizer = "";
+        if (funcId != null) {
             tokenizer = funcId.getName();
         }
 
-        float simThreshold = FuzzyUtils.getSimThreshold(metadataProvider);
-        String simFunction = FuzzyUtils.getSimFunction(metadataProvider);
+        String simFunction = FuzzyUtils.getSimFunction(simFuncExpr.getFunctionIdentifier());
+        float simThreshold;
+        ConstantExpression constExpr = (ConstantExpression) thresholdConstantExpr;
+        AsterixConstantValue constVal = (AsterixConstantValue) constExpr.getValue();
+        if (constVal.getObject().getType().equals(BuiltinType.AFLOAT)) {
+            simThreshold = ((AFloat) constVal.getObject()).getFloatValue();
+        } else {
+            simThreshold = FuzzyUtils.getSimThreshold(metadataProvider);
+        }
 
         // finalize AQL+ query
         String prepareJoin;
         switch (joinOp.getJoinKind()) {
-            case INNER: {
-                prepareJoin = "join" + AQLPLUS;
+            case INNER:
+                prepareJoin = AQLPLUS_INNER_JOIN + AQLPLUS;
                 break;
+            case LEFT_OUTER:
+                prepareJoin = AQLPLUS_LEFTOUTER_JOIN + AQLPLUS;
+                break;
+            default:
+                throw new CompilationException(ErrorCode.COMPILATION_INVALID_EXPRESSION);
+        }
+        String groupByLeft = "";
+        String joinCondLeft = "";
+        // Step2. By default, we triggered the prefix fuzzy join strategy, which needs to initialize the mapping from
+        // the shared token to the actual tuples of the both sides. left(right)InputPKs is used to extract those
+        // mappings from prefix tokens to tuples.
+        for (int i = 0; i < leftInputPKs.size(); i++) {
+            if (i > 0) {
+                groupByLeft += ", ";
+                joinCondLeft += " and ";
             }
-            case LEFT_OUTER: {
-                // TODO To make it work for Left Outer Joins, we should permute
-                // the #LEFT and #RIGHT at the top of the AQL+ query. But, when
-                // doing this, the
-                // fuzzyjoin/user-vis-int-vis-user-lot-aqlplus_1.aql (the one
-                // doing 3-way fuzzy joins) gives a different result. But even
-                // if we don't change the FuzzyJoinRule, permuting the for
-                // clauses in fuzzyjoin/user-vis-int-vis-user-lot-aqlplus_1.aql
-                // leads to different results, which suggests there is some
-                // other sort of bug.
-                return false;
-                // prepareJoin = "loj" + AQLPLUS;
-                // break;
-            }
-            default: {
-                throw new IllegalStateException();
-            }
+            groupByLeft += String.format(Locale.US, GROUPBY_LEFT, i, i);
+            joinCondLeft += String.format(Locale.US, JOIN_COND_LEFT, i, i);
         }
-        String aqlPlus = String.format(Locale.US, prepareJoin, tokenizer, tokenizer, simFunction, simThreshold,
-                tokenizer, tokenizer, simFunction, simThreshold, simFunction, simThreshold, simThreshold);
 
-        LogicalVariable leftPKVar = leftInputPKs.get(0);
-        LogicalVariable rightPKVar = rightInputPKs.get(0);
+        String groupByRight = "";
+        String joinCondRight = "";
+        for (int i = 0; i < rightInputPKs.size(); i++) {
+            if (i > 0) {
+                groupByRight += ", ";
+                joinCondRight += " and ";
+            }
+            groupByRight += String.format(Locale.US, GROUPBY_RIGHT, i, i);
+            joinCondRight += String.format(Locale.US, JOIN_COND_RIGHT, i, i);
+        }
+        return String.format(Locale.US, prepareJoin, tokenizer, tokenizer, simFunction, simThreshold, tokenizer,
+                tokenizer, simFunction, simThreshold, simFunction, simThreshold, simThreshold, groupByLeft,
+                groupByRight, joinCondRight, joinCondLeft);
+    }
 
+    private ILogicalOperator generatePrefixFuzzyJoinSubplan(IOptimizationContext context,
+            MetadataProvider metadataProvider, String aqlPlus, ILogicalOperator leftInputOp,
+            List<LogicalVariable> leftInputPKs, LogicalVariable leftInputVar, ILogicalOperator rightInputOp,
+            List<LogicalVariable> rightInputPKs, LogicalVariable rightInputVar) throws AlgebricksException {
+        // Step3. Translate the tokenizer, join condition and group by (shared token) as shown
+        // in the above AQLPLUS template.
         Counter counter = new Counter(context.getVarCounter());
+        // The translator will compile metadata internally. Run this compilation
+        // under the same transaction id as the "outer" compilation.
+        AqlPlusExpressionToPlanTranslator translator = new AqlPlusExpressionToPlanTranslator(metadataProvider, counter);
+
+        LogicalOperatorDeepCopyWithNewVariablesVisitor copyVisitor =
+                new LogicalOperatorDeepCopyWithNewVariablesVisitor(context, context);
+
+        // Step3.1. Substitute the variable references of the above AQLPLUS template with
+        // the actually attached variables.
+        translator.addOperatorToMetaScope(new VarIdentifier("##LEFT_0"), leftInputOp);
+        translator.addVariableToMetaScope(new VarIdentifier("$$LEFT_0"), leftInputVar);
+        for (int i = 0; i < leftInputPKs.size(); i++) {
+            translator.addVariableToMetaScope(new VarIdentifier("$$LEFTPK_0_" + i), leftInputPKs.get(i));
+        }
+
+        // Step3.2. right side again.
+        translator.addOperatorToMetaScope(new VarIdentifier("##RIGHT_0"), rightInputOp);
+        translator.addVariableToMetaScope(new VarIdentifier("$$RIGHT_0"), rightInputVar);
+        for (int i = 0; i < rightInputPKs.size(); i++) {
+            translator.addVariableToMetaScope(new VarIdentifier("$$RIGHTPK_0_" + i), rightInputPKs.get(i));
+        }
+
+        // Step3.3. the suffix 0-3 is used for identifying the different level of variable references.
+        ILogicalOperator leftInputOpCopy = copyVisitor.deepCopy(leftInputOp);
+        translator.addOperatorToMetaScope(new VarIdentifier("##LEFT_1"), leftInputOpCopy);
+        LogicalVariable leftInputVarCopy = copyVisitor.varCopy(leftInputVar);
+        translator.addVariableToMetaScope(new VarIdentifier("$$LEFT_1"), leftInputVarCopy);
+        for (int i = 0; i < leftInputPKs.size(); i++) {
+            leftInputVarCopy = copyVisitor.varCopy(leftInputPKs.get(i));
+            translator.addVariableToMetaScope(new VarIdentifier("$$LEFTPK_1_" + i), leftInputVarCopy);
+        }
+        copyVisitor.updatePrimaryKeys(context);
+        copyVisitor.reset();
+
+        // Notice: pick side to run Stage 1, currently always picks RIGHT side. It means that the right side will
+        // produce the token order as well as its own token list.
+        for (int i = SUBSET_RIGHT_INDEX; i <= LENGTH_RIGHT_INDEX; i++) {
+            translator.addOperatorToMetaScope(new VarIdentifier("##RIGHT_" + i), copyVisitor.deepCopy(rightInputOp));
+            LogicalVariable rightInputVarCopy = copyVisitor.varCopy(rightInputVar);
+            translator.addVariableToMetaScope(new VarIdentifier("$$RIGHT_" + i), rightInputVarCopy);
+            for (int j = 0; j < rightInputPKs.size(); j++) {
+                rightInputVarCopy = copyVisitor.varCopy(rightInputPKs.get(j));
+                translator.addVariableToMetaScope(new VarIdentifier("$$RIGHTPK_" + i + "_" + j), rightInputVarCopy);
+            }
+            copyVisitor.updatePrimaryKeys(context);
+            copyVisitor.reset();
+        }
+        counter.set(context.getVarCounter());
 
         AQLPlusParser parser = new AQLPlusParser(new StringReader(aqlPlus));
         parser.initScope();
@@ -256,115 +424,59 @@ public class FuzzyJoinRule implements IAlgebraicRewriteRule {
         try {
             clauses = parser.Clauses();
         } catch (ParseException e) {
-            throw new AlgebricksException(e);
+            throw CompilationException.create(ErrorCode.COMPILATION_TRANSLATION_ERROR, e);
         }
-        // The translator will compile metadata internally. Run this compilation
-        // under the same transaction id as the "outer" compilation.
-        AqlPlusExpressionToPlanTranslator translator = new AqlPlusExpressionToPlanTranslator(metadataProvider, counter);
-        context.setVarCounter(counter.get());
-
-        LogicalOperatorDeepCopyWithNewVariablesVisitor deepCopyVisitor =
-                new LogicalOperatorDeepCopyWithNewVariablesVisitor(context, context);
 
-        translator.addOperatorToMetaScope(new Identifier("#LEFT"), leftInputOp);
-        translator.addVariableToMetaScope(new Identifier("$$LEFT"), leftInputVar);
-        translator.addVariableToMetaScope(new Identifier("$$LEFTPK"), leftPKVar);
-
-        translator.addOperatorToMetaScope(new Identifier("#RIGHT"), rightInputOp);
-        translator.addVariableToMetaScope(new Identifier("$$RIGHT"), rightInputVar);
-        translator.addVariableToMetaScope(new Identifier("$$RIGHTPK"), rightPKVar);
-
-        translator.addOperatorToMetaScope(new Identifier("#LEFT_1"), deepCopyVisitor.deepCopy(leftInputOp));
-        translator.addVariableToMetaScope(new Identifier("$$LEFT_1"), deepCopyVisitor.varCopy(leftInputVar));
-        translator.addVariableToMetaScope(new Identifier("$$LEFTPK_1"), deepCopyVisitor.varCopy(leftPKVar));
-        deepCopyVisitor.updatePrimaryKeys(context);
-        deepCopyVisitor.reset();
-
-        // translator.addOperatorToMetaScope(new Identifier("#LEFT_2"),
-        // deepCopyVisitor.deepCopy(leftInputOp, null));
-        // translator.addVariableToMetaScope(new Identifier("$$LEFT_2"),
-        // deepCopyVisitor.varCopy(leftInputVar));
-        // translator.addVariableToMetaScope(new Identifier("$$LEFTPK_2"),
-        // deepCopyVisitor.varCopy(leftPKVar));
-        // deepCopyVisitor.updatePrimaryKeys(context);
-        // deepCopyVisitor.reset();
-        //
-        // translator.addOperatorToMetaScope(new Identifier("#LEFT_3"),
-        // deepCopyVisitor.deepCopy(leftInputOp, null));
-        // translator.addVariableToMetaScope(new Identifier("$$LEFT_3"),
-        // deepCopyVisitor.varCopy(leftInputVar));
-        // translator.addVariableToMetaScope(new Identifier("$$LEFTPK_3"),
-        // deepCopyVisitor.varCopy(leftPKVar));
-        // deepCopyVisitor.updatePrimaryKeys(context);
-        // deepCopyVisitor.reset();
-
-        translator.addOperatorToMetaScope(new Identifier("#RIGHT_1"), deepCopyVisitor.deepCopy(rightInputOp));
-        translator.addVariableToMetaScope(new Identifier("$$RIGHT_1"), deepCopyVisitor.varCopy(rightInputVar));
-        translator.addVariableToMetaScope(new Identifier("$$RIGHTPK_1"), deepCopyVisitor.varCopy(rightPKVar));
-        deepCopyVisitor.updatePrimaryKeys(context);
-        deepCopyVisitor.reset();
-
-        // TODO pick side to run Stage 1, currently always picks RIGHT side
-        translator.addOperatorToMetaScope(new Identifier("#RIGHT_2"), deepCopyVisitor.deepCopy(rightInputOp));
-        translator.addVariableToMetaScope(new Identifier("$$RIGHT_2"), deepCopyVisitor.varCopy(rightInputVar));
-        translator.addVariableToMetaScope(new Identifier("$$RIGHTPK_2"), deepCopyVisitor.varCopy(rightPKVar));
-        deepCopyVisitor.updatePrimaryKeys(context);
-        deepCopyVisitor.reset();
-
-        translator.addOperatorToMetaScope(new Identifier("#RIGHT_3"), deepCopyVisitor.deepCopy(rightInputOp));
-        translator.addVariableToMetaScope(new Identifier("$$RIGHT_3"), deepCopyVisitor.varCopy(rightInputVar));
-        translator.addVariableToMetaScope(new Identifier("$$RIGHTPK_3"), deepCopyVisitor.varCopy(rightPKVar));
-        deepCopyVisitor.updatePrimaryKeys(context);
-        deepCopyVisitor.reset();
-
-        ILogicalPlan plan = translator.translate(clauses);
+        // Step 4. The essential substitution with translator.
+        ILogicalPlan plan;
+        try {
+            plan = translator.translate(clauses);
+        } catch (CompilationException e) {
+            throw CompilationException.create(ErrorCode.COMPILATION_TRANSLATION_ERROR, e);
+        }
         context.setVarCounter(counter.get());
 
-        ILogicalOperator outputOp = plan.getRoots().get(0).getValue();
+        return plan.getRoots().get(0).getValue();
+    }
 
-        SelectOperator extraSelect = null;
-        if (getItemExprRef != expRef) {
-            // more than one join condition
-            getItemExprRef.setValue(ConstantExpression.TRUE);
-            switch (joinOp.getJoinKind()) {
-                case INNER: {
-                    extraSelect = new SelectOperator(expRef, false, null);
-                    extraSelect.setSourceLocation(expRef.getValue().getSourceLocation());
-                    extraSelect.getInputs().add(new MutableObject<ILogicalOperator>(outputOp));
-                    outputOp = extraSelect;
-                    break;
-                }
-                case LEFT_OUTER: {
-                    if (outputOp.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
-                        throw new IllegalStateException();
-                    }
-                    LeftOuterJoinOperator topJoin = (LeftOuterJoinOperator) outputOp;
-                    topJoin.getCondition().setValue(expRef.getValue());
-                    break;
-                }
-                default: {
-                    throw new IllegalStateException();
-                }
-            }
+    // Since the generatePrefixFuzzyJoinSubplan generates the prefix-based join operators for the partial simJoin
+    // of expRef, we need to add the full condition expRef\getItemExprRef into the top-level operator of the plan.
+    // Notice: Any composite select on leftOuterJoin with fuzzyjoin condition inlined can be regarded as its example.
+    // Example: leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql or with more extra conditions inlined.
+    private void setConditionForLeftOuterJoin(LeftOuterJoinOperator topJoin, Mutable<ILogicalExpression> expRef) {
+        // Combine the conditions of top join of aqlplus plan and the original join
+        AbstractFunctionCallExpression andFunc =
+                new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(AlgebricksBuiltinFunctions.AND));
+
+        List<Mutable<ILogicalExpression>> conjs = new ArrayList<>();
+        if (topJoin.getCondition().getValue().splitIntoConjuncts(conjs)) {
+            andFunc.getArguments().addAll(conjs);
+        } else {
+            andFunc.getArguments().add(new MutableObject<>(topJoin.getCondition().getValue()));
         }
-        opRef.setValue(outputOp);
-        OperatorPropertiesUtil.typeOpRec(opRef, context);
-        return true;
+
+        List<Mutable<ILogicalExpression>> conjs2 = new ArrayList<>();
+        if (expRef.getValue().splitIntoConjuncts(conjs2)) {
+            andFunc.getArguments().addAll(conjs2);
+        } else {
+            andFunc.getArguments().add(expRef);
+        }
+        topJoin.getCondition().setValue(andFunc);
     }
 
     /**
      * Look for GET_ITEM function call.
      */
-    private Mutable<ILogicalExpression> getSimilarityExpression(Mutable<ILogicalExpression> expRef) {
-        ILogicalExpression exp = expRef.getValue();
+    private Mutable<ILogicalExpression> getSimilarityExpression(Mutable<ILogicalExpression> exprRef) {
+        ILogicalExpression exp = exprRef.getValue();
         if (exp.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
             AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) exp;
             if (funcExpr.getFunctionIdentifier().equals(BuiltinFunctions.GET_ITEM)) {
-                return expRef;
+                return exprRef;
             }
             if (funcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.AND)) {
-                for (int i = 0; i < 2; i++) {
-                    Mutable<ILogicalExpression> expRefRet = getSimilarityExpression(funcExpr.getArguments().get(i));
+                for (Mutable<ILogicalExpression> arg : funcExpr.getArguments()) {
+                    Mutable<ILogicalExpression> expRefRet = getSimilarityExpression(arg);
                     if (expRefRet != null) {
                         return expRefRet;
                     }
@@ -374,9 +486,19 @@ public class FuzzyJoinRule implements IAlgebraicRewriteRule {
         return null;
     }
 
-    @Override
-    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
-            throws AlgebricksException {
-        return false;
+    // To extract all the PKs of the liveVars referenced by on of the fuzzyjoin branch branch.
+    private List<LogicalVariable> findPrimaryKeysInSubplan(Collection<LogicalVariable> liveVars,
+            IOptimizationContext context) {
+        Collection<LogicalVariable> primaryKeys = new HashSet<>();
+        for (LogicalVariable var : liveVars) {
+            List<LogicalVariable> pks = context.findPrimaryKey(var);
+            if (pks != null) {
+                primaryKeys.addAll(pks);
+            }
+        }
+        if (primaryKeys.isEmpty()) {
+            return new ArrayList<>();
+        }
+        return new ArrayList<>(primaryKeys);
     }
 }