You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by im...@apache.org on 2020/04/17 07:32:46 UTC

[asterixdb] 03/03: [NO ISSUE] Remove out-of-date tokenizer

This is an automated email from the ASF dual-hosted git repository.

imaxon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit f85cee9e60c4a68bc2a7de87284c6b812794f76b
Author: Rui Guo <ru...@uci.edu>
AuthorDate: Mon Apr 13 10:04:19 2020 -0700

    [NO ISSUE] Remove out-of-date tokenizer
    
    The string-based Tokenizer should be replaced with the array-based
    IBinaryTokenizer. The Tokenizer is not used in the codebase in a
    meaningful way, so let's remove it to make things clear.
    
    Change-Id: I483604bf2a5e20c18f6224ac2a153667828dabfb
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/5763
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Ian Maxon <im...@uci.edu>
---
 .../asterix/fuzzyjoin/FuzzyJoinAppendLength.java   |  58 ---------
 .../asterix/fuzzyjoin/FuzzyJoinTokenize.java       | 133 ---------------------
 .../fuzzyjoin/similarity/SimilarityMetric.java     |   4 -
 .../similarity/SimilarityMetricJaccard.java        |  21 ----
 .../fuzzyjoin/tokenizer/NGramTokenizer.java        |  90 --------------
 .../apache/asterix/fuzzyjoin/tokenizer/Token.java  | 118 ------------------
 .../asterix/fuzzyjoin/tokenizer/Tokenizer.java     |  27 -----
 .../fuzzyjoin/tokenizer/TokenizerBuffered.java     |  30 -----
 .../tokenizer/TokenizerBufferedFactory.java        |  34 ------
 .../fuzzyjoin/tokenizer/TokenizerFactory.java      |  31 -----
 .../asterix/fuzzyjoin/tokenizer/WordTokenizer.java |  68 -----------
 .../fuzzyjoin/tokenizer/WordTokenizerBuffered.java |  92 --------------
 .../fuzzyjoin/tokenorder/IntTokenCountRank.java    |  28 -----
 .../tokenorder/IntTokenCountRankFrequency.java     |  58 ---------
 .../asterix/fuzzyjoin/tokenorder/IntTokenRank.java |  28 -----
 .../tokenorder/IntTokenRankFrequency.java          |  54 ---------
 .../asterix/fuzzyjoin/tokenorder/TokenLoad.java    |  61 ----------
 .../asterix/fuzzyjoin/tokenorder/TokenRank.java    |  31 -----
 .../tokenorder/TokenRankBufferedFrequency.java     |  75 ------------
 .../fuzzyjoin/tokenorder/TokenRankFrequency.java   |  61 ----------
 20 files changed, 1102 deletions(-)

diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/FuzzyJoinAppendLength.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/FuzzyJoinAppendLength.java
deleted file mode 100644
index 8be6f0c..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/FuzzyJoinAppendLength.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.Collection;
-
-import org.apache.asterix.fuzzyjoin.tokenizer.Tokenizer;
-import org.apache.asterix.fuzzyjoin.tokenizer.TokenizerFactory;
-
-public class FuzzyJoinAppendLength {
-    public static void main(String args[]) throws IOException {
-        final String inputFileName = args[0];
-        final String outputFileName = args[1];
-
-        BufferedReader input = new BufferedReader(new FileReader(inputFileName));
-        BufferedWriter output = new BufferedWriter(new FileWriter(outputFileName));
-
-        Tokenizer tokenizer = TokenizerFactory.getTokenizer(FuzzyJoinConfig.TOKENIZER_VALUE,
-                FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
-
-        int[] dataColumns = FuzzyJoinUtil.getDataColumns("2,3");
-
-        String line;
-        while ((line = input.readLine()) != null) {
-            String[] splits = line.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX);
-            Collection<String> tokens =
-                    tokenizer.tokenize(FuzzyJoinUtil.getData(splits, dataColumns, FuzzyJoinConfig.TOKEN_SEPARATOR));
-            output.write(splits[0] + FuzzyJoinConfig.RECORD_SEPARATOR + splits[1] + FuzzyJoinConfig.RECORD_SEPARATOR
-                    + splits[2] + FuzzyJoinConfig.RECORD_SEPARATOR + splits[3] + FuzzyJoinConfig.RECORD_SEPARATOR
-                    + tokens.size() + "\n");
-        }
-
-        input.close();
-        output.close();
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/FuzzyJoinTokenize.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/FuzzyJoinTokenize.java
deleted file mode 100644
index 4c85f25..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/FuzzyJoinTokenize.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin;
-
-import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-
-import org.apache.asterix.fuzzyjoin.tokenizer.Tokenizer;
-import org.apache.asterix.fuzzyjoin.tokenizer.TokenizerFactory;
-import org.apache.asterix.fuzzyjoin.tokenorder.TokenLoad;
-import org.apache.asterix.fuzzyjoin.tokenorder.TokenRank;
-import org.apache.asterix.fuzzyjoin.tokenorder.TokenRankFrequency;
-
-public class FuzzyJoinTokenize {
-    public static class TokenCount implements Comparable<Object> {
-        public String token;
-        public MutableInteger count;
-
-        public TokenCount(String token, MutableInteger count) {
-            this.token = token;
-            this.count = count;
-        }
-
-        @Override
-        public int compareTo(Object o) {
-            TokenCount tc = (TokenCount) o;
-            return count.compareTo(tc.count);
-        }
-
-        public String getToken() {
-            return token;
-        }
-
-        @Override
-        public String toString() {
-            return token + " " + count;
-        }
-    }
-
-    public static void main(String args[]) throws IOException {
-        final String inputFileName = args[0];
-        final String tokensFileName = args[1];
-        final String tokenizedFileName = args[2];
-
-        BufferedReader input = new BufferedReader(new FileReader(inputFileName));
-
-        Tokenizer tokenizer = TokenizerFactory.getTokenizer(FuzzyJoinConfig.TOKENIZER_VALUE,
-                FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
-
-        int[] dataColumns = FuzzyJoinUtil.getDataColumns("2,3");
-
-        String line;
-        HashMap<String, MutableInteger> tokenCount = new HashMap<String, MutableInteger>();
-        while ((line = input.readLine()) != null) {
-            Collection<String> tokens =
-                    tokenizer.tokenize(FuzzyJoinUtil.getData(line.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX),
-                            dataColumns, FuzzyJoinConfig.TOKEN_SEPARATOR));
-
-            for (String token : tokens) {
-                MutableInteger count = tokenCount.get(token);
-                if (count == null) {
-                    tokenCount.put(token, new MutableInteger(1));
-                } else {
-                    count.inc();
-                }
-            }
-        }
-
-        input.close();
-
-        ArrayList<TokenCount> tokenCounts = new ArrayList<TokenCount>();
-        tokenCount.forEach((key, value) -> tokenCounts.add(new TokenCount(key, value)));
-        Collections.sort(tokenCounts);
-
-        BufferedWriter outputTokens = new BufferedWriter(new FileWriter(tokensFileName));
-        for (TokenCount tc : tokenCounts) {
-            outputTokens.write(tc.getToken() + "\n");
-        }
-        outputTokens.close();
-
-        TokenRank tokenRank = new TokenRankFrequency();
-        TokenLoad tokenLoad = new TokenLoad(tokensFileName, tokenRank);
-        tokenLoad.loadTokenRank();
-
-        input = new BufferedReader(new FileReader(inputFileName));
-        LittleEndianIntOutputStream outputTokenized =
-                new LittleEndianIntOutputStream(new BufferedOutputStream(new FileOutputStream(tokenizedFileName)));
-        while ((line = input.readLine()) != null) {
-            String splits[] = line.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX);
-            int rid = Integer.parseInt(splits[FuzzyJoinConfig.RECORD_KEY]);
-            outputTokenized.writeInt(rid);
-            Collection<String> tokens =
-                    tokenizer.tokenize(FuzzyJoinUtil.getData(splits, dataColumns, FuzzyJoinConfig.TOKEN_SEPARATOR));
-            Collection<Integer> tokensRanked = tokenRank.getTokenRanks(tokens);
-            outputTokenized.writeInt(tokensRanked.size());
-            for (Integer token : tokensRanked) {
-                outputTokenized.writeInt(token);
-            }
-            // for (int i = 0; i < tokens.size() - tokensRanked.size(); i++) {
-            // outputTokenized.writeInt(Integer.MAX_VALUE);
-            // }
-        }
-
-        input.close();
-        outputTokenized.close();
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetric.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetric.java
index 3348d4c..1133246 100644
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetric.java
+++ b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetric.java
@@ -19,7 +19,6 @@
 
 package org.apache.asterix.fuzzyjoin.similarity;
 
-import org.apache.asterix.fuzzyjoin.tokenizer.Tokenizer;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.util.ISequenceIterator;
 
@@ -118,7 +117,4 @@ public abstract class SimilarityMetric {
 
     public abstract float getSimilarity(int[] tokensX, int startX, int lengthX, int[] tokensY, int startY, int lengthY);
 
-    public abstract float getSimilarity(int[] tokensX, int[] tokensY);
-
-    public abstract float getSimilarity(String stringX, String stringY, Tokenizer tokenizer);
 }
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
index 63d3077..f72400f 100644
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
+++ b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
@@ -22,7 +22,6 @@ package org.apache.asterix.fuzzyjoin.similarity;
 import java.util.Set;
 import java.util.TreeSet;
 
-import org.apache.asterix.fuzzyjoin.tokenizer.Tokenizer;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.util.ISequenceIterator;
 
@@ -82,24 +81,4 @@ public class SimilarityMetricJaccard extends SimilarityMetric implements IGeneri
         return (float) intersectionSize / (totalSize - intersectionSize);
     }
 
-    @Override
-    public float getSimilarity(int[] tokensX, int[] tokensY) {
-        return getSimilarity(tokensX, 0, tokensX.length, tokensY, 0, tokensY.length);
-    }
-
-    @Override
-    public float getSimilarity(String stringX, String stringY, Tokenizer tokenizer) {
-        Set<String> setX = new TreeSet<String>();
-        for (String token : tokenizer.tokenize(stringX)) {
-            setX.add(token);
-        }
-        Set<String> setY = new TreeSet<String>();
-        for (String token : tokenizer.tokenize(stringY)) {
-            setY.add(token);
-        }
-        int lengthX = setX.size();
-        int lengthY = setY.size();
-        setX.retainAll(setY);
-        return ((float) setX.size()) / (lengthX + lengthY - setX.size());
-    }
 }
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/NGramTokenizer.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/NGramTokenizer.java
deleted file mode 100644
index 5594e43..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/NGramTokenizer.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-public class NGramTokenizer implements Tokenizer {
-
-    /**
-     *
-     */
-    private static final long serialVersionUID = 1L;
-
-    public static void main(String args[]) {
-        Tokenizer tokenizer = new NGramTokenizer();
-        String a = "hadoopoop";
-        System.out.println(a + ":" + tokenizer.tokenize(a));
-    }
-
-    private final int gramLength;
-
-    /**
-     * padding used in q gram calculation.
-     */
-    private final char QGRAMENDPADDING = '$';
-
-    /**
-     * padding used in q gram calculation.
-     */
-    private final char QGRAMSTARTPADDING = '$';
-
-    public NGramTokenizer() {
-        gramLength = 3;
-    }
-
-    public NGramTokenizer(int gramLength) {
-        this.gramLength = gramLength;
-    }
-
-    private StringBuffer getAdjustedString(String input) {
-        final StringBuffer adjustedString = new StringBuffer();
-        for (int i = 0; i < gramLength - 1; i++) {
-            adjustedString.append(QGRAMSTARTPADDING);
-        }
-        adjustedString.append(input);
-        for (int i = 0; i < gramLength - 1; i++) {
-            adjustedString.append(QGRAMENDPADDING);
-        }
-        return adjustedString;
-    }
-
-    public List<String> tokenize(String input) {
-        final ArrayList<String> returnVect = new ArrayList<String>();
-        final StringBuffer adjustedString = getAdjustedString(input);
-        int curPos = 0;
-        final int length = adjustedString.length() - (gramLength - 1);
-        final HashMap<String, Integer> grams = new HashMap<String, Integer>();
-        while (curPos < length) {
-            final String term = adjustedString.substring(curPos, curPos + gramLength);
-            Integer count = grams.get(term);
-            if (count == null) {
-                count = new Integer(0);
-            }
-            count++;
-            grams.put(term, count);
-            returnVect.add(term + count);
-            curPos++;
-        }
-        return returnVect;
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/Token.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/Token.java
deleted file mode 100644
index 720d269..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/Token.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import java.io.Serializable;
-
-public class Token implements Serializable {
-    /**
-     *
-     */
-    private static final long serialVersionUID = 1L;
-
-    private CharSequence data;
-    private int start;
-    private int length;
-    private int count;
-
-    /** Cache the hash code for the string */
-    private int hash; // Default to 0
-
-    public Token() {
-    }
-
-    public Token(CharSequence data, int start, int length, int count) {
-        set(data, start, length, count);
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (o == null) {
-            return false;
-        }
-        if (!(o instanceof Token)) {
-            return false;
-        }
-        Token t = (Token) o;
-        if (t.length != length) {
-            return false;
-        }
-        for (int i = 0; i < length; i++) {
-            if (t.data.charAt(t.start + i) != data.charAt(start + i)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    public CharSequence getCharSequence() {
-        return data;
-    }
-
-    public int getCount() {
-        return count;
-    }
-
-    public int getLength() {
-        return length;
-    }
-
-    public int getStart() {
-        return start;
-    }
-
-    @Override
-    public int hashCode() {
-        int h = hash;
-        if (h == 0 && length > 0) {
-            for (int i = 0; i < length; i++) {
-                h = 31 * h + data.charAt(start + i);
-            }
-            h = 31 * h + count;
-            hash = h;
-        }
-        return h;
-    }
-
-    public int length() {
-        return length;
-    }
-
-    public void set(CharSequence data, int start, int length, int count) {
-        this.data = data;
-        this.start = start;
-        this.length = length;
-        this.count = count;
-        hash = 0;
-    }
-
-    public void set(String data, int count) {
-        this.data = data;
-        start = 0;
-        length = data.length();
-        this.count = count;
-        hash = 0;
-    }
-
-    @Override
-    public String toString() {
-        return "(" + data.subSequence(start, start + length) + ", " + count + ")";
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/Tokenizer.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/Tokenizer.java
deleted file mode 100644
index 71078d5..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/Tokenizer.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import java.io.Serializable;
-import java.util.List;
-
-public interface Tokenizer extends Serializable {
-    public List<String> tokenize(String text);
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/TokenizerBuffered.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/TokenizerBuffered.java
deleted file mode 100644
index 19fcf18..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/TokenizerBuffered.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-public interface TokenizerBuffered {
-    public void advance();
-
-    public boolean end();
-
-    public Token getToken();
-
-    public void reset();
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/TokenizerBufferedFactory.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/TokenizerBufferedFactory.java
deleted file mode 100644
index 2f4e8c6..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/TokenizerBufferedFactory.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-public class TokenizerBufferedFactory {
-    public static TokenizerBuffered getTokenizer(String tokenizer, StringBuilder buffer) {
-        if (tokenizer.equals("Word")) {
-            return new WordTokenizerBuffered(buffer);
-        }
-        throw new RuntimeException("Unknown tokenizer \"" + tokenizer + "\".");
-    }
-
-    public static boolean isSeparator(char c) {
-        return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER
-                || Character.getType(c) == Character.OTHER_NUMBER);
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/TokenizerFactory.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/TokenizerFactory.java
deleted file mode 100644
index 9b1856a..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/TokenizerFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-public class TokenizerFactory {
-    public static Tokenizer getTokenizer(String tokenizer, String wordSeparator, char tokenSeparator) {
-        if (tokenizer.equals("NGram")) {
-            return new NGramTokenizer();
-        } else if (tokenizer.equals("Word")) {
-            return new WordTokenizer(wordSeparator, tokenSeparator);
-        }
-        throw new RuntimeException("Unknown tokenizer \"" + tokenizer + "\".");
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/WordTokenizer.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/WordTokenizer.java
deleted file mode 100644
index fa0bfe7..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/WordTokenizer.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-public class WordTokenizer implements Tokenizer {
-
-    /**
-     *
-     */
-    private static final long serialVersionUID = 1L;
-
-    public static void main(String args[]) {
-        Tokenizer tokenizer = new WordTokenizer("_", '_');
-        String a = "hadoop_rocks_in_java";
-        System.out.println(a + ":" + tokenizer.tokenize(a));
-    }
-
-    private final String wordSeparator;
-    private final char tokenSeparator;
-
-    public WordTokenizer() {
-        this(" ", '_');
-    }
-
-    public WordTokenizer(String wordSeparator, char tokenSeparator) {
-        this.wordSeparator = wordSeparator;
-        this.tokenSeparator = tokenSeparator;
-    }
-
-    public List<String> tokenize(String input) {
-        final ArrayList<String> returnVect = new ArrayList<String>();
-        final HashMap<String, Integer> tokens = new HashMap<String, Integer>();
-        for (String term : input.split(wordSeparator)) {
-            if (term.length() == 0) {
-                continue;
-            }
-            Integer count = tokens.get(term);
-            if (count == null) {
-                count = 0;
-            }
-            count++;
-            tokens.put(term, count);
-            returnVect.add(term + tokenSeparator + count);
-        }
-        return returnVect;
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/WordTokenizerBuffered.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/WordTokenizerBuffered.java
deleted file mode 100644
index 29206f9..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/WordTokenizerBuffered.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import org.apache.asterix.fuzzyjoin.IntArray;
-
-public class WordTokenizerBuffered implements TokenizerBuffered {
-
-    private final StringBuilder buffer;
-    private int index;
-    private final Token token;
-
-    private final IntArray tokensStart, tokensLength;
-
-    public WordTokenizerBuffered(StringBuilder buffer) {
-        this.buffer = buffer;
-        token = new Token();
-        tokensStart = new IntArray();
-        tokensLength = new IntArray();
-        reset();
-    }
-
-    @Override
-    public void advance() {
-        while (index < buffer.length() && TokenizerBufferedFactory.isSeparator(buffer.charAt(index))) {
-            index++;
-        }
-        int start = index;
-        while (index < buffer.length() && !TokenizerBufferedFactory.isSeparator(buffer.charAt(index))) {
-            buffer.setCharAt(index, Character.toLowerCase(buffer.charAt(index)));
-            index++;
-        }
-        int length = index - start;
-        int count = 1;
-        if (length > 0) {
-            // search if we got the same token before
-            for (int i = 0; i < tokensStart.length(); ++i) {
-                if (length == tokensLength.get(i)) {
-                    int tokenStart = tokensStart.get(i);
-                    count++; // assume we found it
-                    for (int j = 0; j < length; ++j) {
-                        if (buffer.charAt(start + j) != buffer.charAt(tokenStart + j)) {
-                            count--; // token not found
-                            break;
-                        }
-                    }
-                }
-            }
-            // add the new token to the list of seen tokens
-            tokensStart.add(start);
-            tokensLength.add(length);
-        }
-        // set token
-        token.set(buffer, start, length, count);
-    }
-
-    @Override
-    public boolean end() {
-        return token.length() <= 0;
-    }
-
-    @Override
-    public Token getToken() {
-        return token;
-    }
-
-    @Override
-    public void reset() {
-        index = 0;
-        tokensStart.reset();
-        tokensLength.reset();
-        advance();
-    }
-
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenCountRank.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenCountRank.java
deleted file mode 100644
index 90f8c6a..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenCountRank.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenorder;
-
-import java.io.Serializable;
-
-public interface IntTokenCountRank extends Serializable {
-    public int add(int token, int count);
-
-    public int getRank(int token, int count);
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenCountRankFrequency.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenCountRankFrequency.java
deleted file mode 100644
index d54c7d6..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenCountRankFrequency.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenorder;
-
-import java.util.HashMap;
-
-import org.apache.asterix.fuzzyjoin.IntPair;
-
-public class IntTokenCountRankFrequency implements IntTokenCountRank {
-    /**
-     *
-     */
-    private static final long serialVersionUID = 1L;
-
-    private final HashMap<IntPair, Integer> ranksMap = new HashMap<IntPair, Integer>();
-    private final IntPair tmpPair = new IntPair();
-    private int crtRank = 0;
-
-    @Override
-    public int add(int token, int count) {
-        int prevRank = crtRank;
-        ranksMap.put(new IntPair(token, count), prevRank);
-        crtRank++;
-        return prevRank;
-    }
-
-    @Override
-    public int getRank(int token, int count) {
-        tmpPair.set(token, count);
-        Integer rank = ranksMap.get(tmpPair);
-        if (rank == null) {
-            return -1;
-        }
-        return rank;
-    }
-
-    @Override
-    public String toString() {
-        return "[" + crtRank + ",\n " + ranksMap + "\n]";
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenRank.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenRank.java
deleted file mode 100644
index b8e2082..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenRank.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenorder;
-
-import java.io.Serializable;
-
-public interface IntTokenRank extends Serializable {
-    public int add(int token);
-
-    public int getRank(int token);
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenRankFrequency.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenRankFrequency.java
deleted file mode 100644
index 08d1c93..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/IntTokenRankFrequency.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenorder;
-
-import java.util.HashMap;
-
-public class IntTokenRankFrequency implements IntTokenRank {
-    /**
-     *
-     */
-    private static final long serialVersionUID = 1L;
-
-    private final HashMap<Integer, Integer> ranksMap = new HashMap<Integer, Integer>();
-    private int crtRank = 0;
-
-    @Override
-    public int add(int token) {
-        int prevRank = crtRank;
-        ranksMap.put(token, prevRank);
-        crtRank++;
-        return prevRank;
-    }
-
-    @Override
-    public int getRank(int token) {
-        Integer rank = ranksMap.get(token);
-        if (rank == null) {
-            return -1;
-        }
-        return rank;
-    }
-
-    @Override
-    public String toString() {
-        return "[" + crtRank + ",\n " + ranksMap + "\n]";
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenLoad.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenLoad.java
deleted file mode 100644
index 3578d94..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenLoad.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenorder;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.Serializable;
-
-import org.apache.asterix.fuzzyjoin.FuzzyJoinConfig;
-
-public class TokenLoad implements Serializable {
-    private final String path;
-    private final TokenRank rank;
-
-    public TokenLoad(String path, TokenRank rank) {
-        this.path = path;
-        this.rank = rank;
-    }
-
-    public void loadTokenRank() {
-        loadTokenRank(1);
-    }
-
-    public void loadTokenRank(int factor) {
-        try (BufferedReader fis = new BufferedReader(
-                // new FileReader(path.toString())
-                new InputStreamReader(new FileInputStream(path), "UTF-8"))) {
-            String token = null;
-            while ((token = fis.readLine()) != null) {
-                rank.add(token);
-                // only used when increasing the token dictionary
-                for (int i = 1; i < factor; i++) {
-                    // remove _COUNT at the end of the token (it is removed in
-                    // the new records anyway)
-                    rank.add(token.split(FuzzyJoinConfig.TOKEN_SEPARATOR_REGEX)[0] + i);
-                }
-            }
-        } catch (IOException ioe) {
-            throw new RuntimeException(ioe);
-        }
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenRank.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenRank.java
deleted file mode 100644
index 42cdfa7..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenRank.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenorder;
-
-import java.io.Serializable;
-import java.util.Collection;
-
-public interface TokenRank extends Serializable {
-    public int add(String token);
-
-    public Integer getRank(String token);
-
-    public Collection<Integer> getTokenRanks(Iterable<String> tokens);
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenRankBufferedFrequency.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenRankBufferedFrequency.java
deleted file mode 100644
index 57fc325..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenRankBufferedFrequency.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenorder;
-
-import java.util.Collection;
-import java.util.HashMap;
-
-import org.apache.asterix.fuzzyjoin.tokenizer.Token;
-
-public class TokenRankBufferedFrequency implements TokenRank {
-    /**
-     *
-     */
-    private static final long serialVersionUID = 1L;
-
-    private final HashMap<Token, Integer> ranksMap = new HashMap<Token, Integer>();
-    private int crtRank = 0;
-
-    public int add(String stringWithCount) {
-        int end = stringWithCount.lastIndexOf('_');
-        int count = 0;
-        for (int i = end + 1; i < stringWithCount.length(); ++i) {
-            count = count * 10 + (stringWithCount.charAt(i) - '0');
-        }
-        return add(stringWithCount.substring(0, end), count);
-    }
-
-    public int add(String string, int count) {
-        Token token = new Token(string, 0, string.length(), count);
-        return add(token);
-    }
-
-    public int add(Token token) {
-        int prevRank = crtRank;
-        ranksMap.put(token, prevRank);
-        crtRank++;
-        return prevRank;
-    }
-
-    @Override
-    public Integer getRank(String token) {
-        throw new UnsupportedOperationException();
-    }
-
-    public Integer getRank(Token token) {
-        return ranksMap.get(token);
-    }
-
-    @Override
-    public Collection<Integer> getTokenRanks(Iterable<String> tokens) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public String toString() {
-        return "[" + crtRank + ",\n " + ranksMap + "\n]";
-    }
-}
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenRankFrequency.java b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenRankFrequency.java
deleted file mode 100644
index 97b9503..0000000
--- a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenorder/TokenRankFrequency.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenorder;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.TreeSet;
-
-public class TokenRankFrequency implements TokenRank {
-    /**
-     *
-     */
-    private static final long serialVersionUID = 1L;
-
-    private final HashMap<String, Integer> ranksMap = new HashMap<String, Integer>();
-    private int crtRank = 0;
-
-    public int add(String token) {
-        int prevRank = crtRank;
-        ranksMap.put(token, prevRank);
-        crtRank++;
-        return prevRank;
-    }
-
-    public Integer getRank(String token) {
-        return ranksMap.get(token);
-    }
-
-    public Collection<Integer> getTokenRanks(Iterable<String> tokens) {
-        TreeSet<Integer> ranksCol = new TreeSet<Integer>();
-        for (String token : tokens) {
-            Integer rank = getRank(token);
-            if (rank != null) {
-                ranksCol.add(rank);
-            }
-        }
-        return ranksCol;
-    }
-
-    @Override
-    public String toString() {
-        return "[" + crtRank + ",\n " + ranksMap + "\n]";
-    }
-}