You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2018/03/14 04:08:17 UTC

[4/7] asterixdb git commit: [ASTERIXDB-2330][*DB][RT] Add IFunctionRegistrant for dynamic function registration

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-fuzzyjoin/src/main/resources/META-INF/services/org.apache.asterix.om.functions.IFunctionRegistrant
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/resources/META-INF/services/org.apache.asterix.om.functions.IFunctionRegistrant b/asterixdb/asterix-fuzzyjoin/src/main/resources/META-INF/services/org.apache.asterix.om.functions.IFunctionRegistrant
new file mode 100644
index 0000000..c981de2
--- /dev/null
+++ b/asterixdb/asterix-fuzzyjoin/src/main/resources/META-INF/services/org.apache.asterix.om.functions.IFunctionRegistrant
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+org.apache.asterix.runtime.FuzzyJoinFunctionRegistrant

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionCollection.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionCollection.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionCollection.java
new file mode 100644
index 0000000..90c742c
--- /dev/null
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionCollection.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.om.functions;
+
+import java.io.Serializable;
+
+public interface IFunctionCollection extends Serializable {
+    void add(IFunctionDescriptorFactory descriptorFactory);
+
+    void addGenerated(IFunctionDescriptorFactory descriptorFactory);
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionRegistrant.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionRegistrant.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionRegistrant.java
new file mode 100644
index 0000000..2fa83fc
--- /dev/null
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionRegistrant.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.om.functions;
+
+import java.io.Serializable;
+
+public interface IFunctionRegistrant extends Serializable {
+    void register(IFunctionCollection collection);
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-om/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
new file mode 100644
index 0000000..c48fff1
--- /dev/null
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.runtime.evaluators.base;
+
+import org.apache.asterix.common.functions.FunctionDescriptorTag;
+import org.apache.asterix.om.functions.AbstractFunctionDescriptor;
+
+public abstract class AbstractScalarFunctionDynamicDescriptor extends AbstractFunctionDescriptor {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public FunctionDescriptorTag getFunctionDescriptorTag() {
+        return FunctionDescriptorTag.SCALAR;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/pom.xml
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/pom.xml b/asterixdb/asterix-runtime/pom.xml
index 239b01a..22d1bfc 100644
--- a/asterixdb/asterix-runtime/pom.xml
+++ b/asterixdb/asterix-runtime/pom.xml
@@ -92,12 +92,6 @@
       <scope>compile</scope>
     </dependency>
     <dependency>
-      <groupId>org.apache.asterix</groupId>
-      <artifactId>asterix-fuzzyjoin</artifactId>
-      <version>${project.version}</version>
-      <scope>compile</scope>
-    </dependency>
-    <dependency>
       <groupId>org.apache.hyracks</groupId>
       <artifactId>hyracks-storage-am-btree</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
deleted file mode 100644
index c48fff1..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.base;
-
-import org.apache.asterix.common.functions.FunctionDescriptorTag;
-import org.apache.asterix.om.functions.AbstractFunctionDescriptor;
-
-public abstract class AbstractScalarFunctionDynamicDescriptor extends AbstractFunctionDescriptor {
-
-    private static final long serialVersionUID = 1L;
-
-    @Override
-    public FunctionDescriptorTag getFunctionDescriptorTag() {
-        return FunctionDescriptorTag.SCALAR;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
deleted file mode 100644
index dece292..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.IOException;
-
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.common.exceptions.ErrorCode;
-import org.apache.asterix.common.exceptions.RuntimeDataException;
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.om.base.ABoolean;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class EditDistanceCheckEvaluator extends EditDistanceEvaluator {
-
-    protected final IScalarEvaluator edThreshEval;
-    protected int edThresh;
-    private final IPointable argPtrThreshold = new VoidPointable();
-    protected final OrderedListBuilder listBuilder;
-    protected ArrayBackedValueStorage listItemVal;
-    @SuppressWarnings("unchecked")
-    protected final ISerializerDeserializer<ABoolean> booleanSerde =
-            SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ABOOLEAN);
-
-    public EditDistanceCheckEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
-            throws HyracksDataException {
-        super(args, context);
-        edThreshEval = args[2].createScalarEvaluator(context);
-        listBuilder = new OrderedListBuilder();
-        listItemVal = new ArrayBackedValueStorage();
-    }
-
-    @Override
-    public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
-        resultStorage.reset();
-        firstStringEval.evaluate(tuple, argPtr1);
-        firstTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset()]);
-        secondStringEval.evaluate(tuple, argPtr2);
-        secondTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset()]);
-        edThreshEval.evaluate(tuple, argPtrThreshold);
-
-        if (!checkArgTypes(firstTypeTag, secondTypeTag)) {
-            result.set(resultStorage);
-            return;
-        }
-        try {
-            edThresh = ATypeHierarchy.getIntegerValue(BuiltinFunctions.EDIT_DISTANCE_CHECK.getName(), 2,
-                    argPtrThreshold.getByteArray(), argPtrThreshold.getStartOffset());
-            if (edThresh < 0) {
-                throw new RuntimeDataException(ErrorCode.NEGATIVE_VALUE, BuiltinFunctions.EDIT_DISTANCE_CHECK.getName(),
-                        3, edThresh);
-            }
-            editDistance = computeResult(argPtr1, argPtr2, firstTypeTag);
-            writeResult(editDistance);
-        } catch (IOException e) {
-            throw new HyracksDataException(e);
-        }
-        result.set(resultStorage);
-    }
-
-    @Override
-    protected int computeResult(IPointable left, IPointable right, ATypeTag argType) throws HyracksDataException {
-        byte[] leftBytes = left.getByteArray();
-        int leftStartOffset = left.getStartOffset();
-        byte[] rightBytes = right.getByteArray();
-        int rightStartOffset = right.getStartOffset();
-        switch (argType) {
-            case STRING: {
-                return ed.UTF8StringEditDistance(leftBytes, leftStartOffset + typeIndicatorSize, rightBytes,
-                        rightStartOffset + typeIndicatorSize, edThresh);
-            }
-
-            case ARRAY: {
-                firstOrdListIter.reset(leftBytes, leftStartOffset);
-                secondOrdListIter.reset(rightBytes, rightStartOffset);
-                return (int) ed.computeSimilarity(firstOrdListIter, secondOrdListIter, edThresh);
-            }
-
-            default: {
-                throw new TypeMismatchException(BuiltinFunctions.EDIT_DISTANCE_CHECK, 0, argType.serialize(),
-                        ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
-            }
-
-        }
-    }
-
-    @Override
-    protected void writeResult(int ed) throws IOException {
-        listBuilder.reset(new AOrderedListType(BuiltinType.ANY, "list"));
-        boolean matches = (ed < 0) ? false : true;
-        listItemVal.reset();
-        booleanSerde.serialize(matches ? ABoolean.TRUE : ABoolean.FALSE, listItemVal.getDataOutput());
-        listBuilder.addItem(listItemVal);
-
-        listItemVal.reset();
-        aInt64.setValue((matches) ? ed : Integer.MAX_VALUE);
-        int64Serde.serialize(aInt64, listItemVal.getDataOutput());
-        listBuilder.addItem(listItemVal);
-        listBuilder.write(out, true);
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceContainsEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceContainsEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceContainsEvaluator.java
deleted file mode 100644
index eaf3368..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceContainsEvaluator.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-
-public class EditDistanceContainsEvaluator extends EditDistanceCheckEvaluator {
-
-    public EditDistanceContainsEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
-            throws HyracksDataException {
-        super(args, context);
-    }
-
-    @Override
-    protected int computeResult(IPointable left, IPointable right, ATypeTag argType) throws HyracksDataException {
-        byte[] leftBytes = left.getByteArray();
-        int leftStartOffset = left.getStartOffset();
-        byte[] rightBytes = right.getByteArray();
-        int rightStartOffset = right.getStartOffset();
-
-        switch (argType) {
-            case STRING: {
-                return ed.UTF8StringEditDistanceContains(leftBytes, leftStartOffset + typeIndicatorSize, rightBytes,
-                        rightStartOffset + typeIndicatorSize, edThresh);
-            }
-            case ARRAY: {
-                firstOrdListIter.reset(leftBytes, leftStartOffset);
-                secondOrdListIter.reset(rightBytes, rightStartOffset);
-                return ed.getSimilarityContains(firstOrdListIter, secondOrdListIter, edThresh);
-            }
-            default: {
-                throw new TypeMismatchException(BuiltinFunctions.EDIT_DISTANCE_CONTAINS, 0, argType.serialize(),
-                        ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
deleted file mode 100644
index 85fd334..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityMetricEditDistance;
-import org.apache.asterix.om.base.AInt64;
-import org.apache.asterix.om.base.AMutableInt64;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.runtime.exceptions.IncompatibleTypeException;
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.asterix.runtime.exceptions.UnsupportedItemTypeException;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class EditDistanceEvaluator implements IScalarEvaluator {
-
-    // assuming type indicator in serde format
-    protected final int typeIndicatorSize = 1;
-
-    protected final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
-    protected final DataOutput out = resultStorage.getDataOutput();
-    protected final IPointable argPtr1 = new VoidPointable();
-    protected final IPointable argPtr2 = new VoidPointable();
-    protected final IScalarEvaluator firstStringEval;
-    protected final IScalarEvaluator secondStringEval;
-    protected final SimilarityMetricEditDistance ed = new SimilarityMetricEditDistance();
-    protected final OrderedListIterator firstOrdListIter = new OrderedListIterator();
-    protected final OrderedListIterator secondOrdListIter = new OrderedListIterator();
-    protected int editDistance = 0;
-    protected final AMutableInt64 aInt64 = new AMutableInt64(-1);
-    @SuppressWarnings("unchecked")
-    protected final ISerializerDeserializer<AInt64> int64Serde =
-            SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT64);
-    protected ATypeTag itemTypeTag;
-
-    protected ATypeTag firstTypeTag;
-    protected ATypeTag secondTypeTag;
-
-    public EditDistanceEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
-            throws HyracksDataException {
-        firstStringEval = args[0].createScalarEvaluator(context);
-        secondStringEval = args[1].createScalarEvaluator(context);
-    }
-
-    @Override
-    public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
-        resultStorage.reset();
-        firstStringEval.evaluate(tuple, argPtr1);
-        firstTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset()]);
-        secondStringEval.evaluate(tuple, argPtr2);
-        secondTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset()]);
-
-        if (!checkArgTypes(firstTypeTag, secondTypeTag)) {
-            result.set(resultStorage);
-            return;
-        }
-
-        editDistance = computeResult(argPtr1, argPtr2, firstTypeTag);
-        try {
-            writeResult(editDistance);
-        } catch (IOException e) {
-            throw new HyracksDataException(e);
-        }
-        result.set(resultStorage);
-    }
-
-    protected int computeResult(IPointable left, IPointable right, ATypeTag argType) throws HyracksDataException {
-        byte[] leftBytes = left.getByteArray();
-        int leftStartOffset = left.getStartOffset();
-        byte[] rightBytes = right.getByteArray();
-        int rightStartOffset = right.getStartOffset();
-
-        switch (argType) {
-            case STRING: {
-                // Passes -1 as the simThresh to calculate the edit distance
-                // without applying any calculation optimizations.
-                return ed.getActualUTF8StringEditDistanceVal(leftBytes, leftStartOffset + typeIndicatorSize, rightBytes,
-                        rightStartOffset + typeIndicatorSize, -1);
-            }
-            case ARRAY: {
-                firstOrdListIter.reset(leftBytes, leftStartOffset);
-                secondOrdListIter.reset(rightBytes, rightStartOffset);
-                return (int) ed.computeSimilarity(firstOrdListIter, secondOrdListIter);
-            }
-            default: {
-                throw new TypeMismatchException(BuiltinFunctions.EDIT_DISTANCE, 0, argType.serialize(),
-                        ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
-            }
-
-        }
-    }
-
-    protected boolean checkArgTypes(ATypeTag typeTag1, ATypeTag typeTag2) throws HyracksDataException {
-        if (typeTag1 != typeTag2) {
-            throw new IncompatibleTypeException(BuiltinFunctions.EDIT_DISTANCE, typeTag1.serialize(),
-                    typeTag2.serialize());
-        }
-
-        // Since they are equal, check one tag is enough.
-        if (typeTag1 != ATypeTag.STRING && typeTag1 != ATypeTag.ARRAY) { // could be an list
-            throw new TypeMismatchException(BuiltinFunctions.EDIT_DISTANCE, 0, typeTag1.serialize(),
-                    ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
-        }
-
-        if (typeTag1 == ATypeTag.ARRAY) {
-            itemTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER
-                    .deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset() + 1]);
-            if (itemTypeTag == ATypeTag.ANY) {
-                throw new UnsupportedItemTypeException(BuiltinFunctions.EDIT_DISTANCE, itemTypeTag.serialize());
-            }
-            itemTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER
-                    .deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset() + 1]);
-            if (itemTypeTag == ATypeTag.ANY) {
-                throw new UnsupportedItemTypeException(BuiltinFunctions.EDIT_DISTANCE, itemTypeTag.serialize());
-            }
-        }
-        return true;
-    }
-
-    protected void writeResult(int ed) throws IOException {
-        aInt64.setValue(ed);
-        int64Serde.serialize(aInt64, out);
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/GramTokensEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/GramTokensEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/GramTokensEvaluator.java
deleted file mode 100644
index ef727c9..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/GramTokensEvaluator.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.dataflow.data.nontagged.serde.ABooleanSerializerDeserializer;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
-
-public class GramTokensEvaluator implements IScalarEvaluator {
-
-    // assuming type indicator in serde format
-    private final int typeIndicatorSize = 1;
-
-    private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
-    private final DataOutput out = resultStorage.getDataOutput();
-    private final IPointable stringArg = new VoidPointable();
-    private final IPointable gramLengthArg = new VoidPointable();
-    private final IPointable prePostArg = new VoidPointable();
-    private final IScalarEvaluator stringEval;
-    private final IScalarEvaluator gramLengthEval;
-    private final IScalarEvaluator prePostEval;
-
-    private final NGramUTF8StringBinaryTokenizer tokenizer;
-    private final OrderedListBuilder listBuilder = new OrderedListBuilder();
-    private final AOrderedListType listType;
-
-    public GramTokensEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context, IBinaryTokenizer tokenizer,
-            BuiltinType itemType) throws HyracksDataException {
-        stringEval = args[0].createScalarEvaluator(context);
-        gramLengthEval = args[1].createScalarEvaluator(context);
-        prePostEval = args[2].createScalarEvaluator(context);
-        this.tokenizer = (NGramUTF8StringBinaryTokenizer) tokenizer;
-        this.listType = new AOrderedListType(itemType, null);
-    }
-
-    @Override
-    public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
-        resultStorage.reset();
-        stringEval.evaluate(tuple, stringArg);
-        gramLengthEval.evaluate(tuple, gramLengthArg);
-        prePostEval.evaluate(tuple, prePostArg);
-
-        int gramLength = ATypeHierarchy.getIntegerValue(BuiltinFunctions.GRAM_TOKENS.getName(), 1,
-                gramLengthArg.getByteArray(), gramLengthArg.getStartOffset());
-        tokenizer.setGramlength(gramLength);
-        boolean prePost = ABooleanSerializerDeserializer.getBoolean(prePostArg.getByteArray(),
-                prePostArg.getStartOffset() + typeIndicatorSize);
-        tokenizer.setPrePost(prePost);
-        tokenizer.reset(stringArg.getByteArray(), stringArg.getStartOffset(), stringArg.getLength());
-
-        try {
-            listBuilder.reset(listType);
-            while (tokenizer.hasNext()) {
-                tokenizer.next();
-                listBuilder.addItem(tokenizer.getToken());
-            }
-            listBuilder.write(out, true);
-        } catch (IOException e) {
-            throw new HyracksDataException(e);
-        }
-        result.set(resultStorage);
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityFiltersCache.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityFiltersCache.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityFiltersCache.java
deleted file mode 100644
index 4f5fb69..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityFiltersCache.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataInputStream;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityFilters;
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityFiltersFactory;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
-import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
-
-public class SimilarityFiltersCache {
-    private final UTF8StringSerializerDeserializer utf8SerDer = new UTF8StringSerializerDeserializer();
-
-    private final ByteBufferInputStream bbis = new ByteBufferInputStream();
-    private final DataInputStream dis = new DataInputStream(bbis);
-
-    private float similarityThresholdCached = 0;
-    private byte[] similarityNameBytesCached = null;
-    private SimilarityFilters similarityFiltersCached = null;
-
-    public SimilarityFilters get(float similarityThreshold, byte[] similarityNameBytes, int startOffset, int len)
-            throws HyracksDataException {
-        if (similarityThreshold != similarityThresholdCached || similarityNameBytesCached == null
-                || !Arrays.equals(similarityNameBytes, similarityNameBytesCached)) {
-            bbis.setByteBuffer(ByteBuffer.wrap(similarityNameBytes), startOffset + 1);
-            String similarityName = utf8SerDer.deserialize(dis);
-            similarityNameBytesCached = Arrays.copyOfRange(similarityNameBytes, startOffset, len);
-            similarityFiltersCached =
-                    SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
-        }
-        return similarityFiltersCached;
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
deleted file mode 100644
index 60b5592..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.IOException;
-
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.dataflow.data.nontagged.serde.AFloatSerializerDeserializer;
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.om.base.ABoolean;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.IntegerPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.data.std.util.BinaryEntry;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class SimilarityJaccardCheckEvaluator extends SimilarityJaccardEvaluator {
-
-    protected final IScalarEvaluator jaccThreshEval;
-    protected float jaccThresh = -1f;
-    protected IPointable jaccThreshPointable = new VoidPointable();
-
-    protected OrderedListBuilder listBuilder;
-    protected ArrayBackedValueStorage inputVal;
-    @SuppressWarnings("unchecked")
-    protected final ISerializerDeserializer<ABoolean> booleanSerde =
-            SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ABOOLEAN);
-    protected final AOrderedListType listType = new AOrderedListType(BuiltinType.ANY, "list");
-
-    public SimilarityJaccardCheckEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
-            throws HyracksDataException {
-        super(args, context);
-        jaccThreshEval = args[2].createScalarEvaluator(context);
-        listBuilder = new OrderedListBuilder();
-        inputVal = new ArrayBackedValueStorage();
-    }
-
-    @Override
-    public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
-        resultStorage.reset();
-
-        firstOrdListEval.evaluate(tuple, argPtr1);
-        secondOrdListEval.evaluate(tuple, argPtr2);
-        jaccThreshEval.evaluate(tuple, jaccThreshPointable);
-
-        firstTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset()]);
-        secondTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset()]);
-
-        firstItemTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset() + 1]);
-        secondItemTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset() + 1]);
-
-        jaccThresh = AFloatSerializerDeserializer.getFloat(jaccThreshPointable.getByteArray(),
-                jaccThreshPointable.getStartOffset() + TYPE_INDICATOR_SIZE);
-
-        if (!checkArgTypes(firstTypeTag, secondTypeTag)) {
-            result.set(resultStorage);
-            return;
-        }
-        if (prepareLists(argPtr1, argPtr2)) {
-            jaccSim = computeResult();
-        } else {
-            jaccSim = 0.0f;
-        }
-        try {
-            writeResult(jaccSim);
-        } catch (IOException e) {
-            throw new HyracksDataException(e);
-        }
-        result.set(resultStorage);
-    }
-
-    @Override
-    protected int probeHashMap(AbstractAsterixListIterator probeIter, int buildListSize, int probeListSize)
-            throws HyracksDataException {
-        // Apply length filter.
-        int lengthLowerBound = (int) Math.ceil(jaccThresh * probeListSize);
-        if ((lengthLowerBound > buildListSize)
-                || (buildListSize > (int) Math.floor(1.0f / jaccThresh * probeListSize))) {
-            return -1;
-        }
-        // Probe phase: Probe items from second list, and compute intersection size.
-        int intersectionSize = 0;
-        int probeListCount = 0;
-        int minUnionSize = buildListSize;
-        while (probeIter.hasNext()) {
-            probeListCount++;
-            byte[] buf = probeIter.getData();
-            int off = probeIter.getPos();
-            int len = probeIter.getItemLen();
-            keyEntry.set(buf, off, len);
-            BinaryEntry entry = hashMap.get(keyEntry);
-            if (entry != null) {
-                // Increment second value.
-                int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
-                // Irrelevant for the intersection size.
-                if (firstValInt == 0) {
-                    continue;
-                }
-                int secondValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset() + 4);
-                // Subtract old min value.
-                intersectionSize -= (firstValInt < secondValInt) ? firstValInt : secondValInt;
-                secondValInt++;
-                // Add new min value.
-                intersectionSize += (firstValInt < secondValInt) ? firstValInt : secondValInt;
-                IntegerPointable.setInteger(entry.getBuf(), entry.getOffset() + 4, secondValInt);
-            } else {
-                // Could not find element in other set. Increase min union size by 1.
-                minUnionSize++;
-                // Check whether jaccThresh can still be satisfied if there was a mismatch.
-                int maxIntersectionSize = Math.min(buildListSize, intersectionSize + (probeListSize - probeListCount));
-                int lowerBound = (int) Math.floor(jaccThresh * minUnionSize);
-                if (maxIntersectionSize < lowerBound) {
-                    // Cannot satisfy jaccThresh.
-                    return -1;
-                }
-            }
-            probeIter.next();
-        }
-        return intersectionSize;
-    }
-
-    @Override
-    protected void writeResult(float jacc) throws IOException {
-        listBuilder.reset(listType);
-        boolean matches = (jacc < jaccThresh) ? false : true;
-        inputVal.reset();
-        booleanSerde.serialize(matches ? ABoolean.TRUE : ABoolean.FALSE, inputVal.getDataOutput());
-        listBuilder.addItem(inputVal);
-
-        inputVal.reset();
-        aFloat.setValue((matches) ? jacc : 0.0f);
-        floatSerde.serialize(aFloat, inputVal.getDataOutput());
-        listBuilder.addItem(inputVal);
-
-        listBuilder.write(out, true);
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
deleted file mode 100644
index 1e5ad3c..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.asterix.dataflow.data.nontagged.comparators.ListItemBinaryComparatorFactory;
-import org.apache.asterix.dataflow.data.nontagged.hash.ListItemBinaryHashFunctionFactory;
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.om.base.AFloat;
-import org.apache.asterix.om.base.AMutableFloat;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap;
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
-import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.IntegerPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.data.std.util.BinaryEntry;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class SimilarityJaccardEvaluator implements IScalarEvaluator {
-
-    // Parameters for hash table.
-    protected static final int MIN_TABLE_SIZE = 100;
-    protected static final int TABLE_FRAME_SIZE = 32768;
-
-    // Assuming type indicator in serde format.
-    protected static final int TYPE_INDICATOR_SIZE = 1;
-
-    protected final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
-    protected final DataOutput out = resultStorage.getDataOutput();
-    protected final IPointable argPtr1 = new VoidPointable();
-    protected final IPointable argPtr2 = new VoidPointable();
-    protected final IScalarEvaluator firstOrdListEval;
-    protected final IScalarEvaluator secondOrdListEval;
-
-    protected final OrderedListIterator fstOrdListIter = new OrderedListIterator();
-    protected final OrderedListIterator sndOrdListIter = new OrderedListIterator();
-    protected final UnorderedListIterator fstUnordListIter = new UnorderedListIterator();
-    protected final UnorderedListIterator sndUnordListIter = new UnorderedListIterator();
-
-    protected AbstractAsterixListIterator firstListIter;
-    protected AbstractAsterixListIterator secondListIter;
-
-    protected final AMutableFloat aFloat = new AMutableFloat(0);
-    @SuppressWarnings("unchecked")
-    protected final ISerializerDeserializer<AFloat> floatSerde =
-            SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AFLOAT);
-
-    protected ATypeTag firstTypeTag;
-    protected ATypeTag secondTypeTag;
-    protected float jaccSim = 0.0f;
-    protected ATypeTag firstItemTypeTag;
-    protected ATypeTag secondItemTypeTag;
-
-    protected BinaryHashMap hashMap;
-    protected BinaryEntry keyEntry = new BinaryEntry();
-    protected BinaryEntry valEntry = new BinaryEntry();
-
-    // Ignore case for strings. Defaults to true.
-    protected final boolean ignoreCase = true;
-
-    protected int hashTableSize = MIN_TABLE_SIZE;
-
-    public SimilarityJaccardEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
-            throws HyracksDataException {
-        firstOrdListEval = args[0].createScalarEvaluator(context);
-        secondOrdListEval = args[1].createScalarEvaluator(context);
-        byte[] emptyValBuf = new byte[8];
-        Arrays.fill(emptyValBuf, (byte) 0);
-        valEntry.set(emptyValBuf, 0, 8);
-    }
-
-    @Override
-    public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
-        resultStorage.reset();
-
-        firstOrdListEval.evaluate(tuple, argPtr1);
-        secondOrdListEval.evaluate(tuple, argPtr2);
-
-        firstTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset()]);
-        secondTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset()]);
-
-        firstItemTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset() + 1]);
-        secondItemTypeTag =
-                EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset() + 1]);
-
-        if (!checkArgTypes(firstTypeTag, secondTypeTag)) {
-            result.set(resultStorage);
-            return;
-        }
-        if (prepareLists(argPtr1, argPtr2)) {
-            jaccSim = computeResult();
-        } else {
-            jaccSim = 0.0f;
-        }
-        try {
-            writeResult(jaccSim);
-        } catch (IOException e) {
-            throw new HyracksDataException(e);
-        }
-        result.set(resultStorage);
-    }
-
-    protected boolean prepareLists(IPointable left, IPointable right) throws HyracksDataException {
-        firstListIter.reset(left.getByteArray(), left.getStartOffset());
-        secondListIter.reset(right.getByteArray(), right.getStartOffset());
-        // Check for special case where one of the lists is empty, since list
-        // types won't match.
-        if (firstListIter.size() == 0 || secondListIter.size() == 0) {
-            return false;
-        }
-
-        // Set the size of the table dynamically
-        hashTableSize = Math.max(Math.max(firstListIter.size(), secondListIter.size()), MIN_TABLE_SIZE);
-
-        // TODO: Check item types are compatible.
-        return true;
-    }
-
-    protected float computeResult() throws HyracksDataException {
-        // We will subtract the intersection size later to get the real union size.
-        int firstListSize = firstListIter.size();
-        int secondListSize = secondListIter.size();
-        int unionSize = firstListSize + secondListSize;
-        // Choose smaller list as build, and larger one as probe.
-        AbstractAsterixListIterator buildList = (firstListSize < secondListSize) ? firstListIter : secondListIter;
-        AbstractAsterixListIterator probeList = (buildList == firstListIter) ? secondListIter : firstListIter;
-        int buildListSize = (buildList == firstListIter) ? firstListSize : secondListSize;
-        int probeListSize = (probeList == firstListIter) ? firstListSize : secondListSize;
-        ATypeTag buildItemTypeTag = (buildList == firstListIter) ? firstItemTypeTag : secondItemTypeTag;
-        ATypeTag probeItemTypeTag = (probeList == firstListIter) ? firstItemTypeTag : secondItemTypeTag;
-
-        setHashMap(buildItemTypeTag, probeItemTypeTag);
-        buildHashMap(buildList);
-        int intersectionSize = probeHashMap(probeList, buildListSize, probeListSize);
-        // Special indicator for the "check" version of jaccard.
-        if (intersectionSize < 0) {
-            return -1;
-        }
-        unionSize -= intersectionSize;
-        return (float) intersectionSize / (float) unionSize;
-    }
-
-    protected void buildHashMap(AbstractAsterixListIterator buildIter) throws HyracksDataException {
-        // Build phase: Add items into hash map, starting with first list.
-        // Value in map is a pair of integers. Set first integer to 1.
-        IntegerPointable.setInteger(valEntry.getBuf(), 0, 1);
-        while (buildIter.hasNext()) {
-            byte[] buf = buildIter.getData();
-            int off = buildIter.getPos();
-            int len = buildIter.getItemLen();
-            keyEntry.set(buf, off, len);
-            BinaryEntry entry = hashMap.put(keyEntry, valEntry);
-            if (entry != null) {
-                // Increment value.
-                int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
-                IntegerPointable.setInteger(entry.getBuf(), entry.getOffset(), firstValInt + 1);
-            }
-            buildIter.next();
-        }
-    }
-
-    protected int probeHashMap(AbstractAsterixListIterator probeIter, int buildListSize, int probeListSize)
-            throws HyracksDataException {
-        // Probe phase: Probe items from second list, and compute intersection size.
-        int intersectionSize = 0;
-        while (probeIter.hasNext()) {
-            byte[] buf = probeIter.getData();
-            int off = probeIter.getPos();
-            int len = probeIter.getItemLen();
-            keyEntry.set(buf, off, len);
-            BinaryEntry entry = hashMap.get(keyEntry);
-            if (entry != null) {
-                // Increment second value.
-                int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
-                // Irrelevant for the intersection size.
-                if (firstValInt == 0) {
-                    continue;
-                }
-                int secondValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset() + 4);
-                // Subtract old min value.
-                intersectionSize -= (firstValInt < secondValInt) ? firstValInt : secondValInt;
-                secondValInt++;
-                // Add new min value.
-                intersectionSize += (firstValInt < secondValInt) ? firstValInt : secondValInt;
-                IntegerPointable.setInteger(entry.getBuf(), entry.getOffset() + 4, secondValInt);
-            }
-            probeIter.next();
-        }
-        return intersectionSize;
-    }
-
-    protected void setHashMap(ATypeTag buildItemTypeTag, ATypeTag probeItemTypeTag) {
-        if (hashMap != null) {
-            hashMap.clear();
-            return;
-        }
-
-        IBinaryHashFunction putHashFunc =
-                ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction(buildItemTypeTag, ignoreCase);
-        IBinaryHashFunction getHashFunc =
-                ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction(probeItemTypeTag, ignoreCase);
-        IBinaryComparator cmp = ListItemBinaryComparatorFactory.INSTANCE.createBinaryComparator(buildItemTypeTag,
-                probeItemTypeTag, ignoreCase);
-        hashMap = new BinaryHashMap(hashTableSize, TABLE_FRAME_SIZE, putHashFunc, getHashFunc, cmp);
-    }
-
-    protected boolean checkArgTypes(ATypeTag typeTag1, ATypeTag typeTag2) throws HyracksDataException {
-        switch (typeTag1) {
-            case ARRAY: {
-                firstListIter = fstOrdListIter;
-                break;
-            }
-            case MULTISET: {
-                firstListIter = fstUnordListIter;
-                break;
-            }
-            default: {
-                throw new TypeMismatchException(BuiltinFunctions.SIMILARITY_JACCARD, 0, typeTag1.serialize(),
-                        ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
-            }
-        }
-        switch (typeTag2) {
-            case ARRAY: {
-                secondListIter = sndOrdListIter;
-                break;
-            }
-            case MULTISET: {
-                secondListIter = sndUnordListIter;
-                break;
-            }
-            default: {
-                throw new TypeMismatchException(BuiltinFunctions.SIMILARITY_JACCARD, 1, typeTag2.serialize(),
-                        ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
-            }
-        }
-        return true;
-    }
-
-    protected void writeResult(float jacc) throws IOException {
-        aFloat.setValue(jacc);
-        floatSerde.serialize(aFloat, out);
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java
deleted file mode 100644
index b70c6ad..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.asterix.dataflow.data.nontagged.serde.AFloatSerializerDeserializer;
-import org.apache.asterix.dataflow.data.nontagged.serde.AOrderedListSerializerDeserializer;
-import org.apache.asterix.dataflow.data.nontagged.serde.AUnorderedListSerializerDeserializer;
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.fuzzyjoin.IntArray;
-import org.apache.asterix.fuzzyjoin.similarity.PartialIntersect;
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityFiltersJaccard;
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityMetric;
-import org.apache.asterix.om.base.AFloat;
-import org.apache.asterix.om.base.AMutableFloat;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class SimilarityJaccardPrefixEvaluator implements IScalarEvaluator {
-    // assuming type indicator in serde format
-    protected final int typeIndicatorSize = 1;
-
-    protected final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
-    protected final DataOutput out = resultStorage.getDataOutput();
-    protected final IPointable inputVal = new VoidPointable();
-    protected final IScalarEvaluator evalLen1;
-    protected final IScalarEvaluator evalTokens1;
-    protected final IScalarEvaluator evalLen2;
-    protected final IScalarEvaluator evalTokens2;
-    protected final IScalarEvaluator evalTokenPrefix;
-    protected final IScalarEvaluator evalThreshold;
-
-    protected float similarityThresholdCache;
-    protected SimilarityFiltersJaccard similarityFilters;
-    protected final IntArray tokens1 = new IntArray();
-    protected final IntArray tokens2 = new IntArray();
-    protected final PartialIntersect parInter = new PartialIntersect();
-
-    protected float sim = 0.0f;
-
-    // result
-    protected final AMutableFloat res = new AMutableFloat(0);
-    @SuppressWarnings("unchecked")
-    protected final ISerializerDeserializer<AFloat> reusSerde =
-            SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AFLOAT);
-
-    public SimilarityJaccardPrefixEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
-            throws HyracksDataException {
-        evalLen1 = args[0].createScalarEvaluator(context);
-        evalTokens1 = args[1].createScalarEvaluator(context);
-        evalLen2 = args[2].createScalarEvaluator(context);
-        evalTokens2 = args[3].createScalarEvaluator(context);
-        evalTokenPrefix = args[4].createScalarEvaluator(context);
-        evalThreshold = args[5].createScalarEvaluator(context);
-    }
-
-    @Override
-    public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
-        resultStorage.reset();
-        // similarity threshold
-        sim = 0;
-        evalThreshold.evaluate(tuple, inputVal);
-        float similarityThreshold =
-                AFloatSerializerDeserializer.getFloat(inputVal.getByteArray(), inputVal.getStartOffset() + 1);
-
-        if (similarityThreshold != similarityThresholdCache || similarityFilters == null) {
-            similarityFilters = new SimilarityFiltersJaccard(similarityThreshold);
-            similarityThresholdCache = similarityThreshold;
-        }
-
-        evalLen1.evaluate(tuple, inputVal);
-        int length1 = ATypeHierarchy.getIntegerValue(BuiltinFunctions.SIMILARITY_JACCARD.getName(), 0,
-                inputVal.getByteArray(), inputVal.getStartOffset());
-        evalLen2.evaluate(tuple, inputVal);
-        int length2 = ATypeHierarchy.getIntegerValue(BuiltinFunctions.SIMILARITY_JACCARD.getName(), 2,
-                inputVal.getByteArray(), inputVal.getStartOffset());
-
-        //
-        // -- - length filter - --
-        //
-        if (similarityFilters.passLengthFilter(length1, length2)) {
-
-            // -- - tokens1 - --
-            int i;
-            tokens1.reset();
-            evalTokens1.evaluate(tuple, inputVal);
-
-            byte[] serList = inputVal.getByteArray();
-            int startOffset = inputVal.getStartOffset();
-            if (serList[startOffset] != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG
-                    && serList[startOffset] != ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG) {
-                throw new TypeMismatchException(BuiltinFunctions.SIMILARITY_JACCARD, 1, serList[startOffset],
-                        ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
-            }
-
-            int lengthTokens1;
-            if (serList[startOffset] == ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG) {
-                lengthTokens1 =
-                        AOrderedListSerializerDeserializer.getNumberOfItems(inputVal.getByteArray(), startOffset);
-                // read tokens
-                for (i = 0; i < lengthTokens1; i++) {
-                    int itemOffset = AOrderedListSerializerDeserializer.getItemOffset(serList, startOffset, i);
-                    int token = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(
-                            BuiltinFunctions.SIMILARITY_JACCARD.getName(), 1, serList, itemOffset, startOffset + 1);
-                    tokens1.add(token);
-                }
-            } else {
-                lengthTokens1 =
-                        AUnorderedListSerializerDeserializer.getNumberOfItems(inputVal.getByteArray(), startOffset);
-                // read tokens
-                for (i = 0; i < lengthTokens1; i++) {
-                    int itemOffset = AUnorderedListSerializerDeserializer.getItemOffset(serList, startOffset, i);
-                    int token = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(
-                            BuiltinFunctions.SIMILARITY_JACCARD.getName(), 1, serList, itemOffset, startOffset + 1);
-                    tokens1.add(token);
-                }
-            }
-            // pad tokens
-            for (; i < length1; i++) {
-                tokens1.add(Integer.MAX_VALUE);
-            }
-
-            // -- - tokens2 - --
-            tokens2.reset();
-            evalTokens2.evaluate(tuple, inputVal);
-
-            serList = inputVal.getByteArray();
-            startOffset = inputVal.getStartOffset();
-            if (serList[startOffset] != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG
-                    && serList[startOffset] != ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG) {
-                throw new TypeMismatchException(BuiltinFunctions.SIMILARITY_JACCARD, 3, serList[startOffset],
-                        ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
-            }
-
-            int lengthTokens2;
-            if (serList[startOffset] == ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG) {
-                lengthTokens2 =
-                        AOrderedListSerializerDeserializer.getNumberOfItems(inputVal.getByteArray(), startOffset);
-                // read tokens
-                for (i = 0; i < lengthTokens2; i++) {
-                    int itemOffset = AOrderedListSerializerDeserializer.getItemOffset(serList, startOffset, i);
-                    int token = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(
-                            BuiltinFunctions.SIMILARITY_JACCARD.getName(), 3, serList, itemOffset, startOffset + 1);
-                    tokens2.add(token);
-                }
-            } else {
-                lengthTokens2 =
-                        AUnorderedListSerializerDeserializer.getNumberOfItems(inputVal.getByteArray(), startOffset);
-                // read tokens
-                for (i = 0; i < lengthTokens2; i++) {
-                    int itemOffset = AUnorderedListSerializerDeserializer.getItemOffset(serList, startOffset, i);
-                    int token = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(
-                            BuiltinFunctions.SIMILARITY_JACCARD.getName(), 3, serList, itemOffset, startOffset + 1);
-                    tokens2.add(token);
-                }
-            }
-            // pad tokens
-            for (; i < length2; i++) {
-                tokens2.add(Integer.MAX_VALUE);
-            }
-
-            // -- - token prefix - --
-            evalTokenPrefix.evaluate(tuple, inputVal);
-            int tokenPrefix = ATypeHierarchy.getIntegerValue(BuiltinFunctions.SIMILARITY_JACCARD.getName(), 4,
-                    inputVal.getByteArray(), inputVal.getStartOffset());
-
-            //
-            // -- - position filter - --
-            //
-            SimilarityMetric.getPartialIntersectSize(tokens1.get(), 0, tokens1.length(), tokens2.get(), 0,
-                    tokens2.length(), tokenPrefix, parInter);
-            if (similarityFilters.passPositionFilter(parInter.intersectSize, parInter.posXStop, length1,
-                    parInter.posYStop, length2)) {
-
-                //
-                // -- - suffix filter - --
-                //
-                if (similarityFilters.passSuffixFilter(tokens1.get(), 0, tokens1.length(), parInter.posXStart,
-                        tokens2.get(), 0, tokens2.length(), parInter.posYStart)) {
-
-                    sim = similarityFilters.passSimilarityFilter(tokens1.get(), 0, tokens1.length(),
-                            parInter.posXStop + 1, tokens2.get(), 0, tokens2.length(), parInter.posYStop + 1,
-                            parInter.intersectSize);
-                }
-            }
-        }
-
-        try {
-            writeResult();
-        } catch (IOException e) {
-            throw new HyracksDataException(e);
-        }
-        result.set(resultStorage);
-    }
-
-    public void writeResult() throws IOException {
-        res.setValue(sim);
-        reusSerde.serialize(res, out);
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java
deleted file mode 100644
index 0decd9e..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityMetricJaccard;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-public class SimilarityJaccardSortedCheckEvaluator extends SimilarityJaccardCheckEvaluator {
-
-    protected final SimilarityMetricJaccard jaccard = new SimilarityMetricJaccard();
-
-    public SimilarityJaccardSortedCheckEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
-            throws HyracksDataException {
-        super(args, context);
-    }
-
-    @Override
-    protected float computeResult() throws HyracksDataException {
-        return jaccard.computeSimilarity(firstListIter, secondListIter, jaccThresh);
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
deleted file mode 100644
index 1cd32c8..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityMetricJaccard;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-// Assumes that both arguments are sorted by the same ordering.
-public class SimilarityJaccardSortedEvaluator extends SimilarityJaccardEvaluator {
-
-    protected final SimilarityMetricJaccard jaccard = new SimilarityMetricJaccard();
-
-    public SimilarityJaccardSortedEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
-            throws HyracksDataException {
-        super(args, context);
-    }
-
-    @Override
-    protected float computeResult() throws HyracksDataException {
-        return jaccard.computeSimilarity(firstListIter, secondListIter);
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/WordTokensEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/WordTokensEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/WordTokensEvaluator.java
deleted file mode 100644
index e51d5cf..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/WordTokensEvaluator.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
-
-public class WordTokensEvaluator implements IScalarEvaluator {
-    private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
-    private final DataOutput out = resultStorage.getDataOutput();
-    private final IPointable argPtr = new VoidPointable();
-    private final IScalarEvaluator stringEval;
-
-    private final IBinaryTokenizer tokenizer;
-    private final OrderedListBuilder listBuilder = new OrderedListBuilder();
-    private final AOrderedListType listType;
-
-    public WordTokensEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context, IBinaryTokenizer tokenizer,
-            BuiltinType itemType) throws HyracksDataException {
-        stringEval = args[0].createScalarEvaluator(context);
-        this.tokenizer = tokenizer;
-        this.listType = new AOrderedListType(itemType, null);
-    }
-
-    @Override
-    public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
-        resultStorage.reset();
-        stringEval.evaluate(tuple, argPtr);
-        tokenizer.reset(argPtr.getByteArray(), argPtr.getStartOffset(), argPtr.getLength());
-        try {
-            listBuilder.reset(listType);
-            while (tokenizer.hasNext()) {
-                tokenizer.next();
-                listBuilder.addItem(tokenizer.getToken());
-            }
-            listBuilder.write(out, true);
-        } catch (IOException e) {
-            throw new HyracksDataException(e);
-        }
-        result.set(resultStorage);
-    }
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java
deleted file mode 100644
index dd36671..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.functions;
-
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.functions.IFunctionDescriptor;
-import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
-import org.apache.asterix.runtime.evaluators.common.GramTokensEvaluator;
-import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
-import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8NGramTokenFactory;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
-
-public class CountHashedGramTokensDescriptor extends AbstractScalarFunctionDynamicDescriptor {
-
-    private static final long serialVersionUID = 1L;
-    public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() {
-        @Override
-        public IFunctionDescriptor createFunctionDescriptor() {
-            return new CountHashedGramTokensDescriptor();
-        }
-    };
-
-    @Override
-    public FunctionIdentifier getIdentifier() {
-        return BuiltinFunctions.COUNTHASHED_GRAM_TOKENS;
-    }
-
-    @Override
-    public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args)
-            throws AlgebricksException {
-        return new IScalarEvaluatorFactory() {
-            private static final long serialVersionUID = 1L;
-
-            @Override
-            public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
-                ITokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
-                NGramUTF8StringBinaryTokenizer tokenizer =
-                        new NGramUTF8StringBinaryTokenizer(3, true, false, true, tokenFactory);
-                return new GramTokensEvaluator(args, ctx, tokenizer, BuiltinType.AINT32);
-            }
-        };
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java
deleted file mode 100644
index e12ba2e..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.functions;
-
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.functions.IFunctionDescriptor;
-import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
-import org.apache.asterix.runtime.evaluators.common.WordTokensEvaluator;
-import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizer;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8WordTokenFactory;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
-
-public class CountHashedWordTokensDescriptor extends AbstractScalarFunctionDynamicDescriptor {
-
-    private static final long serialVersionUID = 1L;
-    public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() {
-        @Override
-        public IFunctionDescriptor createFunctionDescriptor() {
-            return new CountHashedWordTokensDescriptor();
-        }
-    };
-
-    @Override
-    public FunctionIdentifier getIdentifier() {
-        return BuiltinFunctions.COUNTHASHED_WORD_TOKENS;
-    }
-
-    @Override
-    public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
-        return new IScalarEvaluatorFactory() {
-            private static final long serialVersionUID = 1L;
-
-            @Override
-            public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
-                ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
-                IBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, true, tokenFactory);
-                return new WordTokensEvaluator(args, ctx, tokenizer, BuiltinType.AINT32);
-            }
-        };
-    }
-
-}