You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2018/03/14 04:08:17 UTC
[4/7] asterixdb git commit: [ASTERIXDB-2330][*DB][RT] Add
IFunctionRegistrant for dynamic function registration
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-fuzzyjoin/src/main/resources/META-INF/services/org.apache.asterix.om.functions.IFunctionRegistrant
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-fuzzyjoin/src/main/resources/META-INF/services/org.apache.asterix.om.functions.IFunctionRegistrant b/asterixdb/asterix-fuzzyjoin/src/main/resources/META-INF/services/org.apache.asterix.om.functions.IFunctionRegistrant
new file mode 100644
index 0000000..c981de2
--- /dev/null
+++ b/asterixdb/asterix-fuzzyjoin/src/main/resources/META-INF/services/org.apache.asterix.om.functions.IFunctionRegistrant
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+org.apache.asterix.runtime.FuzzyJoinFunctionRegistrant
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionCollection.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionCollection.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionCollection.java
new file mode 100644
index 0000000..90c742c
--- /dev/null
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionCollection.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.om.functions;
+
+import java.io.Serializable;
+
+public interface IFunctionCollection extends Serializable {
+ void add(IFunctionDescriptorFactory descriptorFactory);
+
+ void addGenerated(IFunctionDescriptorFactory descriptorFactory);
+}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionRegistrant.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionRegistrant.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionRegistrant.java
new file mode 100644
index 0000000..2fa83fc
--- /dev/null
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/IFunctionRegistrant.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.om.functions;
+
+import java.io.Serializable;
+
+public interface IFunctionRegistrant extends Serializable {
+ void register(IFunctionCollection collection);
+}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-om/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
new file mode 100644
index 0000000..c48fff1
--- /dev/null
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.runtime.evaluators.base;
+
+import org.apache.asterix.common.functions.FunctionDescriptorTag;
+import org.apache.asterix.om.functions.AbstractFunctionDescriptor;
+
+public abstract class AbstractScalarFunctionDynamicDescriptor extends AbstractFunctionDescriptor {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public FunctionDescriptorTag getFunctionDescriptorTag() {
+ return FunctionDescriptorTag.SCALAR;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/pom.xml
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/pom.xml b/asterixdb/asterix-runtime/pom.xml
index 239b01a..22d1bfc 100644
--- a/asterixdb/asterix-runtime/pom.xml
+++ b/asterixdb/asterix-runtime/pom.xml
@@ -92,12 +92,6 @@
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.asterix</groupId>
- <artifactId>asterix-fuzzyjoin</artifactId>
- <version>${project.version}</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>org.apache.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
</dependency>
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
deleted file mode 100644
index c48fff1..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/base/AbstractScalarFunctionDynamicDescriptor.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.base;
-
-import org.apache.asterix.common.functions.FunctionDescriptorTag;
-import org.apache.asterix.om.functions.AbstractFunctionDescriptor;
-
-public abstract class AbstractScalarFunctionDynamicDescriptor extends AbstractFunctionDescriptor {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public FunctionDescriptorTag getFunctionDescriptorTag() {
- return FunctionDescriptorTag.SCALAR;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
deleted file mode 100644
index dece292..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.IOException;
-
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.common.exceptions.ErrorCode;
-import org.apache.asterix.common.exceptions.RuntimeDataException;
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.om.base.ABoolean;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class EditDistanceCheckEvaluator extends EditDistanceEvaluator {
-
- protected final IScalarEvaluator edThreshEval;
- protected int edThresh;
- private final IPointable argPtrThreshold = new VoidPointable();
- protected final OrderedListBuilder listBuilder;
- protected ArrayBackedValueStorage listItemVal;
- @SuppressWarnings("unchecked")
- protected final ISerializerDeserializer<ABoolean> booleanSerde =
- SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ABOOLEAN);
-
- public EditDistanceCheckEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
- throws HyracksDataException {
- super(args, context);
- edThreshEval = args[2].createScalarEvaluator(context);
- listBuilder = new OrderedListBuilder();
- listItemVal = new ArrayBackedValueStorage();
- }
-
- @Override
- public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
- resultStorage.reset();
- firstStringEval.evaluate(tuple, argPtr1);
- firstTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset()]);
- secondStringEval.evaluate(tuple, argPtr2);
- secondTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset()]);
- edThreshEval.evaluate(tuple, argPtrThreshold);
-
- if (!checkArgTypes(firstTypeTag, secondTypeTag)) {
- result.set(resultStorage);
- return;
- }
- try {
- edThresh = ATypeHierarchy.getIntegerValue(BuiltinFunctions.EDIT_DISTANCE_CHECK.getName(), 2,
- argPtrThreshold.getByteArray(), argPtrThreshold.getStartOffset());
- if (edThresh < 0) {
- throw new RuntimeDataException(ErrorCode.NEGATIVE_VALUE, BuiltinFunctions.EDIT_DISTANCE_CHECK.getName(),
- 3, edThresh);
- }
- editDistance = computeResult(argPtr1, argPtr2, firstTypeTag);
- writeResult(editDistance);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- result.set(resultStorage);
- }
-
- @Override
- protected int computeResult(IPointable left, IPointable right, ATypeTag argType) throws HyracksDataException {
- byte[] leftBytes = left.getByteArray();
- int leftStartOffset = left.getStartOffset();
- byte[] rightBytes = right.getByteArray();
- int rightStartOffset = right.getStartOffset();
- switch (argType) {
- case STRING: {
- return ed.UTF8StringEditDistance(leftBytes, leftStartOffset + typeIndicatorSize, rightBytes,
- rightStartOffset + typeIndicatorSize, edThresh);
- }
-
- case ARRAY: {
- firstOrdListIter.reset(leftBytes, leftStartOffset);
- secondOrdListIter.reset(rightBytes, rightStartOffset);
- return (int) ed.computeSimilarity(firstOrdListIter, secondOrdListIter, edThresh);
- }
-
- default: {
- throw new TypeMismatchException(BuiltinFunctions.EDIT_DISTANCE_CHECK, 0, argType.serialize(),
- ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
- }
-
- }
- }
-
- @Override
- protected void writeResult(int ed) throws IOException {
- listBuilder.reset(new AOrderedListType(BuiltinType.ANY, "list"));
- boolean matches = (ed < 0) ? false : true;
- listItemVal.reset();
- booleanSerde.serialize(matches ? ABoolean.TRUE : ABoolean.FALSE, listItemVal.getDataOutput());
- listBuilder.addItem(listItemVal);
-
- listItemVal.reset();
- aInt64.setValue((matches) ? ed : Integer.MAX_VALUE);
- int64Serde.serialize(aInt64, listItemVal.getDataOutput());
- listBuilder.addItem(listItemVal);
- listBuilder.write(out, true);
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceContainsEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceContainsEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceContainsEvaluator.java
deleted file mode 100644
index eaf3368..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceContainsEvaluator.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-
-public class EditDistanceContainsEvaluator extends EditDistanceCheckEvaluator {
-
- public EditDistanceContainsEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
- throws HyracksDataException {
- super(args, context);
- }
-
- @Override
- protected int computeResult(IPointable left, IPointable right, ATypeTag argType) throws HyracksDataException {
- byte[] leftBytes = left.getByteArray();
- int leftStartOffset = left.getStartOffset();
- byte[] rightBytes = right.getByteArray();
- int rightStartOffset = right.getStartOffset();
-
- switch (argType) {
- case STRING: {
- return ed.UTF8StringEditDistanceContains(leftBytes, leftStartOffset + typeIndicatorSize, rightBytes,
- rightStartOffset + typeIndicatorSize, edThresh);
- }
- case ARRAY: {
- firstOrdListIter.reset(leftBytes, leftStartOffset);
- secondOrdListIter.reset(rightBytes, rightStartOffset);
- return ed.getSimilarityContains(firstOrdListIter, secondOrdListIter, edThresh);
- }
- default: {
- throw new TypeMismatchException(BuiltinFunctions.EDIT_DISTANCE_CONTAINS, 0, argType.serialize(),
- ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
deleted file mode 100644
index 85fd334..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityMetricEditDistance;
-import org.apache.asterix.om.base.AInt64;
-import org.apache.asterix.om.base.AMutableInt64;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.runtime.exceptions.IncompatibleTypeException;
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.asterix.runtime.exceptions.UnsupportedItemTypeException;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class EditDistanceEvaluator implements IScalarEvaluator {
-
- // assuming type indicator in serde format
- protected final int typeIndicatorSize = 1;
-
- protected final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
- protected final DataOutput out = resultStorage.getDataOutput();
- protected final IPointable argPtr1 = new VoidPointable();
- protected final IPointable argPtr2 = new VoidPointable();
- protected final IScalarEvaluator firstStringEval;
- protected final IScalarEvaluator secondStringEval;
- protected final SimilarityMetricEditDistance ed = new SimilarityMetricEditDistance();
- protected final OrderedListIterator firstOrdListIter = new OrderedListIterator();
- protected final OrderedListIterator secondOrdListIter = new OrderedListIterator();
- protected int editDistance = 0;
- protected final AMutableInt64 aInt64 = new AMutableInt64(-1);
- @SuppressWarnings("unchecked")
- protected final ISerializerDeserializer<AInt64> int64Serde =
- SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT64);
- protected ATypeTag itemTypeTag;
-
- protected ATypeTag firstTypeTag;
- protected ATypeTag secondTypeTag;
-
- public EditDistanceEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
- throws HyracksDataException {
- firstStringEval = args[0].createScalarEvaluator(context);
- secondStringEval = args[1].createScalarEvaluator(context);
- }
-
- @Override
- public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
- resultStorage.reset();
- firstStringEval.evaluate(tuple, argPtr1);
- firstTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset()]);
- secondStringEval.evaluate(tuple, argPtr2);
- secondTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset()]);
-
- if (!checkArgTypes(firstTypeTag, secondTypeTag)) {
- result.set(resultStorage);
- return;
- }
-
- editDistance = computeResult(argPtr1, argPtr2, firstTypeTag);
- try {
- writeResult(editDistance);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- result.set(resultStorage);
- }
-
- protected int computeResult(IPointable left, IPointable right, ATypeTag argType) throws HyracksDataException {
- byte[] leftBytes = left.getByteArray();
- int leftStartOffset = left.getStartOffset();
- byte[] rightBytes = right.getByteArray();
- int rightStartOffset = right.getStartOffset();
-
- switch (argType) {
- case STRING: {
- // Passes -1 as the simThresh to calculate the edit distance
- // without applying any calculation optimizations.
- return ed.getActualUTF8StringEditDistanceVal(leftBytes, leftStartOffset + typeIndicatorSize, rightBytes,
- rightStartOffset + typeIndicatorSize, -1);
- }
- case ARRAY: {
- firstOrdListIter.reset(leftBytes, leftStartOffset);
- secondOrdListIter.reset(rightBytes, rightStartOffset);
- return (int) ed.computeSimilarity(firstOrdListIter, secondOrdListIter);
- }
- default: {
- throw new TypeMismatchException(BuiltinFunctions.EDIT_DISTANCE, 0, argType.serialize(),
- ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
- }
-
- }
- }
-
- protected boolean checkArgTypes(ATypeTag typeTag1, ATypeTag typeTag2) throws HyracksDataException {
- if (typeTag1 != typeTag2) {
- throw new IncompatibleTypeException(BuiltinFunctions.EDIT_DISTANCE, typeTag1.serialize(),
- typeTag2.serialize());
- }
-
- // Since they are equal, check one tag is enough.
- if (typeTag1 != ATypeTag.STRING && typeTag1 != ATypeTag.ARRAY) { // could be an list
- throw new TypeMismatchException(BuiltinFunctions.EDIT_DISTANCE, 0, typeTag1.serialize(),
- ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
- }
-
- if (typeTag1 == ATypeTag.ARRAY) {
- itemTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER
- .deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset() + 1]);
- if (itemTypeTag == ATypeTag.ANY) {
- throw new UnsupportedItemTypeException(BuiltinFunctions.EDIT_DISTANCE, itemTypeTag.serialize());
- }
- itemTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER
- .deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset() + 1]);
- if (itemTypeTag == ATypeTag.ANY) {
- throw new UnsupportedItemTypeException(BuiltinFunctions.EDIT_DISTANCE, itemTypeTag.serialize());
- }
- }
- return true;
- }
-
- protected void writeResult(int ed) throws IOException {
- aInt64.setValue(ed);
- int64Serde.serialize(aInt64, out);
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/GramTokensEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/GramTokensEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/GramTokensEvaluator.java
deleted file mode 100644
index ef727c9..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/GramTokensEvaluator.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.dataflow.data.nontagged.serde.ABooleanSerializerDeserializer;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
-
-public class GramTokensEvaluator implements IScalarEvaluator {
-
- // assuming type indicator in serde format
- private final int typeIndicatorSize = 1;
-
- private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
- private final DataOutput out = resultStorage.getDataOutput();
- private final IPointable stringArg = new VoidPointable();
- private final IPointable gramLengthArg = new VoidPointable();
- private final IPointable prePostArg = new VoidPointable();
- private final IScalarEvaluator stringEval;
- private final IScalarEvaluator gramLengthEval;
- private final IScalarEvaluator prePostEval;
-
- private final NGramUTF8StringBinaryTokenizer tokenizer;
- private final OrderedListBuilder listBuilder = new OrderedListBuilder();
- private final AOrderedListType listType;
-
- public GramTokensEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context, IBinaryTokenizer tokenizer,
- BuiltinType itemType) throws HyracksDataException {
- stringEval = args[0].createScalarEvaluator(context);
- gramLengthEval = args[1].createScalarEvaluator(context);
- prePostEval = args[2].createScalarEvaluator(context);
- this.tokenizer = (NGramUTF8StringBinaryTokenizer) tokenizer;
- this.listType = new AOrderedListType(itemType, null);
- }
-
- @Override
- public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
- resultStorage.reset();
- stringEval.evaluate(tuple, stringArg);
- gramLengthEval.evaluate(tuple, gramLengthArg);
- prePostEval.evaluate(tuple, prePostArg);
-
- int gramLength = ATypeHierarchy.getIntegerValue(BuiltinFunctions.GRAM_TOKENS.getName(), 1,
- gramLengthArg.getByteArray(), gramLengthArg.getStartOffset());
- tokenizer.setGramlength(gramLength);
- boolean prePost = ABooleanSerializerDeserializer.getBoolean(prePostArg.getByteArray(),
- prePostArg.getStartOffset() + typeIndicatorSize);
- tokenizer.setPrePost(prePost);
- tokenizer.reset(stringArg.getByteArray(), stringArg.getStartOffset(), stringArg.getLength());
-
- try {
- listBuilder.reset(listType);
- while (tokenizer.hasNext()) {
- tokenizer.next();
- listBuilder.addItem(tokenizer.getToken());
- }
- listBuilder.write(out, true);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- result.set(resultStorage);
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityFiltersCache.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityFiltersCache.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityFiltersCache.java
deleted file mode 100644
index 4f5fb69..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityFiltersCache.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataInputStream;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityFilters;
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityFiltersFactory;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
-import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
-
-public class SimilarityFiltersCache {
- private final UTF8StringSerializerDeserializer utf8SerDer = new UTF8StringSerializerDeserializer();
-
- private final ByteBufferInputStream bbis = new ByteBufferInputStream();
- private final DataInputStream dis = new DataInputStream(bbis);
-
- private float similarityThresholdCached = 0;
- private byte[] similarityNameBytesCached = null;
- private SimilarityFilters similarityFiltersCached = null;
-
- public SimilarityFilters get(float similarityThreshold, byte[] similarityNameBytes, int startOffset, int len)
- throws HyracksDataException {
- if (similarityThreshold != similarityThresholdCached || similarityNameBytesCached == null
- || !Arrays.equals(similarityNameBytes, similarityNameBytesCached)) {
- bbis.setByteBuffer(ByteBuffer.wrap(similarityNameBytes), startOffset + 1);
- String similarityName = utf8SerDer.deserialize(dis);
- similarityNameBytesCached = Arrays.copyOfRange(similarityNameBytes, startOffset, len);
- similarityFiltersCached =
- SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
- }
- return similarityFiltersCached;
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
deleted file mode 100644
index 60b5592..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.IOException;
-
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.dataflow.data.nontagged.serde.AFloatSerializerDeserializer;
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.om.base.ABoolean;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.IntegerPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.data.std.util.BinaryEntry;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class SimilarityJaccardCheckEvaluator extends SimilarityJaccardEvaluator {
-
- protected final IScalarEvaluator jaccThreshEval;
- protected float jaccThresh = -1f;
- protected IPointable jaccThreshPointable = new VoidPointable();
-
- protected OrderedListBuilder listBuilder;
- protected ArrayBackedValueStorage inputVal;
- @SuppressWarnings("unchecked")
- protected final ISerializerDeserializer<ABoolean> booleanSerde =
- SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ABOOLEAN);
- protected final AOrderedListType listType = new AOrderedListType(BuiltinType.ANY, "list");
-
- public SimilarityJaccardCheckEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
- throws HyracksDataException {
- super(args, context);
- jaccThreshEval = args[2].createScalarEvaluator(context);
- listBuilder = new OrderedListBuilder();
- inputVal = new ArrayBackedValueStorage();
- }
-
- @Override
- public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
- resultStorage.reset();
-
- firstOrdListEval.evaluate(tuple, argPtr1);
- secondOrdListEval.evaluate(tuple, argPtr2);
- jaccThreshEval.evaluate(tuple, jaccThreshPointable);
-
- firstTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset()]);
- secondTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset()]);
-
- firstItemTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset() + 1]);
- secondItemTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset() + 1]);
-
- jaccThresh = AFloatSerializerDeserializer.getFloat(jaccThreshPointable.getByteArray(),
- jaccThreshPointable.getStartOffset() + TYPE_INDICATOR_SIZE);
-
- if (!checkArgTypes(firstTypeTag, secondTypeTag)) {
- result.set(resultStorage);
- return;
- }
- if (prepareLists(argPtr1, argPtr2)) {
- jaccSim = computeResult();
- } else {
- jaccSim = 0.0f;
- }
- try {
- writeResult(jaccSim);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- result.set(resultStorage);
- }
-
- @Override
- protected int probeHashMap(AbstractAsterixListIterator probeIter, int buildListSize, int probeListSize)
- throws HyracksDataException {
- // Apply length filter.
- int lengthLowerBound = (int) Math.ceil(jaccThresh * probeListSize);
- if ((lengthLowerBound > buildListSize)
- || (buildListSize > (int) Math.floor(1.0f / jaccThresh * probeListSize))) {
- return -1;
- }
- // Probe phase: Probe items from second list, and compute intersection size.
- int intersectionSize = 0;
- int probeListCount = 0;
- int minUnionSize = buildListSize;
- while (probeIter.hasNext()) {
- probeListCount++;
- byte[] buf = probeIter.getData();
- int off = probeIter.getPos();
- int len = probeIter.getItemLen();
- keyEntry.set(buf, off, len);
- BinaryEntry entry = hashMap.get(keyEntry);
- if (entry != null) {
- // Increment second value.
- int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
- // Irrelevant for the intersection size.
- if (firstValInt == 0) {
- continue;
- }
- int secondValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset() + 4);
- // Subtract old min value.
- intersectionSize -= (firstValInt < secondValInt) ? firstValInt : secondValInt;
- secondValInt++;
- // Add new min value.
- intersectionSize += (firstValInt < secondValInt) ? firstValInt : secondValInt;
- IntegerPointable.setInteger(entry.getBuf(), entry.getOffset() + 4, secondValInt);
- } else {
- // Could not find element in other set. Increase min union size by 1.
- minUnionSize++;
- // Check whether jaccThresh can still be satisfied if there was a mismatch.
- int maxIntersectionSize = Math.min(buildListSize, intersectionSize + (probeListSize - probeListCount));
- int lowerBound = (int) Math.floor(jaccThresh * minUnionSize);
- if (maxIntersectionSize < lowerBound) {
- // Cannot satisfy jaccThresh.
- return -1;
- }
- }
- probeIter.next();
- }
- return intersectionSize;
- }
-
- @Override
- protected void writeResult(float jacc) throws IOException {
- listBuilder.reset(listType);
- boolean matches = (jacc < jaccThresh) ? false : true;
- inputVal.reset();
- booleanSerde.serialize(matches ? ABoolean.TRUE : ABoolean.FALSE, inputVal.getDataOutput());
- listBuilder.addItem(inputVal);
-
- inputVal.reset();
- aFloat.setValue((matches) ? jacc : 0.0f);
- floatSerde.serialize(aFloat, inputVal.getDataOutput());
- listBuilder.addItem(inputVal);
-
- listBuilder.write(out, true);
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
deleted file mode 100644
index 1e5ad3c..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.asterix.dataflow.data.nontagged.comparators.ListItemBinaryComparatorFactory;
-import org.apache.asterix.dataflow.data.nontagged.hash.ListItemBinaryHashFunctionFactory;
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.om.base.AFloat;
-import org.apache.asterix.om.base.AMutableFloat;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap;
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
-import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.IntegerPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.data.std.util.BinaryEntry;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class SimilarityJaccardEvaluator implements IScalarEvaluator {
-
- // Parameters for hash table.
- protected static final int MIN_TABLE_SIZE = 100;
- protected static final int TABLE_FRAME_SIZE = 32768;
-
- // Assuming type indicator in serde format.
- protected static final int TYPE_INDICATOR_SIZE = 1;
-
- protected final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
- protected final DataOutput out = resultStorage.getDataOutput();
- protected final IPointable argPtr1 = new VoidPointable();
- protected final IPointable argPtr2 = new VoidPointable();
- protected final IScalarEvaluator firstOrdListEval;
- protected final IScalarEvaluator secondOrdListEval;
-
- protected final OrderedListIterator fstOrdListIter = new OrderedListIterator();
- protected final OrderedListIterator sndOrdListIter = new OrderedListIterator();
- protected final UnorderedListIterator fstUnordListIter = new UnorderedListIterator();
- protected final UnorderedListIterator sndUnordListIter = new UnorderedListIterator();
-
- protected AbstractAsterixListIterator firstListIter;
- protected AbstractAsterixListIterator secondListIter;
-
- protected final AMutableFloat aFloat = new AMutableFloat(0);
- @SuppressWarnings("unchecked")
- protected final ISerializerDeserializer<AFloat> floatSerde =
- SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AFLOAT);
-
- protected ATypeTag firstTypeTag;
- protected ATypeTag secondTypeTag;
- protected float jaccSim = 0.0f;
- protected ATypeTag firstItemTypeTag;
- protected ATypeTag secondItemTypeTag;
-
- protected BinaryHashMap hashMap;
- protected BinaryEntry keyEntry = new BinaryEntry();
- protected BinaryEntry valEntry = new BinaryEntry();
-
- // Ignore case for strings. Defaults to true.
- protected final boolean ignoreCase = true;
-
- protected int hashTableSize = MIN_TABLE_SIZE;
-
- public SimilarityJaccardEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
- throws HyracksDataException {
- firstOrdListEval = args[0].createScalarEvaluator(context);
- secondOrdListEval = args[1].createScalarEvaluator(context);
- byte[] emptyValBuf = new byte[8];
- Arrays.fill(emptyValBuf, (byte) 0);
- valEntry.set(emptyValBuf, 0, 8);
- }
-
- @Override
- public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
- resultStorage.reset();
-
- firstOrdListEval.evaluate(tuple, argPtr1);
- secondOrdListEval.evaluate(tuple, argPtr2);
-
- firstTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset()]);
- secondTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset()]);
-
- firstItemTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr1.getByteArray()[argPtr1.getStartOffset() + 1]);
- secondItemTypeTag =
- EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argPtr2.getByteArray()[argPtr2.getStartOffset() + 1]);
-
- if (!checkArgTypes(firstTypeTag, secondTypeTag)) {
- result.set(resultStorage);
- return;
- }
- if (prepareLists(argPtr1, argPtr2)) {
- jaccSim = computeResult();
- } else {
- jaccSim = 0.0f;
- }
- try {
- writeResult(jaccSim);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- result.set(resultStorage);
- }
-
- protected boolean prepareLists(IPointable left, IPointable right) throws HyracksDataException {
- firstListIter.reset(left.getByteArray(), left.getStartOffset());
- secondListIter.reset(right.getByteArray(), right.getStartOffset());
- // Check for special case where one of the lists is empty, since list
- // types won't match.
- if (firstListIter.size() == 0 || secondListIter.size() == 0) {
- return false;
- }
-
- // Set the size of the table dynamically
- hashTableSize = Math.max(Math.max(firstListIter.size(), secondListIter.size()), MIN_TABLE_SIZE);
-
- // TODO: Check item types are compatible.
- return true;
- }
-
- protected float computeResult() throws HyracksDataException {
- // We will subtract the intersection size later to get the real union size.
- int firstListSize = firstListIter.size();
- int secondListSize = secondListIter.size();
- int unionSize = firstListSize + secondListSize;
- // Choose smaller list as build, and larger one as probe.
- AbstractAsterixListIterator buildList = (firstListSize < secondListSize) ? firstListIter : secondListIter;
- AbstractAsterixListIterator probeList = (buildList == firstListIter) ? secondListIter : firstListIter;
- int buildListSize = (buildList == firstListIter) ? firstListSize : secondListSize;
- int probeListSize = (probeList == firstListIter) ? firstListSize : secondListSize;
- ATypeTag buildItemTypeTag = (buildList == firstListIter) ? firstItemTypeTag : secondItemTypeTag;
- ATypeTag probeItemTypeTag = (probeList == firstListIter) ? firstItemTypeTag : secondItemTypeTag;
-
- setHashMap(buildItemTypeTag, probeItemTypeTag);
- buildHashMap(buildList);
- int intersectionSize = probeHashMap(probeList, buildListSize, probeListSize);
- // Special indicator for the "check" version of jaccard.
- if (intersectionSize < 0) {
- return -1;
- }
- unionSize -= intersectionSize;
- return (float) intersectionSize / (float) unionSize;
- }
-
- protected void buildHashMap(AbstractAsterixListIterator buildIter) throws HyracksDataException {
- // Build phase: Add items into hash map, starting with first list.
- // Value in map is a pair of integers. Set first integer to 1.
- IntegerPointable.setInteger(valEntry.getBuf(), 0, 1);
- while (buildIter.hasNext()) {
- byte[] buf = buildIter.getData();
- int off = buildIter.getPos();
- int len = buildIter.getItemLen();
- keyEntry.set(buf, off, len);
- BinaryEntry entry = hashMap.put(keyEntry, valEntry);
- if (entry != null) {
- // Increment value.
- int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
- IntegerPointable.setInteger(entry.getBuf(), entry.getOffset(), firstValInt + 1);
- }
- buildIter.next();
- }
- }
-
- protected int probeHashMap(AbstractAsterixListIterator probeIter, int buildListSize, int probeListSize)
- throws HyracksDataException {
- // Probe phase: Probe items from second list, and compute intersection size.
- int intersectionSize = 0;
- while (probeIter.hasNext()) {
- byte[] buf = probeIter.getData();
- int off = probeIter.getPos();
- int len = probeIter.getItemLen();
- keyEntry.set(buf, off, len);
- BinaryEntry entry = hashMap.get(keyEntry);
- if (entry != null) {
- // Increment second value.
- int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
- // Irrelevant for the intersection size.
- if (firstValInt == 0) {
- continue;
- }
- int secondValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset() + 4);
- // Subtract old min value.
- intersectionSize -= (firstValInt < secondValInt) ? firstValInt : secondValInt;
- secondValInt++;
- // Add new min value.
- intersectionSize += (firstValInt < secondValInt) ? firstValInt : secondValInt;
- IntegerPointable.setInteger(entry.getBuf(), entry.getOffset() + 4, secondValInt);
- }
- probeIter.next();
- }
- return intersectionSize;
- }
-
- protected void setHashMap(ATypeTag buildItemTypeTag, ATypeTag probeItemTypeTag) {
- if (hashMap != null) {
- hashMap.clear();
- return;
- }
-
- IBinaryHashFunction putHashFunc =
- ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction(buildItemTypeTag, ignoreCase);
- IBinaryHashFunction getHashFunc =
- ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction(probeItemTypeTag, ignoreCase);
- IBinaryComparator cmp = ListItemBinaryComparatorFactory.INSTANCE.createBinaryComparator(buildItemTypeTag,
- probeItemTypeTag, ignoreCase);
- hashMap = new BinaryHashMap(hashTableSize, TABLE_FRAME_SIZE, putHashFunc, getHashFunc, cmp);
- }
-
- protected boolean checkArgTypes(ATypeTag typeTag1, ATypeTag typeTag2) throws HyracksDataException {
- switch (typeTag1) {
- case ARRAY: {
- firstListIter = fstOrdListIter;
- break;
- }
- case MULTISET: {
- firstListIter = fstUnordListIter;
- break;
- }
- default: {
- throw new TypeMismatchException(BuiltinFunctions.SIMILARITY_JACCARD, 0, typeTag1.serialize(),
- ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
- }
- }
- switch (typeTag2) {
- case ARRAY: {
- secondListIter = sndOrdListIter;
- break;
- }
- case MULTISET: {
- secondListIter = sndUnordListIter;
- break;
- }
- default: {
- throw new TypeMismatchException(BuiltinFunctions.SIMILARITY_JACCARD, 1, typeTag2.serialize(),
- ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
- }
- }
- return true;
- }
-
- protected void writeResult(float jacc) throws IOException {
- aFloat.setValue(jacc);
- floatSerde.serialize(aFloat, out);
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java
deleted file mode 100644
index b70c6ad..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardPrefixEvaluator.java
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.asterix.dataflow.data.nontagged.serde.AFloatSerializerDeserializer;
-import org.apache.asterix.dataflow.data.nontagged.serde.AOrderedListSerializerDeserializer;
-import org.apache.asterix.dataflow.data.nontagged.serde.AUnorderedListSerializerDeserializer;
-import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
-import org.apache.asterix.fuzzyjoin.IntArray;
-import org.apache.asterix.fuzzyjoin.similarity.PartialIntersect;
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityFiltersJaccard;
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityMetric;
-import org.apache.asterix.om.base.AFloat;
-import org.apache.asterix.om.base.AMutableFloat;
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class SimilarityJaccardPrefixEvaluator implements IScalarEvaluator {
- // assuming type indicator in serde format
- protected final int typeIndicatorSize = 1;
-
- protected final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
- protected final DataOutput out = resultStorage.getDataOutput();
- protected final IPointable inputVal = new VoidPointable();
- protected final IScalarEvaluator evalLen1;
- protected final IScalarEvaluator evalTokens1;
- protected final IScalarEvaluator evalLen2;
- protected final IScalarEvaluator evalTokens2;
- protected final IScalarEvaluator evalTokenPrefix;
- protected final IScalarEvaluator evalThreshold;
-
- protected float similarityThresholdCache;
- protected SimilarityFiltersJaccard similarityFilters;
- protected final IntArray tokens1 = new IntArray();
- protected final IntArray tokens2 = new IntArray();
- protected final PartialIntersect parInter = new PartialIntersect();
-
- protected float sim = 0.0f;
-
- // result
- protected final AMutableFloat res = new AMutableFloat(0);
- @SuppressWarnings("unchecked")
- protected final ISerializerDeserializer<AFloat> reusSerde =
- SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AFLOAT);
-
- public SimilarityJaccardPrefixEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
- throws HyracksDataException {
- evalLen1 = args[0].createScalarEvaluator(context);
- evalTokens1 = args[1].createScalarEvaluator(context);
- evalLen2 = args[2].createScalarEvaluator(context);
- evalTokens2 = args[3].createScalarEvaluator(context);
- evalTokenPrefix = args[4].createScalarEvaluator(context);
- evalThreshold = args[5].createScalarEvaluator(context);
- }
-
- @Override
- public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
- resultStorage.reset();
- // similarity threshold
- sim = 0;
- evalThreshold.evaluate(tuple, inputVal);
- float similarityThreshold =
- AFloatSerializerDeserializer.getFloat(inputVal.getByteArray(), inputVal.getStartOffset() + 1);
-
- if (similarityThreshold != similarityThresholdCache || similarityFilters == null) {
- similarityFilters = new SimilarityFiltersJaccard(similarityThreshold);
- similarityThresholdCache = similarityThreshold;
- }
-
- evalLen1.evaluate(tuple, inputVal);
- int length1 = ATypeHierarchy.getIntegerValue(BuiltinFunctions.SIMILARITY_JACCARD.getName(), 0,
- inputVal.getByteArray(), inputVal.getStartOffset());
- evalLen2.evaluate(tuple, inputVal);
- int length2 = ATypeHierarchy.getIntegerValue(BuiltinFunctions.SIMILARITY_JACCARD.getName(), 2,
- inputVal.getByteArray(), inputVal.getStartOffset());
-
- //
- // -- - length filter - --
- //
- if (similarityFilters.passLengthFilter(length1, length2)) {
-
- // -- - tokens1 - --
- int i;
- tokens1.reset();
- evalTokens1.evaluate(tuple, inputVal);
-
- byte[] serList = inputVal.getByteArray();
- int startOffset = inputVal.getStartOffset();
- if (serList[startOffset] != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG
- && serList[startOffset] != ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG) {
- throw new TypeMismatchException(BuiltinFunctions.SIMILARITY_JACCARD, 1, serList[startOffset],
- ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
- }
-
- int lengthTokens1;
- if (serList[startOffset] == ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG) {
- lengthTokens1 =
- AOrderedListSerializerDeserializer.getNumberOfItems(inputVal.getByteArray(), startOffset);
- // read tokens
- for (i = 0; i < lengthTokens1; i++) {
- int itemOffset = AOrderedListSerializerDeserializer.getItemOffset(serList, startOffset, i);
- int token = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(
- BuiltinFunctions.SIMILARITY_JACCARD.getName(), 1, serList, itemOffset, startOffset + 1);
- tokens1.add(token);
- }
- } else {
- lengthTokens1 =
- AUnorderedListSerializerDeserializer.getNumberOfItems(inputVal.getByteArray(), startOffset);
- // read tokens
- for (i = 0; i < lengthTokens1; i++) {
- int itemOffset = AUnorderedListSerializerDeserializer.getItemOffset(serList, startOffset, i);
- int token = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(
- BuiltinFunctions.SIMILARITY_JACCARD.getName(), 1, serList, itemOffset, startOffset + 1);
- tokens1.add(token);
- }
- }
- // pad tokens
- for (; i < length1; i++) {
- tokens1.add(Integer.MAX_VALUE);
- }
-
- // -- - tokens2 - --
- tokens2.reset();
- evalTokens2.evaluate(tuple, inputVal);
-
- serList = inputVal.getByteArray();
- startOffset = inputVal.getStartOffset();
- if (serList[startOffset] != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG
- && serList[startOffset] != ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG) {
- throw new TypeMismatchException(BuiltinFunctions.SIMILARITY_JACCARD, 3, serList[startOffset],
- ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
- }
-
- int lengthTokens2;
- if (serList[startOffset] == ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG) {
- lengthTokens2 =
- AOrderedListSerializerDeserializer.getNumberOfItems(inputVal.getByteArray(), startOffset);
- // read tokens
- for (i = 0; i < lengthTokens2; i++) {
- int itemOffset = AOrderedListSerializerDeserializer.getItemOffset(serList, startOffset, i);
- int token = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(
- BuiltinFunctions.SIMILARITY_JACCARD.getName(), 3, serList, itemOffset, startOffset + 1);
- tokens2.add(token);
- }
- } else {
- lengthTokens2 =
- AUnorderedListSerializerDeserializer.getNumberOfItems(inputVal.getByteArray(), startOffset);
- // read tokens
- for (i = 0; i < lengthTokens2; i++) {
- int itemOffset = AUnorderedListSerializerDeserializer.getItemOffset(serList, startOffset, i);
- int token = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(
- BuiltinFunctions.SIMILARITY_JACCARD.getName(), 3, serList, itemOffset, startOffset + 1);
- tokens2.add(token);
- }
- }
- // pad tokens
- for (; i < length2; i++) {
- tokens2.add(Integer.MAX_VALUE);
- }
-
- // -- - token prefix - --
- evalTokenPrefix.evaluate(tuple, inputVal);
- int tokenPrefix = ATypeHierarchy.getIntegerValue(BuiltinFunctions.SIMILARITY_JACCARD.getName(), 4,
- inputVal.getByteArray(), inputVal.getStartOffset());
-
- //
- // -- - position filter - --
- //
- SimilarityMetric.getPartialIntersectSize(tokens1.get(), 0, tokens1.length(), tokens2.get(), 0,
- tokens2.length(), tokenPrefix, parInter);
- if (similarityFilters.passPositionFilter(parInter.intersectSize, parInter.posXStop, length1,
- parInter.posYStop, length2)) {
-
- //
- // -- - suffix filter - --
- //
- if (similarityFilters.passSuffixFilter(tokens1.get(), 0, tokens1.length(), parInter.posXStart,
- tokens2.get(), 0, tokens2.length(), parInter.posYStart)) {
-
- sim = similarityFilters.passSimilarityFilter(tokens1.get(), 0, tokens1.length(),
- parInter.posXStop + 1, tokens2.get(), 0, tokens2.length(), parInter.posYStop + 1,
- parInter.intersectSize);
- }
- }
- }
-
- try {
- writeResult();
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- result.set(resultStorage);
- }
-
- public void writeResult() throws IOException {
- res.setValue(sim);
- reusSerde.serialize(res, out);
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java
deleted file mode 100644
index 0decd9e..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedCheckEvaluator.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityMetricJaccard;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-public class SimilarityJaccardSortedCheckEvaluator extends SimilarityJaccardCheckEvaluator {
-
- protected final SimilarityMetricJaccard jaccard = new SimilarityMetricJaccard();
-
- public SimilarityJaccardSortedCheckEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
- throws HyracksDataException {
- super(args, context);
- }
-
- @Override
- protected float computeResult() throws HyracksDataException {
- return jaccard.computeSimilarity(firstListIter, secondListIter, jaccThresh);
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
deleted file mode 100644
index 1cd32c8..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import org.apache.asterix.fuzzyjoin.similarity.SimilarityMetricJaccard;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-// Assumes that both arguments are sorted by the same ordering.
-public class SimilarityJaccardSortedEvaluator extends SimilarityJaccardEvaluator {
-
- protected final SimilarityMetricJaccard jaccard = new SimilarityMetricJaccard();
-
- public SimilarityJaccardSortedEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
- throws HyracksDataException {
- super(args, context);
- }
-
- @Override
- protected float computeResult() throws HyracksDataException {
- return jaccard.computeSimilarity(firstListIter, secondListIter);
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/WordTokensEvaluator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/WordTokensEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/WordTokensEvaluator.java
deleted file mode 100644
index e51d5cf..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/WordTokensEvaluator.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.common;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IPointable;
-import org.apache.hyracks.data.std.primitive.VoidPointable;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
-
-public class WordTokensEvaluator implements IScalarEvaluator {
- private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
- private final DataOutput out = resultStorage.getDataOutput();
- private final IPointable argPtr = new VoidPointable();
- private final IScalarEvaluator stringEval;
-
- private final IBinaryTokenizer tokenizer;
- private final OrderedListBuilder listBuilder = new OrderedListBuilder();
- private final AOrderedListType listType;
-
- public WordTokensEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context, IBinaryTokenizer tokenizer,
- BuiltinType itemType) throws HyracksDataException {
- stringEval = args[0].createScalarEvaluator(context);
- this.tokenizer = tokenizer;
- this.listType = new AOrderedListType(itemType, null);
- }
-
- @Override
- public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
- resultStorage.reset();
- stringEval.evaluate(tuple, argPtr);
- tokenizer.reset(argPtr.getByteArray(), argPtr.getStartOffset(), argPtr.getLength());
- try {
- listBuilder.reset(listType);
- while (tokenizer.hasNext()) {
- tokenizer.next();
- listBuilder.addItem(tokenizer.getToken());
- }
- listBuilder.write(out, true);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- result.set(resultStorage);
- }
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java
deleted file mode 100644
index dd36671..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.functions;
-
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.functions.IFunctionDescriptor;
-import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
-import org.apache.asterix.runtime.evaluators.common.GramTokensEvaluator;
-import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
-import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8NGramTokenFactory;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
-
-public class CountHashedGramTokensDescriptor extends AbstractScalarFunctionDynamicDescriptor {
-
- private static final long serialVersionUID = 1L;
- public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() {
- @Override
- public IFunctionDescriptor createFunctionDescriptor() {
- return new CountHashedGramTokensDescriptor();
- }
- };
-
- @Override
- public FunctionIdentifier getIdentifier() {
- return BuiltinFunctions.COUNTHASHED_GRAM_TOKENS;
- }
-
- @Override
- public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args)
- throws AlgebricksException {
- return new IScalarEvaluatorFactory() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
- ITokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
- NGramUTF8StringBinaryTokenizer tokenizer =
- new NGramUTF8StringBinaryTokenizer(3, true, false, true, tokenFactory);
- return new GramTokensEvaluator(args, ctx, tokenizer, BuiltinType.AINT32);
- }
- };
- }
-
-}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/b8307794/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java
deleted file mode 100644
index e12ba2e..0000000
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.evaluators.functions;
-
-import org.apache.asterix.om.functions.BuiltinFunctions;
-import org.apache.asterix.om.functions.IFunctionDescriptor;
-import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
-import org.apache.asterix.runtime.evaluators.common.WordTokensEvaluator;
-import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
-import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizer;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8WordTokenFactory;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
-import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
-
-public class CountHashedWordTokensDescriptor extends AbstractScalarFunctionDynamicDescriptor {
-
- private static final long serialVersionUID = 1L;
- public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() {
- @Override
- public IFunctionDescriptor createFunctionDescriptor() {
- return new CountHashedWordTokensDescriptor();
- }
- };
-
- @Override
- public FunctionIdentifier getIdentifier() {
- return BuiltinFunctions.COUNTHASHED_WORD_TOKENS;
- }
-
- @Override
- public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
- return new IScalarEvaluatorFactory() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
- ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
- IBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, true, tokenFactory);
- return new WordTokensEvaluator(args, ctx, tokenizer, BuiltinType.AINT32);
- }
- };
- }
-
-}