You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2018/02/20 07:18:18 UTC
[1/4] incubator-hivemall git commit: Close #131: [v0.5.0-rc3] Merge
v0.5.0 branch
Repository: incubator-hivemall
Updated Branches:
refs/heads/master 448847fa2 -> 3a718713a
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java b/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
deleted file mode 100644
index cf10ed7..0000000
--- a/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.dataset;
-
-import hivemall.UDTFWithOptions;
-
-import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.util.Random;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.Options;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.Collector;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-
-/**
- * A wrapper of [[hivemall.dataset.LogisticRegressionDataGeneratorUDTF]]. This wrapper is needed
- * because Spark cannot handle HadoopUtils#getTaskId() correctly.
- */
-@Description(name = "lr_datagen",
- value = "_FUNC_(options string) - Generates a logistic regression dataset")
-public final class LogisticRegressionDataGeneratorUDTFWrapper extends UDTFWithOptions {
- private transient LogisticRegressionDataGeneratorUDTF udtf =
- new LogisticRegressionDataGeneratorUDTF();
-
- @Override
- protected Options getOptions() {
- Options options = null;
- try {
- Method m = udtf.getClass().getDeclaredMethod("getOptions");
- m.setAccessible(true);
- options = (Options) m.invoke(udtf);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return options;
- }
-
- @SuppressWarnings("all")
- @Override
- protected CommandLine processOptions(ObjectInspector[] objectInspectors)
- throws UDFArgumentException {
- CommandLine commands = null;
- try {
- Method m = udtf.getClass().getDeclaredMethod("processOptions");
- m.setAccessible(true);
- commands = (CommandLine) m.invoke(udtf, objectInspectors);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return commands;
- }
-
- @Override
- public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
- try {
- // Extract a collector for LogisticRegressionDataGeneratorUDTF
- Field collector = GenericUDTF.class.getDeclaredField("collector");
- collector.setAccessible(true);
- udtf.setCollector((Collector) collector.get(this));
-
- // To avoid HadoopUtils#getTaskId()
- Class<?> clazz = udtf.getClass();
- Field rnd1 = clazz.getDeclaredField("rnd1");
- Field rnd2 = clazz.getDeclaredField("rnd2");
- Field r_seed = clazz.getDeclaredField("r_seed");
- r_seed.setAccessible(true);
- final long seed = r_seed.getLong(udtf) + (int) Thread.currentThread().getId();
- rnd1.setAccessible(true);
- rnd2.setAccessible(true);
- rnd1.set(udtf, new Random(seed));
- rnd2.set(udtf, new Random(seed + 1));
- } catch (Exception e) {
- e.printStackTrace();
- }
- return udtf.initialize(argOIs);
- }
-
- @Override
- public void process(Object[] objects) throws HiveException {
- udtf.process(objects);
- }
-
- @Override
- public void close() throws HiveException {
- udtf.close();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
deleted file mode 100644
index b454fd9..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-
-/**
- * A wrapper of [[hivemall.ftvec.AddBiasUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(name = "add_bias",
- value = "_FUNC_(features in array<string>) - Returns features with a bias as array<string>")
-@UDFType(deterministic = true, stateful = false)
-public class AddBiasUDFWrapper extends GenericUDF {
- private AddBiasUDF udf = new AddBiasUDF();
- private ListObjectInspector argumentOI = null;
-
- @Override
- public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
- throw new UDFArgumentLengthException(
- "add_bias() has an single arguments: array<string> features");
- }
-
- switch (arguments[0].getCategory()) {
- case LIST:
- argumentOI = (ListObjectInspector) arguments[0];
- ObjectInspector elmOI = argumentOI.getListElementObjectInspector();
- if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
- if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
- break;
- }
- }
- default:
- throw new UDFArgumentTypeException(0, "Type mismatch: features");
- }
-
- return ObjectInspectorFactory.getStandardListObjectInspector(argumentOI.getListElementObjectInspector());
- }
-
- @Override
- public Object evaluate(DeferredObject[] arguments) throws HiveException {
- assert (arguments.length == 1);
- @SuppressWarnings("unchecked")
- final List<String> input = (List<String>) argumentOI.getList(arguments[0].get());
- return udf.evaluate(input);
- }
-
- @Override
- public String getDisplayString(String[] children) {
- return "add_bias(" + Arrays.toString(children) + ")";
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
deleted file mode 100644
index 0b687db..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * A wrapper of [[hivemall.ftvec.AddFeatureIndexUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(
- name = "add_feature_index",
- value = "_FUNC_(dense features in array<double>) - Returns a feature vector with feature indices")
-@UDFType(deterministic = true, stateful = false)
-public class AddFeatureIndexUDFWrapper extends GenericUDF {
- private AddFeatureIndexUDF udf = new AddFeatureIndexUDF();
- private ListObjectInspector argumentOI = null;
-
- @Override
- public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
- throw new UDFArgumentLengthException(
- "add_feature_index() has an single arguments: array<double> features");
- }
-
- switch (arguments[0].getCategory()) {
- case LIST:
- argumentOI = (ListObjectInspector) arguments[0];
- ObjectInspector elmOI = argumentOI.getListElementObjectInspector();
- if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
- if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.DOUBLE) {
- break;
- }
- }
- default:
- throw new UDFArgumentTypeException(0, "Type mismatch: features");
- }
-
- return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
- }
-
- @Override
- public Object evaluate(DeferredObject[] arguments) throws HiveException {
- assert (arguments.length == 1);
- @SuppressWarnings("unchecked")
- final List<Double> input = (List<Double>) argumentOI.getList(arguments[0].get());
- return udf.evaluate(input);
- }
-
- @Override
- public String getDisplayString(String[] children) {
- return "add_feature_index(" + Arrays.toString(children) + ")";
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
deleted file mode 100644
index 5924468..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * A wrapper of [[hivemall.ftvec.ExtractFeatureUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(name = "extract_feature",
- value = "_FUNC_(feature in string) - Returns a parsed feature as string")
-@UDFType(deterministic = true, stateful = false)
-public class ExtractFeatureUDFWrapper extends GenericUDF {
- private ExtractFeatureUDF udf = new ExtractFeatureUDF();
- private PrimitiveObjectInspector argumentOI = null;
-
- @Override
- public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
- throw new UDFArgumentLengthException(
- "extract_feature() has an single arguments: string feature");
- }
-
- argumentOI = (PrimitiveObjectInspector) arguments[0];
- if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
- throw new UDFArgumentTypeException(0, "Type mismatch: feature");
- }
-
- return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
- }
-
- @Override
- public Object evaluate(DeferredObject[] arguments) throws HiveException {
- assert (arguments.length == 1);
- final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get());
- return udf.evaluate(input);
- }
-
- @Override
- public String getDisplayString(String[] children) {
- return "extract_feature(" + Arrays.toString(children) + ")";
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
deleted file mode 100644
index 8580247..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * A wrapper of [[hivemall.ftvec.ExtractWeightUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(name = "extract_weight",
- value = "_FUNC_(feature in string) - Returns the weight of a feature as string")
-@UDFType(deterministic = true, stateful = false)
-public class ExtractWeightUDFWrapper extends GenericUDF {
- private ExtractWeightUDF udf = new ExtractWeightUDF();
- private PrimitiveObjectInspector argumentOI = null;
-
- @Override
- public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
- throw new UDFArgumentLengthException(
- "extract_weight() has an single arguments: string feature");
- }
-
- argumentOI = (PrimitiveObjectInspector) arguments[0];
- if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
- throw new UDFArgumentTypeException(0, "Type mismatch: feature");
- }
-
- return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.DOUBLE);
- }
-
- @Override
- public Object evaluate(DeferredObject[] arguments) throws HiveException {
- assert (arguments.length == 1);
- final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get());
- return udf.evaluate(input);
- }
-
- @Override
- public String getDisplayString(String[] children) {
- return "extract_weight(" + Arrays.toString(children) + ")";
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
deleted file mode 100644
index 584be6c..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-import java.util.Map;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-
-/**
- * A wrapper of [[hivemall.ftvec.SortByFeatureUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle Map<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(name = "sort_by_feature",
- value = "_FUNC_(map in map<int,float>) - Returns a sorted map")
-@UDFType(deterministic = true, stateful = false)
-public class SortByFeatureUDFWrapper extends GenericUDF {
- private SortByFeatureUDF udf = new SortByFeatureUDF();
- private MapObjectInspector argumentOI = null;
-
- @Override
- public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
- throw new UDFArgumentLengthException(
- "sorted_by_feature() has an single arguments: map<int, float> map");
- }
-
- switch (arguments[0].getCategory()) {
- case MAP:
- argumentOI = (MapObjectInspector) arguments[0];
- ObjectInspector keyOI = argumentOI.getMapKeyObjectInspector();
- ObjectInspector valueOI = argumentOI.getMapValueObjectInspector();
- if (keyOI.getCategory().equals(Category.PRIMITIVE)
- && valueOI.getCategory().equals(Category.PRIMITIVE)) {
- final PrimitiveCategory keyCategory = ((PrimitiveObjectInspector) keyOI).getPrimitiveCategory();
- final PrimitiveCategory valueCategory = ((PrimitiveObjectInspector) valueOI).getPrimitiveCategory();
- if (keyCategory == PrimitiveCategory.INT
- && valueCategory == PrimitiveCategory.FLOAT) {
- break;
- }
- }
- default:
- throw new UDFArgumentTypeException(0, "Type mismatch: map");
- }
-
-
- return ObjectInspectorFactory.getStandardMapObjectInspector(
- argumentOI.getMapKeyObjectInspector(), argumentOI.getMapValueObjectInspector());
- }
-
- @Override
- public Object evaluate(DeferredObject[] arguments) throws HiveException {
- assert (arguments.length == 1);
- @SuppressWarnings("unchecked")
- final Map<IntWritable, FloatWritable> input = (Map<IntWritable, FloatWritable>) argumentOI.getMap(arguments[0].get());
- return udf.evaluate(input);
- }
-
- @Override
- public String getDisplayString(String[] children) {
- return "sort_by_feature(" + Arrays.toString(children) + ")";
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
deleted file mode 100644
index db533be..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec.scaling;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.io.Text;
-
-/**
- * A wrapper of [[hivemall.ftvec.scaling.L2NormalizationUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark-1.3 cannot handle
- * List<> as a return type in Hive UDF. The type must be passed via ObjectInspector. This issues has
- * been reported in SPARK-6747, so a future release of Spark makes the wrapper obsolete.
- */
-public class L2NormalizationUDFWrapper extends GenericUDF {
- private L2NormalizationUDF udf = new L2NormalizationUDF();
-
- private transient List<Text> retValue = new ArrayList<Text>();
- private transient Converter toListText = null;
-
- @Override
- public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
- throw new UDFArgumentLengthException("normalize() has an only single argument.");
- }
-
- switch (arguments[0].getCategory()) {
- case LIST:
- ObjectInspector elmOI = ((ListObjectInspector) arguments[0]).getListElementObjectInspector();
- if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
- if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
- break;
- }
- }
- default:
- throw new UDFArgumentTypeException(0,
- "normalize() must have List[String] as an argument, but "
- + arguments[0].getTypeName() + " was found.");
- }
-
- // Create a ObjectInspector converter for arguments
- ObjectInspector outputElemOI = ObjectInspectorFactory.getReflectionObjectInspector(
- Text.class, ObjectInspectorOptions.JAVA);
- ObjectInspector outputOI = ObjectInspectorFactory.getStandardListObjectInspector(outputElemOI);
- toListText = ObjectInspectorConverters.getConverter(arguments[0], outputOI);
-
- ObjectInspector listElemOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
- ObjectInspector returnElemOI = ObjectInspectorUtils.getStandardObjectInspector(listElemOI);
- return ObjectInspectorFactory.getStandardListObjectInspector(returnElemOI);
- }
-
- @Override
- public Object evaluate(DeferredObject[] arguments) throws HiveException {
- assert (arguments.length == 1);
- @SuppressWarnings("unchecked")
- final List<Text> input = (List<Text>) toListText.convert(arguments[0].get());
- retValue = udf.evaluate(input);
- return retValue;
- }
-
- @Override
- public String getDisplayString(String[] children) {
- return "normalize(" + Arrays.toString(children) + ")";
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
deleted file mode 100644
index d3bcbe6..0000000
--- a/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.knn.lsh;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-
-/** A wrapper of [[hivemall.knn.lsh.MinHashesUDF]]. */
-@Description(
- name = "minhashes",
- value = "_FUNC_(features in array<string>, noWeight in boolean) - Returns hashed features as array<int>")
-@UDFType(deterministic = true, stateful = false)
-public class MinHashesUDFWrapper extends GenericUDF {
- private MinHashesUDF udf = new MinHashesUDF();
- private ListObjectInspector featuresOI = null;
- private PrimitiveObjectInspector noWeightOI = null;
-
- @Override
- public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 2) {
- throw new UDFArgumentLengthException(
- "minhashes() has 2 arguments: array<string> features, boolean noWeight");
- }
-
- // Check argument types
- switch (arguments[0].getCategory()) {
- case LIST:
- featuresOI = (ListObjectInspector) arguments[0];
- ObjectInspector elmOI = featuresOI.getListElementObjectInspector();
- if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
- if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
- break;
- }
- }
- default:
- throw new UDFArgumentTypeException(0, "Type mismatch: features");
- }
-
- noWeightOI = (PrimitiveObjectInspector) arguments[1];
- if (noWeightOI.getPrimitiveCategory() != PrimitiveCategory.BOOLEAN) {
- throw new UDFArgumentException("Type mismatch: noWeight");
- }
-
- return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT));
- }
-
- @Override
- public Object evaluate(DeferredObject[] arguments) throws HiveException {
- assert (arguments.length == 2);
- @SuppressWarnings("unchecked")
- final List<String> features = (List<String>) featuresOI.getList(arguments[0].get());
- final Boolean noWeight = PrimitiveObjectInspectorUtils.getBoolean(arguments[1].get(),
- noWeightOI);
- return udf.evaluate(features, noWeight);
- }
-
- @Override
- public String getDisplayString(String[] children) {
- /**
- * TODO: Need to return hive-specific type names.
- */
- return "minhashes(" + Arrays.toString(children) + ")";
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
deleted file mode 100644
index f386223..0000000
--- a/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.tools.mapred;
-
-import java.util.UUID;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/** An alternative implementation of [[hivemall.tools.mapred.RowIdUDF]]. */
-@Description(
- name = "rowid",
- value = "_FUNC_() - Returns a generated row id of a form {TASK_ID}-{UUID}-{SEQUENCE_NUMBER}")
-@UDFType(deterministic = false, stateful = true)
-public class RowIdUDFWrapper extends GenericUDF {
- // RowIdUDF is directly used because spark cannot
- // handle HadoopUtils#getTaskId().
-
- private long sequence;
- private long taskId;
-
- public RowIdUDFWrapper() {
- this.sequence = 0L;
- this.taskId = Thread.currentThread().getId();
- }
-
- @Override
- public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 0) {
- throw new UDFArgumentLengthException("row_number() has no argument.");
- }
-
- return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
- }
-
- @Override
- public Object evaluate(DeferredObject[] arguments) throws HiveException {
- assert (arguments.length == 0);
- sequence++;
- /**
- * TODO: Check if it is unique over all tasks in executors of Spark.
- */
- return taskId + "-" + UUID.randomUUID() + "-" + sequence;
- }
-
- @Override
- public String getDisplayString(String[] children) {
- return "row_number()";
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/scala/hivemall/HivemallException.scala
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/scala/hivemall/HivemallException.scala b/spark/spark-common/src/main/scala/hivemall/HivemallException.scala
deleted file mode 100644
index 53f6756..0000000
--- a/spark/spark-common/src/main/scala/hivemall/HivemallException.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall
-
-class HivemallException(message: String, cause: Throwable)
- extends Exception(message, cause) {
-
- def this(message: String) = this(message, null)
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala b/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
deleted file mode 100644
index 3fb2d18..0000000
--- a/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.ml.feature
-
-import java.util.StringTokenizer
-
-import scala.collection.mutable.ListBuffer
-
-import hivemall.HivemallException
-
-// Used for DataFrame#explode
-case class HivemallFeature(feature: String)
-
-/**
- * Class that represents the features and labels of a data point for Hivemall.
- *
- * @param label Label for this data point.
- * @param features List of features for this data point.
- */
-case class HivemallLabeledPoint(label: Float = 0.0f, features: Seq[String]) {
- override def toString: String = {
- "%s,%s".format(label, features.mkString("[", ",", "]"))
- }
-}
-
-object HivemallLabeledPoint {
-
- // Simple parser for HivemallLabeledPoint
- def parse(s: String): HivemallLabeledPoint = {
- val (label, features) = s.indexOf(',') match {
- case d if d > 0 => (s.substring(0, d), s.substring(d + 1))
- case _ => ("0.0", "[]") // Dummy
- }
- HivemallLabeledPoint(label.toFloat, parseTuple(new StringTokenizer(features, "[],", true)))
- }
-
- // TODO: Support to parse rows without labels
- private[this] def parseTuple(tokenizer: StringTokenizer): Seq[String] = {
- val items = ListBuffer.empty[String]
- var parsing = true
- var allowDelim = false
- while (parsing && tokenizer.hasMoreTokens()) {
- val token = tokenizer.nextToken()
- if (token == "[") {
- items ++= parseTuple(tokenizer)
- parsing = false
- allowDelim = true
- } else if (token == ",") {
- if (allowDelim) {
- allowDelim = false
- } else {
- throw new HivemallException("Found ',' at a wrong position.")
- }
- } else if (token == "]") {
- parsing = false
- } else {
- items.append(token)
- allowDelim = true
- }
- }
- if (parsing) {
- throw new HivemallException(s"A tuple must end with ']'.")
- }
- items
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
deleted file mode 100644
index a6bbb4b..0000000
--- a/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.streaming
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.ml.feature.HivemallLabeledPoint
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Row, SQLContext}
-import org.apache.spark.streaming.dstream.DStream
-
-final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
-
- def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
- : DStream[Row] = {
- ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
- f(sqlContext.createDataFrame(rdd)).rdd
- }
- }
-}
-
-object HivemallStreamingOps {
-
- /**
- * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
- */
- implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
- : HivemallStreamingOps = {
- new HivemallStreamingOps(ds)
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/src/site/resources/LICENSE-font_awesome.txt
----------------------------------------------------------------------
diff --git a/src/site/resources/LICENSE-font_awesome.txt b/src/site/resources/LICENSE-font_awesome.txt
new file mode 100644
index 0000000..ad1f9ac
--- /dev/null
+++ b/src/site/resources/LICENSE-font_awesome.txt
@@ -0,0 +1,86 @@
+-----------------------------------------------------------
+SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
+-----------------------------------------------------------
+
+PREAMBLE
+The goals of the Open Font License (OFL) are to stimulate worldwide
+development of collaborative font projects, to support the font creation
+efforts of academic and linguistic communities, and to provide a free and
+open framework in which fonts may be shared and improved in partnership
+with others.
+
+The OFL allows the licensed fonts to be used, studied, modified and
+redistributed freely as long as they are not sold by themselves. The
+fonts, including any derivative works, can be bundled, embedded,
+redistributed and/or sold with any software provided that any reserved
+names are not used by derivative works. The fonts and derivatives,
+however, cannot be released under any other type of license. The
+requirement for fonts to remain under this license does not apply
+to any document created using the fonts or their derivatives.
+
+DEFINITIONS
+"Font Software" refers to the set of files released by the Copyright
+Holder(s) under this license and clearly marked as such. This may
+include source files, build scripts and documentation.
+
+"Reserved Font Name" refers to any names specified as such after the
+copyright statement(s).
+
+"Original Version" refers to the collection of Font Software components as
+distributed by the Copyright Holder(s).
+
+"Modified Version" refers to any derivative made by adding to, deleting,
+or substituting -- in part or in whole -- any of the components of the
+Original Version, by changing formats or by porting the Font Software to a
+new environment.
+
+"Author" refers to any designer, engineer, programmer, technical
+writer or other person who contributed to the Font Software.
+
+PERMISSION & CONDITIONS
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Font Software, to use, study, copy, merge, embed, modify,
+redistribute, and sell modified and unmodified copies of the Font
+Software, subject to the following conditions:
+
+1) Neither the Font Software nor any of its individual components,
+in Original or Modified Versions, may be sold by itself.
+
+2) Original or Modified Versions of the Font Software may be bundled,
+redistributed and/or sold with any software, provided that each copy
+contains the above copyright notice and this license. These can be
+included either as stand-alone text files, human-readable headers or
+in the appropriate machine-readable metadata fields within text or
+binary files as long as those fields can be easily viewed by the user.
+
+3) No Modified Version of the Font Software may use the Reserved Font
+Name(s) unless explicit written permission is granted by the corresponding
+Copyright Holder. This restriction only applies to the primary font name as
+presented to the users.
+
+4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
+Software shall not be used to promote, endorse or advertise any
+Modified Version, except to acknowledge the contribution(s) of the
+Copyright Holder(s) and the Author(s) or with their explicit written
+permission.
+
+5) The Font Software, modified or unmodified, in part or in whole,
+must be distributed entirely under this license, and must not be
+distributed under any other license. The requirement for fonts to
+remain under this license does not apply to any document created
+using the Font Software.
+
+TERMINATION
+This license becomes null and void if any of the above conditions are
+not met.
+
+DISCLAIMER
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
+OTHER DEALINGS IN THE FONT SOFTWARE.
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/lib/xgboost4j-0.60-0.10.jar
----------------------------------------------------------------------
diff --git a/xgboost/lib/xgboost4j-0.60-0.10.jar b/xgboost/lib/xgboost4j-0.60-0.10.jar
deleted file mode 100644
index cf1599b..0000000
Binary files a/xgboost/lib/xgboost4j-0.60-0.10.jar and /dev/null differ
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/pom.xml
----------------------------------------------------------------------
diff --git a/xgboost/pom.xml b/xgboost/pom.xml
index b9f11b8..8dcb45e 100644
--- a/xgboost/pom.xml
+++ b/xgboost/pom.xml
@@ -16,14 +16,13 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
+ <version>0.5.1-incubating-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
@@ -32,8 +31,6 @@
<packaging>jar</packaging>
<properties>
- <xgboost.version>0.60</xgboost.version>
- <xgboost4j.version>0.10</xgboost4j.version>
<main.basedir>${project.parent.basedir}</main.basedir>
</properties>
@@ -42,69 +39,45 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
- <version>${hive.version}</version>
<scope>provided</scope>
- <exclusions>
- <exclusion>
- <artifactId>jetty</artifactId>
- <groupId>org.mortbay.jetty</groupId>
- </exclusion>
- <exclusion>
- <groupId>javax.jdo</groupId>
- <artifactId>jdo2-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>asm-parent</groupId>
- <artifactId>asm-parent</artifactId>
- </exclusion>
- <exclusion>
- <groupId>asm</groupId>
- <artifactId>asm</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
- <version>1.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
- <version>1.0.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
- <version>1.2.17</version>
<scope>provided</scope>
</dependency>
+
+ <!-- compile scope -->
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-core</artifactId>
<version>${project.version}</version>
- <scope>provided</scope>
+ <scope>compile</scope>
</dependency>
-
- <!-- compile scope -->
<dependency>
- <groupId>ml.dmlc</groupId>
+ <groupId>io.github.myui</groupId>
<artifactId>xgboost4j</artifactId>
- <version>${xgboost4j.version}</version>
+ <version>${xgboost.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
@@ -116,106 +89,4 @@
</dependency>
</dependencies>
- <build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
- <plugins>
- <!-- TODO: This is hacky, so we'll replace this with another better way in a future -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-install-plugin</artifactId>
- <version>2.4</version>
- <executions>
- <execution>
- <id>install-xgboost</id>
- <phase>validate</phase>
- <configuration>
- <file>${basedir}/lib/xgboost4j-${xgboost.version}-${xgboost4j.version}.jar</file>
- <repositoryLayout>default</repositoryLayout>
- <groupId>ml.dmlc</groupId>
- <artifactId>xgboost4j</artifactId>
- <version>${xgboost4j.version}</version>
- <packaging>jar</packaging>
- <generatePom>true</generatePom>
- </configuration>
- <goals>
- <goal>install-file</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <!-- hivemall-xgboost_xx-xx.jar -->
- <execution>
- <id>jar-with-portal-binaries</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${xgboost.version}-${project.version}</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>false</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>ml.dmlc:xgboost4j</include>
- </includes>
- </artifactSet>
- <filters>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>tracker.py</exclude>
- </excludes>
- </filter>
- </filters>
- </configuration>
- </execution>
- <!-- hivemall-xgboost_xx-xx-with-dependencies.jar including minimum dependencies -->
- <execution>
- <id>jar-with-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${xgboost.version}-${project.version}-with-dependencies</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>true</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>org.apache.hivemall:hivemall-core</include>
- <include>io.netty:netty-all</include>
- <include>com.github.haifengl:smile-core</include>
- <include>com.github.haifengl:smile-math</include>
- <include>com.github.haifengl:smile-data</include>
- <include>org.tukaani:xz</include>
- <include>ml.dmlc:xgboost4j</include>
- <include>com.esotericsoftware.kryo:kryo</include>
- </includes>
- </artifactSet>
- <filters>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>*.jar</exclude>
- <exclude>tracker.py</exclude>
- </excludes>
- </filter>
- </filters>
- </configuration>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
-
</project>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java
----------------------------------------------------------------------
diff --git a/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java b/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java
index 2e2bf25..0472229 100644
--- a/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java
+++ b/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java
@@ -48,7 +48,9 @@ public final class XGBoostUtils {
values[i] = Float.parseFloat(str.substring(pos + 1));
}
}
- return LabeledPoint.fromSparseVector((float) target, indices, values);
+
+
+ return new LabeledPoint((float) target, indices, values);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java
----------------------------------------------------------------------
diff --git a/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java b/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java
index fd67c09..b80f95a 100644
--- a/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java
+++ b/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java
@@ -18,8 +18,6 @@
*/
package hivemall.xgboost.tools;
-import hivemall.utils.lang.Preconditions;
-
import java.util.ArrayList;
import java.util.List;
@@ -32,10 +30,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-@Description(
- name = "xgboost_multiclass_predict",
+import hivemall.utils.lang.Preconditions;
+
+@Description(name = "xgboost_multiclass_predict",
value = "_FUNC_(string rowid, string[] features, string model_id, array<byte> pred_model [, string options]) "
- + "- Returns a prediction result as (string rowid, int label, float probability)")
+ + "- Returns a prediction result as (string rowid, string label, float probability)")
public final class XGBoostMulticlassPredictUDTF extends hivemall.xgboost.XGBoostPredictUDTF {
public XGBoostMulticlassPredictUDTF() {
@@ -65,14 +64,14 @@ public final class XGBoostMulticlassPredictUDTF extends hivemall.xgboost.XGBoost
final Object[] forwardObj = new Object[3];
for (int i = 0, size = testData.size(); i < size; i++) {
final float[] predicted_i = predicted[i];
- final String rowId = testData.get(i).getRowId();
+ String rowId = testData.get(i).getRowId();
forwardObj[0] = rowId;
assert (predicted_i.length > 1);
for (int j = 0; j < predicted_i.length; j++) {
- forwardObj[1] = j;
+ forwardObj[1] = String.valueOf(j);
float prob = predicted_i[j];
- forwardObj[2] = prob;
+ forwardObj[2] = Float.valueOf(prob);
forward(forwardObj);
}
}
[3/4] incubator-hivemall git commit: Close #131: [v0.5.0-rc3] Merge
v0.5.0 branch
Posted by my...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/docs/gitbook/binaryclass/titanic_rf.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/binaryclass/titanic_rf.md b/docs/gitbook/binaryclass/titanic_rf.md
index 29784e0..2b54074 100644
--- a/docs/gitbook/binaryclass/titanic_rf.md
+++ b/docs/gitbook/binaryclass/titanic_rf.md
@@ -175,7 +175,7 @@ from
# Prediction
```sql
-SET hivevar:classification=true;
+-- SET hivevar:classification=true;
set hive.auto.convert.join=true;
SET hive.mapjoin.optimized.hashtable=false;
SET mapred.reduce.tasks=16;
@@ -202,7 +202,8 @@ FROM (
-- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted
-- hivemall v0.5-rc.1 or later
p.model_weight,
- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
+ tree_predict(p.model_id, p.model, t.features, "-classification") as predicted
+ -- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
-- tree_predict_v1(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted -- to use the old model in v0.5-rc.1 or later
FROM (
SELECT
@@ -319,7 +320,7 @@ from
> [116.12055542977338,960.8569891444097,291.08765260103837,469.74671636586226,163.721292772701,120.784769882858,847.9769298113661,554.4617571355476,346.3500941757221,97.42593940113392] 0.1838351822503962
```sql
-SET hivevar:classification=true;
+-- SET hivevar:classification=true;
SET hive.mapjoin.optimized.hashtable=false;
SET mapred.reduce.tasks=16;
@@ -345,7 +346,8 @@ FROM (
-- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted
-- hivemall v0.5-rc.1 or later
p.model_weight,
- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
+ tree_predict(p.model_id, p.model, t.features, "-classification") as predicted
+ -- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
-- tree_predict_v1(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted -- to use the old model in v0.5-rc.1 or later
FROM (
SELECT
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/docs/gitbook/multiclass/iris_randomforest.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/multiclass/iris_randomforest.md b/docs/gitbook/multiclass/iris_randomforest.md
index b421297..bfc197f 100644
--- a/docs/gitbook/multiclass/iris_randomforest.md
+++ b/docs/gitbook/multiclass/iris_randomforest.md
@@ -206,7 +206,7 @@ from
# Prediction
```sql
-set hivevar:classification=true;
+-- set hivevar:classification=true;
set hive.auto.convert.join=true;
set hive.mapjoin.optimized.hashtable=false;
@@ -225,7 +225,8 @@ FROM (
-- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted
-- hivemall v0.5-rc.1 or later
p.model_weight,
- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
+ tree_predict(p.model_id, p.model, t.features, "-classification") as predicted
+ -- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
-- tree_predict_v1(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted -- to use the old model in v0.5-rc.1 or later
FROM
model p
@@ -265,7 +266,8 @@ FROM (
-- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted
-- hivemall v0.5-rc.1 or later
p.model_weight,
- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
+ tree_predict(p.model_id, p.model, t.features, "-classification") as predicted
+ -- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
-- tree_predict_v1(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted as predicted -- to use the old model in v0.5-rc.1 or later
FROM (
SELECT
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/mixserv/pom.xml
----------------------------------------------------------------------
diff --git a/mixserv/pom.xml b/mixserv/pom.xml
index 0a1b387..ff27b09 100644
--- a/mixserv/pom.xml
+++ b/mixserv/pom.xml
@@ -16,14 +16,13 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
+ <version>0.5.1-incubating-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
@@ -40,49 +39,26 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
- <version>${hive.version}</version>
<scope>provided</scope>
- <exclusions>
- <exclusion>
- <artifactId>jetty</artifactId>
- <groupId>org.mortbay.jetty</groupId>
- </exclusion>
- <exclusion>
- <groupId>javax.jdo</groupId>
- <artifactId>jdo2-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>asm-parent</groupId>
- <artifactId>asm-parent</artifactId>
- </exclusion>
- <exclusion>
- <groupId>asm</groupId>
- <artifactId>asm</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
<groupId>javax.jdo</groupId>
<artifactId>jdo2-api</artifactId>
- <version>2.3-eb</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
- <version>${guava.version}</version>
<scope>provided</scope>
</dependency>
@@ -103,19 +79,16 @@
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
- <version>1.2</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
- <version>1.0.4</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
- <version>1.2.17</version>
<scope>compile</scope>
</dependency>
<dependency>
@@ -130,28 +103,21 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
- <version>1.10.19</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
<plugins>
<!-- hivemall-mixserv-xx-fat.jar including all dependencies -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
<executions>
<execution>
<id>jar-with-dependencies</id>
@@ -170,7 +136,7 @@
<include>commons-cli:commons-cli</include>
<include>commons-logging:commons-logging</include>
<include>log4j:log4j</include>
- <include>io.netty:netty-all</include>
+ <include>io.netty.netty-all</include>
</includes>
</artifactSet>
<!-- maven-shade-plugin cannot handle the dependency of log4j because
@@ -198,8 +164,7 @@
</filter>
</filters>
<transformers>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<manifestEntries>
<Main-Class>hivemall.mix.server.MixServer</Main-Class>
<Implementation-Title>${project.name}</Implementation-Title>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/nlp/pom.xml
----------------------------------------------------------------------
diff --git a/nlp/pom.xml b/nlp/pom.xml
index dc77c06..782e41d 100644
--- a/nlp/pom.xml
+++ b/nlp/pom.xml
@@ -16,14 +16,13 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
+ <version>0.5.1-incubating-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
@@ -40,77 +39,51 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
- <version>${hive.version}</version>
<scope>provided</scope>
- <exclusions>
- <exclusion>
- <artifactId>jetty</artifactId>
- <groupId>org.mortbay.jetty</groupId>
- </exclusion>
- <exclusion>
- <groupId>javax.jdo</groupId>
- <artifactId>jdo2-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>asm-parent</groupId>
- <artifactId>asm-parent</artifactId>
- </exclusion>
- <exclusion>
- <groupId>asm</groupId>
- <artifactId>asm</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
- <version>1.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
- <version>1.0.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
- <version>1.2.17</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>javax.jdo</groupId>
<artifactId>jdo2-api</artifactId>
- <version>2.3-eb</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
- <version>${guava.version}</version>
<scope>provided</scope>
</dependency>
+
+ <!-- compile scope -->
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-core</artifactId>
<version>${project.version}</version>
- <scope>provided</scope>
+ <scope>compile</scope>
</dependency>
-
- <!-- compile scope -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-kuromoji</artifactId>
@@ -128,7 +101,6 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
@@ -140,98 +112,4 @@
</dependencies>
- <build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
- <plugins>
- <!-- hivemall-nlp-xx.jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.5</version>
- <configuration>
- <finalName>${project.artifactId}-${project.version}</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- </configuration>
- </plugin>
- <!-- hivemall-nlp-xx-with-dependencies.jar including minimum dependencies -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <execution>
- <id>jar-with-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>true</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>org.apache.hivemall:hivemall-core</include>
- <include>org.apache.lucene:lucene-analyzers-kuromoji</include>
- <include>org.apache.lucene:lucene-analyzers-smartcn</include>
- <include>org.apache.lucene:lucene-analyzers-common</include>
- <include>org.apache.lucene:lucene-core</include>
- </includes>
- </artifactSet>
- <filters>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>META-INF/LICENSE.txt</exclude>
- </excludes>
- </filter>
- <filter>
- <artifact>org.apache.lucene:lucene-analyzers-kuromoji</artifact>
- <includes>
- <include>**</include>
- </includes>
- </filter>
- <filter>
- <artifact>org.apache.lucene:lucene-analyzers-smartcn</artifact>
- <includes>
- <include>**</include>
- </includes>
- </filter>
- <filter>
- <artifact>org.apache.lucene:lucene-analyzers-common</artifact>
- <includes>
- <include>**</include>
- </includes>
- </filter>
- <filter>
- <artifact>org.apache.lucene:lucene-core</artifact>
- <includes>
- <include>**</include>
- </includes>
- </filter>
- </filters>
- <transformers>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
- <manifestEntries>
- <Implementation-Title>${project.name}</Implementation-Title>
- <Implementation-Version>${project.version}</Implementation-Version>
- <Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
- </manifestEntries>
- </transformer>
- <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
- <addHeader>false</addHeader>
- </transformer>
- </transformers>
- </configuration>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
-
</project>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
----------------------------------------------------------------------
diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
index 93fd18c..411c89e 100644
--- a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
+++ b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
@@ -19,15 +19,19 @@
package hivemall.nlp.tokenizer;
import hivemall.utils.hadoop.HiveUtils;
-import hivemall.utils.io.IOUtils;
import hivemall.utils.io.HttpUtils;
+import hivemall.utils.io.IOUtils;
+import hivemall.utils.lang.ExceptionUtils;
+import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.net.HttpURLConnection;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -55,8 +59,7 @@ import org.apache.lucene.analysis.ja.dict.UserDictionary;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-@Description(
- name = "tokenize_ja",
+@Description(name = "tokenize_ja",
value = "_FUNC_(String line [, const string mode = \"normal\", const array<string> stopWords, const array<string> stopTags, const array<string> userDict (or string userDictURL)])"
+ " - returns tokenized strings in array<string>")
@UDFType(deterministic = true, stateful = false)
@@ -77,20 +80,21 @@ public final class KuromojiUDF extends GenericUDF {
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
final int arglen = arguments.length;
if (arglen < 1 || arglen > 5) {
- throw new UDFArgumentException("Invalid number of arguments for `tokenize_ja`: "
- + arglen);
+ throw new UDFArgumentException(
+ "Invalid number of arguments for `tokenize_ja`: " + arglen);
}
this._mode = (arglen >= 2) ? tokenizationMode(arguments[1]) : Mode.NORMAL;
- this._stopWords = (arglen >= 3) ? stopWords(arguments[2])
- : JapaneseAnalyzer.getDefaultStopSet();
- this._stopTags = (arglen >= 4) ? stopTags(arguments[3])
- : JapaneseAnalyzer.getDefaultStopTags();
+ this._stopWords =
+ (arglen >= 3) ? stopWords(arguments[2]) : JapaneseAnalyzer.getDefaultStopSet();
+ this._stopTags =
+ (arglen >= 4) ? stopTags(arguments[3]) : JapaneseAnalyzer.getDefaultStopTags();
this._userDict = (arglen >= 5) ? userDictionary(arguments[4]) : null;
this._analyzer = null;
- return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+ return ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
@Override
@@ -219,7 +223,8 @@ public final class KuromojiUDF extends GenericUDF {
return UserDictionary.open(reader); // return null if empty
} catch (Throwable e) {
throw new UDFArgumentException(
- "Failed to create user dictionary based on the given array<string>: " + e);
+ "Failed to create user dictionary based on the given array<string>: "
+ + builder.toString() + '\n' + ExceptionUtils.prettyPrintStackTrace(e));
}
}
@@ -234,7 +239,8 @@ public final class KuromojiUDF extends GenericUDF {
try {
conn = HttpUtils.getHttpURLConnection(userDictURL);
} catch (IllegalArgumentException | IOException e) {
- throw new UDFArgumentException("Failed to create HTTP connection to the URL: " + e);
+ throw new UDFArgumentException("Failed to create HTTP connection to the URL: "
+ + userDictURL + '\n' + ExceptionUtils.prettyPrintStackTrace(e));
}
// allow to read as a compressed GZIP file for efficiency
@@ -247,7 +253,8 @@ public final class KuromojiUDF extends GenericUDF {
try {
responseCode = conn.getResponseCode();
} catch (IOException e) {
- throw new UDFArgumentException("Failed to get response code: " + e);
+ throw new UDFArgumentException("Failed to get response code: " + userDictURL + '\n'
+ + ExceptionUtils.prettyPrintStackTrace(e));
}
if (responseCode != 200) {
throw new UDFArgumentException("Got invalid response code: " + responseCode);
@@ -255,17 +262,24 @@ public final class KuromojiUDF extends GenericUDF {
final InputStream is;
try {
- is = IOUtils.decodeInputStream(HttpUtils.getLimitedInputStream(conn,
- MAX_INPUT_STREAM_SIZE));
+ is = IOUtils.decodeInputStream(
+ HttpUtils.getLimitedInputStream(conn, MAX_INPUT_STREAM_SIZE));
} catch (NullPointerException | IOException e) {
- throw new UDFArgumentException("Failed to get input stream from the connection: " + e);
+ throw new UDFArgumentException("Failed to get input stream from the connection: "
+ + userDictURL + '\n' + ExceptionUtils.prettyPrintStackTrace(e));
}
- final Reader reader = new InputStreamReader(is);
+ CharsetDecoder decoder =
+ StandardCharsets.UTF_8.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ final Reader reader = new InputStreamReader(is, decoder);
try {
return UserDictionary.open(reader); // return null if empty
} catch (Throwable e) {
- throw new UDFArgumentException("Failed to parse the file in CSV format: " + e);
+ throw new UDFArgumentException(
+ "Failed to parse the file in CSV format (UTF-8 encoding is expected): "
+ + userDictURL + '\n' + ExceptionUtils.prettyPrintStackTrace(e));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index e9c19dd..e594006 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,13 +16,12 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
+ <version>0.5.1-incubating-SNAPSHOT</version>
<parent>
<groupId>org.apache</groupId>
@@ -51,7 +50,8 @@
<url>https://git-wip-us.apache.org/repos/asf/incubator-hivemall.git</url>
<connection>scm:git:https://git-wip-us.apache.org/repos/asf/incubator-hivemall.git</connection>
<developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/incubator-hivemall.git</developerConnection>
- </scm>
+ <tag>v0.5.0-rc1</tag>
+ </scm>
<mailingLists>
<mailingList>
@@ -152,8 +152,8 @@
<name>Tsuyoshi Ozawa</name>
<email>ozawa[at]apache.org</email>
<url>https://people.apache.org/~ozawa/</url>
- <organization></organization>
- <organizationUrl></organizationUrl>
+ <organization />
+ <organizationUrl />
<roles>
<role>PPMC Member</role>
</roles>
@@ -249,15 +249,14 @@
<module>nlp</module>
<module>xgboost</module>
<module>mixserv</module>
+ <module>spark</module>
+ <module>dist</module>
</modules>
<properties>
- <java.source.version>1.7</java.source.version>
- <java.target.version>1.7</java.target.version>
+ <main.basedir>${project.basedir}</main.basedir>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
- <scala.version>2.11.8</scala.version>
- <scala.binary.version>2.11</scala.binary.version>
<maven.build.timestamp.format>yyyy</maven.build.timestamp.format>
<build.year>${maven.build.timestamp}</build.year>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -269,9 +268,9 @@
<guava.version>11.0.2</guava.version>
<junit.version>4.12</junit.version>
<dependency.locations.enabled>false</dependency.locations.enabled>
- <main.basedir>${project.basedir}</main.basedir>
- <maven-enforcer-plugin.version>3.0.0-M1</maven-enforcer-plugin.version>
+ <maven-enforcer.requireMavenVersion>[3.3.1,)</maven-enforcer.requireMavenVersion>
<surefire.version>2.19.1</surefire.version>
+ <xgboost.version>0.7-rc2</xgboost.version>
</properties>
<distributionManagement>
@@ -315,113 +314,6 @@
<profiles>
<profile>
- <id>spark-2.2</id>
- <modules>
- <module>spark/spark-2.2</module>
- <module>spark/spark-common</module>
- </modules>
- <properties>
- <spark.version>2.2.0</spark.version>
- <spark.binary.version>2.2</spark.binary.version>
- </properties>
- <build>
- <plugins>
- <!-- Spark-2.2 only supports Java 8 -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-enforcer-plugin</artifactId>
- <version>${maven-enforcer-plugin.version}</version>
- <executions>
- <execution>
- <id>enforce-versions</id>
- <phase>validate</phase>
- <goals>
- <goal>enforce</goal>
- </goals>
- <configuration>
- <rules>
- <requireProperty>
- <property>java.source.version</property>
- <regex>1.8</regex>
- <regexMessage>When -Pspark-2.2 set, java.source.version must be 1.8</regexMessage>
- </requireProperty>
- <requireProperty>
- <property>java.target.version</property>
- <regex>1.8</regex>
- <regexMessage>When -Pspark-2.2 set, java.target.version must be 1.8</regexMessage>
- </requireProperty>
- </rules>
- </configuration>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>spark-2.1</id>
- <modules>
- <module>spark/spark-2.1</module>
- <module>spark/spark-common</module>
- </modules>
- <properties>
- <spark.version>2.1.1</spark.version>
- <spark.binary.version>2.1</spark.binary.version>
- </properties>
- </profile>
- <profile>
- <id>spark-2.0</id>
- <modules>
- <module>spark/spark-2.0</module>
- <module>spark/spark-common</module>
- </modules>
- <properties>
- <spark.version>2.0.2</spark.version>
- <spark.binary.version>2.0</spark.binary.version>
- </properties>
- </profile>
- <profile>
- <id>java7</id>
- <properties>
- <spark.test.jvm.opts>-ea -Xms768m -Xmx1024m -XX:PermSize=128m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m</spark.test.jvm.opts>
- </properties>
- <activation>
- <jdk>[,1.8)</jdk> <!-- version < 1.8 -->
- </activation>
- </profile>
- <profile>
- <id>java8</id>
- <properties>
- <spark.test.jvm.opts>-ea -Xms768m -Xmx1024m -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=512m -XX:ReservedCodeCacheSize=512m</spark.test.jvm.opts>
- </properties>
- <activation>
- <jdk>[1.8,)</jdk> <!-- version >= 1.8 -->
- </activation>
- </profile>
- <profile>
- <id>compile-xgboost</id>
- <build>
- <plugins>
- <plugin>
- <artifactId>exec-maven-plugin</artifactId>
- <groupId>org.codehaus.mojo</groupId>
- <executions>
- <execution>
- <id>native</id>
- <phase>generate-sources</phase>
- <goals>
- <goal>exec</goal>
- </goals>
- <configuration>
- <executable>./bin/build_xgboost.sh</executable>
- </configuration>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
<id>doclint-java8-disable</id>
<activation>
<jdk>[1.8,)</jdk>
@@ -432,6 +324,110 @@
</profile>
</profiles>
+ <dependencyManagement>
+ <dependencies>
+ <!-- provided scope -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>${hive.version}</version>
+ <scope>provided</scope>
+ <exclusions>
+ <exclusion>
+ <artifactId>jetty</artifactId>
+ <groupId>org.mortbay.jetty</groupId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.jdo</groupId>
+ <artifactId>jdo2-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>asm-parent</groupId>
+ <artifactId>asm-parent</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.2</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>1.0.4</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>1.2.17</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.jdo</groupId>
+ <artifactId>jdo2-api</artifactId>
+ <version>2.3-eb</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>${guava.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- test scope -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>${junit.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <version>1.10.19</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <version>1.10.19</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.powermock</groupId>
+ <artifactId>powermock-module-junit4</artifactId>
+ <version>1.6.3</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.powermock</groupId>
+ <artifactId>powermock-api-mockito</artifactId>
+ <version>1.6.3</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
+
<build>
<directory>target</directory>
<outputDirectory>target/classes</outputDirectory>
@@ -441,6 +437,25 @@
<pluginManagement>
<plugins>
<plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>3.0.2</version>
+ <configuration>
+ <finalName>${project.artifactId}-${project.version}</finalName>
+ <outputDirectory>${main.basedir}/target</outputDirectory>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>3.1.0</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-enforcer-plugin</artifactId>
+ <version>3.0.0-M1</version>
+ </plugin>
+ <plugin>
<!-- mvn formatter:format -->
<groupId>net.revelc.code</groupId>
<artifactId>formatter-maven-plugin</artifactId>
@@ -475,6 +490,11 @@
<useDefaultExcludes>false</useDefaultExcludes>
<excludes>
<exclude>docs/gitbook/node_modules/**</exclude>
+ <exclude>target/</exclude>
+ <exclude>src/main/java/hivemall/utils/codec/Base91.java</exclude>
+ <exclude>src/main/java/hivemall/utils/math/FastMath.java</exclude>
+ <exclude>src/main/java/hivemall/smile/classification/DecisionTree.java</exclude>
+ <exclude>src/main/java/hivemall/smile/regression/RegressionTree.java</exclude>
</excludes>
<encoding>UTF-8</encoding>
<headerDefinitions>
@@ -575,14 +595,42 @@
<artifactId>maven-enforcer-plugin</artifactId>
<executions>
<execution>
- <id>enforce-maven</id>
+ <id>enforce-JAVA_HOME-is-set</id>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
+ <configuration>
+ <rules>
+ <requireEnvironmentVariable>
+ <variableName>JAVA_HOME</variableName>
+ </requireEnvironmentVariable>
+ </rules>
+ <fail>true</fail>
+ </configuration>
+ </execution>
+ <execution>
+ <id>enforce-JAVA8_HOME-is-set</id>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
+ <configuration>
+ <rules>
+ <requireEnvironmentVariable>
+ <variableName>JAVA8_HOME</variableName>
+ </requireEnvironmentVariable>
+ </rules>
+ <fail>true</fail>
+ </configuration>
+ </execution>
+ <execution>
+ <id>required-maven-version</id>
<goals>
<goal>enforce</goal>
</goals>
<configuration>
<rules>
<requireMavenVersion>
- <version>[3.3.1,)</version>
+ <version>${maven-enforcer.requireMavenVersion}</version>
</requireMavenVersion>
</rules>
</configuration>
@@ -610,8 +658,8 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
- <source>${java.source.version}</source>
- <target>${java.target.version}</target>
+ <source>${maven.compiler.source}</source>
+ <target>${maven.compiler.target}</target>
<debug>true</debug>
<debuglevel>lines,vars,source</debuglevel>
<encoding>UTF-8</encoding>
@@ -688,30 +736,6 @@
</dependencies>
</plugin>
<!-- end mvn site -->
- <plugin>
- <groupId>org.scalastyle</groupId>
- <artifactId>scalastyle-maven-plugin</artifactId>
- <version>0.8.0</version>
- <configuration>
- <verbose>false</verbose>
- <failOnViolation>true</failOnViolation>
- <includeTestSourceDirectory>true</includeTestSourceDirectory>
- <failOnWarning>false</failOnWarning>
- <sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
- <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
- <configLocation>spark/spark-common/scalastyle-config.xml</configLocation>
- <outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
- <inputEncoding>${project.build.sourceEncoding}</inputEncoding>
- <outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
- </configuration>
- <executions>
- <execution>
- <goals>
- <goal>check</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
<!-- mvn apache-rat:check -->
<plugin>
<groupId>org.apache.rat</groupId>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/pom.xml
----------------------------------------------------------------------
diff --git a/spark/common/pom.xml b/spark/common/pom.xml
new file mode 100644
index 0000000..a6262e8
--- /dev/null
+++ b/spark/common/pom.xml
@@ -0,0 +1,64 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-spark</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>hivemall-spark-common</artifactId>
+ <name>Hivemall on Spark Common</name>
+ <packaging>jar</packaging>
+
+ <properties>
+ <main.basedir>${project.parent.parent.basedir}</main.basedir>
+ </properties>
+
+ <dependencies>
+ <!-- provided scope -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- compile scope -->
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-core</artifactId>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+
+</project>
+
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java b/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
new file mode 100644
index 0000000..cf10ed7
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.dataset;
+
+import hivemall.UDTFWithOptions;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.Random;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.Collector;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * A wrapper of [[hivemall.dataset.LogisticRegressionDataGeneratorUDTF]]. This wrapper is needed
+ * because Spark cannot handle HadoopUtils#getTaskId() correctly.
+ */
+@Description(name = "lr_datagen",
+ value = "_FUNC_(options string) - Generates a logistic regression dataset")
+public final class LogisticRegressionDataGeneratorUDTFWrapper extends UDTFWithOptions {
+ private transient LogisticRegressionDataGeneratorUDTF udtf =
+ new LogisticRegressionDataGeneratorUDTF();
+
+ @Override
+ protected Options getOptions() {
+ Options options = null;
+ try {
+ Method m = udtf.getClass().getDeclaredMethod("getOptions");
+ m.setAccessible(true);
+ options = (Options) m.invoke(udtf);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ return options;
+ }
+
+ @SuppressWarnings("all")
+ @Override
+ protected CommandLine processOptions(ObjectInspector[] objectInspectors)
+ throws UDFArgumentException {
+ CommandLine commands = null;
+ try {
+ Method m = udtf.getClass().getDeclaredMethod("processOptions");
+ m.setAccessible(true);
+ commands = (CommandLine) m.invoke(udtf, objectInspectors);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ return commands;
+ }
+
+ @Override
+ public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
+ try {
+ // Extract a collector for LogisticRegressionDataGeneratorUDTF
+ Field collector = GenericUDTF.class.getDeclaredField("collector");
+ collector.setAccessible(true);
+ udtf.setCollector((Collector) collector.get(this));
+
+ // To avoid HadoopUtils#getTaskId()
+ Class<?> clazz = udtf.getClass();
+ Field rnd1 = clazz.getDeclaredField("rnd1");
+ Field rnd2 = clazz.getDeclaredField("rnd2");
+ Field r_seed = clazz.getDeclaredField("r_seed");
+ r_seed.setAccessible(true);
+ final long seed = r_seed.getLong(udtf) + (int) Thread.currentThread().getId();
+ rnd1.setAccessible(true);
+ rnd2.setAccessible(true);
+ rnd1.set(udtf, new Random(seed));
+ rnd2.set(udtf, new Random(seed + 1));
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ return udtf.initialize(argOIs);
+ }
+
+ @Override
+ public void process(Object[] objects) throws HiveException {
+ udtf.process(objects);
+ }
+
+ @Override
+ public void close() throws HiveException {
+ udtf.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
new file mode 100644
index 0000000..b454fd9
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.AddBiasUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(name = "add_bias",
+ value = "_FUNC_(features in array<string>) - Returns features with a bias as array<string>")
+@UDFType(deterministic = true, stateful = false)
+public class AddBiasUDFWrapper extends GenericUDF {
+ private AddBiasUDF udf = new AddBiasUDF();
+ private ListObjectInspector argumentOI = null;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ if (arguments.length != 1) {
+ throw new UDFArgumentLengthException(
+ "add_bias() has an single arguments: array<string> features");
+ }
+
+ switch (arguments[0].getCategory()) {
+ case LIST:
+ argumentOI = (ListObjectInspector) arguments[0];
+ ObjectInspector elmOI = argumentOI.getListElementObjectInspector();
+ if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+ if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
+ break;
+ }
+ }
+ default:
+ throw new UDFArgumentTypeException(0, "Type mismatch: features");
+ }
+
+ return ObjectInspectorFactory.getStandardListObjectInspector(argumentOI.getListElementObjectInspector());
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert (arguments.length == 1);
+ @SuppressWarnings("unchecked")
+ final List<String> input = (List<String>) argumentOI.getList(arguments[0].get());
+ return udf.evaluate(input);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return "add_bias(" + Arrays.toString(children) + ")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
new file mode 100644
index 0000000..0b687db
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.AddFeatureIndexUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(
+ name = "add_feature_index",
+ value = "_FUNC_(dense features in array<double>) - Returns a feature vector with feature indices")
+@UDFType(deterministic = true, stateful = false)
+public class AddFeatureIndexUDFWrapper extends GenericUDF {
+ private AddFeatureIndexUDF udf = new AddFeatureIndexUDF();
+ private ListObjectInspector argumentOI = null;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ if (arguments.length != 1) {
+ throw new UDFArgumentLengthException(
+ "add_feature_index() has an single arguments: array<double> features");
+ }
+
+ switch (arguments[0].getCategory()) {
+ case LIST:
+ argumentOI = (ListObjectInspector) arguments[0];
+ ObjectInspector elmOI = argumentOI.getListElementObjectInspector();
+ if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+ if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.DOUBLE) {
+ break;
+ }
+ }
+ default:
+ throw new UDFArgumentTypeException(0, "Type mismatch: features");
+ }
+
+ return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert (arguments.length == 1);
+ @SuppressWarnings("unchecked")
+ final List<Double> input = (List<Double>) argumentOI.getList(arguments[0].get());
+ return udf.evaluate(input);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return "add_feature_index(" + Arrays.toString(children) + ")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
new file mode 100644
index 0000000..5924468
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.ExtractFeatureUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(name = "extract_feature",
+ value = "_FUNC_(feature in string) - Returns a parsed feature as string")
+@UDFType(deterministic = true, stateful = false)
+public class ExtractFeatureUDFWrapper extends GenericUDF {
+ private ExtractFeatureUDF udf = new ExtractFeatureUDF();
+ private PrimitiveObjectInspector argumentOI = null;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ if (arguments.length != 1) {
+ throw new UDFArgumentLengthException(
+ "extract_feature() has an single arguments: string feature");
+ }
+
+ argumentOI = (PrimitiveObjectInspector) arguments[0];
+ if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
+ throw new UDFArgumentTypeException(0, "Type mismatch: feature");
+ }
+
+ return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert (arguments.length == 1);
+ final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get());
+ return udf.evaluate(input);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return "extract_feature(" + Arrays.toString(children) + ")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
new file mode 100644
index 0000000..8580247
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.ExtractWeightUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(name = "extract_weight",
+ value = "_FUNC_(feature in string) - Returns the weight of a feature as string")
+@UDFType(deterministic = true, stateful = false)
+public class ExtractWeightUDFWrapper extends GenericUDF {
+ private ExtractWeightUDF udf = new ExtractWeightUDF();
+ private PrimitiveObjectInspector argumentOI = null;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ if (arguments.length != 1) {
+ throw new UDFArgumentLengthException(
+ "extract_weight() has an single arguments: string feature");
+ }
+
+ argumentOI = (PrimitiveObjectInspector) arguments[0];
+ if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
+ throw new UDFArgumentTypeException(0, "Type mismatch: feature");
+ }
+
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.DOUBLE);
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert (arguments.length == 1);
+ final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get());
+ return udf.evaluate(input);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return "extract_weight(" + Arrays.toString(children) + ")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
new file mode 100644
index 0000000..584be6c
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+
+/**
+ * A wrapper of [[hivemall.ftvec.SortByFeatureUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle Map<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(name = "sort_by_feature",
+ value = "_FUNC_(map in map<int,float>) - Returns a sorted map")
+@UDFType(deterministic = true, stateful = false)
+public class SortByFeatureUDFWrapper extends GenericUDF {
+ private SortByFeatureUDF udf = new SortByFeatureUDF();
+ private MapObjectInspector argumentOI = null;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ if (arguments.length != 1) {
+ throw new UDFArgumentLengthException(
+ "sorted_by_feature() has an single arguments: map<int, float> map");
+ }
+
+ switch (arguments[0].getCategory()) {
+ case MAP:
+ argumentOI = (MapObjectInspector) arguments[0];
+ ObjectInspector keyOI = argumentOI.getMapKeyObjectInspector();
+ ObjectInspector valueOI = argumentOI.getMapValueObjectInspector();
+ if (keyOI.getCategory().equals(Category.PRIMITIVE)
+ && valueOI.getCategory().equals(Category.PRIMITIVE)) {
+ final PrimitiveCategory keyCategory = ((PrimitiveObjectInspector) keyOI).getPrimitiveCategory();
+ final PrimitiveCategory valueCategory = ((PrimitiveObjectInspector) valueOI).getPrimitiveCategory();
+ if (keyCategory == PrimitiveCategory.INT
+ && valueCategory == PrimitiveCategory.FLOAT) {
+ break;
+ }
+ }
+ default:
+ throw new UDFArgumentTypeException(0, "Type mismatch: map");
+ }
+
+
+ return ObjectInspectorFactory.getStandardMapObjectInspector(
+ argumentOI.getMapKeyObjectInspector(), argumentOI.getMapValueObjectInspector());
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert (arguments.length == 1);
+ @SuppressWarnings("unchecked")
+ final Map<IntWritable, FloatWritable> input = (Map<IntWritable, FloatWritable>) argumentOI.getMap(arguments[0].get());
+ return udf.evaluate(input);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return "sort_by_feature(" + Arrays.toString(children) + ")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
new file mode 100644
index 0000000..db533be
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec.scaling;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.Text;
+
+/**
+ * A wrapper of [[hivemall.ftvec.scaling.L2NormalizationUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark-1.3 cannot handle
+ * List<> as a return type in Hive UDF. The type must be passed via ObjectInspector. This issues has
+ * been reported in SPARK-6747, so a future release of Spark makes the wrapper obsolete.
+ */
+public class L2NormalizationUDFWrapper extends GenericUDF {
+ private L2NormalizationUDF udf = new L2NormalizationUDF();
+
+ private transient List<Text> retValue = new ArrayList<Text>();
+ private transient Converter toListText = null;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ if (arguments.length != 1) {
+ throw new UDFArgumentLengthException("normalize() has an only single argument.");
+ }
+
+ switch (arguments[0].getCategory()) {
+ case LIST:
+ ObjectInspector elmOI = ((ListObjectInspector) arguments[0]).getListElementObjectInspector();
+ if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+ if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
+ break;
+ }
+ }
+ default:
+ throw new UDFArgumentTypeException(0,
+ "normalize() must have List[String] as an argument, but "
+ + arguments[0].getTypeName() + " was found.");
+ }
+
+ // Create a ObjectInspector converter for arguments
+ ObjectInspector outputElemOI = ObjectInspectorFactory.getReflectionObjectInspector(
+ Text.class, ObjectInspectorOptions.JAVA);
+ ObjectInspector outputOI = ObjectInspectorFactory.getStandardListObjectInspector(outputElemOI);
+ toListText = ObjectInspectorConverters.getConverter(arguments[0], outputOI);
+
+ ObjectInspector listElemOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ ObjectInspector returnElemOI = ObjectInspectorUtils.getStandardObjectInspector(listElemOI);
+ return ObjectInspectorFactory.getStandardListObjectInspector(returnElemOI);
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert (arguments.length == 1);
+ @SuppressWarnings("unchecked")
+ final List<Text> input = (List<Text>) toListText.convert(arguments[0].get());
+ retValue = udf.evaluate(input);
+ return retValue;
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return "normalize(" + Arrays.toString(children) + ")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java b/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
new file mode 100644
index 0000000..d3bcbe6
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.knn.lsh;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/** A wrapper of [[hivemall.knn.lsh.MinHashesUDF]]. */
+@Description(
+ name = "minhashes",
+ value = "_FUNC_(features in array<string>, noWeight in boolean) - Returns hashed features as array<int>")
+@UDFType(deterministic = true, stateful = false)
+public class MinHashesUDFWrapper extends GenericUDF {
+ private MinHashesUDF udf = new MinHashesUDF();
+ private ListObjectInspector featuresOI = null;
+ private PrimitiveObjectInspector noWeightOI = null;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ if (arguments.length != 2) {
+ throw new UDFArgumentLengthException(
+ "minhashes() has 2 arguments: array<string> features, boolean noWeight");
+ }
+
+ // Check argument types
+ switch (arguments[0].getCategory()) {
+ case LIST:
+ featuresOI = (ListObjectInspector) arguments[0];
+ ObjectInspector elmOI = featuresOI.getListElementObjectInspector();
+ if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+ if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
+ break;
+ }
+ }
+ default:
+ throw new UDFArgumentTypeException(0, "Type mismatch: features");
+ }
+
+ noWeightOI = (PrimitiveObjectInspector) arguments[1];
+ if (noWeightOI.getPrimitiveCategory() != PrimitiveCategory.BOOLEAN) {
+ throw new UDFArgumentException("Type mismatch: noWeight");
+ }
+
+ return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT));
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert (arguments.length == 2);
+ @SuppressWarnings("unchecked")
+ final List<String> features = (List<String>) featuresOI.getList(arguments[0].get());
+ final Boolean noWeight = PrimitiveObjectInspectorUtils.getBoolean(arguments[1].get(),
+ noWeightOI);
+ return udf.evaluate(features, noWeight);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ /**
+ * TODO: Need to return hive-specific type names.
+ */
+ return "minhashes(" + Arrays.toString(children) + ")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java b/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
new file mode 100644
index 0000000..f386223
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.mapred;
+
+import java.util.UUID;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/** An alternative implementation of [[hivemall.tools.mapred.RowIdUDF]]. */
+@Description(
+ name = "rowid",
+ value = "_FUNC_() - Returns a generated row id of a form {TASK_ID}-{UUID}-{SEQUENCE_NUMBER}")
+@UDFType(deterministic = false, stateful = true)
+public class RowIdUDFWrapper extends GenericUDF {
+ // RowIdUDF is directly used because spark cannot
+ // handle HadoopUtils#getTaskId().
+
+ private long sequence;
+ private long taskId;
+
+ public RowIdUDFWrapper() {
+ this.sequence = 0L;
+ this.taskId = Thread.currentThread().getId();
+ }
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ if (arguments.length != 0) {
+ throw new UDFArgumentLengthException("row_number() has no argument.");
+ }
+
+ return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ assert (arguments.length == 0);
+ sequence++;
+ /**
+ * TODO: Check if it is unique over all tasks in executors of Spark.
+ */
+ return taskId + "-" + UUID.randomUUID() + "-" + sequence;
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return "row_number()";
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/scala/hivemall/HivemallException.scala
----------------------------------------------------------------------
diff --git a/spark/common/src/main/scala/hivemall/HivemallException.scala b/spark/common/src/main/scala/hivemall/HivemallException.scala
new file mode 100644
index 0000000..53f6756
--- /dev/null
+++ b/spark/common/src/main/scala/hivemall/HivemallException.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall
+
+class HivemallException(message: String, cause: Throwable)
+ extends Exception(message, cause) {
+
+ def this(message: String) = this(message, null)
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
----------------------------------------------------------------------
diff --git a/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala b/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
new file mode 100644
index 0000000..3fb2d18
--- /dev/null
+++ b/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.ml.feature
+
+import java.util.StringTokenizer
+
+import scala.collection.mutable.ListBuffer
+
+import hivemall.HivemallException
+
+// Used for DataFrame#explode
+case class HivemallFeature(feature: String)
+
+/**
+ * Class that represents the features and labels of a data point for Hivemall.
+ *
+ * @param label Label for this data point.
+ * @param features List of features for this data point.
+ */
+case class HivemallLabeledPoint(label: Float = 0.0f, features: Seq[String]) {
+ override def toString: String = {
+ "%s,%s".format(label, features.mkString("[", ",", "]"))
+ }
+}
+
+object HivemallLabeledPoint {
+
+ // Simple parser for HivemallLabeledPoint
+ def parse(s: String): HivemallLabeledPoint = {
+ val (label, features) = s.indexOf(',') match {
+ case d if d > 0 => (s.substring(0, d), s.substring(d + 1))
+ case _ => ("0.0", "[]") // Dummy
+ }
+ HivemallLabeledPoint(label.toFloat, parseTuple(new StringTokenizer(features, "[],", true)))
+ }
+
+ // TODO: Support to parse rows without labels
+ private[this] def parseTuple(tokenizer: StringTokenizer): Seq[String] = {
+ val items = ListBuffer.empty[String]
+ var parsing = true
+ var allowDelim = false
+ while (parsing && tokenizer.hasMoreTokens()) {
+ val token = tokenizer.nextToken()
+ if (token == "[") {
+ items ++= parseTuple(tokenizer)
+ parsing = false
+ allowDelim = true
+ } else if (token == ",") {
+ if (allowDelim) {
+ allowDelim = false
+ } else {
+ throw new HivemallException("Found ',' at a wrong position.")
+ }
+ } else if (token == "]") {
+ parsing = false
+ } else {
+ items.append(token)
+ allowDelim = true
+ }
+ }
+ if (parsing) {
+ throw new HivemallException(s"A tuple must end with ']'.")
+ }
+ items
+ }
+}
[4/4] incubator-hivemall git commit: Close #131: [v0.5.0-rc3] Merge
v0.5.0 branch
Posted by my...@apache.org.
Close #131: [v0.5.0-rc3] Merge v0.5.0 branch
Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/3a718713
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/3a718713
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/3a718713
Branch: refs/heads/master
Commit: 3a718713afb612848312afae74301ec2cbf1d8a2
Parents: 448847f
Author: Makoto Yui <my...@apache.org>
Authored: Tue Feb 20 16:17:51 2018 +0900
Committer: Makoto Yui <my...@apache.org>
Committed: Tue Feb 20 16:17:51 2018 +0900
----------------------------------------------------------------------
.gitignore | 1 +
.rat-excludes | 6 +-
LICENSE | 120 +++++++
NOTICE | 82 +----
VERSION | 2 +-
bin/build.sh | 1 -
bin/build_xgboost.sh | 87 -----
bin/maven_central_release.sh | 33 --
core/pom.xml | 112 +------
.../main/java/hivemall/HivemallConstants.java | 2 +-
.../hivemall/ftvec/ranking/BprSamplingUDTF.java | 38 ++-
.../smile/classification/DecisionTree.java | 24 +-
.../smile/regression/RegressionTree.java | 20 +-
.../hivemall/smile/tools/TreePredictUDF.java | 63 +++-
.../main/java/hivemall/utils/codec/Base91.java | 20 +-
.../hivemall/utils/lang/ExceptionUtils.java | 118 +++++++
.../main/java/hivemall/utils/math/FastMath.java | 67 ++--
.../main/resources/META-INF/LICENSE-jafama.txt | 202 +++++++++++
.../main/resources/META-INF/LICENSE-smile.txt | 203 +++++++++++
dist/pom.xml | 163 +++++++++
docs/gitbook/binaryclass/news20_rf.md | 5 +-
docs/gitbook/binaryclass/titanic_rf.md | 10 +-
docs/gitbook/multiclass/iris_randomforest.md | 8 +-
mixserv/pom.xml | 43 +--
nlp/pom.xml | 132 +-------
.../hivemall/nlp/tokenizer/KuromojiUDF.java | 52 +--
pom.xml | 318 ++++++++++--------
spark/common/pom.xml | 64 ++++
...isticRegressionDataGeneratorUDTFWrapper.java | 109 ++++++
.../java/hivemall/ftvec/AddBiasUDFWrapper.java | 83 +++++
.../ftvec/AddFeatureIndexUDFWrapper.java | 85 +++++
.../ftvec/ExtractFeatureUDFWrapper.java | 73 ++++
.../hivemall/ftvec/ExtractWeightUDFWrapper.java | 73 ++++
.../hivemall/ftvec/SortByFeatureUDFWrapper.java | 92 +++++
.../scaling/L2NormalizationUDFWrapper.java | 95 ++++++
.../hivemall/knn/lsh/MinHashesUDFWrapper.java | 93 ++++++
.../hivemall/tools/mapred/RowIdUDFWrapper.java | 72 ++++
.../main/scala/hivemall/HivemallException.scala | 25 ++
.../spark/ml/feature/HivemallLabeledPoint.scala | 82 +++++
spark/pom.xml | 295 ++++++++++++++++
spark/scalastyle-config.xml | 333 +++++++++++++++++++
spark/spark-2.0/pom.xml | 147 +-------
.../spark/streaming/HivemallStreamingOps.scala | 47 +++
.../apache/spark/sql/hive/HiveUdfSuite.scala | 2 +-
.../spark/sql/hive/HivemallOpsSuite.scala | 2 +-
spark/spark-2.1/pom.xml | 145 +-------
.../spark/streaming/HivemallStreamingOps.scala | 47 +++
.../apache/spark/sql/hive/HiveUdfSuite.scala | 2 +-
.../spark/sql/hive/HivemallOpsSuite.scala | 2 +-
spark/spark-2.2/pom.xml | 167 ++--------
.../spark/sql/hive/HivemallGroupedDataset.scala | 2 +-
.../spark/streaming/HivemallStreamingOps.scala | 47 +++
.../apache/spark/sql/hive/HiveUdfSuite.scala | 2 +-
.../spark/sql/hive/HivemallOpsSuite.scala | 2 +-
spark/spark-common/pom.xml | 146 --------
spark/spark-common/scalastyle-config.xml | 333 -------------------
...isticRegressionDataGeneratorUDTFWrapper.java | 109 ------
.../java/hivemall/ftvec/AddBiasUDFWrapper.java | 83 -----
.../ftvec/AddFeatureIndexUDFWrapper.java | 85 -----
.../ftvec/ExtractFeatureUDFWrapper.java | 73 ----
.../hivemall/ftvec/ExtractWeightUDFWrapper.java | 73 ----
.../hivemall/ftvec/SortByFeatureUDFWrapper.java | 92 -----
.../scaling/L2NormalizationUDFWrapper.java | 95 ------
.../hivemall/knn/lsh/MinHashesUDFWrapper.java | 93 ------
.../hivemall/tools/mapred/RowIdUDFWrapper.java | 72 ----
.../main/scala/hivemall/HivemallException.scala | 25 --
.../spark/ml/feature/HivemallLabeledPoint.scala | 82 -----
.../spark/streaming/HivemallStreamingOps.scala | 47 ---
src/site/resources/LICENSE-font_awesome.txt | 86 +++++
xgboost/lib/xgboost4j-0.60-0.10.jar | Bin 1424975 -> 0 bytes
xgboost/pom.xml | 143 +-------
.../java/hivemall/xgboost/XGBoostUtils.java | 4 +-
.../tools/XGBoostMulticlassPredictUDTF.java | 15 +-
73 files changed, 3023 insertions(+), 2753 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 84b63c8..3ba5593 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,4 @@ release.properties
\#*#
pom.xml.next
pom.xml.tag
+.cache-main
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/.rat-excludes
----------------------------------------------------------------------
diff --git a/.rat-excludes b/.rat-excludes
index fb6ce1c..fcb4b31 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -26,4 +26,8 @@ resources/eclipse-style.xml
**/*.spark
**/*.hql
docs/gitbook/_book/**
-docs/gitbook/node_modules/**
\ No newline at end of file
+docs/gitbook/node_modules/**
+**/release.properties
+**/derby.log
+**/LICENSE-*.txt
+**/Base91.java
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/LICENSE
----------------------------------------------------------------------
diff --git a/LICENSE b/LICENSE
index f433b1a..26b11dd 100644
--- a/LICENSE
+++ b/LICENSE
@@ -175,3 +175,123 @@
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+
+APACHE HIVEMALL SUBCOMPONENTS:
+
+The Apache Hivemall project contains subcomponents with separate copyright
+notices and license terms. Your use of the source code for the these
+subcomponents is subject to the terms and conditions of the following
+licenses.
+
+---------------------------------------------------------------------------
+The Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
+---------------------------------------------------------------------------
+
+This product bundles a modified version of 'Smile' which is licensed
+under the Apache License Version 2.0, specifically for Random Forest module.
+For details, see https://github.com/haifengl/smile/
+
+ You can find a copy of the License at
+
+ core/src/main/resources/META-INF/LICENSE-smile.txt
+
+ which is placed under META-INF/ in a jar.
+
+This product bundles a modified version of `Jafama` which is licensed
+under the Apache License Version 2.0, specifically for FastMath.java.
+For details, see https://github.com/jeffhain/jafama/
+
+ You can find a copy of the License at
+
+ core/src/main/resources/META-INF/LICENSE-jafama.txt
+
+ which is placed under META-INF/ in a jar.
+
+---------------------------------------------------------------------------
+ The 3-Clause BSD License (https://opensource.org/licenses/BSD-3-Clause)
+---------------------------------------------------------------------------
+
+This product bundles a modified version of Jochaim Henke's `Base91
+Encoder/Decoder` which is licensed under the BSD 3-Clause License,
+specifically for Base91.java.
+For details, see https://github.com/bwaldvogel/base91
+
+ Copyright (c) 2000-2006 Joachim Henke
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ - Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ - Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ - Neither the name of Joachim Henke nor the names of his contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+---------------------------------------------------------------------------
+ Public Domain License
+---------------------------------------------------------------------------
+
+This product bundles public domain software derived from `fdlibm`,
+specifically for FastMath.java.
+For details, see http://www.netlib.org/fdlibm/fdlibm.h
+
+ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+
+ Developed at SunSoft, a Sun Microsystems, Inc. business.
+ Permission to use, copy, modify, and distribute this
+ software is freely granted, provided that this notice
+ is preserved.
+
+---------------------------------------------------------------------------
+ The SIL Open Font License (https://opensource.org/licenses/OFL-1.1)
+---------------------------------------------------------------------------
+
+This product bundles `Font-awesome` fonts which is licensed under the
+SIL Open Font License (OFL) 1.1, specifically for the project site.
+For details, see http://fontawesome.io/
+
+ You can find a copy of the License at
+
+ src/site/resources/LICENSE-font_awesome.txt
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index bfc4af8..34b5f5d 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,81 +1,13 @@
Apache Hivemall
-Copyright 2016 and onwards The Apache Software Foundation
+Copyright 2016-2018 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
-Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST)
-Copyright (C) 2015-2016 Makoto Yui
-Copyright (C) 2015-2016 Treasure Data, Inc.
+This product is based on source code originally developed by AIST and Treasure Data, Inc.
+They have been licensed to the Apache Software Foundation under Software Grant Agreements from
+the following individuals and organizations:
-------------------------------------------------------------------------------------------------------
-Copyright notifications which have been relocated from source files
-
-o hivemall/core/src/main/java/hivemall/smile/classification/DecisionTree.java
- hivemall/core/src/main/java/hivemall/smile/regression/RegressionTree.java
-
- Copyright (c) 2010 Haifeng Li
-
- https://github.com/haifengl/smile
- Licensed under the Apache License, Version 2.0
-
-o hivemall/core/src/main/java/hivemall/utils/codec/Base91.java
-
- Copyright (c) 2000-2006 Joachim Henke
-
- https://github.com/bwaldvogel/base91
- Licensed under the BSD 3-Clause License
-
-o hivemall/core/src/main/java/hivemall/utils/collections/OpenHashMap.java
-
- Copyright (C) 2010 catchpole.net
-
- https://github.com/slipperyseal/atomicobjects/
- Licensed under the Apache License, Version 2.0
-
-o hivemall/core/src/main/java/hivemall/utils/math/FastMath.java
-
- Copyright 2012-2015 Jeff Hain
-
- https://github.com/jeffhain/jafama/
- Licensed under the Apache License, Version 2.0
-
- Copyright (C) 1993 by Sun Microsystems, Inc.
-
- Permission to use, copy, modify, and distribute this software is freely granted, provided that this notice is preserved.
-
-------------------------------------------------------------------------------------------------------
-Copyright notifications which have been relocated from ASF projects
-
-o hivemall/core/src/main/java/hivemall/utils/math/MathUtils.java#erfInv()
-
- Copyright (C) 2003-2016 The Apache Software Foundation.
-
- http://commons.apache.org/proper/commons-math/
- Licensed under the Apache License, Version 2.0
-
-o hivemall/core/src/main/java/hivemall/utils/buffer/DynamicByteArray.java
-
- Copyright 2013-2015 The Apache Software Foundation
-
- https://orc.apache.org/
- Licensed under the Apache License, Version 2.0
-
-o hivemall/spark/spark-2.0/extra-src/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
- hivemall/spark/spark-2.0/src/test/scala/org/apache/spark/sql/QueryTest.scala
- hivemall/spark/spark-2.0/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
- hivemall/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
- hivemall/spark/spark-2.1/extra-src/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
- hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/QueryTest.scala
- hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
- hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
- hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
- hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
- hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkBase.scala
-
- Copyright (C) 2014-2017 The Apache Software Foundation.
-
- http://spark.apache.org/
- Licensed under the Apache License, Version 2.0
-
-
\ No newline at end of file
+ - Copyright 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST)
+ - Copyright 2015-2016 Makoto Yui
+ - Copyright 2015-2016 Treasure Data, Inc.
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/VERSION
----------------------------------------------------------------------
diff --git a/VERSION b/VERSION
index 17de5ad..89b6d66 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.0-incubating-SNAPSHOT
+0.5.1-incubating-SNAPSHOT
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/bin/build.sh
----------------------------------------------------------------------
diff --git a/bin/build.sh b/bin/build.sh
index 8487d70..05d1f8f 100755
--- a/bin/build.sh
+++ b/bin/build.sh
@@ -30,5 +30,4 @@ if [ "$HIVEMALL_HOME" = "" ]; then
fi
cd $HIVEMALL_HOME
-mvn validate -Pxgboost
mvn clean package -Dskiptests=true -Dmaven.test.skip=true
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/bin/build_xgboost.sh
----------------------------------------------------------------------
diff --git a/bin/build_xgboost.sh b/bin/build_xgboost.sh
deleted file mode 100755
index 0bebcf8..0000000
--- a/bin/build_xgboost.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-# xgboost requires g++-4.6 or higher (https://github.com/dmlc/xgboost/blob/master/doc/build.md),
-# so we need to first check if the requirement is satisfied.
-COMPILER_REQUIRED_VERSION="4.6"
-COMPILER_VERSION=`g++ --version 2> /dev/null`
-
-# Check if GNU g++ installed
-if [ $? = 127 ]; then
- echo "First, you need to install g++"
- exit 1
-elif [[ "$COMPILER_VERSION" = *LLVM* ]]; then
- echo "You must use GNU g++, but the detected compiler was clang++"
- exit 1
-fi
-
-COMPILER_VERSION_NUMBER=`echo $COMPILER_VERSION | grep ^g++ | \
- awk 'match($0, /[0-9]+\.[0-9]+\.[0-9]+/) {print substr($0, RSTART, RLENGTH)}'`
-
-# See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers
-function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; }
-if [ $(version $COMPILER_VERSION_NUMBER) -lt $(version $COMPILER_REQUIRED_VERSION) ]; then
- echo "You must compile xgboost with GNU g++-$COMPILER_REQUIRED_VERSION or higher," \
- "but the detected compiler was g++-$COMPILER_VERSION_NUMBER"
- exit 1
-fi
-
-# Target commit hash value
-XGBOOST_HASHVAL='7ab15a0b31c870c7779691639f521df3ccd4a56e'
-
-# Move to a top directory
-if [ "$HIVEMALL_HOME" = "" ]; then
- if [ -e ../bin/${0##*/} ]; then
- HIVEMALL_HOME=`pwd`/..
- elif [ -e ./bin/${0##*/} ]; then
- HIVEMALL_HOME=`pwd`
- else
- echo "env HIVEMALL_HOME not defined"
- exit 1
- fi
-fi
-
-cd $HIVEMALL_HOME
-
-# Final output dir for a custom-compiled xgboost binary
-HIVEMALL_LIB_DIR="$HIVEMALL_HOME/xgboost/src/main/resources/lib/"
-rm -rf $HIVEMALL_LIB_DIR >> /dev/null
-mkdir -p $HIVEMALL_LIB_DIR
-
-# Move to an output directory
-XGBOOST_OUT="$HIVEMALL_HOME/target/xgboost-$XGBOOST_HASHVAL"
-rm -rf $XGBOOST_OUT >> /dev/null
-mkdir -p $XGBOOST_OUT
-cd $XGBOOST_OUT
-
-# Fetch xgboost sources
-git clone --progress https://github.com/maropu/xgboost.git
-cd xgboost
-git checkout $XGBOOST_HASHVAL
-
-# Resolve dependent sources
-git submodule init
-git submodule update
-
-# Copy a built binary to the output
-cd jvm-packages
-ENABLE_STATIC_LINKS=1 ./create_jni.sh
-cp ./lib/libxgboost4j.* "$HIVEMALL_LIB_DIR"
-
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/bin/maven_central_release.sh
----------------------------------------------------------------------
diff --git a/bin/maven_central_release.sh b/bin/maven_central_release.sh
deleted file mode 100755
index 8a7918f..0000000
--- a/bin/maven_central_release.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-if [ "$HIVEMALL_HOME" = "" ]; then
- if [ -e ../bin/${0##*/} ]; then
- HIVEMALL_HOME=".."
- elif [ -e ./bin/${0##*/} ]; then
- HIVEMALL_HOME="."
- else
- echo "env HIVEMALL_HOME not defined"
- exit 1
- fi
-fi
-
-cd $HIVEMALL_HOME
-mvn clean deploy -DperformRelease=true -Dskiptests=true -Dmaven.test.skip=true
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index c79124a..82cb369 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -16,14 +16,13 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
+ <version>0.5.1-incubating-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
@@ -40,67 +39,41 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
- <version>${hive.version}</version>
<scope>provided</scope>
- <exclusions>
- <exclusion>
- <artifactId>jetty</artifactId>
- <groupId>org.mortbay.jetty</groupId>
- </exclusion>
- <exclusion>
- <groupId>javax.jdo</groupId>
- <artifactId>jdo2-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>asm-parent</groupId>
- <artifactId>asm-parent</artifactId>
- </exclusion>
- <exclusion>
- <groupId>asm</groupId>
- <artifactId>asm</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
- <version>1.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
- <version>1.0.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
- <version>1.2.17</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>javax.jdo</groupId>
<artifactId>jdo2-api</artifactId>
- <version>2.3-eb</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
- <version>${guava.version}</version>
<scope>provided</scope>
</dependency>
@@ -160,104 +133,23 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
- <version>1.10.19</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
- <version>1.6.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
- <version>1.6.3</version>
<scope>test</scope>
</dependency>
</dependencies>
- <build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
- <plugins>
- <!-- hivemall-core-xx.jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.5</version>
- <configuration>
- <finalName>${project.artifactId}-${project.version}</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- </configuration>
- </plugin>
- <!-- hivemall-core-xx-with-dependencies.jar including minimum dependencies -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <execution>
- <id>jar-with-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>true</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>io.netty:netty-all</include>
- <include>com.github.haifengl:smile-core</include>
- <include>com.github.haifengl:smile-math</include>
- <include>com.github.haifengl:smile-data</include>
- <include>org.tukaani:xz</include>
- <include>org.apache.commons:commons-math3</include>
- <include>org.roaringbitmap:RoaringBitmap</include>
- <include>it.unimi.dsi:fastutil</include>
- <include>com.clearspring.analytics:stream</include>
- </includes>
- </artifactSet>
- <transformers>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
- <manifestEntries>
- <Implementation-Title>${project.name}</Implementation-Title>
- <Implementation-Version>${project.version}</Implementation-Version>
- <Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
- </manifestEntries>
- </transformer>
- <!--
- <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
- <addHeader>false</addHeader>
- </transformer>
- -->
- </transformers>
- <filters>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>META-INF/LICENSE.txt</exclude>
- <exclude>META-INF/NOTICE.txt</exclude>
- </excludes>
- </filter>
- </filters>
- </configuration>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
-
</project>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/HivemallConstants.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/HivemallConstants.java b/core/src/main/java/hivemall/HivemallConstants.java
index 5e6e407..955aeb1 100644
--- a/core/src/main/java/hivemall/HivemallConstants.java
+++ b/core/src/main/java/hivemall/HivemallConstants.java
@@ -20,7 +20,7 @@ package hivemall;
public final class HivemallConstants {
- public static final String VERSION = "0.5.0-incubating-SNAPSHOT";
+ public static final String VERSION = "0.5.1-incubating-SNAPSHOT";
public static final String BIAS_CLAUSE = "0";
public static final int BIAS_CLAUSE_HASHVAL = 0;
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java b/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java
index ab418ed..821c734 100644
--- a/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java
+++ b/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java
@@ -18,12 +18,6 @@
*/
package hivemall.ftvec.ranking;
-import hivemall.UDTFWithOptions;
-import hivemall.utils.collections.lists.IntArrayList;
-import hivemall.utils.hadoop.HiveUtils;
-import hivemall.utils.lang.BitUtils;
-import hivemall.utils.lang.Primitives;
-
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Random;
@@ -45,6 +39,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.io.IntWritable;
+import hivemall.UDTFWithOptions;
+import hivemall.utils.collections.lists.IntArrayList;
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.lang.BitUtils;
+import hivemall.utils.lang.Primitives;
+
@Description(name = "bpr_sampling",
value = "_FUNC_(int userId, List<int> posItems [, const string options])"
+ "- Returns a relation consists of <int userId, int itemId>")
@@ -54,9 +54,13 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
private ListObjectInspector itemListOI;
private PrimitiveObjectInspector itemElemOI;
- private PositiveOnlyFeedback feedback;
+ // Need to avoid
+ // org.apache.hive.com.esotericsoftware.kryo.KryoException: java.lang.ArrayIndexOutOfBoundsException: 1
+ @Nullable
+ private transient PositiveOnlyFeedback feedback;
// sampling options
+ private int maxItemId;
private float samplingRate;
private boolean withoutReplacement;
private boolean pairSampling;
@@ -106,8 +110,7 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
}
}
- this.feedback = pairSampling ? new PerEventPositiveOnlyFeedback(maxItemId)
- : new PositiveOnlyFeedback(maxItemId);
+ this.maxItemId = maxItemId;
this.samplingRate = samplingRate;
this.withoutReplacement = withoutReplacement;
this.pairSampling = pairSampling;
@@ -147,6 +150,11 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
@Override
public void process(@Nonnull Object[] args) throws HiveException {
+ if (feedback == null) {
+ this.feedback = pairSampling ? new PerEventPositiveOnlyFeedback(maxItemId)
+ : new PositiveOnlyFeedback(maxItemId);
+ }
+
int userId = PrimitiveObjectInspectorUtils.getInt(args[0], userOI);
validateIndex(userId);
@@ -202,7 +210,8 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
}
}
- private void forward(final int user, final int posItem, final int negItem) throws HiveException {
+ private void forward(final int user, final int posItem, final int negItem)
+ throws HiveException {
assert (user >= 0) : user;
assert (posItem >= 0) : posItem;
assert (negItem >= 0) : negItem;
@@ -260,9 +269,8 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
* Caution: This is not a perfect 'without sampling' but it does 'without sampling' for positive
* feedbacks.
*/
- private void uniformUserSamplingWithoutReplacement(
- @Nonnull final PositiveOnlyFeedback feedback, final int numSamples)
- throws HiveException {
+ private void uniformUserSamplingWithoutReplacement(@Nonnull final PositiveOnlyFeedback feedback,
+ final int numSamples) throws HiveException {
int numUsers = feedback.getNumUsers();
if (numUsers == 0) {
return;
@@ -280,8 +288,8 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
int nthUser = rand.nextInt(numUsers);
int user = BitUtils.indexOfSetBit(userBits, nthUser);
if (user == -1) {
- throw new HiveException("Cannot find " + nthUser + "-th user among " + numUsers
- + " users");
+ throw new HiveException(
+ "Cannot find " + nthUser + "-th user among " + numUsers + " users");
}
IntArrayList posItems = feedback.getItems(user, true);
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/smile/classification/DecisionTree.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/smile/classification/DecisionTree.java b/core/src/main/java/hivemall/smile/classification/DecisionTree.java
index f2ff560..e6160d2 100644
--- a/core/src/main/java/hivemall/smile/classification/DecisionTree.java
+++ b/core/src/main/java/hivemall/smile/classification/DecisionTree.java
@@ -1,22 +1,4 @@
/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -31,6 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+// This file includes a modified version of Smile:
+// https://github.com/haifengl/smile/blob/master/core/src/main/java/smile/classification/DecisionTree.java
package hivemall.smile.classification;
import static hivemall.smile.utils.SmileExtUtils.resolveFeatureName;
@@ -369,8 +353,8 @@ public final class DecisionTree implements Classifier<Vector> {
public void exportGraphviz(@Nonnull final StringBuilder builder,
@Nullable final String[] featureNames, @Nullable final String[] classNames,
- @Nonnull final String outputName, @Nullable double[] colorBrew,
- final @Nonnull MutableInt nodeIdGenerator, final int parentNodeId) {
+ @Nonnull final String outputName, @Nullable final double[] colorBrew,
+ @Nonnull final MutableInt nodeIdGenerator, final int parentNodeId) {
final int myNodeId = nodeIdGenerator.getValue();
if (trueChild == null && falseChild == null) {
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/smile/regression/RegressionTree.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/smile/regression/RegressionTree.java b/core/src/main/java/hivemall/smile/regression/RegressionTree.java
index 0670876..b085734 100755
--- a/core/src/main/java/hivemall/smile/regression/RegressionTree.java
+++ b/core/src/main/java/hivemall/smile/regression/RegressionTree.java
@@ -1,22 +1,4 @@
/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -31,6 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+// This file includes a modified version of Smile:
+// https://github.com/haifengl/smile/blob/master/core/src/main/java/smile/regression/RegressionTree.java
package hivemall.smile.regression;
import static hivemall.smile.utils.SmileExtUtils.resolveFeatureName;
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java b/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java
index 46b8758..ea3bc29 100644
--- a/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java
+++ b/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java
@@ -18,6 +18,7 @@
*/
package hivemall.smile.tools;
+import hivemall.UDFWithOptions;
import hivemall.math.vector.DenseVector;
import hivemall.math.vector.SparseVector;
import hivemall.math.vector.Vector;
@@ -37,11 +38,12 @@ import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -53,12 +55,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspe
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
-@Description(
- name = "tree_predict",
- value = "_FUNC_(string modelId, string model, array<double|string> features [, const boolean classification])"
- + " - Returns a prediction result of a random forest")
+@Description(name = "tree_predict",
+ value = "_FUNC_(string modelId, string model, array<double|string> features [, const string options | const boolean classification=false])"
+ + " - Returns a prediction result of a random forest"
+ + " in <int value, array<double> posteriori> for classification and <double> for regression")
@UDFType(deterministic = true, stateful = false)
-public final class TreePredictUDF extends GenericUDF {
+public final class TreePredictUDF extends UDFWithOptions {
private boolean classification;
private StringObjectInspector modelOI;
@@ -72,9 +74,25 @@ public final class TreePredictUDF extends GenericUDF {
private transient Evaluator evaluator;
@Override
+ protected Options getOptions() {
+ Options opts = new Options();
+ opts.addOption("c", "classification", false,
+ "Predict as classification [default: not enabled]");
+ return opts;
+ }
+
+ @Override
+ protected CommandLine processOptions(@Nonnull String optionValue) throws UDFArgumentException {
+ CommandLine cl = parseOptions(optionValue);
+
+ this.classification = cl.hasOption("classification");
+ return cl;
+ }
+
+ @Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
if (argOIs.length != 3 && argOIs.length != 4) {
- throw new UDFArgumentException("_FUNC_ takes 3 or 4 arguments");
+ throw new UDFArgumentException("tree_predict takes 3 or 4 arguments");
}
this.modelOI = HiveUtils.asStringOI(argOIs[1]);
@@ -89,15 +107,25 @@ public final class TreePredictUDF extends GenericUDF {
this.denseInput = false;
} else {
throw new UDFArgumentException(
- "_FUNC_ takes array<double> or array<string> for the second argument: "
+ "tree_predict takes array<double> or array<string> for the second argument: "
+ listOI.getTypeName());
}
- boolean classification = false;
if (argOIs.length == 4) {
- classification = HiveUtils.getConstBoolean(argOIs[3]);
+ ObjectInspector argOI3 = argOIs[3];
+ if (HiveUtils.isConstBoolean(argOI3)) {
+ this.classification = HiveUtils.getConstBoolean(argOI3);
+ } else if (HiveUtils.isConstString(argOI3)) {
+ String opts = HiveUtils.getConstString(argOI3);
+ processOptions(opts);
+ } else {
+ throw new UDFArgumentException(
+ "tree_predict expects <const boolean> or <const string> for the fourth argument: "
+ + argOI3.getTypeName());
+ }
+ } else {
+ this.classification = false;
}
- this.classification = classification;
if (classification) {
List<String> fieldNames = new ArrayList<String>(2);
@@ -105,7 +133,8 @@ public final class TreePredictUDF extends GenericUDF {
fieldNames.add("value");
fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
fieldNames.add("posteriori");
- fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
+ fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
} else {
return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
@@ -116,7 +145,7 @@ public final class TreePredictUDF extends GenericUDF {
public Object evaluate(@Nonnull DeferredObject[] arguments) throws HiveException {
Object arg0 = arguments[0].get();
if (arg0 == null) {
- throw new HiveException("ModelId was null");
+ throw new HiveException("modelId should not be null");
}
// Not using string OI for backward compatibilities
String modelId = arg0.toString();
@@ -134,8 +163,8 @@ public final class TreePredictUDF extends GenericUDF {
this.featuresProbe = parseFeatures(arg2, featuresProbe);
if (evaluator == null) {
- this.evaluator = classification ? new ClassificationEvaluator()
- : new RegressionEvaluator();
+ this.evaluator =
+ classification ? new ClassificationEvaluator() : new RegressionEvaluator();
}
return evaluator.evaluate(modelId, model, featuresProbe);
}
@@ -192,8 +221,8 @@ public final class TreePredictUDF extends GenericUDF {
}
if (feature.indexOf(':') != -1) {
- throw new UDFArgumentException("Invaliad feature format `<index>:<value>`: "
- + col);
+ throw new UDFArgumentException(
+ "Invaliad feature format `<index>:<value>`: " + col);
}
final int colIndex = Integer.parseInt(feature);
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/utils/codec/Base91.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/codec/Base91.java b/core/src/main/java/hivemall/utils/codec/Base91.java
index 27bdf62..3e996be 100644
--- a/core/src/main/java/hivemall/utils/codec/Base91.java
+++ b/core/src/main/java/hivemall/utils/codec/Base91.java
@@ -1,22 +1,4 @@
/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*
* basE91 encoding/decoding routines
*
* Copyright (c) 2000-2006 Joachim Henke
@@ -46,6 +28,8 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
+// This file contains a modified version of Jochaim Henke's Base91:
+// https://github.com/bwaldvogel/base91/blob/master/src/main/java/de/bwaldvogel/base91/Base91.java
package hivemall.utils.codec;
import hivemall.utils.io.FastByteArrayOutputStream;
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java b/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java
new file mode 100644
index 0000000..b69c5b0
--- /dev/null
+++ b/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.lang;
+
+import javax.annotation.Nonnull;
+
+public final class ExceptionUtils {
+
+ public static final int TRACE_CAUSE_DEPTH = 5;
+
+ private ExceptionUtils() {}
+
+ @Nonnull
+ public static String prettyPrintStackTrace(@Nonnull final Throwable throwable) {
+ return prettyPrintStackTrace(throwable, TRACE_CAUSE_DEPTH);
+ }
+
+ @Nonnull
+ public static String prettyPrintStackTrace(@Nonnull final Throwable throwable,
+ final int traceDepth) {
+ final StringBuilder out = new StringBuilder(512);
+ out.append(getMessage(throwable));
+ out.append("\n\n---- Debugging information ----");
+ final int tracedepth;
+ if (throwable instanceof RuntimeException || throwable instanceof Error) {
+ tracedepth = -1;
+ } else {
+ tracedepth = traceDepth;
+ }
+ String captured = captureThrownWithStrackTrace(throwable, "trace-exception", tracedepth);
+ out.append(captured);
+ final Throwable cause = throwable.getCause();
+ if (cause != null) {
+ final Throwable rootCause = getRootCause(cause);
+ captured = captureThrownWithStrackTrace(rootCause, "trace-cause", TRACE_CAUSE_DEPTH);
+ out.append(captured);
+ }
+ out.append("\n------------------------------- \n");
+ return out.toString();
+ }
+
+ @Nonnull
+ private static String captureThrownWithStrackTrace(@Nonnull final Throwable throwable,
+ final String label, final int traceDepth) {
+ assert (traceDepth >= 1 || traceDepth == -1);
+ final StringBuilder out = new StringBuilder(255);
+ final String clazz = throwable.getClass().getName();
+ out.append(String.format("\n%-20s: %s \n", ("* " + label), clazz));
+ final StackTraceElement[] st = throwable.getStackTrace();
+ int at;
+ final int limit = (traceDepth == -1) ? st.length - 1 : traceDepth;
+ for (at = 0; at < st.length; at++) {
+ if (at < limit) {
+ out.append("\tat " + st[at] + '\n');
+ } else {
+ out.append("\t...\n");
+ break;
+ }
+ }
+ if (st.length == 0) {
+ out.append("\t no stack traces...");
+ } else if (at != (st.length - 1)) {
+ out.append("\tat " + st[st.length - 1]);
+ }
+ String errmsg = throwable.getMessage();
+ if (errmsg != null) {
+ out.append(String.format("\n%-20s: \n", ("* " + label + "-error-msg")));
+ String[] line = errmsg.split("\n");
+ final int maxlines = Math.min(line.length, Math.max(1, TRACE_CAUSE_DEPTH - 2));
+ for (int i = 0; i < maxlines; i++) {
+ out.append('\t');
+ out.append(line[i]);
+ if (i != (maxlines - 1)) {
+ out.append('\n');
+ }
+ }
+ }
+ return out.toString();
+ }
+
+ @Nonnull
+ public static String getMessage(@Nonnull final Throwable throwable) {
+ String errMsg = throwable.getMessage();
+ String clazz = throwable.getClass().getName();
+ return (errMsg != null) ? clazz + ": " + errMsg : clazz;
+ }
+
+ @Nonnull
+ private static Throwable getRootCause(@Nonnull final Throwable throwable) {
+ Throwable top = throwable;
+ while (top != null) {
+ Throwable parent = top.getCause();
+ if (parent != null) {
+ top = parent;
+ } else {
+ break;
+ }
+ }
+ return top;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/utils/math/FastMath.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/math/FastMath.java b/core/src/main/java/hivemall/utils/math/FastMath.java
index d27d6f8..09f7a16 100644
--- a/core/src/main/java/hivemall/utils/math/FastMath.java
+++ b/core/src/main/java/hivemall/utils/math/FastMath.java
@@ -1,21 +1,32 @@
/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
+ * Copyright 2012-2015 Jeff Hain
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * =============================================================================
+ * Notice of fdlibm package this program is partially derived from:
+ *
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * =============================================================================
*/
+// This file contains a modified version of Jafama's FastMath:
+// https://github.com/jeffhain/jafama/blob/master/src/main/java/net/jafama/FastMath.java
package hivemall.utils.math;
import hivemall.annotations.Experimental;
@@ -98,34 +109,6 @@ public final class FastMath {
return 1 / (1 + exp(-x));
}
- /*
- * Copyright 2012-2015 Jeff Hain
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /*
- * =============================================================================
- * Notice of fdlibm package this program is partially derived from:
- *
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * =============================================================================
- */
-
/**
* Based on Jafama (https://github.com/jeffhain/jafama/) version 2.2.
*/
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/resources/META-INF/LICENSE-jafama.txt
----------------------------------------------------------------------
diff --git a/core/src/main/resources/META-INF/LICENSE-jafama.txt b/core/src/main/resources/META-INF/LICENSE-jafama.txt
new file mode 100644
index 0000000..151b7ea
--- /dev/null
+++ b/core/src/main/resources/META-INF/LICENSE-jafama.txt
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/resources/META-INF/LICENSE-smile.txt
----------------------------------------------------------------------
diff --git a/core/src/main/resources/META-INF/LICENSE-smile.txt b/core/src/main/resources/META-INF/LICENSE-smile.txt
new file mode 100644
index 0000000..94ad231
--- /dev/null
+++ b/core/src/main/resources/META-INF/LICENSE-smile.txt
@@ -0,0 +1,203 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/dist/pom.xml
----------------------------------------------------------------------
diff --git a/dist/pom.xml b/dist/pom.xml
new file mode 100644
index 0000000..bea6226
--- /dev/null
+++ b/dist/pom.xml
@@ -0,0 +1,163 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>hivemall-all</artifactId>
+ <name>Hivemall Distribution</name>
+ <packaging>jar</packaging>
+
+ <properties>
+ <main.basedir>${project.parent.basedir}</main.basedir>
+ </properties>
+
+ <dependencies>
+ <!-- compile scope -->
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-core</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-nlp</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-xgboost</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <finalName>${project.artifactId}-${project.version}</finalName>
+ <archive>
+ <index>true</index>
+ <compress>true</compress>
+ <manifest>
+ <addClasspath>false</addClasspath>
+ <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+ <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+ </manifest>
+ </archive>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>jar-with-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <finalName>${project.artifactId}-${project.version}</finalName>
+ <outputDirectory>${project.parent.build.directory}</outputDirectory>
+ <minimizeJar>false</minimizeJar>
+ <createDependencyReducedPom>false</createDependencyReducedPom>
+ <createSourcesJar>true</createSourcesJar>
+ <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+ <artifactSet>
+ <includes>
+ <!-- hivemall-core -->
+ <include>org.apache.hivemall:hivemall-core</include>
+ <include>io.netty:netty-all</include>
+ <include>com.github.haifengl:smile-core</include>
+ <include>com.github.haifengl:smile-math</include>
+ <include>com.github.haifengl:smile-data</include>
+ <include>org.tukaani:xz</include>
+ <include>org.apache.commons:commons-math3</include>
+ <include>org.roaringbitmap:RoaringBitmap</include>
+ <include>it.unimi.dsi:fastutil</include>
+ <include>com.clearspring.analytics:stream</include>
+ <!-- hivemall-nlp -->
+ <include>org.apache.hivemall:hivemall-nlp</include>
+ <include>org.apache.lucene:lucene-analyzers-kuromoji</include>
+ <include>org.apache.lucene:lucene-analyzers-smartcn</include>
+ <include>org.apache.lucene:lucene-analyzers-common</include>
+ <include>org.apache.lucene:lucene-core</include>
+ <!-- hivemall-xgboost -->
+ <include>org.apache.hivemall:hivemall-xgboost</include>
+ <include>io.github.myui:xgboost4j</include>
+ <include>com.esotericsoftware.kryo:kryo</include>
+ </includes>
+ <excludes>
+ <exclude>org.apache.hivemall:hivemall-all</exclude>
+ </excludes>
+ </artifactSet>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <manifestEntries>
+ <Implementation-Title>${project.name}</Implementation-Title>
+ <Implementation-Version>${project.version}</Implementation-Version>
+ <Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
+ </manifestEntries>
+ </transformer>
+ </transformers>
+ <filters>
+ <filter>
+ <artifact>org.apache.lucene:*</artifact>
+ <includes>
+ <include>**</include>
+ </includes>
+ </filter>
+ <filter>
+ <artifact>com.esotericsoftware.kryo:kryo</artifact>
+ <includes>
+ <include>**</include>
+ </includes>
+ </filter>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/LICENSE.txt</exclude>
+ <exclude>META-INF/NOTICE.txt</exclude>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ <exclude>*.jar</exclude>
+ <exclude>tracker.py</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/docs/gitbook/binaryclass/news20_rf.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/binaryclass/news20_rf.md b/docs/gitbook/binaryclass/news20_rf.md
index fd0b475..327939b 100644
--- a/docs/gitbook/binaryclass/news20_rf.md
+++ b/docs/gitbook/binaryclass/news20_rf.md
@@ -47,7 +47,7 @@ from
## Prediction
```sql
-SET hivevar:classification=true;
+-- SET hivevar:classification=true;
drop table rf_predicted;
create table rf_predicted
@@ -60,7 +60,8 @@ FROM (
SELECT
rowid,
m.model_weight,
- tree_predict(m.model_id, m.model, t.features, ${classification}) as predicted
+ tree_predict(m.model_id, m.model, t.features, "-classification") as predicted
+ -- tree_predict(m.model_id, m.model, t.features, ${classification}) as predicted
FROM
rf_model m
LEFT OUTER JOIN -- CROSS JOIN
[2/4] incubator-hivemall git commit: Close #131: [v0.5.0-rc3] Merge
v0.5.0 branch
Posted by my...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/pom.xml
----------------------------------------------------------------------
diff --git a/spark/pom.xml b/spark/pom.xml
new file mode 100644
index 0000000..d018b8d
--- /dev/null
+++ b/spark/pom.xml
@@ -0,0 +1,295 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>hivemall-spark</artifactId>
+ <packaging>pom</packaging>
+ <name>Hivemall on Apache Spark</name>
+
+ <modules>
+ <module>common</module>
+ <module>spark-2.0</module>
+ <module>spark-2.1</module>
+ <module>spark-2.2</module>
+ </modules>
+
+ <properties>
+ <main.basedir>${project.parent.basedir}</main.basedir>
+ <scala.version>2.11.8</scala.version>
+ <scala.binary.version>2.11</scala.binary.version>
+ <scalatest.jvm.opts>-ea -Xms768m -Xmx1024m -XX:PermSize=128m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m</scalatest.jvm.opts>
+ </properties>
+
+ <dependencyManagement>
+ <dependencies>
+ <!-- compile scope -->
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-core</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-xgboost</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <version>1.8</version>
+ <scope>compile</scope>
+ </dependency>
+
+ <!-- provided scope -->
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>${scala.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- test dependencies -->
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-mixserv</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_${scala.binary.version}</artifactId>
+ <version>2.2.4</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
+
+ <build>
+ <directory>target</directory>
+ <outputDirectory>target/classes</outputDirectory>
+ <finalName>${project.artifactId}-${project.version}</finalName>
+ <testOutputDirectory>target/test-classes</testOutputDirectory>
+
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ <version>3.2.2</version>
+ </plugin>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ <version>1.0</version>
+ <configuration>
+ <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+ <junitxml>.</junitxml>
+ <filereports>SparkTestSuite.txt</filereports>
+ <argLine>${scalatest.jvm.opts}</argLine>
+ <stderr />
+ <environmentVariables>
+ <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
+ <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
+ <SPARK_TESTING>1</SPARK_TESTING>
+ <JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
+ <PATH>${env.JAVA_HOME}/bin:${env.PATH}</PATH>
+ </environmentVariables>
+ <systemProperties>
+ <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
+ <derby.system.durability>test</derby.system.durability>
+ <java.awt.headless>true</java.awt.headless>
+ <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
+ <spark.testing>1</spark.testing>
+ <spark.ui.enabled>false</spark.ui.enabled>
+ <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
+ <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
+ <!-- Needed by sql/hive tests. -->
+ <test.src.tables>__not_used__</test.src.tables>
+ </systemProperties>
+ <tagsToExclude>${test.exclude.tags}</tagsToExclude>
+ </configuration>
+ </plugin>
+ <!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>jar-with-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
+ <outputDirectory>${main.basedir}/target</outputDirectory>
+ <minimizeJar>false</minimizeJar>
+ <createDependencyReducedPom>false</createDependencyReducedPom>
+ <createSourcesJar>true</createSourcesJar>
+ <artifactSet>
+ <includes>
+ <include>org.apache.hivemall:hivemall-spark-common</include>
+ <!-- hivemall-core -->
+ <include>org.apache.hivemall:hivemall-core</include>
+ <include>io.netty:netty-all</include>
+ <include>com.github.haifengl:smile-core</include>
+ <include>com.github.haifengl:smile-math</include>
+ <include>com.github.haifengl:smile-data</include>
+ <include>org.tukaani:xz</include>
+ <include>org.apache.commons:commons-math3</include>
+ <include>org.roaringbitmap:RoaringBitmap</include>
+ <include>it.unimi.dsi:fastutil</include>
+ <include>com.clearspring.analytics:stream</include>
+ <!-- hivemall-nlp -->
+ <include>org.apache.hivemall:hivemall-nlp</include>
+ <include>org.apache.lucene:lucene-analyzers-kuromoji</include>
+ <include>org.apache.lucene:lucene-analyzers-smartcn</include>
+ <include>org.apache.lucene:lucene-analyzers-common</include>
+ <include>org.apache.lucene:lucene-core</include>
+ <!-- hivemall-xgboost -->
+ <include>org.apache.hivemall:hivemall-xgboost</include>
+ <include>io.github.myui:xgboost4j</include>
+ <include>com.esotericsoftware.kryo:kryo</include>
+ </includes>
+ </artifactSet>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <manifestEntries>
+ <Implementation-Title>${project.name}</Implementation-Title>
+ <Implementation-Version>${project.version}</Implementation-Version>
+ <Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
+ </manifestEntries>
+ </transformer>
+ </transformers>
+ <filters>
+ <filter>
+ <artifact>org.apache.lucene:*</artifact>
+ <includes>
+ <include>**</include>
+ </includes>
+ </filter>
+ <filter>
+ <artifact>com.esotericsoftware.kryo:kryo</artifact>
+ <includes>
+ <include>**</include>
+ </includes>
+ </filter>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/LICENSE.txt</exclude>
+ <exclude>META-INF/NOTICE.txt</exclude>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ <exclude>*.jar</exclude>
+ <exclude>tracker.py</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.scalastyle</groupId>
+ <artifactId>scalastyle-maven-plugin</artifactId>
+ <version>0.8.0</version>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
+ <plugins>
+ <plugin>
+ <groupId>org.scalastyle</groupId>
+ <artifactId>scalastyle-maven-plugin</artifactId>
+ <configuration>
+ <verbose>false</verbose>
+ <failOnViolation>true</failOnViolation>
+ <includeTestSourceDirectory>true</includeTestSourceDirectory>
+ <failOnWarning>false</failOnWarning>
+ <sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
+ <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
+ <configLocation>spark/scalastyle-config.xml</configLocation>
+ <outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
+ <inputEncoding>${project.build.sourceEncoding}</inputEncoding>
+ <outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>scala-compile-first</id>
+ <phase>process-resources</phase>
+ <goals>
+ <goal>add-source</goal>
+ <goal>compile</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>scala-test-compile</id>
+ <phase>process-test-resources</phase>
+ <goals>
+ <goal>testCompile</goal>
+ </goals>
+ </execution>
+ </executions>
+ <!-- For incremental compilation -->
+ <configuration>
+ <scalaVersion>${scala.version}</scalaVersion>
+ <recompileMode>incremental</recompileMode>
+ <useZincServer>true</useZincServer>
+ <args>
+ <arg>-unchecked</arg>
+ <arg>-deprecation</arg>
+ <!-- TODO: To enable this option, we need to fix many wornings -->
+ <!-- <arg>-feature</arg> -->
+ </args>
+ <jvmArgs>
+ <jvmArg>-Xms768m</jvmArg>
+ <jvmArg>-Xmx1024m</jvmArg>
+ <jvmArg>-XX:PermSize=128m</jvmArg>
+ <jvmArg>-XX:MaxPermSize=512m</jvmArg>
+ <jvmArg>-XX:ReservedCodeCacheSize=512m</jvmArg>
+ </jvmArgs>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/spark/scalastyle-config.xml b/spark/scalastyle-config.xml
new file mode 100644
index 0000000..13d1c47
--- /dev/null
+++ b/spark/scalastyle-config.xml
@@ -0,0 +1,333 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<!--
+If you wish to turn off checking for a section of code, you can put a comment in the source
+before and after the section, with the following syntax:
+
+ // scalastyle:off
+ ... // stuff that breaks the styles
+ // scalastyle:on
+
+You can also disable only one rule, by specifying its rule id, as specified in:
+ http://www.scalastyle.org/rules-0.7.0.html
+
+ // scalastyle:off no.finalize
+ override def finalize(): Unit = ...
+ // scalastyle:on no.finalize
+
+This file is divided into 3 sections:
+ (1) rules that we enforce.
+ (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
+ (or we need to make the scalastyle rule more configurable).
+ (3) rules that we don't want to enforce.
+-->
+
+<scalastyle>
+ <name>Scalastyle standard configuration</name>
+
+ <!-- ================================================================================ -->
+ <!-- rules we enforce -->
+ <!-- ================================================================================ -->
+
+ <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
+ <parameters>
+ <parameter name="header"><![CDATA[/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */]]></parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
+ <parameters>
+ <parameter name="maxLineLength"><![CDATA[100]]></parameter>
+ <parameter name="tabSize"><![CDATA[2]]></parameter>
+ <parameter name="ignoreImports">true</parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
+ <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
+ <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
+ <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+ <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
+ <parameters>
+ <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
+ <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
+
+ <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
+ <parameters>
+ <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
+ <parameters>
+ <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+ </parameters>
+ </check>
+
+ <!-- ??? usually shouldn't be checked into the code base. -->
+ <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
+
+ <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
+ <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+ <parameters><parameter name="regex">^println$</parameter></parameters>
+ <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
+ // scalastyle:off println
+ println(...)
+ // scalastyle:on println]]></customMessage>
+ </check>
+
+ <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
+ <customMessage><![CDATA[
+ @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
+ ]]></customMessage>
+ </check>
+
+ <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
+ <customMessage><![CDATA[
+ Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
+ ShutdownHookManager.addShutdownHook instead.
+ If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
+ // scalastyle:off runtimeaddshutdownhook
+ Runtime.getRuntime.addShutdownHook(...)
+ // scalastyle:on runtimeaddshutdownhook
+ ]]></customMessage>
+ </check>
+
+ <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
+ <customMessage><![CDATA[
+ Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
+ java.util.concurrent.ConcurrentLinkedQueue instead.
+ If you must use mutable.SynchronizedBuffer, wrap the code block with
+ // scalastyle:off mutablesynchronizedbuffer
+ mutable.SynchronizedBuffer[...]
+ // scalastyle:on mutablesynchronizedbuffer
+ ]]></customMessage>
+ </check>
+
+ <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">Class\.forName</parameter></parameters>
+ <customMessage><![CDATA[
+ Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
+ If you must use Class.forName, wrap the code block with
+ // scalastyle:off classforname
+ Class.forName(...)
+ // scalastyle:on classforname
+ ]]></customMessage>
+ </check>
+
+ <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">Await\.result</parameter></parameters>
+ <customMessage><![CDATA[
+ Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
+ If you must use Await.result, wrap the code block with
+ // scalastyle:off awaitresult
+ Await.result(...)
+ // scalastyle:on awaitresult
+ ]]></customMessage>
+ </check>
+
+ <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
+ <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+ <parameters><parameter name="regex">JavaConversions</parameter></parameters>
+ <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
+ scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
+ </check>
+
+ <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+ <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
+ <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
+ of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
+ <parameters>
+ <parameter name="groups">java,scala,3rdParty,spark</parameter>
+ <parameter name="group.java">javax?\..*</parameter>
+ <parameter name="group.scala">scala\..*</parameter>
+ <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
+ <parameter name="group.spark">org\.apache\.spark\..*</parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
+ <parameters>
+ <parameter name="tokens">COMMA</parameter>
+ </parameters>
+ </check>
+
+ <!-- SPARK-3854: Single Space between ')' and '{' -->
+ <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">\)\{</parameter></parameters>
+ <customMessage><![CDATA[
+ Single Space between ')' and `{`.
+ ]]></customMessage>
+ </check>
+
+ <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]</parameter></parameters>
+ <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
+ </check>
+
+ <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
+ <customMessage>Omit braces in case clauses.</customMessage>
+ </check>
+
+ <!-- SPARK-16877: Avoid Java annotations -->
+ <check customId="OverrideJavaCase" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+ <parameters><parameter name="regex">^Override$</parameter></parameters>
+ <customMessage>override modifier should be used instead of @java.lang.Override.</customMessage>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
+
+ <!-- ================================================================================ -->
+ <!-- rules we'd like to enforce, but haven't cleaned up the codebase yet -->
+ <!-- ================================================================================ -->
+
+ <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
+ <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
+ <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
+ <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
+
+ <!-- This breaks symbolic method names so we don't turn it on. -->
+ <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
+ <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
+ <parameters>
+ <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
+ </parameters>
+ </check>
+
+ <!-- Should turn this on, but we have a few places that need to be fixed first -->
+ <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+
+ <!-- ================================================================================ -->
+ <!-- rules we don't want -->
+ <!-- ================================================================================ -->
+
+ <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
+ <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
+ </check>
+
+ <!-- We want the opposite of this: NewLineAtEofChecker -->
+ <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
+
+ <!-- This one complains about all kinds of random things. Disable. -->
+ <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
+
+ <!-- We use return quite a bit for control flows and guards -->
+ <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
+
+ <!-- We use null a lot in low level code and to interface with 3rd party code -->
+ <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
+ <parameters><parameter name="maxFileLength">800></parameter></parameters>
+ </check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
+ <parameters><parameter name="maxTypes">30</parameter></parameters>
+ </check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
+ <parameters><parameter name="maximum">10</parameter></parameters>
+ </check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
+ <parameters><parameter name="maxLength">50</parameter></parameters>
+ </check>
+
+ <!-- Not exactly feasible to enforce this right now. -->
+ <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
+ <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
+ <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
+ </check>
+
+ <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
+ <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
+ <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
+ </check>
+
+</scalastyle>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/pom.xml b/spark/spark-2.0/pom.xml
index e197586..54c817d 100644
--- a/spark/spark-2.0/pom.xml
+++ b/spark/spark-2.0/pom.xml
@@ -16,37 +16,36 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
- <relativePath>../../pom.xml</relativePath>
+ <artifactId>hivemall-spark</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
</parent>
- <artifactId>hivemall-spark</artifactId>
+ <artifactId>hivemall-spark2.0</artifactId>
<name>Hivemall on Spark 2.0</name>
<packaging>jar</packaging>
<properties>
- <main.basedir>${project.parent.basedir}</main.basedir>
+ <main.basedir>${project.parent.parent.basedir}</main.basedir>
+ <spark.version>2.0.2</spark.version>
+ <spark.binary.version>2.0</spark.binary.version>
</properties>
<dependencies>
- <!-- hivemall dependencies -->
+ <!-- compile scope -->
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-core</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-xgboost</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
@@ -56,21 +55,12 @@
<scope>compile</scope>
</dependency>
- <!-- third-party dependencies -->
+ <!-- provided scope -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
- <version>${scala.version}</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- <version>1.8</version>
- <scope>compile</scope>
+ <scope>provided</scope>
</dependency>
-
- <!-- other provided dependencies -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -106,114 +96,26 @@
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-mixserv</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.xerial</groupId>
- <artifactId>xerial-core</artifactId>
- <version>3.2.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
- <version>2.2.4</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
<plugins>
- <!-- For incremental compilation -->
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <version>3.2.2</version>
- <executions>
- <execution>
- <id>scala-compile-first</id>
- <phase>process-resources</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- </execution>
- <execution>
- <id>scala-test-compile-first</id>
- <phase>process-test-resources</phase>
- <goals>
- <goal>testCompile</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <scalaVersion>${scala.version}</scalaVersion>
- <recompileMode>incremental</recompileMode>
- <useZincServer>true</useZincServer>
- <args>
- <arg>-unchecked</arg>
- <arg>-deprecation</arg>
- <!-- TODO: To enable this option, we need to fix many wornings -->
- <!-- <arg>-feature</arg> -->
- </args>
- <jvmArgs>
- <jvmArg>-Xms512m</jvmArg>
- <jvmArg>-Xmx1024m</jvmArg>
- </jvmArgs>
- </configuration>
- </plugin>
- <!-- hivemall-spark_xx-xx.jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.5</version>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- </configuration>
- </plugin>
<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <execution>
- <id>jar-with-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>false</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>org.apache.hivemall:hivemall-core</include>
- <include>org.apache.hivemall:hivemall-xgboost</include>
- <include>org.apache.hivemall:hivemall-spark-common</include>
- <include>com.github.haifengl:smile-core</include>
- <include>com.github.haifengl:smile-math</include>
- <include>com.github.haifengl:smile-data</include>
- <include>ml.dmlc:xgboost4j</include>
- <include>com.esotericsoftware.kryo:kryo</include>
- </includes>
- </artifactSet>
- </configuration>
- </execution>
- </executions>
</plugin>
<!-- disable surefire because there is no java test -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
- <version>2.7</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
@@ -222,33 +124,6 @@
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
- <version>1.0</version>
- <configuration>
- <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
- <junitxml>.</junitxml>
- <filereports>SparkTestSuite.txt</filereports>
- <argLine>${spark.test.jvm.opts}</argLine>
- <stderr />
- <environmentVariables>
- <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
- <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
- <SPARK_TESTING>1</SPARK_TESTING>
- <JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
- </environmentVariables>
- <systemProperties>
- <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
- <derby.system.durability>test</derby.system.durability>
- <java.awt.headless>true</java.awt.headless>
- <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
- <spark.testing>1</spark.testing>
- <spark.ui.enabled>false</spark.ui.enabled>
- <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
- <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
- <!-- Needed by sql/hive tests. -->
- <test.src.tables>__not_used__</test.src.tables>
- </systemProperties>
- <tagsToExclude>${test.exclude.tags}</tagsToExclude>
- </configuration>
<executions>
<execution>
<id>test</id>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ b/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+ def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
+ : DStream[Row] = {
+ ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+ f(sqlContext.createDataFrame(rdd)).rdd
+ }
+ }
+}
+
+object HivemallStreamingOps {
+
+ /**
+ * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+ */
+ implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
+ : HivemallStreamingOps = {
+ new HivemallStreamingOps(ds)
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
index d3bf435..4a43afc 100644
--- a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
+++ b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
@@ -35,7 +35,7 @@ final class HiveUdfWithFeatureSuite extends HivemallFeatureQueryTest {
checkAnswer(
sql(s"SELECT DISTINCT hivemall_version()"),
- Row("0.5.0-incubating-SNAPSHOT")
+ Row("0.5.1-incubating-SNAPSHOT")
)
// sql("DROP TEMPORARY FUNCTION IF EXISTS hivemall_version")
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 5e99fd8..399a557 100644
--- a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -293,7 +293,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
}
test("misc - hivemall_version") {
- checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.0-incubating-SNAPSHOT"))
+ checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.1-incubating-SNAPSHOT"))
}
test("misc - rowid") {
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/pom.xml b/spark/spark-2.1/pom.xml
index 3d07184..e10b4ab 100644
--- a/spark/spark-2.1/pom.xml
+++ b/spark/spark-2.1/pom.xml
@@ -16,23 +16,24 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
- <relativePath>../../pom.xml</relativePath>
+ <artifactId>hivemall-spark</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
</parent>
- <artifactId>hivemall-spark</artifactId>
+ <artifactId>hivemall-spark2.1</artifactId>
<name>Hivemall on Spark 2.1</name>
<packaging>jar</packaging>
<properties>
- <main.basedir>${project.parent.basedir}</main.basedir>
+ <main.basedir>${project.parent.parent.basedir}</main.basedir>
+ <spark.version>2.1.1</spark.version>
+ <spark.binary.version>2.1</spark.binary.version>
</properties>
<dependencies>
@@ -40,13 +41,11 @@
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-core</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-xgboost</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
@@ -56,21 +55,12 @@
<scope>compile</scope>
</dependency>
- <!-- third-party dependencies -->
+ <!-- provided scope -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
- <version>${scala.version}</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- <version>1.8</version>
- <scope>compile</scope>
+ <scope>provided</scope>
</dependency>
-
- <!-- other provided dependencies -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -106,114 +96,26 @@
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-mixserv</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.xerial</groupId>
- <artifactId>xerial-core</artifactId>
- <version>3.2.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
- <version>2.2.4</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
<plugins>
- <!-- For incremental compilation -->
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <version>3.2.2</version>
- <executions>
- <execution>
- <id>scala-compile-first</id>
- <phase>process-resources</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- </execution>
- <execution>
- <id>scala-test-compile-first</id>
- <phase>process-test-resources</phase>
- <goals>
- <goal>testCompile</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <scalaVersion>${scala.version}</scalaVersion>
- <recompileMode>incremental</recompileMode>
- <useZincServer>true</useZincServer>
- <args>
- <arg>-unchecked</arg>
- <arg>-deprecation</arg>
- <!-- TODO: To enable this option, we need to fix many wornings -->
- <!-- <arg>-feature</arg> -->
- </args>
- <jvmArgs>
- <jvmArg>-Xms512m</jvmArg>
- <jvmArg>-Xmx1024m</jvmArg>
- </jvmArgs>
- </configuration>
- </plugin>
- <!-- hivemall-spark_xx-xx.jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.5</version>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- </configuration>
- </plugin>
<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <execution>
- <id>jar-with-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>false</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>org.apache.hivemall:hivemall-core</include>
- <include>org.apache.hivemall:hivemall-xgboost</include>
- <include>org.apache.hivemall:hivemall-spark-common</include>
- <include>com.github.haifengl:smile-core</include>
- <include>com.github.haifengl:smile-math</include>
- <include>com.github.haifengl:smile-data</include>
- <include>ml.dmlc:xgboost4j</include>
- <include>com.esotericsoftware.kryo:kryo</include>
- </includes>
- </artifactSet>
- </configuration>
- </execution>
- </executions>
</plugin>
<!-- disable surefire because there is no java test -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
- <version>2.7</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
@@ -222,33 +124,6 @@
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
- <version>1.0</version>
- <configuration>
- <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
- <junitxml>.</junitxml>
- <filereports>SparkTestSuite.txt</filereports>
- <argLine>${spark.test.jvm.opts}</argLine>
- <stderr />
- <environmentVariables>
- <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
- <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
- <SPARK_TESTING>1</SPARK_TESTING>
- <JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
- </environmentVariables>
- <systemProperties>
- <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
- <derby.system.durability>test</derby.system.durability>
- <java.awt.headless>true</java.awt.headless>
- <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
- <spark.testing>1</spark.testing>
- <spark.ui.enabled>false</spark.ui.enabled>
- <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
- <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
- <!-- Needed by sql/hive tests. -->
- <test.src.tables>__not_used__</test.src.tables>
- </systemProperties>
- <tagsToExclude>${test.exclude.tags}</tagsToExclude>
- </configuration>
<executions>
<execution>
<id>test</id>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ b/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+ def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
+ : DStream[Row] = {
+ ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+ f(sqlContext.createDataFrame(rdd)).rdd
+ }
+ }
+}
+
+object HivemallStreamingOps {
+
+ /**
+ * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+ */
+ implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
+ : HivemallStreamingOps = {
+ new HivemallStreamingOps(ds)
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
index eb4ec04..cecceca 100644
--- a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
+++ b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
@@ -35,7 +35,7 @@ final class HiveUdfWithFeatureSuite extends HivemallFeatureQueryTest {
checkAnswer(
sql(s"SELECT DISTINCT hivemall_version()"),
- Row("0.5.0-incubating-SNAPSHOT")
+ Row("0.5.1-incubating-SNAPSHOT")
)
// sql("DROP TEMPORARY FUNCTION IF EXISTS hivemall_version")
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 84ab0cd..8dad4c3 100644
--- a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -295,7 +295,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
}
test("misc - hivemall_version") {
- checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.0-incubating-SNAPSHOT"))
+ checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.1-incubating-SNAPSHOT"))
}
test("misc - rowid") {
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/pom.xml b/spark/spark-2.2/pom.xml
index 5366e1d..47aea92 100644
--- a/spark/spark-2.2/pom.xml
+++ b/spark/spark-2.2/pom.xml
@@ -16,40 +16,40 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
- <relativePath>../../pom.xml</relativePath>
+ <artifactId>hivemall-spark</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
</parent>
- <artifactId>hivemall-spark</artifactId>
+ <artifactId>hivemall-spark2.2</artifactId>
<name>Hivemall on Spark 2.2</name>
<packaging>jar</packaging>
<properties>
- <PermGen>64m</PermGen>
- <MaxPermGen>512m</MaxPermGen>
- <CodeCacheSize>512m</CodeCacheSize>
- <main.basedir>${project.parent.basedir}</main.basedir>
+ <main.basedir>${project.parent.parent.basedir}</main.basedir>
+ <spark.version>2.2.0</spark.version>
+ <spark.binary.version>2.2</spark.binary.version>
+ <hadoop.version>2.6.5</hadoop.version>
+ <scalatest.jvm.opts>-ea -Xms768m -Xmx2g -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=512m -XX:ReservedCodeCacheSize=512m</scalatest.jvm.opts>
+ <maven.compiler.source>1.8</maven.compiler.source>
+ <maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
- <!-- hivemall dependencies -->
+ <!-- compile scope -->
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-core</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-xgboost</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
@@ -59,22 +59,13 @@
<scope>compile</scope>
</dependency>
- <!-- third-party dependencies -->
+ <!-- provided scope -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
- <version>${scala.version}</version>
- <scope>compile</scope>
+ <scope>provided</scope>
</dependency>
<dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- <version>1.8</version>
- <scope>compile</scope>
- </dependency>
-
- <!-- other provided dependencies -->
- <dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
@@ -109,117 +100,26 @@
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-mixserv</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.xerial</groupId>
- <artifactId>xerial-core</artifactId>
- <version>3.2.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
- <version>2.2.4</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
<plugins>
- <!-- For incremental compilation -->
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <version>3.2.2</version>
- <executions>
- <execution>
- <id>scala-compile-first</id>
- <phase>process-resources</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- </execution>
- <execution>
- <id>scala-test-compile-first</id>
- <phase>process-test-resources</phase>
- <goals>
- <goal>testCompile</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <scalaVersion>${scala.version}</scalaVersion>
- <recompileMode>incremental</recompileMode>
- <useZincServer>true</useZincServer>
- <args>
- <arg>-unchecked</arg>
- <arg>-deprecation</arg>
- <!-- TODO: To enable this option, we need to fix many wornings -->
- <!-- <arg>-feature</arg> -->
- </args>
- <jvmArgs>
- <jvmArg>-Xms1024m</jvmArg>
- <jvmArg>-Xmx1024m</jvmArg>
- <jvmArg>-XX:PermSize=${PermGen}</jvmArg>
- <jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
- <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
- </jvmArgs>
- </configuration>
- </plugin>
- <!-- hivemall-spark_xx-xx.jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.5</version>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- </configuration>
- </plugin>
<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <execution>
- <id>jar-with-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>false</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>org.apache.hivemall:hivemall-core</include>
- <include>org.apache.hivemall:hivemall-xgboost</include>
- <include>org.apache.hivemall:hivemall-spark-common</include>
- <include>com.github.haifengl:smile-core</include>
- <include>com.github.haifengl:smile-math</include>
- <include>com.github.haifengl:smile-data</include>
- <include>ml.dmlc:xgboost4j</include>
- <include>com.esotericsoftware.kryo:kryo</include>
- </includes>
- </artifactSet>
- </configuration>
- </execution>
- </executions>
</plugin>
<!-- disable surefire because there is no java test -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
- <version>2.7</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
@@ -228,33 +128,6 @@
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
- <version>1.0</version>
- <configuration>
- <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
- <junitxml>.</junitxml>
- <filereports>SparkTestSuite.txt</filereports>
- <argLine>-ea -Xmx2g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
- <stderr />
- <environmentVariables>
- <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
- <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
- <SPARK_TESTING>1</SPARK_TESTING>
- <JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
- </environmentVariables>
- <systemProperties>
- <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
- <derby.system.durability>test</derby.system.durability>
- <java.awt.headless>true</java.awt.headless>
- <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
- <spark.testing>1</spark.testing>
- <spark.ui.enabled>false</spark.ui.enabled>
- <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
- <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
- <!-- Needed by sql/hive tests. -->
- <test.src.tables>__not_used__</test.src.tables>
- </systemProperties>
- <tagsToExclude>${test.exclude.tags}</tagsToExclude>
- </configuration>
<executions>
<execution>
<id>test</id>
@@ -264,6 +137,16 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ <configuration>
+ <environmentVariables>
+ <JAVA_HOME>${env.JAVA8_HOME}</JAVA_HOME>
+ <PATH>${env.JAVA8_HOME}/bin:${env.PATH}</PATH>
+ </environmentVariables>
+ </configuration>
+ </plugin>
</plugins>
</build>
</project>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
index 00617b7..2982d9c 100644
--- a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
+++ b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
@@ -127,7 +127,7 @@ final class HivemallGroupedDataset(groupBy: RelationalGroupedDataset) {
* @group ensemble
*/
def max_label(score: String, label: String): DataFrame = {
- checkType(score, DoubleType)
+ // checkType(score, DoubleType)
checkType(label, StringType)
val udaf = HiveUDAFFunction(
"max_label",
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ b/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+ def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
+ : DStream[Row] = {
+ ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+ f(sqlContext.createDataFrame(rdd)).rdd
+ }
+ }
+}
+
+object HivemallStreamingOps {
+
+ /**
+ * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+ */
+ implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
+ : HivemallStreamingOps = {
+ new HivemallStreamingOps(ds)
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
index 1e1c574..f16eae0 100644
--- a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
+++ b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
@@ -36,7 +36,7 @@ final class HiveUdfWithFeatureSuite extends HivemallFeatureQueryTest {
checkAnswer(
sql(s"SELECT DISTINCT hivemall_version()"),
- Row("0.5.0-incubating-SNAPSHOT")
+ Row("0.5.1-incubating-SNAPSHOT")
)
// sql("DROP TEMPORARY FUNCTION IF EXISTS hivemall_version")
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index f73cb75..f2b7b6e 100644
--- a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -562,7 +562,7 @@ class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
}
test("misc - hivemall_version") {
- checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.0-incubating-SNAPSHOT"))
+ checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.1-incubating-SNAPSHOT"))
}
test("misc - rowid") {
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-common/pom.xml b/spark/spark-common/pom.xml
deleted file mode 100644
index 50670d3..0000000
--- a/spark/spark-common/pom.xml
+++ /dev/null
@@ -1,146 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
-
- <parent>
- <groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
- <relativePath>../../pom.xml</relativePath>
- </parent>
-
- <artifactId>hivemall-spark-common</artifactId>
- <name>Hivemall on Spark Common</name>
- <packaging>jar</packaging>
-
- <properties>
- <main.basedir>${project.parent.basedir}</main.basedir>
- </properties>
-
- <dependencies>
- <!-- hivemall dependencies -->
- <dependency>
- <groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall-core</artifactId>
- <version>${project.version}</version>
- <scope>compile</scope>
- </dependency>
-
- <!-- other provided dependencies -->
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-sql_${scala.binary.version}</artifactId>
- <version>${spark.version}</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-hive_${scala.binary.version}</artifactId>
- <version>${spark.version}</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-streaming_${scala.binary.version}</artifactId>
- <version>${spark.version}</version>
- <scope>provided</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>${hadoop.version}</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>${hive.version}</version>
- <scope>provided</scope>
- </dependency>
- </dependencies>
-
- <build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
- <plugins>
- <!-- For resolving spark binary incompatibility -->
- <plugin>
- <artifactId>maven-clean-plugin</artifactId>
- <version>3.0.0</version>
- <executions>
- <execution>
- <phase>initialize</phase>
- <goals>
- <goal>clean</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <!-- For incremental compilation -->
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <version>3.2.2</version>
- <executions>
- <execution>
- <id>scala-compile-first</id>
- <phase>process-resources</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- </execution>
- <execution>
- <id>scala-test-compile-first</id>
- <phase>process-test-resources</phase>
- <goals>
- <goal>testCompile</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <scalaVersion>${scala.version}</scalaVersion>
- <recompileMode>incremental</recompileMode>
- <useZincServer>true</useZincServer>
- <args>
- <arg>-unchecked</arg>
- <arg>-deprecation</arg>
- <!-- TODO: To enable this option, we need to fix many wornings -->
- <!-- <arg>-feature</arg> -->
- </args>
- <jvmArgs>
- <jvmArg>-Xms512m</jvmArg>
- <jvmArg>-Xmx1024m</jvmArg>
- </jvmArgs>
- </configuration>
- </plugin>
- </plugins>
- </build>
-</project>
-
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/spark/spark-common/scalastyle-config.xml b/spark/spark-common/scalastyle-config.xml
deleted file mode 100644
index 13d1c47..0000000
--- a/spark/spark-common/scalastyle-config.xml
+++ /dev/null
@@ -1,333 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<!--
-If you wish to turn off checking for a section of code, you can put a comment in the source
-before and after the section, with the following syntax:
-
- // scalastyle:off
- ... // stuff that breaks the styles
- // scalastyle:on
-
-You can also disable only one rule, by specifying its rule id, as specified in:
- http://www.scalastyle.org/rules-0.7.0.html
-
- // scalastyle:off no.finalize
- override def finalize(): Unit = ...
- // scalastyle:on no.finalize
-
-This file is divided into 3 sections:
- (1) rules that we enforce.
- (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
- (or we need to make the scalastyle rule more configurable).
- (3) rules that we don't want to enforce.
--->
-
-<scalastyle>
- <name>Scalastyle standard configuration</name>
-
- <!-- ================================================================================ -->
- <!-- rules we enforce -->
- <!-- ================================================================================ -->
-
- <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
- <parameters>
- <parameter name="header"><![CDATA[/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */]]></parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
- <parameters>
- <parameter name="maxLineLength"><![CDATA[100]]></parameter>
- <parameter name="tabSize"><![CDATA[2]]></parameter>
- <parameter name="ignoreImports">true</parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
- <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
- <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
- <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
- <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
- <parameters>
- <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
- <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
-
- <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
- <parameters>
- <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
- <parameters>
- <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
- </parameters>
- </check>
-
- <!-- ??? usually shouldn't be checked into the code base. -->
- <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
-
- <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
- <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
- <parameters><parameter name="regex">^println$</parameter></parameters>
- <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
- // scalastyle:off println
- println(...)
- // scalastyle:on println]]></customMessage>
- </check>
-
- <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
- <customMessage><![CDATA[
- @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
- ]]></customMessage>
- </check>
-
- <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
- <customMessage><![CDATA[
- Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
- ShutdownHookManager.addShutdownHook instead.
- If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
- // scalastyle:off runtimeaddshutdownhook
- Runtime.getRuntime.addShutdownHook(...)
- // scalastyle:on runtimeaddshutdownhook
- ]]></customMessage>
- </check>
-
- <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
- <customMessage><![CDATA[
- Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
- java.util.concurrent.ConcurrentLinkedQueue instead.
- If you must use mutable.SynchronizedBuffer, wrap the code block with
- // scalastyle:off mutablesynchronizedbuffer
- mutable.SynchronizedBuffer[...]
- // scalastyle:on mutablesynchronizedbuffer
- ]]></customMessage>
- </check>
-
- <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">Class\.forName</parameter></parameters>
- <customMessage><![CDATA[
- Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
- If you must use Class.forName, wrap the code block with
- // scalastyle:off classforname
- Class.forName(...)
- // scalastyle:on classforname
- ]]></customMessage>
- </check>
-
- <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">Await\.result</parameter></parameters>
- <customMessage><![CDATA[
- Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
- If you must use Await.result, wrap the code block with
- // scalastyle:off awaitresult
- Await.result(...)
- // scalastyle:on awaitresult
- ]]></customMessage>
- </check>
-
- <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
- <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
- <parameters><parameter name="regex">JavaConversions</parameter></parameters>
- <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
- scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
- </check>
-
- <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
- <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
- <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
- of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
- <parameters>
- <parameter name="groups">java,scala,3rdParty,spark</parameter>
- <parameter name="group.java">javax?\..*</parameter>
- <parameter name="group.scala">scala\..*</parameter>
- <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
- <parameter name="group.spark">org\.apache\.spark\..*</parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
- <parameters>
- <parameter name="tokens">COMMA</parameter>
- </parameters>
- </check>
-
- <!-- SPARK-3854: Single Space between ')' and '{' -->
- <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">\)\{</parameter></parameters>
- <customMessage><![CDATA[
- Single Space between ')' and `{`.
- ]]></customMessage>
- </check>
-
- <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]</parameter></parameters>
- <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
- </check>
-
- <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
- <customMessage>Omit braces in case clauses.</customMessage>
- </check>
-
- <!-- SPARK-16877: Avoid Java annotations -->
- <check customId="OverrideJavaCase" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
- <parameters><parameter name="regex">^Override$</parameter></parameters>
- <customMessage>override modifier should be used instead of @java.lang.Override.</customMessage>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
-
- <!-- ================================================================================ -->
- <!-- rules we'd like to enforce, but haven't cleaned up the codebase yet -->
- <!-- ================================================================================ -->
-
- <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
- <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
- <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
- <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
-
- <!-- This breaks symbolic method names so we don't turn it on. -->
- <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
- <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
- <parameters>
- <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
- </parameters>
- </check>
-
- <!-- Should turn this on, but we have a few places that need to be fixed first -->
- <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
-
- <!-- ================================================================================ -->
- <!-- rules we don't want -->
- <!-- ================================================================================ -->
-
- <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
- <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
- </check>
-
- <!-- We want the opposite of this: NewLineAtEofChecker -->
- <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
-
- <!-- This one complains about all kinds of random things. Disable. -->
- <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
-
- <!-- We use return quite a bit for control flows and guards -->
- <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
-
- <!-- We use null a lot in low level code and to interface with 3rd party code -->
- <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
- <parameters><parameter name="maxFileLength">800></parameter></parameters>
- </check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
- <parameters><parameter name="maxTypes">30</parameter></parameters>
- </check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
- <parameters><parameter name="maximum">10</parameter></parameters>
- </check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
- <parameters><parameter name="maxLength">50</parameter></parameters>
- </check>
-
- <!-- Not exactly feasible to enforce this right now. -->
- <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
- <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
- <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
- </check>
-
- <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
- <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
- <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
- </check>
-
-</scalastyle>