You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2018/02/20 07:18:18 UTC

[1/4] incubator-hivemall git commit: Close #131: [v0.5.0-rc3] Merge v0.5.0 branch

Repository: incubator-hivemall
Updated Branches:
  refs/heads/master 448847fa2 -> 3a718713a


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java b/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
deleted file mode 100644
index cf10ed7..0000000
--- a/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.dataset;
-
-import hivemall.UDTFWithOptions;
-
-import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.util.Random;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.Options;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.Collector;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-
-/**
- * A wrapper of [[hivemall.dataset.LogisticRegressionDataGeneratorUDTF]]. This wrapper is needed
- * because Spark cannot handle HadoopUtils#getTaskId() correctly.
- */
-@Description(name = "lr_datagen",
-        value = "_FUNC_(options string) - Generates a logistic regression dataset")
-public final class LogisticRegressionDataGeneratorUDTFWrapper extends UDTFWithOptions {
-    private transient LogisticRegressionDataGeneratorUDTF udtf =
-            new LogisticRegressionDataGeneratorUDTF();
-
-    @Override
-    protected Options getOptions() {
-        Options options = null;
-        try {
-            Method m = udtf.getClass().getDeclaredMethod("getOptions");
-            m.setAccessible(true);
-            options = (Options) m.invoke(udtf);
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return options;
-    }
-
-    @SuppressWarnings("all")
-    @Override
-    protected CommandLine processOptions(ObjectInspector[] objectInspectors)
-            throws UDFArgumentException {
-        CommandLine commands = null;
-        try {
-            Method m = udtf.getClass().getDeclaredMethod("processOptions");
-            m.setAccessible(true);
-            commands = (CommandLine) m.invoke(udtf, objectInspectors);
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return commands;
-    }
-
-    @Override
-    public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
-        try {
-            // Extract a collector for LogisticRegressionDataGeneratorUDTF
-            Field collector = GenericUDTF.class.getDeclaredField("collector");
-            collector.setAccessible(true);
-            udtf.setCollector((Collector) collector.get(this));
-
-            // To avoid HadoopUtils#getTaskId()
-            Class<?> clazz = udtf.getClass();
-            Field rnd1 = clazz.getDeclaredField("rnd1");
-            Field rnd2 = clazz.getDeclaredField("rnd2");
-            Field r_seed = clazz.getDeclaredField("r_seed");
-            r_seed.setAccessible(true);
-            final long seed = r_seed.getLong(udtf) + (int) Thread.currentThread().getId();
-            rnd1.setAccessible(true);
-            rnd2.setAccessible(true);
-            rnd1.set(udtf, new Random(seed));
-            rnd2.set(udtf, new Random(seed + 1));
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return udtf.initialize(argOIs);
-    }
-
-    @Override
-    public void process(Object[] objects) throws HiveException {
-        udtf.process(objects);
-    }
-
-    @Override
-    public void close() throws HiveException {
-        udtf.close();
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
deleted file mode 100644
index b454fd9..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-
-/**
- * A wrapper of [[hivemall.ftvec.AddBiasUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(name = "add_bias",
-        value = "_FUNC_(features in array<string>) - Returns features with a bias as array<string>")
-@UDFType(deterministic = true, stateful = false)
-public class AddBiasUDFWrapper extends GenericUDF {
-    private AddBiasUDF udf = new AddBiasUDF();
-    private ListObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "add_bias() has an single arguments: array<string> features");
-        }
-
-        switch (arguments[0].getCategory()) {
-            case LIST:
-                argumentOI = (ListObjectInspector) arguments[0];
-                ObjectInspector elmOI = argumentOI.getListElementObjectInspector();
-                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
-                    if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0, "Type mismatch: features");
-        }
-
-        return ObjectInspectorFactory.getStandardListObjectInspector(argumentOI.getListElementObjectInspector());
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        @SuppressWarnings("unchecked")
-        final List<String> input = (List<String>) argumentOI.getList(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "add_bias(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
deleted file mode 100644
index 0b687db..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * A wrapper of [[hivemall.ftvec.AddFeatureIndexUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(
-        name = "add_feature_index",
-        value = "_FUNC_(dense features in array<double>) - Returns a feature vector with feature indices")
-@UDFType(deterministic = true, stateful = false)
-public class AddFeatureIndexUDFWrapper extends GenericUDF {
-    private AddFeatureIndexUDF udf = new AddFeatureIndexUDF();
-    private ListObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "add_feature_index() has an single arguments: array<double> features");
-        }
-
-        switch (arguments[0].getCategory()) {
-            case LIST:
-                argumentOI = (ListObjectInspector) arguments[0];
-                ObjectInspector elmOI = argumentOI.getListElementObjectInspector();
-                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
-                    if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.DOUBLE) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0, "Type mismatch: features");
-        }
-
-        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        @SuppressWarnings("unchecked")
-        final List<Double> input = (List<Double>) argumentOI.getList(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "add_feature_index(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
deleted file mode 100644
index 5924468..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * A wrapper of [[hivemall.ftvec.ExtractFeatureUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(name = "extract_feature",
-        value = "_FUNC_(feature in string) - Returns a parsed feature as string")
-@UDFType(deterministic = true, stateful = false)
-public class ExtractFeatureUDFWrapper extends GenericUDF {
-    private ExtractFeatureUDF udf = new ExtractFeatureUDF();
-    private PrimitiveObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "extract_feature() has an single arguments: string feature");
-        }
-
-        argumentOI = (PrimitiveObjectInspector) arguments[0];
-        if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
-            throw new UDFArgumentTypeException(0, "Type mismatch: feature");
-        }
-
-        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "extract_feature(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
deleted file mode 100644
index 8580247..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * A wrapper of [[hivemall.ftvec.ExtractWeightUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(name = "extract_weight",
-        value = "_FUNC_(feature in string) - Returns the weight of a feature as string")
-@UDFType(deterministic = true, stateful = false)
-public class ExtractWeightUDFWrapper extends GenericUDF {
-    private ExtractWeightUDF udf = new ExtractWeightUDF();
-    private PrimitiveObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "extract_weight() has an single arguments: string feature");
-        }
-
-        argumentOI = (PrimitiveObjectInspector) arguments[0];
-        if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
-            throw new UDFArgumentTypeException(0, "Type mismatch: feature");
-        }
-
-        return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.DOUBLE);
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "extract_weight(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
deleted file mode 100644
index 584be6c..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-import java.util.Map;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-
-/**
- * A wrapper of [[hivemall.ftvec.SortByFeatureUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle Map<>
- * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
- */
-@Description(name = "sort_by_feature",
-        value = "_FUNC_(map in map<int,float>) - Returns a sorted map")
-@UDFType(deterministic = true, stateful = false)
-public class SortByFeatureUDFWrapper extends GenericUDF {
-    private SortByFeatureUDF udf = new SortByFeatureUDF();
-    private MapObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "sorted_by_feature() has an single arguments: map<int, float> map");
-        }
-
-        switch (arguments[0].getCategory()) {
-            case MAP:
-                argumentOI = (MapObjectInspector) arguments[0];
-                ObjectInspector keyOI = argumentOI.getMapKeyObjectInspector();
-                ObjectInspector valueOI = argumentOI.getMapValueObjectInspector();
-                if (keyOI.getCategory().equals(Category.PRIMITIVE)
-                        && valueOI.getCategory().equals(Category.PRIMITIVE)) {
-                    final PrimitiveCategory keyCategory = ((PrimitiveObjectInspector) keyOI).getPrimitiveCategory();
-                    final PrimitiveCategory valueCategory = ((PrimitiveObjectInspector) valueOI).getPrimitiveCategory();
-                    if (keyCategory == PrimitiveCategory.INT
-                            && valueCategory == PrimitiveCategory.FLOAT) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0, "Type mismatch: map");
-        }
-
-
-        return ObjectInspectorFactory.getStandardMapObjectInspector(
-            argumentOI.getMapKeyObjectInspector(), argumentOI.getMapValueObjectInspector());
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        @SuppressWarnings("unchecked")
-        final Map<IntWritable, FloatWritable> input = (Map<IntWritable, FloatWritable>) argumentOI.getMap(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "sort_by_feature(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
deleted file mode 100644
index db533be..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec.scaling;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.io.Text;
-
-/**
- * A wrapper of [[hivemall.ftvec.scaling.L2NormalizationUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark-1.3 cannot handle
- * List<> as a return type in Hive UDF. The type must be passed via ObjectInspector. This issues has
- * been reported in SPARK-6747, so a future release of Spark makes the wrapper obsolete.
- */
-public class L2NormalizationUDFWrapper extends GenericUDF {
-    private L2NormalizationUDF udf = new L2NormalizationUDF();
-
-    private transient List<Text> retValue = new ArrayList<Text>();
-    private transient Converter toListText = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException("normalize() has an only single argument.");
-        }
-
-        switch (arguments[0].getCategory()) {
-            case LIST:
-                ObjectInspector elmOI = ((ListObjectInspector) arguments[0]).getListElementObjectInspector();
-                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
-                    if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0,
-                    "normalize() must have List[String] as an argument, but "
-                            + arguments[0].getTypeName() + " was found.");
-        }
-
-        // Create a ObjectInspector converter for arguments
-        ObjectInspector outputElemOI = ObjectInspectorFactory.getReflectionObjectInspector(
-            Text.class, ObjectInspectorOptions.JAVA);
-        ObjectInspector outputOI = ObjectInspectorFactory.getStandardListObjectInspector(outputElemOI);
-        toListText = ObjectInspectorConverters.getConverter(arguments[0], outputOI);
-
-        ObjectInspector listElemOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
-        ObjectInspector returnElemOI = ObjectInspectorUtils.getStandardObjectInspector(listElemOI);
-        return ObjectInspectorFactory.getStandardListObjectInspector(returnElemOI);
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        @SuppressWarnings("unchecked")
-        final List<Text> input = (List<Text>) toListText.convert(arguments[0].get());
-        retValue = udf.evaluate(input);
-        return retValue;
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "normalize(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
deleted file mode 100644
index d3bcbe6..0000000
--- a/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.knn.lsh;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-
-/** A wrapper of [[hivemall.knn.lsh.MinHashesUDF]]. */
-@Description(
-        name = "minhashes",
-        value = "_FUNC_(features in array<string>, noWeight in boolean) - Returns hashed features as array<int>")
-@UDFType(deterministic = true, stateful = false)
-public class MinHashesUDFWrapper extends GenericUDF {
-    private MinHashesUDF udf = new MinHashesUDF();
-    private ListObjectInspector featuresOI = null;
-    private PrimitiveObjectInspector noWeightOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-        if (arguments.length != 2) {
-            throw new UDFArgumentLengthException(
-                "minhashes() has 2 arguments: array<string> features, boolean noWeight");
-        }
-
-        // Check argument types
-        switch (arguments[0].getCategory()) {
-            case LIST:
-                featuresOI = (ListObjectInspector) arguments[0];
-                ObjectInspector elmOI = featuresOI.getListElementObjectInspector();
-                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
-                    if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0, "Type mismatch: features");
-        }
-
-        noWeightOI = (PrimitiveObjectInspector) arguments[1];
-        if (noWeightOI.getPrimitiveCategory() != PrimitiveCategory.BOOLEAN) {
-            throw new UDFArgumentException("Type mismatch: noWeight");
-        }
-
-        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT));
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 2);
-        @SuppressWarnings("unchecked")
-        final List<String> features = (List<String>) featuresOI.getList(arguments[0].get());
-        final Boolean noWeight = PrimitiveObjectInspectorUtils.getBoolean(arguments[1].get(),
-            noWeightOI);
-        return udf.evaluate(features, noWeight);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        /**
-         * TODO: Need to return hive-specific type names.
-         */
-        return "minhashes(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
deleted file mode 100644
index f386223..0000000
--- a/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.tools.mapred;
-
-import java.util.UUID;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/** An alternative implementation of [[hivemall.tools.mapred.RowIdUDF]]. */
-@Description(
-        name = "rowid",
-        value = "_FUNC_() - Returns a generated row id of a form {TASK_ID}-{UUID}-{SEQUENCE_NUMBER}")
-@UDFType(deterministic = false, stateful = true)
-public class RowIdUDFWrapper extends GenericUDF {
-    // RowIdUDF is directly used because spark cannot
-    // handle HadoopUtils#getTaskId().
-
-    private long sequence;
-    private long taskId;
-
-    public RowIdUDFWrapper() {
-        this.sequence = 0L;
-        this.taskId = Thread.currentThread().getId();
-    }
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-        if (arguments.length != 0) {
-            throw new UDFArgumentLengthException("row_number() has no argument.");
-        }
-
-        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 0);
-        sequence++;
-        /**
-         * TODO: Check if it is unique over all tasks in executors of Spark.
-         */
-        return taskId + "-" + UUID.randomUUID() + "-" + sequence;
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "row_number()";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/scala/hivemall/HivemallException.scala
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/scala/hivemall/HivemallException.scala b/spark/spark-common/src/main/scala/hivemall/HivemallException.scala
deleted file mode 100644
index 53f6756..0000000
--- a/spark/spark-common/src/main/scala/hivemall/HivemallException.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall
-
-class HivemallException(message: String, cause: Throwable)
-    extends Exception(message, cause) {
-
-  def this(message: String) = this(message, null)
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala b/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
deleted file mode 100644
index 3fb2d18..0000000
--- a/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.ml.feature
-
-import java.util.StringTokenizer
-
-import scala.collection.mutable.ListBuffer
-
-import hivemall.HivemallException
-
-// Used for DataFrame#explode
-case class HivemallFeature(feature: String)
-
-/**
- * Class that represents the features and labels of a data point for Hivemall.
- *
- * @param label Label for this data point.
- * @param features List of features for this data point.
- */
-case class HivemallLabeledPoint(label: Float = 0.0f, features: Seq[String]) {
-  override def toString: String = {
-    "%s,%s".format(label, features.mkString("[", ",", "]"))
-  }
-}
-
-object HivemallLabeledPoint {
-
-  // Simple parser for HivemallLabeledPoint
-  def parse(s: String): HivemallLabeledPoint = {
-    val (label, features) = s.indexOf(',') match {
-      case d if d > 0 => (s.substring(0, d), s.substring(d + 1))
-      case _ => ("0.0", "[]") // Dummy
-    }
-    HivemallLabeledPoint(label.toFloat, parseTuple(new StringTokenizer(features, "[],", true)))
-  }
-
-  // TODO: Support to parse rows without labels
-  private[this] def parseTuple(tokenizer: StringTokenizer): Seq[String] = {
-    val items = ListBuffer.empty[String]
-    var parsing = true
-    var allowDelim = false
-    while (parsing && tokenizer.hasMoreTokens()) {
-      val token = tokenizer.nextToken()
-      if (token == "[") {
-        items ++= parseTuple(tokenizer)
-        parsing = false
-        allowDelim = true
-      } else if (token == ",") {
-        if (allowDelim) {
-          allowDelim = false
-        } else {
-          throw new HivemallException("Found ',' at a wrong position.")
-        }
-      } else if (token == "]") {
-        parsing = false
-      } else {
-        items.append(token)
-        allowDelim = true
-      }
-    }
-    if (parsing) {
-      throw new HivemallException(s"A tuple must end with ']'.")
-    }
-    items
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
deleted file mode 100644
index a6bbb4b..0000000
--- a/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.streaming
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.ml.feature.HivemallLabeledPoint
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Row, SQLContext}
-import org.apache.spark.streaming.dstream.DStream
-
-final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
-
-  def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
-      : DStream[Row] = {
-    ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
-      f(sqlContext.createDataFrame(rdd)).rdd
-    }
-  }
-}
-
-object HivemallStreamingOps {
-
-  /**
-   * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
-   */
-  implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
-      : HivemallStreamingOps = {
-    new HivemallStreamingOps(ds)
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/src/site/resources/LICENSE-font_awesome.txt
----------------------------------------------------------------------
diff --git a/src/site/resources/LICENSE-font_awesome.txt b/src/site/resources/LICENSE-font_awesome.txt
new file mode 100644
index 0000000..ad1f9ac
--- /dev/null
+++ b/src/site/resources/LICENSE-font_awesome.txt
@@ -0,0 +1,86 @@
+-----------------------------------------------------------
+SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
+-----------------------------------------------------------
+
+PREAMBLE
+The goals of the Open Font License (OFL) are to stimulate worldwide
+development of collaborative font projects, to support the font creation
+efforts of academic and linguistic communities, and to provide a free and
+open framework in which fonts may be shared and improved in partnership
+with others.
+
+The OFL allows the licensed fonts to be used, studied, modified and
+redistributed freely as long as they are not sold by themselves. The
+fonts, including any derivative works, can be bundled, embedded, 
+redistributed and/or sold with any software provided that any reserved
+names are not used by derivative works. The fonts and derivatives,
+however, cannot be released under any other type of license. The
+requirement for fonts to remain under this license does not apply
+to any document created using the fonts or their derivatives.
+
+DEFINITIONS
+"Font Software" refers to the set of files released by the Copyright
+Holder(s) under this license and clearly marked as such. This may
+include source files, build scripts and documentation.
+
+"Reserved Font Name" refers to any names specified as such after the
+copyright statement(s).
+
+"Original Version" refers to the collection of Font Software components as
+distributed by the Copyright Holder(s).
+
+"Modified Version" refers to any derivative made by adding to, deleting,
+or substituting -- in part or in whole -- any of the components of the
+Original Version, by changing formats or by porting the Font Software to a
+new environment.
+
+"Author" refers to any designer, engineer, programmer, technical
+writer or other person who contributed to the Font Software.
+
+PERMISSION & CONDITIONS
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Font Software, to use, study, copy, merge, embed, modify,
+redistribute, and sell modified and unmodified copies of the Font
+Software, subject to the following conditions:
+
+1) Neither the Font Software nor any of its individual components,
+in Original or Modified Versions, may be sold by itself.
+
+2) Original or Modified Versions of the Font Software may be bundled,
+redistributed and/or sold with any software, provided that each copy
+contains the above copyright notice and this license. These can be
+included either as stand-alone text files, human-readable headers or
+in the appropriate machine-readable metadata fields within text or
+binary files as long as those fields can be easily viewed by the user.
+
+3) No Modified Version of the Font Software may use the Reserved Font
+Name(s) unless explicit written permission is granted by the corresponding
+Copyright Holder. This restriction only applies to the primary font name as
+presented to the users.
+
+4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
+Software shall not be used to promote, endorse or advertise any
+Modified Version, except to acknowledge the contribution(s) of the
+Copyright Holder(s) and the Author(s) or with their explicit written
+permission.
+
+5) The Font Software, modified or unmodified, in part or in whole,
+must be distributed entirely under this license, and must not be
+distributed under any other license. The requirement for fonts to
+remain under this license does not apply to any document created
+using the Font Software.
+
+TERMINATION
+This license becomes null and void if any of the above conditions are
+not met.
+
+DISCLAIMER
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
+OTHER DEALINGS IN THE FONT SOFTWARE.

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/lib/xgboost4j-0.60-0.10.jar
----------------------------------------------------------------------
diff --git a/xgboost/lib/xgboost4j-0.60-0.10.jar b/xgboost/lib/xgboost4j-0.60-0.10.jar
deleted file mode 100644
index cf1599b..0000000
Binary files a/xgboost/lib/xgboost4j-0.60-0.10.jar and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/pom.xml
----------------------------------------------------------------------
diff --git a/xgboost/pom.xml b/xgboost/pom.xml
index b9f11b8..8dcb45e 100644
--- a/xgboost/pom.xml
+++ b/xgboost/pom.xml
@@ -16,14 +16,13 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 
 	<parent>
 		<groupId>org.apache.hivemall</groupId>
 		<artifactId>hivemall</artifactId>
-		<version>0.5.0-incubating-SNAPSHOT</version>
+		<version>0.5.1-incubating-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 
@@ -32,8 +31,6 @@
 	<packaging>jar</packaging>
 
 	<properties>
-		<xgboost.version>0.60</xgboost.version>
-		<xgboost4j.version>0.10</xgboost4j.version>
 		<main.basedir>${project.parent.basedir}</main.basedir>
 	</properties>
 
@@ -42,69 +39,45 @@
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-common</artifactId>
-			<version>${hadoop.version}</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-mapreduce-client-core</artifactId>
-			<version>${hadoop.version}</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hive</groupId>
 			<artifactId>hive-exec</artifactId>
-			<version>${hive.version}</version>
 			<scope>provided</scope>
-			<exclusions>
-				<exclusion>
-					<artifactId>jetty</artifactId>
-					<groupId>org.mortbay.jetty</groupId>
-				</exclusion>
-				<exclusion>
-					<groupId>javax.jdo</groupId>
-					<artifactId>jdo2-api</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>asm-parent</groupId>
-					<artifactId>asm-parent</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>asm</groupId>
-					<artifactId>asm</artifactId>
-				</exclusion>
-			</exclusions>
 		</dependency>
 		<dependency>
 			<groupId>commons-cli</groupId>
 			<artifactId>commons-cli</artifactId>
-			<version>1.2</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>commons-logging</groupId>
 			<artifactId>commons-logging</artifactId>
-			<version>1.0.4</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>log4j</groupId>
 			<artifactId>log4j</artifactId>
-			<version>1.2.17</version>
 			<scope>provided</scope>
 		</dependency>
+
+		<!-- compile scope -->
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-core</artifactId>
 			<version>${project.version}</version>
-			<scope>provided</scope>
+			<scope>compile</scope>
 		</dependency>
-
-		<!-- compile scope -->
 		<dependency>
-			<groupId>ml.dmlc</groupId>
+			<groupId>io.github.myui</groupId>
 			<artifactId>xgboost4j</artifactId>
-			<version>${xgboost4j.version}</version>
+			<version>${xgboost.version}</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
@@ -116,106 +89,4 @@
 		</dependency>
 	</dependencies>
 
-	<build>
-		<directory>target</directory>
-		<outputDirectory>target/classes</outputDirectory>
-		<finalName>${project.artifactId}-${project.version}</finalName>
-		<testOutputDirectory>target/test-classes</testOutputDirectory>
-		<plugins>
-			<!-- TODO: This is hacky, so we'll replace this with another better way in a future -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-install-plugin</artifactId>
-				<version>2.4</version>
-				<executions>
-					<execution>
-						<id>install-xgboost</id>
-						<phase>validate</phase>
-						<configuration>
-							<file>${basedir}/lib/xgboost4j-${xgboost.version}-${xgboost4j.version}.jar</file>
-							<repositoryLayout>default</repositoryLayout>
-							<groupId>ml.dmlc</groupId>
-							<artifactId>xgboost4j</artifactId>
-							<version>${xgboost4j.version}</version>
-							<packaging>jar</packaging>
-							<generatePom>true</generatePom>
-						</configuration>
-						<goals>
-							<goal>install-file</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-shade-plugin</artifactId>
-				<version>3.1.0</version>
-				<executions>
-					<!-- hivemall-xgboost_xx-xx.jar -->
-					<execution>
-						<id>jar-with-portal-binaries</id>
-						<phase>package</phase>
-						<goals>
-							<goal>shade</goal>
-						</goals>
-						<configuration>
-							<finalName>${project.artifactId}-${xgboost.version}-${project.version}</finalName>
-							<outputDirectory>${project.parent.build.directory}</outputDirectory>
-							<minimizeJar>false</minimizeJar>
-							<createDependencyReducedPom>false</createDependencyReducedPom>
-							<artifactSet>
-								<includes>
-									<include>ml.dmlc:xgboost4j</include>
-								</includes>
-							</artifactSet>
-							<filters>
-								<filter>
-									<artifact>*:*</artifact>
-									<excludes>
-										<exclude>tracker.py</exclude>
-									</excludes>
-								</filter>
-							</filters>
-						</configuration>
-					</execution>
-					<!-- hivemall-xgboost_xx-xx-with-dependencies.jar including minimum dependencies -->
-					<execution>
-						<id>jar-with-dependencies</id>
-						<phase>package</phase>
-						<goals>
-							<goal>shade</goal>
-						</goals>
-						<configuration>
-							<finalName>${project.artifactId}-${xgboost.version}-${project.version}-with-dependencies</finalName>
-							<outputDirectory>${project.parent.build.directory}</outputDirectory>
-							<minimizeJar>true</minimizeJar>
-							<createDependencyReducedPom>false</createDependencyReducedPom>
-							<artifactSet>
-								<includes>
-									<include>org.apache.hivemall:hivemall-core</include>
-									<include>io.netty:netty-all</include>
-									<include>com.github.haifengl:smile-core</include>
-									<include>com.github.haifengl:smile-math</include>
-									<include>com.github.haifengl:smile-data</include>
-									<include>org.tukaani:xz</include>
-									<include>ml.dmlc:xgboost4j</include>
-									<include>com.esotericsoftware.kryo:kryo</include>
-								</includes>
-							</artifactSet>
-							<filters>
-								<filter>
-									<artifact>*:*</artifact>
-									<excludes>
-										<exclude>*.jar</exclude>
-										<exclude>tracker.py</exclude>
-									</excludes>
-								</filter>
-							</filters>
-						</configuration>
-					</execution>
-				</executions>
-			</plugin>
-		</plugins>
-	</build>
-
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java
----------------------------------------------------------------------
diff --git a/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java b/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java
index 2e2bf25..0472229 100644
--- a/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java
+++ b/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java
@@ -48,7 +48,9 @@ public final class XGBoostUtils {
                 values[i] = Float.parseFloat(str.substring(pos + 1));
             }
         }
-        return LabeledPoint.fromSparseVector((float) target, indices, values);
+
+
+        return new LabeledPoint((float) target, indices, values);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java
----------------------------------------------------------------------
diff --git a/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java b/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java
index fd67c09..b80f95a 100644
--- a/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java
+++ b/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java
@@ -18,8 +18,6 @@
  */
 package hivemall.xgboost.tools;
 
-import hivemall.utils.lang.Preconditions;
-
 import java.util.ArrayList;
 import java.util.List;
 
@@ -32,10 +30,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 
-@Description(
-        name = "xgboost_multiclass_predict",
+import hivemall.utils.lang.Preconditions;
+
+@Description(name = "xgboost_multiclass_predict",
         value = "_FUNC_(string rowid, string[] features, string model_id, array<byte> pred_model [, string options]) "
-                + "- Returns a prediction result as (string rowid, int label, float probability)")
+                + "- Returns a prediction result as (string rowid, string label, float probability)")
 public final class XGBoostMulticlassPredictUDTF extends hivemall.xgboost.XGBoostPredictUDTF {
 
     public XGBoostMulticlassPredictUDTF() {
@@ -65,14 +64,14 @@ public final class XGBoostMulticlassPredictUDTF extends hivemall.xgboost.XGBoost
         final Object[] forwardObj = new Object[3];
         for (int i = 0, size = testData.size(); i < size; i++) {
             final float[] predicted_i = predicted[i];
-            final String rowId = testData.get(i).getRowId();
+            String rowId = testData.get(i).getRowId();
             forwardObj[0] = rowId;
 
             assert (predicted_i.length > 1);
             for (int j = 0; j < predicted_i.length; j++) {
-                forwardObj[1] = j;
+                forwardObj[1] = String.valueOf(j);
                 float prob = predicted_i[j];
-                forwardObj[2] = prob;
+                forwardObj[2] = Float.valueOf(prob);
                 forward(forwardObj);
             }
         }


[3/4] incubator-hivemall git commit: Close #131: [v0.5.0-rc3] Merge v0.5.0 branch

Posted by my...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/docs/gitbook/binaryclass/titanic_rf.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/binaryclass/titanic_rf.md b/docs/gitbook/binaryclass/titanic_rf.md
index 29784e0..2b54074 100644
--- a/docs/gitbook/binaryclass/titanic_rf.md
+++ b/docs/gitbook/binaryclass/titanic_rf.md
@@ -175,7 +175,7 @@ from
 # Prediction
 
 ```sql
-SET hivevar:classification=true;
+-- SET hivevar:classification=true;
 set hive.auto.convert.join=true;
 SET hive.mapjoin.optimized.hashtable=false;
 SET mapred.reduce.tasks=16;
@@ -202,7 +202,8 @@ FROM (
       -- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted
       -- hivemall v0.5-rc.1 or later
       p.model_weight,
-      tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
+	  tree_predict(p.model_id, p.model, t.features, "-classification") as predicted
+	  -- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
       -- tree_predict_v1(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted -- to use the old model in v0.5-rc.1 or later
     FROM (
       SELECT 
@@ -319,7 +320,7 @@ from
 > [116.12055542977338,960.8569891444097,291.08765260103837,469.74671636586226,163.721292772701,120.784769882858,847.9769298113661,554.4617571355476,346.3500941757221,97.42593940113392]    0.1838351822503962
 
 ```sql
-SET hivevar:classification=true;
+-- SET hivevar:classification=true;
 SET hive.mapjoin.optimized.hashtable=false;
 SET mapred.reduce.tasks=16;
 
@@ -345,7 +346,8 @@ FROM (
       -- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted
       -- hivemall v0.5-rc.1 or later
       p.model_weight,
-      tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
+      tree_predict(p.model_id, p.model, t.features, "-classification") as predicted
+      -- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
       -- tree_predict_v1(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted -- to use the old model in v0.5-rc.1 or later
     FROM (
       SELECT 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/docs/gitbook/multiclass/iris_randomforest.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/multiclass/iris_randomforest.md b/docs/gitbook/multiclass/iris_randomforest.md
index b421297..bfc197f 100644
--- a/docs/gitbook/multiclass/iris_randomforest.md
+++ b/docs/gitbook/multiclass/iris_randomforest.md
@@ -206,7 +206,7 @@ from
 # Prediction
 
 ```sql
-set hivevar:classification=true;
+-- set hivevar:classification=true;
 set hive.auto.convert.join=true;
 set hive.mapjoin.optimized.hashtable=false;
 
@@ -225,7 +225,8 @@ FROM (
     -- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted
     -- hivemall v0.5-rc.1 or later
     p.model_weight,
-    tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
+    tree_predict(p.model_id, p.model, t.features, "-classification") as predicted
+    -- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
     -- tree_predict_v1(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted -- to use the old model in v0.5-rc.1 or later
   FROM
     model p
@@ -265,7 +266,8 @@ FROM (
     -- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted
     -- hivemall v0.5-rc.1 or later
     p.model_weight,
-    tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
+    tree_predict(p.model_id, p.model, t.features, "-classification") as predicted
+    -- tree_predict(p.model_id, p.model, t.features, ${classification}) as predicted
     -- tree_predict_v1(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted as predicted -- to use the old model in v0.5-rc.1 or later
   FROM (
     SELECT 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/mixserv/pom.xml
----------------------------------------------------------------------
diff --git a/mixserv/pom.xml b/mixserv/pom.xml
index 0a1b387..ff27b09 100644
--- a/mixserv/pom.xml
+++ b/mixserv/pom.xml
@@ -16,14 +16,13 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 
 	<parent>
 		<groupId>org.apache.hivemall</groupId>
 		<artifactId>hivemall</artifactId>
-		<version>0.5.0-incubating-SNAPSHOT</version>
+		<version>0.5.1-incubating-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 
@@ -40,49 +39,26 @@
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-common</artifactId>
-			<version>${hadoop.version}</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-mapreduce-client-core</artifactId>
-			<version>${hadoop.version}</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hive</groupId>
 			<artifactId>hive-exec</artifactId>
-			<version>${hive.version}</version>
 			<scope>provided</scope>
-			<exclusions>
-				<exclusion>
-					<artifactId>jetty</artifactId>
-					<groupId>org.mortbay.jetty</groupId>
-				</exclusion>
-				<exclusion>
-					<groupId>javax.jdo</groupId>
-					<artifactId>jdo2-api</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>asm-parent</groupId>
-					<artifactId>asm-parent</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>asm</groupId>
-					<artifactId>asm</artifactId>
-				</exclusion>
-			</exclusions>
 		</dependency>
 		<dependency>
 			<groupId>javax.jdo</groupId>
 			<artifactId>jdo2-api</artifactId>
-			<version>2.3-eb</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>com.google.guava</groupId>
 			<artifactId>guava</artifactId>
-			<version>${guava.version}</version>
 			<scope>provided</scope>
 		</dependency>
 
@@ -103,19 +79,16 @@
 		<dependency>
 			<groupId>commons-cli</groupId>
 			<artifactId>commons-cli</artifactId>
-			<version>1.2</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
 			<groupId>commons-logging</groupId>
 			<artifactId>commons-logging</artifactId>
-			<version>1.0.4</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
 			<groupId>log4j</groupId>
 			<artifactId>log4j</artifactId>
-			<version>1.2.17</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
@@ -130,28 +103,21 @@
 		<dependency>
 			<groupId>junit</groupId>
 			<artifactId>junit</artifactId>
-			<version>${junit.version}</version>
 			<scope>test</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.mockito</groupId>
 			<artifactId>mockito-all</artifactId>
-			<version>1.10.19</version>
 			<scope>test</scope>
 		</dependency>
 	</dependencies>
 
 	<build>
-		<directory>target</directory>
-		<outputDirectory>target/classes</outputDirectory>
-		<finalName>${project.artifactId}-${project.version}</finalName>
-		<testOutputDirectory>target/test-classes</testOutputDirectory>
 		<plugins>
 			<!-- hivemall-mixserv-xx-fat.jar including all dependencies -->
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-shade-plugin</artifactId>
-				<version>3.1.0</version>
 				<executions>
 					<execution>
 						<id>jar-with-dependencies</id>
@@ -170,7 +136,7 @@
 									<include>commons-cli:commons-cli</include>
 									<include>commons-logging:commons-logging</include>
 									<include>log4j:log4j</include>
-									<include>io.netty:netty-all</include>
+									<include>io.netty.netty-all</include>
 								</includes>
 							</artifactSet>
 							<!-- maven-shade-plugin cannot handle the dependency of log4j because 
@@ -198,8 +164,7 @@
 								</filter>
 							</filters>
 							<transformers>
-								<transformer
-									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+								<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
 									<manifestEntries>
 										<Main-Class>hivemall.mix.server.MixServer</Main-Class>
 										<Implementation-Title>${project.name}</Implementation-Title>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/nlp/pom.xml
----------------------------------------------------------------------
diff --git a/nlp/pom.xml b/nlp/pom.xml
index dc77c06..782e41d 100644
--- a/nlp/pom.xml
+++ b/nlp/pom.xml
@@ -16,14 +16,13 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 
 	<parent>
 		<groupId>org.apache.hivemall</groupId>
 		<artifactId>hivemall</artifactId>
-		<version>0.5.0-incubating-SNAPSHOT</version>
+		<version>0.5.1-incubating-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 
@@ -40,77 +39,51 @@
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-common</artifactId>
-			<version>${hadoop.version}</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-mapreduce-client-core</artifactId>
-			<version>${hadoop.version}</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hive</groupId>
 			<artifactId>hive-exec</artifactId>
-			<version>${hive.version}</version>
 			<scope>provided</scope>
-			<exclusions>
-				<exclusion>
-					<artifactId>jetty</artifactId>
-					<groupId>org.mortbay.jetty</groupId>
-				</exclusion>
-				<exclusion>
-					<groupId>javax.jdo</groupId>
-					<artifactId>jdo2-api</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>asm-parent</groupId>
-					<artifactId>asm-parent</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>asm</groupId>
-					<artifactId>asm</artifactId>
-				</exclusion>
-			</exclusions>
 		</dependency>
 		<dependency>
 			<groupId>commons-cli</groupId>
 			<artifactId>commons-cli</artifactId>
-			<version>1.2</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>commons-logging</groupId>
 			<artifactId>commons-logging</artifactId>
-			<version>1.0.4</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>log4j</groupId>
 			<artifactId>log4j</artifactId>
-			<version>1.2.17</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>javax.jdo</groupId>
 			<artifactId>jdo2-api</artifactId>
-			<version>2.3-eb</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>com.google.guava</groupId>
 			<artifactId>guava</artifactId>
-			<version>${guava.version}</version>
 			<scope>provided</scope>
 		</dependency>
+
+		<!-- compile scope -->
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-core</artifactId>
 			<version>${project.version}</version>
-			<scope>provided</scope>
+			<scope>compile</scope>
 		</dependency>
-
-		<!-- compile scope -->
 		<dependency>
 			<groupId>org.apache.lucene</groupId>
 			<artifactId>lucene-analyzers-kuromoji</artifactId>
@@ -128,7 +101,6 @@
 		<dependency>
 			<groupId>junit</groupId>
 			<artifactId>junit</artifactId>
-			<version>${junit.version}</version>
 			<scope>test</scope>
 		</dependency>
 		<dependency>
@@ -140,98 +112,4 @@
 
 	</dependencies>
 
-	<build>
-		<directory>target</directory>
-		<outputDirectory>target/classes</outputDirectory>
-		<finalName>${project.artifactId}-${project.version}</finalName>
-		<testOutputDirectory>target/test-classes</testOutputDirectory>
-		<plugins>
-			<!-- hivemall-nlp-xx.jar -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-jar-plugin</artifactId>
-				<version>2.5</version>
-				<configuration>
-					<finalName>${project.artifactId}-${project.version}</finalName>
-					<outputDirectory>${project.parent.build.directory}</outputDirectory>
-				</configuration>
-			</plugin>
-			<!-- hivemall-nlp-xx-with-dependencies.jar including minimum dependencies -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-shade-plugin</artifactId>
-				<version>3.1.0</version>
-				<executions>
-					<execution>
-						<id>jar-with-dependencies</id>
-						<phase>package</phase>
-						<goals>
-							<goal>shade</goal>
-						</goals>
-						<configuration>
-							<finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
-							<outputDirectory>${project.parent.build.directory}</outputDirectory>
-							<minimizeJar>true</minimizeJar>
-							<createDependencyReducedPom>false</createDependencyReducedPom>
-							<artifactSet>
-								<includes>
-									<include>org.apache.hivemall:hivemall-core</include>
-									<include>org.apache.lucene:lucene-analyzers-kuromoji</include>
-									<include>org.apache.lucene:lucene-analyzers-smartcn</include>
-									<include>org.apache.lucene:lucene-analyzers-common</include>
-									<include>org.apache.lucene:lucene-core</include>
-								</includes>
-							</artifactSet>
-							<filters>
-								<filter>
-									<artifact>*:*</artifact>
-									<excludes>
-										<exclude>META-INF/LICENSE.txt</exclude>
-									</excludes>
-								</filter>
-								<filter>
-									<artifact>org.apache.lucene:lucene-analyzers-kuromoji</artifact>
-									<includes>
-										<include>**</include>
-									</includes>
-								</filter>
-								<filter>
-									<artifact>org.apache.lucene:lucene-analyzers-smartcn</artifact>
-									<includes>
-										<include>**</include>
-									</includes>
-								</filter>
-								<filter>
-									<artifact>org.apache.lucene:lucene-analyzers-common</artifact>
-									<includes>
-										<include>**</include>
-									</includes>
-								</filter>
-								<filter>
-									<artifact>org.apache.lucene:lucene-core</artifact>
-									<includes>
-										<include>**</include>
-									</includes>
-								</filter>
-							</filters>
-							<transformers>
-								<transformer
-									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
-									<manifestEntries>
-										<Implementation-Title>${project.name}</Implementation-Title>
-										<Implementation-Version>${project.version}</Implementation-Version>
-										<Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
-									</manifestEntries>
-								</transformer>
-								<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
-									<addHeader>false</addHeader>
-								</transformer>
-							</transformers>
-						</configuration>
-					</execution>
-				</executions>
-			</plugin>
-		</plugins>
-	</build>
-
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
----------------------------------------------------------------------
diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
index 93fd18c..411c89e 100644
--- a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
+++ b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
@@ -19,15 +19,19 @@
 package hivemall.nlp.tokenizer;
 
 import hivemall.utils.hadoop.HiveUtils;
-import hivemall.utils.io.IOUtils;
 import hivemall.utils.io.HttpUtils;
+import hivemall.utils.io.IOUtils;
+import hivemall.utils.lang.ExceptionUtils;
 
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.net.HttpURLConnection;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -55,8 +59,7 @@ import org.apache.lucene.analysis.ja.dict.UserDictionary;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
 
-@Description(
-        name = "tokenize_ja",
+@Description(name = "tokenize_ja",
         value = "_FUNC_(String line [, const string mode = \"normal\", const array<string> stopWords, const array<string> stopTags, const array<string> userDict (or string userDictURL)])"
                 + " - returns tokenized strings in array<string>")
 @UDFType(deterministic = true, stateful = false)
@@ -77,20 +80,21 @@ public final class KuromojiUDF extends GenericUDF {
     public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
         final int arglen = arguments.length;
         if (arglen < 1 || arglen > 5) {
-            throw new UDFArgumentException("Invalid number of arguments for `tokenize_ja`: "
-                    + arglen);
+            throw new UDFArgumentException(
+                "Invalid number of arguments for `tokenize_ja`: " + arglen);
         }
 
         this._mode = (arglen >= 2) ? tokenizationMode(arguments[1]) : Mode.NORMAL;
-        this._stopWords = (arglen >= 3) ? stopWords(arguments[2])
-                : JapaneseAnalyzer.getDefaultStopSet();
-        this._stopTags = (arglen >= 4) ? stopTags(arguments[3])
-                : JapaneseAnalyzer.getDefaultStopTags();
+        this._stopWords =
+                (arglen >= 3) ? stopWords(arguments[2]) : JapaneseAnalyzer.getDefaultStopSet();
+        this._stopTags =
+                (arglen >= 4) ? stopTags(arguments[3]) : JapaneseAnalyzer.getDefaultStopTags();
         this._userDict = (arglen >= 5) ? userDictionary(arguments[4]) : null;
 
         this._analyzer = null;
 
-        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+        return ObjectInspectorFactory.getStandardListObjectInspector(
+            PrimitiveObjectInspectorFactory.writableStringObjectInspector);
     }
 
     @Override
@@ -219,7 +223,8 @@ public final class KuromojiUDF extends GenericUDF {
             return UserDictionary.open(reader); // return null if empty
         } catch (Throwable e) {
             throw new UDFArgumentException(
-                "Failed to create user dictionary based on the given array<string>: " + e);
+                "Failed to create user dictionary based on the given array<string>: "
+                        + builder.toString() + '\n' + ExceptionUtils.prettyPrintStackTrace(e));
         }
     }
 
@@ -234,7 +239,8 @@ public final class KuromojiUDF extends GenericUDF {
         try {
             conn = HttpUtils.getHttpURLConnection(userDictURL);
         } catch (IllegalArgumentException | IOException e) {
-            throw new UDFArgumentException("Failed to create HTTP connection to the URL: " + e);
+            throw new UDFArgumentException("Failed to create HTTP connection to the URL: "
+                    + userDictURL + '\n' + ExceptionUtils.prettyPrintStackTrace(e));
         }
 
         // allow to read as a compressed GZIP file for efficiency
@@ -247,7 +253,8 @@ public final class KuromojiUDF extends GenericUDF {
         try {
             responseCode = conn.getResponseCode();
         } catch (IOException e) {
-            throw new UDFArgumentException("Failed to get response code: " + e);
+            throw new UDFArgumentException("Failed to get response code: " + userDictURL + '\n'
+                    + ExceptionUtils.prettyPrintStackTrace(e));
         }
         if (responseCode != 200) {
             throw new UDFArgumentException("Got invalid response code: " + responseCode);
@@ -255,17 +262,24 @@ public final class KuromojiUDF extends GenericUDF {
 
         final InputStream is;
         try {
-            is = IOUtils.decodeInputStream(HttpUtils.getLimitedInputStream(conn,
-                MAX_INPUT_STREAM_SIZE));
+            is = IOUtils.decodeInputStream(
+                HttpUtils.getLimitedInputStream(conn, MAX_INPUT_STREAM_SIZE));
         } catch (NullPointerException | IOException e) {
-            throw new UDFArgumentException("Failed to get input stream from the connection: " + e);
+            throw new UDFArgumentException("Failed to get input stream from the connection: "
+                    + userDictURL + '\n' + ExceptionUtils.prettyPrintStackTrace(e));
         }
 
-        final Reader reader = new InputStreamReader(is);
+        CharsetDecoder decoder =
+                StandardCharsets.UTF_8.newDecoder()
+                                      .onMalformedInput(CodingErrorAction.REPORT)
+                                      .onUnmappableCharacter(CodingErrorAction.REPORT);
+        final Reader reader = new InputStreamReader(is, decoder);
         try {
             return UserDictionary.open(reader); // return null if empty
         } catch (Throwable e) {
-            throw new UDFArgumentException("Failed to parse the file in CSV format: " + e);
+            throw new UDFArgumentException(
+                "Failed to parse the file in CSV format (UTF-8 encoding is expected): "
+                        + userDictURL + '\n' + ExceptionUtils.prettyPrintStackTrace(e));
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index e9c19dd..e594006 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,13 +16,12 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 
 	<groupId>org.apache.hivemall</groupId>
 	<artifactId>hivemall</artifactId>
-	<version>0.5.0-incubating-SNAPSHOT</version>
+	<version>0.5.1-incubating-SNAPSHOT</version>
 
 	<parent>
 	  <groupId>org.apache</groupId>
@@ -51,7 +50,8 @@
 		<url>https://git-wip-us.apache.org/repos/asf/incubator-hivemall.git</url>
 		<connection>scm:git:https://git-wip-us.apache.org/repos/asf/incubator-hivemall.git</connection>
 		<developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/incubator-hivemall.git</developerConnection>
-	</scm>
+	  <tag>v0.5.0-rc1</tag>
+  </scm>
 
 	<mailingLists>
 		<mailingList>
@@ -152,8 +152,8 @@
 			<name>Tsuyoshi Ozawa</name>
 			<email>ozawa[at]apache.org</email>
 			<url>https://people.apache.org/~ozawa/</url>
-			<organization></organization>
-			<organizationUrl></organizationUrl>
+			<organization />
+			<organizationUrl />
 			<roles>
 				<role>PPMC Member</role>
 			</roles>
@@ -249,15 +249,14 @@
 		<module>nlp</module>
 		<module>xgboost</module>
 		<module>mixserv</module>
+		<module>spark</module>
+		<module>dist</module>
 	</modules>
 
 	<properties>
-		<java.source.version>1.7</java.source.version>
-		<java.target.version>1.7</java.target.version>
+		<main.basedir>${project.basedir}</main.basedir>
 		<maven.compiler.source>1.7</maven.compiler.source>
 		<maven.compiler.target>1.7</maven.compiler.target>
-		<scala.version>2.11.8</scala.version>
-		<scala.binary.version>2.11</scala.binary.version>
 		<maven.build.timestamp.format>yyyy</maven.build.timestamp.format>
 		<build.year>${maven.build.timestamp}</build.year>
 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -269,9 +268,9 @@
 		<guava.version>11.0.2</guava.version>
 		<junit.version>4.12</junit.version>
 		<dependency.locations.enabled>false</dependency.locations.enabled>
-		<main.basedir>${project.basedir}</main.basedir>
-		<maven-enforcer-plugin.version>3.0.0-M1</maven-enforcer-plugin.version>
+		<maven-enforcer.requireMavenVersion>[3.3.1,)</maven-enforcer.requireMavenVersion>
 		<surefire.version>2.19.1</surefire.version>
+		<xgboost.version>0.7-rc2</xgboost.version>
 	</properties>
 
 	<distributionManagement>
@@ -315,113 +314,6 @@
 
 	<profiles>
 		<profile>
-			<id>spark-2.2</id>
-			<modules>
-				<module>spark/spark-2.2</module>
-				<module>spark/spark-common</module>
-			</modules>
-			<properties>
-				<spark.version>2.2.0</spark.version>
-				<spark.binary.version>2.2</spark.binary.version>
-			</properties>
-			<build>
-				<plugins>
-					<!-- Spark-2.2 only supports Java 8 -->
-					<plugin>
-						<groupId>org.apache.maven.plugins</groupId>
-						<artifactId>maven-enforcer-plugin</artifactId>
-						<version>${maven-enforcer-plugin.version}</version>
-						<executions>
-							<execution>
-								<id>enforce-versions</id>
-								<phase>validate</phase>
-								<goals>
-									<goal>enforce</goal>
-								</goals>
-								<configuration>
-									<rules>
-										<requireProperty>
-											<property>java.source.version</property>
-											<regex>1.8</regex>
-											<regexMessage>When -Pspark-2.2 set, java.source.version must be 1.8</regexMessage>
-										</requireProperty>
-										<requireProperty>
-											<property>java.target.version</property>
-											<regex>1.8</regex>
-											<regexMessage>When -Pspark-2.2 set, java.target.version must be 1.8</regexMessage>
-										</requireProperty>
-									</rules>
-								</configuration>
-							</execution>
-						</executions>
-					</plugin>
-				</plugins>
-			</build>
-		</profile>
-		<profile>
-			<id>spark-2.1</id>
-			<modules>
-				<module>spark/spark-2.1</module>
-				<module>spark/spark-common</module>
-			</modules>
-			<properties>
-				<spark.version>2.1.1</spark.version>
-				<spark.binary.version>2.1</spark.binary.version>
-			</properties>
-		</profile>
-		<profile>
-			<id>spark-2.0</id>
-			<modules>
-				<module>spark/spark-2.0</module>
-				<module>spark/spark-common</module>
-			</modules>
-			<properties>
-				<spark.version>2.0.2</spark.version>
-				<spark.binary.version>2.0</spark.binary.version>
-			</properties>
-		</profile>
-		<profile>
-			<id>java7</id>
-			<properties>
-				<spark.test.jvm.opts>-ea -Xms768m -Xmx1024m -XX:PermSize=128m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m</spark.test.jvm.opts>
-			</properties>
-			<activation>
-				<jdk>[,1.8)</jdk> <!-- version < 1.8 -->
-			</activation>
-		</profile>
-		<profile>
-			<id>java8</id>
-			<properties>
-				<spark.test.jvm.opts>-ea -Xms768m -Xmx1024m -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=512m -XX:ReservedCodeCacheSize=512m</spark.test.jvm.opts>
-			</properties>
-			<activation>
-				<jdk>[1.8,)</jdk> <!-- version >= 1.8 -->
-			</activation>
-		</profile>
-		<profile>
-			<id>compile-xgboost</id>
-			<build>
-				<plugins>
-					<plugin>
-						<artifactId>exec-maven-plugin</artifactId>
-						<groupId>org.codehaus.mojo</groupId>
-						<executions>
-							<execution>
-								<id>native</id>
-								<phase>generate-sources</phase>
-								<goals>
-									<goal>exec</goal>
-								</goals>
-								<configuration>
-									<executable>./bin/build_xgboost.sh</executable>
-								</configuration>
-							</execution>
-						</executions>
-					</plugin>
-				</plugins>
-			</build>
-		</profile>
-		<profile>
 			<id>doclint-java8-disable</id>
 			<activation>
 				<jdk>[1.8,)</jdk>
@@ -432,6 +324,110 @@
 		</profile>
 	</profiles>
 
+	<dependencyManagement>
+		<dependencies>
+			<!-- provided scope -->
+			<dependency>
+				<groupId>org.apache.hadoop</groupId>
+				<artifactId>hadoop-common</artifactId>
+				<version>${hadoop.version}</version>
+				<scope>provided</scope>
+			</dependency>
+			<dependency>
+				<groupId>org.apache.hadoop</groupId>
+				<artifactId>hadoop-mapreduce-client-core</artifactId>
+				<version>${hadoop.version}</version>
+				<scope>provided</scope>
+			</dependency>
+			<dependency>
+				<groupId>org.apache.hive</groupId>
+				<artifactId>hive-exec</artifactId>
+				<version>${hive.version}</version>
+				<scope>provided</scope>
+				<exclusions>
+					<exclusion>
+						<artifactId>jetty</artifactId>
+						<groupId>org.mortbay.jetty</groupId>
+					</exclusion>
+					<exclusion>
+						<groupId>javax.jdo</groupId>
+						<artifactId>jdo2-api</artifactId>
+					</exclusion>
+					<exclusion>
+						<groupId>asm-parent</groupId>
+						<artifactId>asm-parent</artifactId>
+					</exclusion>
+					<exclusion>
+						<groupId>asm</groupId>
+						<artifactId>asm</artifactId>
+					</exclusion>
+				</exclusions>
+			</dependency>
+			<dependency>
+				<groupId>commons-cli</groupId>
+				<artifactId>commons-cli</artifactId>
+				<version>1.2</version>
+				<scope>provided</scope>
+			</dependency>
+			<dependency>
+				<groupId>commons-logging</groupId>
+				<artifactId>commons-logging</artifactId>
+				<version>1.0.4</version>
+				<scope>provided</scope>
+			</dependency>
+			<dependency>
+				<groupId>log4j</groupId>
+				<artifactId>log4j</artifactId>
+				<version>1.2.17</version>
+				<scope>provided</scope>
+			</dependency>
+			<dependency>
+				<groupId>javax.jdo</groupId>
+				<artifactId>jdo2-api</artifactId>
+				<version>2.3-eb</version>
+				<scope>provided</scope>
+			</dependency>
+			<dependency>
+				<groupId>com.google.guava</groupId>
+				<artifactId>guava</artifactId>
+				<version>${guava.version}</version>
+				<scope>provided</scope>
+			</dependency>
+
+			<!-- test scope -->
+			<dependency>
+				<groupId>junit</groupId>
+				<artifactId>junit</artifactId>
+				<version>${junit.version}</version>
+				<scope>test</scope>
+			</dependency>
+			<dependency>
+				<groupId>org.mockito</groupId>
+				<artifactId>mockito-all</artifactId>
+				<version>1.10.19</version>
+				<scope>test</scope>
+			</dependency>
+			<dependency>
+				<groupId>org.mockito</groupId>
+				<artifactId>mockito-core</artifactId>
+				<version>1.10.19</version>
+				<scope>test</scope>
+			</dependency>
+			<dependency>
+				<groupId>org.powermock</groupId>
+				<artifactId>powermock-module-junit4</artifactId>
+				<version>1.6.3</version>
+				<scope>test</scope>
+			</dependency>
+			<dependency>
+				<groupId>org.powermock</groupId>
+				<artifactId>powermock-api-mockito</artifactId>
+				<version>1.6.3</version>
+				<scope>test</scope>
+			</dependency>
+		</dependencies>
+	</dependencyManagement>
+
 	<build>
 		<directory>target</directory>
 		<outputDirectory>target/classes</outputDirectory>
@@ -441,6 +437,25 @@
 		<pluginManagement>
 			<plugins>
 				<plugin>
+					<groupId>org.apache.maven.plugins</groupId>
+					<artifactId>maven-jar-plugin</artifactId>
+					<version>3.0.2</version>
+					<configuration>
+						<finalName>${project.artifactId}-${project.version}</finalName>
+						<outputDirectory>${main.basedir}/target</outputDirectory>
+					</configuration>
+				</plugin>
+				<plugin>
+					<groupId>org.apache.maven.plugins</groupId>
+					<artifactId>maven-shade-plugin</artifactId>
+					<version>3.1.0</version>
+				</plugin>
+				<plugin>
+					<groupId>org.apache.maven.plugins</groupId>
+					<artifactId>maven-enforcer-plugin</artifactId>
+					<version>3.0.0-M1</version>
+				</plugin>
+				<plugin>
 					<!-- mvn formatter:format -->
 					<groupId>net.revelc.code</groupId>
 					<artifactId>formatter-maven-plugin</artifactId>
@@ -475,6 +490,11 @@
 						<useDefaultExcludes>false</useDefaultExcludes>
 						<excludes>
 							<exclude>docs/gitbook/node_modules/**</exclude>
+							<exclude>target/</exclude>
+							<exclude>src/main/java/hivemall/utils/codec/Base91.java</exclude>
+							<exclude>src/main/java/hivemall/utils/math/FastMath.java</exclude>
+							<exclude>src/main/java/hivemall/smile/classification/DecisionTree.java</exclude>
+							<exclude>src/main/java/hivemall/smile/regression/RegressionTree.java</exclude>
 						</excludes>
 						<encoding>UTF-8</encoding>
 						<headerDefinitions>
@@ -575,14 +595,42 @@
 				<artifactId>maven-enforcer-plugin</artifactId>
 				<executions>
 					<execution>
-						<id>enforce-maven</id>
+						<id>enforce-JAVA_HOME-is-set</id>
+						<goals>
+							<goal>enforce</goal>
+						</goals>
+						<configuration>
+							<rules>
+								<requireEnvironmentVariable>
+									<variableName>JAVA_HOME</variableName>
+								</requireEnvironmentVariable>
+							</rules>
+							<fail>true</fail>
+						</configuration>
+					</execution>
+					<execution>
+						<id>enforce-JAVA8_HOME-is-set</id>
+						<goals>
+							<goal>enforce</goal>
+						</goals>
+						<configuration>
+							<rules>
+								<requireEnvironmentVariable>
+									<variableName>JAVA8_HOME</variableName>
+								</requireEnvironmentVariable>
+							</rules>
+							<fail>true</fail>
+						</configuration>
+					</execution>
+					<execution>
+						<id>required-maven-version</id>
 						<goals>
 							<goal>enforce</goal>
 						</goals>
 						<configuration>
 							<rules>
 								<requireMavenVersion>
-									<version>[3.3.1,)</version>
+									<version>${maven-enforcer.requireMavenVersion}</version>
 								</requireMavenVersion>
 							</rules>
 						</configuration>
@@ -610,8 +658,8 @@
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-compiler-plugin</artifactId>
 				<configuration>
-					<source>${java.source.version}</source>
-					<target>${java.target.version}</target>
+					<source>${maven.compiler.source}</source>
+					<target>${maven.compiler.target}</target>
 					<debug>true</debug>
 					<debuglevel>lines,vars,source</debuglevel>
 					<encoding>UTF-8</encoding>
@@ -688,30 +736,6 @@
 				</dependencies>
 			</plugin>
 			<!-- end mvn site -->
-			<plugin>
-				<groupId>org.scalastyle</groupId>
-				<artifactId>scalastyle-maven-plugin</artifactId>
-				<version>0.8.0</version>
-				<configuration>
-					<verbose>false</verbose>
-					<failOnViolation>true</failOnViolation>
-					<includeTestSourceDirectory>true</includeTestSourceDirectory>
-					<failOnWarning>false</failOnWarning>
-					<sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
-					<testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
-					<configLocation>spark/spark-common/scalastyle-config.xml</configLocation>
-					<outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
-					<inputEncoding>${project.build.sourceEncoding}</inputEncoding>
-					<outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
-				</configuration>
-				<executions>
-					<execution>
-						<goals>
-							<goal>check</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
 			<!-- mvn apache-rat:check -->
 			<plugin>
 				<groupId>org.apache.rat</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/pom.xml
----------------------------------------------------------------------
diff --git a/spark/common/pom.xml b/spark/common/pom.xml
new file mode 100644
index 0000000..a6262e8
--- /dev/null
+++ b/spark/common/pom.xml
@@ -0,0 +1,64 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+
+	<parent>
+		<groupId>org.apache.hivemall</groupId>
+		<artifactId>hivemall-spark</artifactId>
+		<version>0.5.1-incubating-SNAPSHOT</version>
+		<relativePath>../pom.xml</relativePath>
+	</parent>
+
+	<artifactId>hivemall-spark-common</artifactId>
+	<name>Hivemall on Spark Common</name>
+	<packaging>jar</packaging>
+
+	<properties>
+		<main.basedir>${project.parent.parent.basedir}</main.basedir>
+	</properties>
+
+	<dependencies>
+		<!-- provided scope -->
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-common</artifactId>
+			<scope>provided</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-mapreduce-client-core</artifactId>
+			<scope>provided</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hive</groupId>
+			<artifactId>hive-exec</artifactId>
+			<scope>provided</scope>
+		</dependency>
+
+		<!-- compile scope -->
+		<dependency>
+			<groupId>org.apache.hivemall</groupId>
+			<artifactId>hivemall-core</artifactId>
+			<scope>compile</scope>
+		</dependency>
+	</dependencies>
+
+</project>
+

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java b/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
new file mode 100644
index 0000000..cf10ed7
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.dataset;
+
+import hivemall.UDTFWithOptions;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.Random;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.Collector;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * A wrapper of [[hivemall.dataset.LogisticRegressionDataGeneratorUDTF]]. This wrapper is needed
+ * because Spark cannot handle HadoopUtils#getTaskId() correctly.
+ */
+@Description(name = "lr_datagen",
+        value = "_FUNC_(options string) - Generates a logistic regression dataset")
+public final class LogisticRegressionDataGeneratorUDTFWrapper extends UDTFWithOptions {
+    private transient LogisticRegressionDataGeneratorUDTF udtf =
+            new LogisticRegressionDataGeneratorUDTF();
+
+    @Override
+    protected Options getOptions() {
+        Options options = null;
+        try {
+            Method m = udtf.getClass().getDeclaredMethod("getOptions");
+            m.setAccessible(true);
+            options = (Options) m.invoke(udtf);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return options;
+    }
+
+    @SuppressWarnings("all")
+    @Override
+    protected CommandLine processOptions(ObjectInspector[] objectInspectors)
+            throws UDFArgumentException {
+        CommandLine commands = null;
+        try {
+            Method m = udtf.getClass().getDeclaredMethod("processOptions");
+            m.setAccessible(true);
+            commands = (CommandLine) m.invoke(udtf, objectInspectors);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return commands;
+    }
+
+    @Override
+    public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
+        try {
+            // Extract a collector for LogisticRegressionDataGeneratorUDTF
+            Field collector = GenericUDTF.class.getDeclaredField("collector");
+            collector.setAccessible(true);
+            udtf.setCollector((Collector) collector.get(this));
+
+            // To avoid HadoopUtils#getTaskId()
+            Class<?> clazz = udtf.getClass();
+            Field rnd1 = clazz.getDeclaredField("rnd1");
+            Field rnd2 = clazz.getDeclaredField("rnd2");
+            Field r_seed = clazz.getDeclaredField("r_seed");
+            r_seed.setAccessible(true);
+            final long seed = r_seed.getLong(udtf) + (int) Thread.currentThread().getId();
+            rnd1.setAccessible(true);
+            rnd2.setAccessible(true);
+            rnd1.set(udtf, new Random(seed));
+            rnd2.set(udtf, new Random(seed + 1));
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return udtf.initialize(argOIs);
+    }
+
+    @Override
+    public void process(Object[] objects) throws HiveException {
+        udtf.process(objects);
+    }
+
+    @Override
+    public void close() throws HiveException {
+        udtf.close();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
new file mode 100644
index 0000000..b454fd9
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.AddBiasUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(name = "add_bias",
+        value = "_FUNC_(features in array<string>) - Returns features with a bias as array<string>")
+@UDFType(deterministic = true, stateful = false)
+public class AddBiasUDFWrapper extends GenericUDF {
+    private AddBiasUDF udf = new AddBiasUDF();
+    private ListObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "add_bias() has an single arguments: array<string> features");
+        }
+
+        switch (arguments[0].getCategory()) {
+            case LIST:
+                argumentOI = (ListObjectInspector) arguments[0];
+                ObjectInspector elmOI = argumentOI.getListElementObjectInspector();
+                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+                    if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0, "Type mismatch: features");
+        }
+
+        return ObjectInspectorFactory.getStandardListObjectInspector(argumentOI.getListElementObjectInspector());
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        @SuppressWarnings("unchecked")
+        final List<String> input = (List<String>) argumentOI.getList(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "add_bias(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
new file mode 100644
index 0000000..0b687db
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.AddFeatureIndexUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(
+        name = "add_feature_index",
+        value = "_FUNC_(dense features in array<double>) - Returns a feature vector with feature indices")
+@UDFType(deterministic = true, stateful = false)
+public class AddFeatureIndexUDFWrapper extends GenericUDF {
+    private AddFeatureIndexUDF udf = new AddFeatureIndexUDF();
+    private ListObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "add_feature_index() has an single arguments: array<double> features");
+        }
+
+        switch (arguments[0].getCategory()) {
+            case LIST:
+                argumentOI = (ListObjectInspector) arguments[0];
+                ObjectInspector elmOI = argumentOI.getListElementObjectInspector();
+                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+                    if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.DOUBLE) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0, "Type mismatch: features");
+        }
+
+        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        @SuppressWarnings("unchecked")
+        final List<Double> input = (List<Double>) argumentOI.getList(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "add_feature_index(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
new file mode 100644
index 0000000..5924468
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.ExtractFeatureUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(name = "extract_feature",
+        value = "_FUNC_(feature in string) - Returns a parsed feature as string")
+@UDFType(deterministic = true, stateful = false)
+public class ExtractFeatureUDFWrapper extends GenericUDF {
+    private ExtractFeatureUDF udf = new ExtractFeatureUDF();
+    private PrimitiveObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "extract_feature() has an single arguments: string feature");
+        }
+
+        argumentOI = (PrimitiveObjectInspector) arguments[0];
+        if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
+            throw new UDFArgumentTypeException(0, "Type mismatch: feature");
+        }
+
+        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "extract_feature(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
new file mode 100644
index 0000000..8580247
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.ExtractWeightUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(name = "extract_weight",
+        value = "_FUNC_(feature in string) - Returns the weight of a feature as string")
+@UDFType(deterministic = true, stateful = false)
+public class ExtractWeightUDFWrapper extends GenericUDF {
+    private ExtractWeightUDF udf = new ExtractWeightUDF();
+    private PrimitiveObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "extract_weight() has an single arguments: string feature");
+        }
+
+        argumentOI = (PrimitiveObjectInspector) arguments[0];
+        if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
+            throw new UDFArgumentTypeException(0, "Type mismatch: feature");
+        }
+
+        return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.DOUBLE);
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "extract_weight(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
new file mode 100644
index 0000000..584be6c
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+
+/**
+ * A wrapper of [[hivemall.ftvec.SortByFeatureUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle Map<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector.
+ */
+@Description(name = "sort_by_feature",
+        value = "_FUNC_(map in map<int,float>) - Returns a sorted map")
+@UDFType(deterministic = true, stateful = false)
+public class SortByFeatureUDFWrapper extends GenericUDF {
+    private SortByFeatureUDF udf = new SortByFeatureUDF();
+    private MapObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "sorted_by_feature() has an single arguments: map<int, float> map");
+        }
+
+        switch (arguments[0].getCategory()) {
+            case MAP:
+                argumentOI = (MapObjectInspector) arguments[0];
+                ObjectInspector keyOI = argumentOI.getMapKeyObjectInspector();
+                ObjectInspector valueOI = argumentOI.getMapValueObjectInspector();
+                if (keyOI.getCategory().equals(Category.PRIMITIVE)
+                        && valueOI.getCategory().equals(Category.PRIMITIVE)) {
+                    final PrimitiveCategory keyCategory = ((PrimitiveObjectInspector) keyOI).getPrimitiveCategory();
+                    final PrimitiveCategory valueCategory = ((PrimitiveObjectInspector) valueOI).getPrimitiveCategory();
+                    if (keyCategory == PrimitiveCategory.INT
+                            && valueCategory == PrimitiveCategory.FLOAT) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0, "Type mismatch: map");
+        }
+
+
+        return ObjectInspectorFactory.getStandardMapObjectInspector(
+            argumentOI.getMapKeyObjectInspector(), argumentOI.getMapValueObjectInspector());
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        @SuppressWarnings("unchecked")
+        final Map<IntWritable, FloatWritable> input = (Map<IntWritable, FloatWritable>) argumentOI.getMap(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "sort_by_feature(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java b/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
new file mode 100644
index 0000000..db533be
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec.scaling;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.Text;
+
+/**
+ * A wrapper of [[hivemall.ftvec.scaling.L2NormalizationUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark-1.3 cannot handle
+ * List<> as a return type in Hive UDF. The type must be passed via ObjectInspector. This issues has
+ * been reported in SPARK-6747, so a future release of Spark makes the wrapper obsolete.
+ */
+public class L2NormalizationUDFWrapper extends GenericUDF {
+    private L2NormalizationUDF udf = new L2NormalizationUDF();
+
+    private transient List<Text> retValue = new ArrayList<Text>();
+    private transient Converter toListText = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException("normalize() has an only single argument.");
+        }
+
+        switch (arguments[0].getCategory()) {
+            case LIST:
+                ObjectInspector elmOI = ((ListObjectInspector) arguments[0]).getListElementObjectInspector();
+                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+                    if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0,
+                    "normalize() must have List[String] as an argument, but "
+                            + arguments[0].getTypeName() + " was found.");
+        }
+
+        // Create a ObjectInspector converter for arguments
+        ObjectInspector outputElemOI = ObjectInspectorFactory.getReflectionObjectInspector(
+            Text.class, ObjectInspectorOptions.JAVA);
+        ObjectInspector outputOI = ObjectInspectorFactory.getStandardListObjectInspector(outputElemOI);
+        toListText = ObjectInspectorConverters.getConverter(arguments[0], outputOI);
+
+        ObjectInspector listElemOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+        ObjectInspector returnElemOI = ObjectInspectorUtils.getStandardObjectInspector(listElemOI);
+        return ObjectInspectorFactory.getStandardListObjectInspector(returnElemOI);
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        @SuppressWarnings("unchecked")
+        final List<Text> input = (List<Text>) toListText.convert(arguments[0].get());
+        retValue = udf.evaluate(input);
+        return retValue;
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "normalize(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java b/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
new file mode 100644
index 0000000..d3bcbe6
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.knn.lsh;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/** A wrapper of [[hivemall.knn.lsh.MinHashesUDF]]. */
+@Description(
+        name = "minhashes",
+        value = "_FUNC_(features in array<string>, noWeight in boolean) - Returns hashed features as array<int>")
+@UDFType(deterministic = true, stateful = false)
+public class MinHashesUDFWrapper extends GenericUDF {
+    private MinHashesUDF udf = new MinHashesUDF();
+    private ListObjectInspector featuresOI = null;
+    private PrimitiveObjectInspector noWeightOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+        if (arguments.length != 2) {
+            throw new UDFArgumentLengthException(
+                "minhashes() has 2 arguments: array<string> features, boolean noWeight");
+        }
+
+        // Check argument types
+        switch (arguments[0].getCategory()) {
+            case LIST:
+                featuresOI = (ListObjectInspector) arguments[0];
+                ObjectInspector elmOI = featuresOI.getListElementObjectInspector();
+                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+                    if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0, "Type mismatch: features");
+        }
+
+        noWeightOI = (PrimitiveObjectInspector) arguments[1];
+        if (noWeightOI.getPrimitiveCategory() != PrimitiveCategory.BOOLEAN) {
+            throw new UDFArgumentException("Type mismatch: noWeight");
+        }
+
+        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT));
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 2);
+        @SuppressWarnings("unchecked")
+        final List<String> features = (List<String>) featuresOI.getList(arguments[0].get());
+        final Boolean noWeight = PrimitiveObjectInspectorUtils.getBoolean(arguments[1].get(),
+            noWeightOI);
+        return udf.evaluate(features, noWeight);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        /**
+         * TODO: Need to return hive-specific type names.
+         */
+        return "minhashes(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java b/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
new file mode 100644
index 0000000..f386223
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.mapred;
+
+import java.util.UUID;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/** An alternative implementation of [[hivemall.tools.mapred.RowIdUDF]]. */
+@Description(
+        name = "rowid",
+        value = "_FUNC_() - Returns a generated row id of a form {TASK_ID}-{UUID}-{SEQUENCE_NUMBER}")
+@UDFType(deterministic = false, stateful = true)
+public class RowIdUDFWrapper extends GenericUDF {
+    // RowIdUDF is directly used because spark cannot
+    // handle HadoopUtils#getTaskId().
+
+    private long sequence;
+    private long taskId;
+
+    public RowIdUDFWrapper() {
+        this.sequence = 0L;
+        this.taskId = Thread.currentThread().getId();
+    }
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+        if (arguments.length != 0) {
+            throw new UDFArgumentLengthException("row_number() has no argument.");
+        }
+
+        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 0);
+        sequence++;
+        /**
+         * TODO: Check if it is unique over all tasks in executors of Spark.
+         */
+        return taskId + "-" + UUID.randomUUID() + "-" + sequence;
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "row_number()";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/scala/hivemall/HivemallException.scala
----------------------------------------------------------------------
diff --git a/spark/common/src/main/scala/hivemall/HivemallException.scala b/spark/common/src/main/scala/hivemall/HivemallException.scala
new file mode 100644
index 0000000..53f6756
--- /dev/null
+++ b/spark/common/src/main/scala/hivemall/HivemallException.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall
+
+class HivemallException(message: String, cause: Throwable)
+    extends Exception(message, cause) {
+
+  def this(message: String) = this(message, null)
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
----------------------------------------------------------------------
diff --git a/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala b/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
new file mode 100644
index 0000000..3fb2d18
--- /dev/null
+++ b/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.ml.feature
+
+import java.util.StringTokenizer
+
+import scala.collection.mutable.ListBuffer
+
+import hivemall.HivemallException
+
+// Used for DataFrame#explode
+case class HivemallFeature(feature: String)
+
+/**
+ * Class that represents the features and labels of a data point for Hivemall.
+ *
+ * @param label Label for this data point.
+ * @param features List of features for this data point.
+ */
+case class HivemallLabeledPoint(label: Float = 0.0f, features: Seq[String]) {
+  override def toString: String = {
+    "%s,%s".format(label, features.mkString("[", ",", "]"))
+  }
+}
+
+object HivemallLabeledPoint {
+
+  // Simple parser for HivemallLabeledPoint
+  def parse(s: String): HivemallLabeledPoint = {
+    val (label, features) = s.indexOf(',') match {
+      case d if d > 0 => (s.substring(0, d), s.substring(d + 1))
+      case _ => ("0.0", "[]") // Dummy
+    }
+    HivemallLabeledPoint(label.toFloat, parseTuple(new StringTokenizer(features, "[],", true)))
+  }
+
+  // TODO: Support to parse rows without labels
+  private[this] def parseTuple(tokenizer: StringTokenizer): Seq[String] = {
+    val items = ListBuffer.empty[String]
+    var parsing = true
+    var allowDelim = false
+    while (parsing && tokenizer.hasMoreTokens()) {
+      val token = tokenizer.nextToken()
+      if (token == "[") {
+        items ++= parseTuple(tokenizer)
+        parsing = false
+        allowDelim = true
+      } else if (token == ",") {
+        if (allowDelim) {
+          allowDelim = false
+        } else {
+          throw new HivemallException("Found ',' at a wrong position.")
+        }
+      } else if (token == "]") {
+        parsing = false
+      } else {
+        items.append(token)
+        allowDelim = true
+      }
+    }
+    if (parsing) {
+      throw new HivemallException(s"A tuple must end with ']'.")
+    }
+    items
+  }
+}


[4/4] incubator-hivemall git commit: Close #131: [v0.5.0-rc3] Merge v0.5.0 branch

Posted by my...@apache.org.
Close #131: [v0.5.0-rc3] Merge v0.5.0 branch


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/3a718713
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/3a718713
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/3a718713

Branch: refs/heads/master
Commit: 3a718713afb612848312afae74301ec2cbf1d8a2
Parents: 448847f
Author: Makoto Yui <my...@apache.org>
Authored: Tue Feb 20 16:17:51 2018 +0900
Committer: Makoto Yui <my...@apache.org>
Committed: Tue Feb 20 16:17:51 2018 +0900

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 .rat-excludes                                   |   6 +-
 LICENSE                                         | 120 +++++++
 NOTICE                                          |  82 +----
 VERSION                                         |   2 +-
 bin/build.sh                                    |   1 -
 bin/build_xgboost.sh                            |  87 -----
 bin/maven_central_release.sh                    |  33 --
 core/pom.xml                                    | 112 +------
 .../main/java/hivemall/HivemallConstants.java   |   2 +-
 .../hivemall/ftvec/ranking/BprSamplingUDTF.java |  38 ++-
 .../smile/classification/DecisionTree.java      |  24 +-
 .../smile/regression/RegressionTree.java        |  20 +-
 .../hivemall/smile/tools/TreePredictUDF.java    |  63 +++-
 .../main/java/hivemall/utils/codec/Base91.java  |  20 +-
 .../hivemall/utils/lang/ExceptionUtils.java     | 118 +++++++
 .../main/java/hivemall/utils/math/FastMath.java |  67 ++--
 .../main/resources/META-INF/LICENSE-jafama.txt  | 202 +++++++++++
 .../main/resources/META-INF/LICENSE-smile.txt   | 203 +++++++++++
 dist/pom.xml                                    | 163 +++++++++
 docs/gitbook/binaryclass/news20_rf.md           |   5 +-
 docs/gitbook/binaryclass/titanic_rf.md          |  10 +-
 docs/gitbook/multiclass/iris_randomforest.md    |   8 +-
 mixserv/pom.xml                                 |  43 +--
 nlp/pom.xml                                     | 132 +-------
 .../hivemall/nlp/tokenizer/KuromojiUDF.java     |  52 +--
 pom.xml                                         | 318 ++++++++++--------
 spark/common/pom.xml                            |  64 ++++
 ...isticRegressionDataGeneratorUDTFWrapper.java | 109 ++++++
 .../java/hivemall/ftvec/AddBiasUDFWrapper.java  |  83 +++++
 .../ftvec/AddFeatureIndexUDFWrapper.java        |  85 +++++
 .../ftvec/ExtractFeatureUDFWrapper.java         |  73 ++++
 .../hivemall/ftvec/ExtractWeightUDFWrapper.java |  73 ++++
 .../hivemall/ftvec/SortByFeatureUDFWrapper.java |  92 +++++
 .../scaling/L2NormalizationUDFWrapper.java      |  95 ++++++
 .../hivemall/knn/lsh/MinHashesUDFWrapper.java   |  93 ++++++
 .../hivemall/tools/mapred/RowIdUDFWrapper.java  |  72 ++++
 .../main/scala/hivemall/HivemallException.scala |  25 ++
 .../spark/ml/feature/HivemallLabeledPoint.scala |  82 +++++
 spark/pom.xml                                   | 295 ++++++++++++++++
 spark/scalastyle-config.xml                     | 333 +++++++++++++++++++
 spark/spark-2.0/pom.xml                         | 147 +-------
 .../spark/streaming/HivemallStreamingOps.scala  |  47 +++
 .../apache/spark/sql/hive/HiveUdfSuite.scala    |   2 +-
 .../spark/sql/hive/HivemallOpsSuite.scala       |   2 +-
 spark/spark-2.1/pom.xml                         | 145 +-------
 .../spark/streaming/HivemallStreamingOps.scala  |  47 +++
 .../apache/spark/sql/hive/HiveUdfSuite.scala    |   2 +-
 .../spark/sql/hive/HivemallOpsSuite.scala       |   2 +-
 spark/spark-2.2/pom.xml                         | 167 ++--------
 .../spark/sql/hive/HivemallGroupedDataset.scala |   2 +-
 .../spark/streaming/HivemallStreamingOps.scala  |  47 +++
 .../apache/spark/sql/hive/HiveUdfSuite.scala    |   2 +-
 .../spark/sql/hive/HivemallOpsSuite.scala       |   2 +-
 spark/spark-common/pom.xml                      | 146 --------
 spark/spark-common/scalastyle-config.xml        | 333 -------------------
 ...isticRegressionDataGeneratorUDTFWrapper.java | 109 ------
 .../java/hivemall/ftvec/AddBiasUDFWrapper.java  |  83 -----
 .../ftvec/AddFeatureIndexUDFWrapper.java        |  85 -----
 .../ftvec/ExtractFeatureUDFWrapper.java         |  73 ----
 .../hivemall/ftvec/ExtractWeightUDFWrapper.java |  73 ----
 .../hivemall/ftvec/SortByFeatureUDFWrapper.java |  92 -----
 .../scaling/L2NormalizationUDFWrapper.java      |  95 ------
 .../hivemall/knn/lsh/MinHashesUDFWrapper.java   |  93 ------
 .../hivemall/tools/mapred/RowIdUDFWrapper.java  |  72 ----
 .../main/scala/hivemall/HivemallException.scala |  25 --
 .../spark/ml/feature/HivemallLabeledPoint.scala |  82 -----
 .../spark/streaming/HivemallStreamingOps.scala  |  47 ---
 src/site/resources/LICENSE-font_awesome.txt     |  86 +++++
 xgboost/lib/xgboost4j-0.60-0.10.jar             | Bin 1424975 -> 0 bytes
 xgboost/pom.xml                                 | 143 +-------
 .../java/hivemall/xgboost/XGBoostUtils.java     |   4 +-
 .../tools/XGBoostMulticlassPredictUDTF.java     |  15 +-
 73 files changed, 3023 insertions(+), 2753 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 84b63c8..3ba5593 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,4 @@ release.properties
 \#*#
 pom.xml.next
 pom.xml.tag
+.cache-main

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/.rat-excludes
----------------------------------------------------------------------
diff --git a/.rat-excludes b/.rat-excludes
index fb6ce1c..fcb4b31 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -26,4 +26,8 @@ resources/eclipse-style.xml
 **/*.spark
 **/*.hql
 docs/gitbook/_book/**
-docs/gitbook/node_modules/**
\ No newline at end of file
+docs/gitbook/node_modules/**
+**/release.properties
+**/derby.log
+**/LICENSE-*.txt
+**/Base91.java

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/LICENSE
----------------------------------------------------------------------
diff --git a/LICENSE b/LICENSE
index f433b1a..26b11dd 100644
--- a/LICENSE
+++ b/LICENSE
@@ -175,3 +175,123 @@
       of your accepting any such warranty or additional liability.
 
    END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+APACHE HIVEMALL SUBCOMPONENTS:
+
+The Apache Hivemall project contains subcomponents with separate copyright
+notices and license terms. Your use of the source code for the these
+subcomponents is subject to the terms and conditions of the following
+licenses.
+
+---------------------------------------------------------------------------
+The Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
+---------------------------------------------------------------------------
+
+This product bundles a modified version of 'Smile' which is licensed 
+under the Apache License Version 2.0, specifically for Random Forest module.
+For details, see https://github.com/haifengl/smile/
+   
+   You can find a copy of the License at
+
+       core/src/main/resources/META-INF/LICENSE-smile.txt
+   
+   which is placed under META-INF/ in a jar.
+
+This product bundles a modified version of `Jafama` which is licensed 
+under the Apache License Version 2.0, specifically for FastMath.java.
+For details, see https://github.com/jeffhain/jafama/
+
+   You can find a copy of the License at
+
+       core/src/main/resources/META-INF/LICENSE-jafama.txt
+   
+   which is placed under META-INF/ in a jar.
+
+---------------------------------------------------------------------------
+ The 3-Clause BSD License (https://opensource.org/licenses/BSD-3-Clause)
+---------------------------------------------------------------------------
+
+This product bundles a modified version of Jochaim Henke's `Base91 
+Encoder/Decoder` which is licensed under the BSD 3-Clause License, 
+specifically for Base91.java.
+For details, see https://github.com/bwaldvogel/base91
+
+   Copyright (c) 2000-2006 Joachim Henke
+   All rights reserved.
+   
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+   
+    - Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    - Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    - Neither the name of Joachim Henke nor the names of his contributors may
+      be used to endorse or promote products derived from this software without
+      specific prior written permission.
+   
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+
+---------------------------------------------------------------------------
+ Public Domain License
+---------------------------------------------------------------------------
+
+This product bundles public domain software derived from `fdlibm`, 
+specifically for FastMath.java.
+For details, see http://www.netlib.org/fdlibm/fdlibm.h
+
+   Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+   
+   Developed at SunSoft, a Sun Microsystems, Inc. business.
+   Permission to use, copy, modify, and distribute this
+   software is freely granted, provided that this notice
+   is preserved.
+
+---------------------------------------------------------------------------
+ The SIL Open Font License (https://opensource.org/licenses/OFL-1.1)
+---------------------------------------------------------------------------
+
+This product bundles `Font-awesome` fonts which is licensed under the
+SIL Open Font License (OFL) 1.1, specifically for the project site.
+For details, see http://fontawesome.io/
+
+   You can find a copy of the License at
+
+       src/site/resources/LICENSE-font_awesome.txt

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index bfc4af8..34b5f5d 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,81 +1,13 @@
 Apache Hivemall
-Copyright 2016 and onwards The Apache Software Foundation
+Copyright 2016-2018 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
 
-Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST)
-Copyright (C) 2015-2016 Makoto Yui
-Copyright (C) 2015-2016 Treasure Data, Inc.
+This product is based on source code originally developed by AIST and Treasure Data, Inc.
+They have been licensed to the Apache Software Foundation under Software Grant Agreements from 
+the following individuals and organizations:
 
-------------------------------------------------------------------------------------------------------
-Copyright notifications which have been relocated from source files
-
-o hivemall/core/src/main/java/hivemall/smile/classification/DecisionTree.java
-  hivemall/core/src/main/java/hivemall/smile/regression/RegressionTree.java
-
-    Copyright (c) 2010 Haifeng Li
-
-    https://github.com/haifengl/smile
-    Licensed under the Apache License, Version 2.0
-
-o hivemall/core/src/main/java/hivemall/utils/codec/Base91.java
-
-    Copyright (c) 2000-2006 Joachim Henke
-
-    https://github.com/bwaldvogel/base91
-    Licensed under the BSD 3-Clause License
-
-o hivemall/core/src/main/java/hivemall/utils/collections/OpenHashMap.java
-
-    Copyright (C) 2010 catchpole.net
-
-    https://github.com/slipperyseal/atomicobjects/
-    Licensed under the Apache License, Version 2.0
-
-o hivemall/core/src/main/java/hivemall/utils/math/FastMath.java
-
-     Copyright 2012-2015 Jeff Hain
-     
-     https://github.com/jeffhain/jafama/
-     Licensed under the Apache License, Version 2.0
-     
-     Copyright (C) 1993 by Sun Microsystems, Inc.
-     
-     Permission to use, copy, modify, and distribute this software is freely granted, provided that this notice is preserved.
-
-------------------------------------------------------------------------------------------------------
-Copyright notifications which have been relocated from ASF projects
-
-o hivemall/core/src/main/java/hivemall/utils/math/MathUtils.java#erfInv()
-
-    Copyright (C) 2003-2016 The Apache Software Foundation.
-
-    http://commons.apache.org/proper/commons-math/
-    Licensed under the Apache License, Version 2.0
-
-o hivemall/core/src/main/java/hivemall/utils/buffer/DynamicByteArray.java
-
-    Copyright 2013-2015 The Apache Software Foundation
-
-    https://orc.apache.org/
-    Licensed under the Apache License, Version 2.0
-
-o hivemall/spark/spark-2.0/extra-src/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
-  hivemall/spark/spark-2.0/src/test/scala/org/apache/spark/sql/QueryTest.scala
-  hivemall/spark/spark-2.0/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
-  hivemall/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
-  hivemall/spark/spark-2.1/extra-src/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
-  hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/QueryTest.scala
-  hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
-  hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
-  hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
-  hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
-  hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkBase.scala
-
-    Copyright (C) 2014-2017 The Apache Software Foundation.
-
-    http://spark.apache.org/
-    Licensed under the Apache License, Version 2.0
- 
- 
\ No newline at end of file
+ - Copyright 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST)
+ - Copyright 2015-2016 Makoto Yui
+ - Copyright 2015-2016 Treasure Data, Inc.

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/VERSION
----------------------------------------------------------------------
diff --git a/VERSION b/VERSION
index 17de5ad..89b6d66 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.0-incubating-SNAPSHOT
+0.5.1-incubating-SNAPSHOT

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/bin/build.sh
----------------------------------------------------------------------
diff --git a/bin/build.sh b/bin/build.sh
index 8487d70..05d1f8f 100755
--- a/bin/build.sh
+++ b/bin/build.sh
@@ -30,5 +30,4 @@ if [ "$HIVEMALL_HOME" = "" ]; then
 fi
 
 cd $HIVEMALL_HOME
-mvn validate -Pxgboost
 mvn clean package -Dskiptests=true -Dmaven.test.skip=true

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/bin/build_xgboost.sh
----------------------------------------------------------------------
diff --git a/bin/build_xgboost.sh b/bin/build_xgboost.sh
deleted file mode 100755
index 0bebcf8..0000000
--- a/bin/build_xgboost.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-# xgboost requires g++-4.6 or higher (https://github.com/dmlc/xgboost/blob/master/doc/build.md),
-# so we need to first check if the requirement is satisfied.
-COMPILER_REQUIRED_VERSION="4.6"
-COMPILER_VERSION=`g++ --version 2> /dev/null`
-
-# Check if GNU g++ installed
-if [ $? = 127 ]; then
-  echo "First, you need to install g++"
-  exit 1
-elif [[ "$COMPILER_VERSION" = *LLVM* ]]; then
-  echo "You must use GNU g++, but the detected compiler was clang++"
-  exit 1
-fi
-
-COMPILER_VERSION_NUMBER=`echo $COMPILER_VERSION | grep ^g++ | \
-  awk 'match($0, /[0-9]+\.[0-9]+\.[0-9]+/) {print substr($0, RSTART, RLENGTH)}'`
-
-# See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers
-function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; }
-if [ $(version $COMPILER_VERSION_NUMBER) -lt $(version $COMPILER_REQUIRED_VERSION) ]; then
-  echo "You must compile xgboost with GNU g++-$COMPILER_REQUIRED_VERSION or higher," \
-    "but the detected compiler was g++-$COMPILER_VERSION_NUMBER"
-  exit 1
-fi
-
-# Target commit hash value
-XGBOOST_HASHVAL='7ab15a0b31c870c7779691639f521df3ccd4a56e'
-
-# Move to a top directory
-if [ "$HIVEMALL_HOME" = "" ]; then
-  if [ -e ../bin/${0##*/} ]; then
-    HIVEMALL_HOME=`pwd`/..
-  elif [ -e ./bin/${0##*/} ]; then
-    HIVEMALL_HOME=`pwd`
-  else
-    echo "env HIVEMALL_HOME not defined"
-    exit 1
-  fi
-fi
-
-cd $HIVEMALL_HOME
-
-# Final output dir for a custom-compiled xgboost binary
-HIVEMALL_LIB_DIR="$HIVEMALL_HOME/xgboost/src/main/resources/lib/"
-rm -rf $HIVEMALL_LIB_DIR >> /dev/null
-mkdir -p $HIVEMALL_LIB_DIR
-
-# Move to an output directory
-XGBOOST_OUT="$HIVEMALL_HOME/target/xgboost-$XGBOOST_HASHVAL"
-rm -rf $XGBOOST_OUT >> /dev/null
-mkdir -p $XGBOOST_OUT
-cd $XGBOOST_OUT
-
-# Fetch xgboost sources
-git clone --progress https://github.com/maropu/xgboost.git
-cd xgboost
-git checkout $XGBOOST_HASHVAL
-
-# Resolve dependent sources
-git submodule init
-git submodule update
-
-# Copy a built binary to the output
-cd jvm-packages
-ENABLE_STATIC_LINKS=1 ./create_jni.sh
-cp ./lib/libxgboost4j.* "$HIVEMALL_LIB_DIR"
-

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/bin/maven_central_release.sh
----------------------------------------------------------------------
diff --git a/bin/maven_central_release.sh b/bin/maven_central_release.sh
deleted file mode 100755
index 8a7918f..0000000
--- a/bin/maven_central_release.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-if [ "$HIVEMALL_HOME" = "" ]; then
-  if [ -e ../bin/${0##*/} ]; then
-    HIVEMALL_HOME=".."
-  elif [ -e ./bin/${0##*/} ]; then
-    HIVEMALL_HOME="."
-  else
-    echo "env HIVEMALL_HOME not defined"
-    exit 1
-  fi
-fi
-
-cd $HIVEMALL_HOME
-mvn clean deploy -DperformRelease=true -Dskiptests=true -Dmaven.test.skip=true

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index c79124a..82cb369 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -16,14 +16,13 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 
 	<parent>
 		<groupId>org.apache.hivemall</groupId>
 		<artifactId>hivemall</artifactId>
-		<version>0.5.0-incubating-SNAPSHOT</version>
+		<version>0.5.1-incubating-SNAPSHOT</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 
@@ -40,67 +39,41 @@
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-common</artifactId>
-			<version>${hadoop.version}</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hadoop</groupId>
 			<artifactId>hadoop-mapreduce-client-core</artifactId>
-			<version>${hadoop.version}</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hive</groupId>
 			<artifactId>hive-exec</artifactId>
-			<version>${hive.version}</version>
 			<scope>provided</scope>
-			<exclusions>
-				<exclusion>
-					<artifactId>jetty</artifactId>
-					<groupId>org.mortbay.jetty</groupId>
-				</exclusion>
-				<exclusion>
-					<groupId>javax.jdo</groupId>
-					<artifactId>jdo2-api</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>asm-parent</groupId>
-					<artifactId>asm-parent</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>asm</groupId>
-					<artifactId>asm</artifactId>
-				</exclusion>
-			</exclusions>
 		</dependency>
 		<dependency>
 			<groupId>commons-cli</groupId>
 			<artifactId>commons-cli</artifactId>
-			<version>1.2</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>commons-logging</groupId>
 			<artifactId>commons-logging</artifactId>
-			<version>1.0.4</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>log4j</groupId>
 			<artifactId>log4j</artifactId>
-			<version>1.2.17</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>javax.jdo</groupId>
 			<artifactId>jdo2-api</artifactId>
-			<version>2.3-eb</version>
 			<scope>provided</scope>
 		</dependency>
 		<dependency>
 			<groupId>com.google.guava</groupId>
 			<artifactId>guava</artifactId>
-			<version>${guava.version}</version>
 			<scope>provided</scope>
 		</dependency>
 
@@ -160,104 +133,23 @@
 		<dependency>
 			<groupId>junit</groupId>
 			<artifactId>junit</artifactId>
-			<version>${junit.version}</version>
 			<scope>test</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.mockito</groupId>
 			<artifactId>mockito-core</artifactId>
-			<version>1.10.19</version>
 			<scope>test</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.powermock</groupId>
 			<artifactId>powermock-module-junit4</artifactId>
-			<version>1.6.3</version>
 			<scope>test</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.powermock</groupId>
 			<artifactId>powermock-api-mockito</artifactId>
-			<version>1.6.3</version>
 			<scope>test</scope>
 		</dependency>
 	</dependencies>
 
-	<build>
-		<directory>target</directory>
-		<outputDirectory>target/classes</outputDirectory>
-		<finalName>${project.artifactId}-${project.version}</finalName>
-		<testOutputDirectory>target/test-classes</testOutputDirectory>
-		<plugins>
-			<!-- hivemall-core-xx.jar -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-jar-plugin</artifactId>
-				<version>2.5</version>
-				<configuration>
-					<finalName>${project.artifactId}-${project.version}</finalName>
-					<outputDirectory>${project.parent.build.directory}</outputDirectory>
-				</configuration>
-			</plugin>
-			<!-- hivemall-core-xx-with-dependencies.jar including minimum dependencies -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-shade-plugin</artifactId>
-				<version>3.1.0</version>
-				<executions>
-					<execution>
-						<id>jar-with-dependencies</id>
-						<phase>package</phase>
-						<goals>
-							<goal>shade</goal>
-						</goals>
-						<configuration>
-							<finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
-							<outputDirectory>${project.parent.build.directory}</outputDirectory>
-							<minimizeJar>true</minimizeJar>
-							<createDependencyReducedPom>false</createDependencyReducedPom>
-							<artifactSet>
-								<includes>
-									<include>io.netty:netty-all</include>
-									<include>com.github.haifengl:smile-core</include>
-									<include>com.github.haifengl:smile-math</include>
-									<include>com.github.haifengl:smile-data</include>
-									<include>org.tukaani:xz</include>
-									<include>org.apache.commons:commons-math3</include>
-									<include>org.roaringbitmap:RoaringBitmap</include>
-									<include>it.unimi.dsi:fastutil</include>
-									<include>com.clearspring.analytics:stream</include>
-								</includes>
-							</artifactSet>
-							<transformers>
-								<transformer
-									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
-									<manifestEntries>
-										<Implementation-Title>${project.name}</Implementation-Title>
-										<Implementation-Version>${project.version}</Implementation-Version>
-										<Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
-									</manifestEntries>
-								</transformer>
-								<!--
-								<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
-									<addHeader>false</addHeader>
-								</transformer>
-								-->
-							</transformers>
-							<filters>
-								<filter>
-									<artifact>*:*</artifact>
-									<excludes>
-										<exclude>META-INF/LICENSE.txt</exclude>
-										<exclude>META-INF/NOTICE.txt</exclude>
-									</excludes>
-								</filter>
-							</filters>
-						</configuration>
-					</execution>
-				</executions>
-			</plugin>
-		</plugins>
-	</build>
-
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/HivemallConstants.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/HivemallConstants.java b/core/src/main/java/hivemall/HivemallConstants.java
index 5e6e407..955aeb1 100644
--- a/core/src/main/java/hivemall/HivemallConstants.java
+++ b/core/src/main/java/hivemall/HivemallConstants.java
@@ -20,7 +20,7 @@ package hivemall;
 
 public final class HivemallConstants {
 
-    public static final String VERSION = "0.5.0-incubating-SNAPSHOT";
+    public static final String VERSION = "0.5.1-incubating-SNAPSHOT";
 
     public static final String BIAS_CLAUSE = "0";
     public static final int BIAS_CLAUSE_HASHVAL = 0;

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java b/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java
index ab418ed..821c734 100644
--- a/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java
+++ b/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java
@@ -18,12 +18,6 @@
  */
 package hivemall.ftvec.ranking;
 
-import hivemall.UDTFWithOptions;
-import hivemall.utils.collections.lists.IntArrayList;
-import hivemall.utils.hadoop.HiveUtils;
-import hivemall.utils.lang.BitUtils;
-import hivemall.utils.lang.Primitives;
-
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.Random;
@@ -45,6 +39,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.io.IntWritable;
 
+import hivemall.UDTFWithOptions;
+import hivemall.utils.collections.lists.IntArrayList;
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.lang.BitUtils;
+import hivemall.utils.lang.Primitives;
+
 @Description(name = "bpr_sampling",
         value = "_FUNC_(int userId, List<int> posItems [, const string options])"
                 + "- Returns a relation consists of <int userId, int itemId>")
@@ -54,9 +54,13 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
     private ListObjectInspector itemListOI;
     private PrimitiveObjectInspector itemElemOI;
 
-    private PositiveOnlyFeedback feedback;
+    // Need to avoid
+    // org.apache.hive.com.esotericsoftware.kryo.KryoException: java.lang.ArrayIndexOutOfBoundsException: 1
+    @Nullable
+    private transient PositiveOnlyFeedback feedback;
 
     // sampling options
+    private int maxItemId;
     private float samplingRate;
     private boolean withoutReplacement;
     private boolean pairSampling;
@@ -106,8 +110,7 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
             }
         }
 
-        this.feedback = pairSampling ? new PerEventPositiveOnlyFeedback(maxItemId)
-                : new PositiveOnlyFeedback(maxItemId);
+        this.maxItemId = maxItemId;
         this.samplingRate = samplingRate;
         this.withoutReplacement = withoutReplacement;
         this.pairSampling = pairSampling;
@@ -147,6 +150,11 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
 
     @Override
     public void process(@Nonnull Object[] args) throws HiveException {
+        if (feedback == null) {
+            this.feedback = pairSampling ? new PerEventPositiveOnlyFeedback(maxItemId)
+                    : new PositiveOnlyFeedback(maxItemId);
+        }
+
         int userId = PrimitiveObjectInspectorUtils.getInt(args[0], userOI);
         validateIndex(userId);
 
@@ -202,7 +210,8 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
         }
     }
 
-    private void forward(final int user, final int posItem, final int negItem) throws HiveException {
+    private void forward(final int user, final int posItem, final int negItem)
+            throws HiveException {
         assert (user >= 0) : user;
         assert (posItem >= 0) : posItem;
         assert (negItem >= 0) : negItem;
@@ -260,9 +269,8 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
      * Caution: This is not a perfect 'without sampling' but it does 'without sampling' for positive
      * feedbacks.
      */
-    private void uniformUserSamplingWithoutReplacement(
-            @Nonnull final PositiveOnlyFeedback feedback, final int numSamples)
-            throws HiveException {
+    private void uniformUserSamplingWithoutReplacement(@Nonnull final PositiveOnlyFeedback feedback,
+            final int numSamples) throws HiveException {
         int numUsers = feedback.getNumUsers();
         if (numUsers == 0) {
             return;
@@ -280,8 +288,8 @@ public final class BprSamplingUDTF extends UDTFWithOptions {
             int nthUser = rand.nextInt(numUsers);
             int user = BitUtils.indexOfSetBit(userBits, nthUser);
             if (user == -1) {
-                throw new HiveException("Cannot find " + nthUser + "-th user among " + numUsers
-                        + " users");
+                throw new HiveException(
+                    "Cannot find " + nthUser + "-th user among " + numUsers + " users");
             }
 
             IntArrayList posItems = feedback.getItems(user, true);

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/smile/classification/DecisionTree.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/smile/classification/DecisionTree.java b/core/src/main/java/hivemall/smile/classification/DecisionTree.java
index f2ff560..e6160d2 100644
--- a/core/src/main/java/hivemall/smile/classification/DecisionTree.java
+++ b/core/src/main/java/hivemall/smile/classification/DecisionTree.java
@@ -1,22 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*
  * Copyright (c) 2010 Haifeng Li
  *   
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -31,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+// This file includes a modified version of Smile:
+// https://github.com/haifengl/smile/blob/master/core/src/main/java/smile/classification/DecisionTree.java
 package hivemall.smile.classification;
 
 import static hivemall.smile.utils.SmileExtUtils.resolveFeatureName;
@@ -369,8 +353,8 @@ public final class DecisionTree implements Classifier<Vector> {
 
         public void exportGraphviz(@Nonnull final StringBuilder builder,
                 @Nullable final String[] featureNames, @Nullable final String[] classNames,
-                @Nonnull final String outputName, @Nullable double[] colorBrew,
-                final @Nonnull MutableInt nodeIdGenerator, final int parentNodeId) {
+                @Nonnull final String outputName, @Nullable final double[] colorBrew,
+                @Nonnull final MutableInt nodeIdGenerator, final int parentNodeId) {
             final int myNodeId = nodeIdGenerator.getValue();
 
             if (trueChild == null && falseChild == null) {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/smile/regression/RegressionTree.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/smile/regression/RegressionTree.java b/core/src/main/java/hivemall/smile/regression/RegressionTree.java
index 0670876..b085734 100755
--- a/core/src/main/java/hivemall/smile/regression/RegressionTree.java
+++ b/core/src/main/java/hivemall/smile/regression/RegressionTree.java
@@ -1,22 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*
  * Copyright (c) 2010 Haifeng Li
  *   
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -31,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+// This file includes a modified version of Smile:
+// https://github.com/haifengl/smile/blob/master/core/src/main/java/smile/regression/RegressionTree.java
 package hivemall.smile.regression;
 
 import static hivemall.smile.utils.SmileExtUtils.resolveFeatureName;

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java b/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java
index 46b8758..ea3bc29 100644
--- a/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java
+++ b/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java
@@ -18,6 +18,7 @@
  */
 package hivemall.smile.tools;
 
+import hivemall.UDFWithOptions;
 import hivemall.math.vector.DenseVector;
 import hivemall.math.vector.SparseVector;
 import hivemall.math.vector.Vector;
@@ -37,11 +38,12 @@ import java.util.List;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -53,12 +55,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspe
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 
-@Description(
-        name = "tree_predict",
-        value = "_FUNC_(string modelId, string model, array<double|string> features [, const boolean classification])"
-                + " - Returns a prediction result of a random forest")
+@Description(name = "tree_predict",
+        value = "_FUNC_(string modelId, string model, array<double|string> features [, const string options | const boolean classification=false])"
+                + " - Returns a prediction result of a random forest"
+                + " in <int value, array<double> posteriori> for classification and <double> for regression")
 @UDFType(deterministic = true, stateful = false)
-public final class TreePredictUDF extends GenericUDF {
+public final class TreePredictUDF extends UDFWithOptions {
 
     private boolean classification;
     private StringObjectInspector modelOI;
@@ -72,9 +74,25 @@ public final class TreePredictUDF extends GenericUDF {
     private transient Evaluator evaluator;
 
     @Override
+    protected Options getOptions() {
+        Options opts = new Options();
+        opts.addOption("c", "classification", false,
+            "Predict as classification [default: not enabled]");
+        return opts;
+    }
+
+    @Override
+    protected CommandLine processOptions(@Nonnull String optionValue) throws UDFArgumentException {
+        CommandLine cl = parseOptions(optionValue);
+
+        this.classification = cl.hasOption("classification");
+        return cl;
+    }
+
+    @Override
     public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
         if (argOIs.length != 3 && argOIs.length != 4) {
-            throw new UDFArgumentException("_FUNC_ takes 3 or 4 arguments");
+            throw new UDFArgumentException("tree_predict takes 3 or 4 arguments");
         }
 
         this.modelOI = HiveUtils.asStringOI(argOIs[1]);
@@ -89,15 +107,25 @@ public final class TreePredictUDF extends GenericUDF {
             this.denseInput = false;
         } else {
             throw new UDFArgumentException(
-                "_FUNC_ takes array<double> or array<string> for the second argument: "
+                "tree_predict takes array<double> or array<string> for the second argument: "
                         + listOI.getTypeName());
         }
 
-        boolean classification = false;
         if (argOIs.length == 4) {
-            classification = HiveUtils.getConstBoolean(argOIs[3]);
+            ObjectInspector argOI3 = argOIs[3];
+            if (HiveUtils.isConstBoolean(argOI3)) {
+                this.classification = HiveUtils.getConstBoolean(argOI3);
+            } else if (HiveUtils.isConstString(argOI3)) {
+                String opts = HiveUtils.getConstString(argOI3);
+                processOptions(opts);
+            } else {
+                throw new UDFArgumentException(
+                    "tree_predict expects <const boolean> or <const string> for the fourth argument: "
+                            + argOI3.getTypeName());
+            }
+        } else {
+            this.classification = false;
         }
-        this.classification = classification;
 
         if (classification) {
             List<String> fieldNames = new ArrayList<String>(2);
@@ -105,7 +133,8 @@ public final class TreePredictUDF extends GenericUDF {
             fieldNames.add("value");
             fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
             fieldNames.add("posteriori");
-            fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
+            fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(
+                PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
             return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
         } else {
             return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
@@ -116,7 +145,7 @@ public final class TreePredictUDF extends GenericUDF {
     public Object evaluate(@Nonnull DeferredObject[] arguments) throws HiveException {
         Object arg0 = arguments[0].get();
         if (arg0 == null) {
-            throw new HiveException("ModelId was null");
+            throw new HiveException("modelId should not be null");
         }
         // Not using string OI for backward compatibilities
         String modelId = arg0.toString();
@@ -134,8 +163,8 @@ public final class TreePredictUDF extends GenericUDF {
         this.featuresProbe = parseFeatures(arg2, featuresProbe);
 
         if (evaluator == null) {
-            this.evaluator = classification ? new ClassificationEvaluator()
-                    : new RegressionEvaluator();
+            this.evaluator =
+                    classification ? new ClassificationEvaluator() : new RegressionEvaluator();
         }
         return evaluator.evaluate(modelId, model, featuresProbe);
     }
@@ -192,8 +221,8 @@ public final class TreePredictUDF extends GenericUDF {
                 }
 
                 if (feature.indexOf(':') != -1) {
-                    throw new UDFArgumentException("Invaliad feature format `<index>:<value>`: "
-                            + col);
+                    throw new UDFArgumentException(
+                        "Invaliad feature format `<index>:<value>`: " + col);
                 }
 
                 final int colIndex = Integer.parseInt(feature);

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/utils/codec/Base91.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/codec/Base91.java b/core/src/main/java/hivemall/utils/codec/Base91.java
index 27bdf62..3e996be 100644
--- a/core/src/main/java/hivemall/utils/codec/Base91.java
+++ b/core/src/main/java/hivemall/utils/codec/Base91.java
@@ -1,22 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*
  * basE91 encoding/decoding routines
  *
  * Copyright (c) 2000-2006 Joachim Henke
@@ -46,6 +28,8 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
+// This file contains a modified version of Jochaim Henke's Base91:
+// https://github.com/bwaldvogel/base91/blob/master/src/main/java/de/bwaldvogel/base91/Base91.java
 package hivemall.utils.codec;
 
 import hivemall.utils.io.FastByteArrayOutputStream;

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java b/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java
new file mode 100644
index 0000000..b69c5b0
--- /dev/null
+++ b/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.lang;
+
+import javax.annotation.Nonnull;
+
+public final class ExceptionUtils {
+
+    public static final int TRACE_CAUSE_DEPTH = 5;
+
+    private ExceptionUtils() {}
+
+    @Nonnull
+    public static String prettyPrintStackTrace(@Nonnull final Throwable throwable) {
+        return prettyPrintStackTrace(throwable, TRACE_CAUSE_DEPTH);
+    }
+
+    @Nonnull
+    public static String prettyPrintStackTrace(@Nonnull final Throwable throwable,
+            final int traceDepth) {
+        final StringBuilder out = new StringBuilder(512);
+        out.append(getMessage(throwable));
+        out.append("\n\n---- Debugging information ----");
+        final int tracedepth;
+        if (throwable instanceof RuntimeException || throwable instanceof Error) {
+            tracedepth = -1;
+        } else {
+            tracedepth = traceDepth;
+        }
+        String captured = captureThrownWithStrackTrace(throwable, "trace-exception", tracedepth);
+        out.append(captured);
+        final Throwable cause = throwable.getCause();
+        if (cause != null) {
+            final Throwable rootCause = getRootCause(cause);
+            captured = captureThrownWithStrackTrace(rootCause, "trace-cause", TRACE_CAUSE_DEPTH);
+            out.append(captured);
+        }
+        out.append("\n------------------------------- \n");
+        return out.toString();
+    }
+
+    @Nonnull
+    private static String captureThrownWithStrackTrace(@Nonnull final Throwable throwable,
+            final String label, final int traceDepth) {
+        assert (traceDepth >= 1 || traceDepth == -1);
+        final StringBuilder out = new StringBuilder(255);
+        final String clazz = throwable.getClass().getName();
+        out.append(String.format("\n%-20s: %s \n", ("* " + label), clazz));
+        final StackTraceElement[] st = throwable.getStackTrace();
+        int at;
+        final int limit = (traceDepth == -1) ? st.length - 1 : traceDepth;
+        for (at = 0; at < st.length; at++) {
+            if (at < limit) {
+                out.append("\tat " + st[at] + '\n');
+            } else {
+                out.append("\t...\n");
+                break;
+            }
+        }
+        if (st.length == 0) {
+            out.append("\t no stack traces...");
+        } else if (at != (st.length - 1)) {
+            out.append("\tat " + st[st.length - 1]);
+        }
+        String errmsg = throwable.getMessage();
+        if (errmsg != null) {
+            out.append(String.format("\n%-20s: \n", ("* " + label + "-error-msg")));
+            String[] line = errmsg.split("\n");
+            final int maxlines = Math.min(line.length, Math.max(1, TRACE_CAUSE_DEPTH - 2));
+            for (int i = 0; i < maxlines; i++) {
+                out.append('\t');
+                out.append(line[i]);
+                if (i != (maxlines - 1)) {
+                    out.append('\n');
+                }
+            }
+        }
+        return out.toString();
+    }
+
+    @Nonnull
+    public static String getMessage(@Nonnull final Throwable throwable) {
+        String errMsg = throwable.getMessage();
+        String clazz = throwable.getClass().getName();
+        return (errMsg != null) ? clazz + ": " + errMsg : clazz;
+    }
+
+    @Nonnull
+    private static Throwable getRootCause(@Nonnull final Throwable throwable) {
+        Throwable top = throwable;
+        while (top != null) {
+            Throwable parent = top.getCause();
+            if (parent != null) {
+                top = parent;
+            } else {
+                break;
+            }
+        }
+        return top;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/utils/math/FastMath.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/math/FastMath.java b/core/src/main/java/hivemall/utils/math/FastMath.java
index d27d6f8..09f7a16 100644
--- a/core/src/main/java/hivemall/utils/math/FastMath.java
+++ b/core/src/main/java/hivemall/utils/math/FastMath.java
@@ -1,21 +1,32 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
+ * Copyright 2012-2015 Jeff Hain
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * =============================================================================
+ * Notice of fdlibm package this program is partially derived from:
+ *
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * =============================================================================
  */
+// This file contains a modified version of Jafama's FastMath:
+// https://github.com/jeffhain/jafama/blob/master/src/main/java/net/jafama/FastMath.java
 package hivemall.utils.math;
 
 import hivemall.annotations.Experimental;
@@ -98,34 +109,6 @@ public final class FastMath {
         return 1 / (1 + exp(-x));
     }
 
-    /*
-     * Copyright 2012-2015 Jeff Hain
-     *
-     * Licensed under the Apache License, Version 2.0 (the "License");
-     * you may not use this file except in compliance with the License.
-     * You may obtain a copy of the License at
-     *
-     *     http://www.apache.org/licenses/LICENSE-2.0
-     *
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-    /*
-     * =============================================================================
-     * Notice of fdlibm package this program is partially derived from:
-     *
-     * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * =============================================================================
-     */
-
     /**
      * Based on Jafama (https://github.com/jeffhain/jafama/) version 2.2.
      */

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/resources/META-INF/LICENSE-jafama.txt
----------------------------------------------------------------------
diff --git a/core/src/main/resources/META-INF/LICENSE-jafama.txt b/core/src/main/resources/META-INF/LICENSE-jafama.txt
new file mode 100644
index 0000000..151b7ea
--- /dev/null
+++ b/core/src/main/resources/META-INF/LICENSE-jafama.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/resources/META-INF/LICENSE-smile.txt
----------------------------------------------------------------------
diff --git a/core/src/main/resources/META-INF/LICENSE-smile.txt b/core/src/main/resources/META-INF/LICENSE-smile.txt
new file mode 100644
index 0000000..94ad231
--- /dev/null
+++ b/core/src/main/resources/META-INF/LICENSE-smile.txt
@@ -0,0 +1,203 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/dist/pom.xml
----------------------------------------------------------------------
diff --git a/dist/pom.xml b/dist/pom.xml
new file mode 100644
index 0000000..bea6226
--- /dev/null
+++ b/dist/pom.xml
@@ -0,0 +1,163 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+
+	<parent>
+		<groupId>org.apache.hivemall</groupId>
+		<artifactId>hivemall</artifactId>
+		<version>0.5.1-incubating-SNAPSHOT</version>
+		<relativePath>../pom.xml</relativePath>
+	</parent>
+
+	<artifactId>hivemall-all</artifactId>
+	<name>Hivemall Distribution</name>
+	<packaging>jar</packaging>
+
+	<properties>
+		<main.basedir>${project.parent.basedir}</main.basedir>
+	</properties>
+
+	<dependencies>
+		<!-- compile scope -->
+		<dependency>
+			<groupId>org.apache.hivemall</groupId>
+			<artifactId>hivemall-core</artifactId>
+			<version>${project.version}</version>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hivemall</groupId>
+			<artifactId>hivemall-nlp</artifactId>
+			<version>${project.version}</version>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hivemall</groupId>
+			<artifactId>hivemall-xgboost</artifactId>
+			<version>${project.version}</version>
+			<scope>compile</scope>
+		</dependency>
+	</dependencies>
+
+	<build>
+		<plugins>
+			<plugin>
+				<artifactId>maven-jar-plugin</artifactId>
+                <configuration>
+					<finalName>${project.artifactId}-${project.version}</finalName>
+                    <archive>
+                        <index>true</index>
+                        <compress>true</compress>
+                        <manifest>
+                            <addClasspath>false</addClasspath>
+                            <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+                            <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+                        </manifest>
+                    </archive>
+                </configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-shade-plugin</artifactId>
+				<executions>
+					<execution>
+						<id>jar-with-dependencies</id>
+						<phase>package</phase>
+						<goals>
+							<goal>shade</goal>
+						</goals>
+						<configuration>
+							<finalName>${project.artifactId}-${project.version}</finalName>
+							<outputDirectory>${project.parent.build.directory}</outputDirectory>
+							<minimizeJar>false</minimizeJar>
+							<createDependencyReducedPom>false</createDependencyReducedPom>
+							<createSourcesJar>true</createSourcesJar>
+							<promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+							<artifactSet>
+								<includes>
+									<!-- hivemall-core -->
+									<include>org.apache.hivemall:hivemall-core</include>
+									<include>io.netty:netty-all</include>
+									<include>com.github.haifengl:smile-core</include>
+									<include>com.github.haifengl:smile-math</include>
+									<include>com.github.haifengl:smile-data</include>
+									<include>org.tukaani:xz</include>
+									<include>org.apache.commons:commons-math3</include>
+									<include>org.roaringbitmap:RoaringBitmap</include>
+									<include>it.unimi.dsi:fastutil</include>
+									<include>com.clearspring.analytics:stream</include>
+									<!-- hivemall-nlp -->
+									<include>org.apache.hivemall:hivemall-nlp</include>
+									<include>org.apache.lucene:lucene-analyzers-kuromoji</include>
+									<include>org.apache.lucene:lucene-analyzers-smartcn</include>
+									<include>org.apache.lucene:lucene-analyzers-common</include>
+									<include>org.apache.lucene:lucene-core</include>
+									<!-- hivemall-xgboost -->
+									<include>org.apache.hivemall:hivemall-xgboost</include>
+									<include>io.github.myui:xgboost4j</include>
+									<include>com.esotericsoftware.kryo:kryo</include>
+								</includes>
+								<excludes>
+									<exclude>org.apache.hivemall:hivemall-all</exclude>
+								</excludes>
+							</artifactSet>
+							<transformers>
+								<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+									<manifestEntries>
+										<Implementation-Title>${project.name}</Implementation-Title>
+										<Implementation-Version>${project.version}</Implementation-Version>
+										<Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
+									</manifestEntries>
+								</transformer>
+							</transformers>
+							<filters>
+								<filter>
+									<artifact>org.apache.lucene:*</artifact>
+									<includes>
+										<include>**</include>
+									</includes>
+								</filter>
+								<filter>
+									<artifact>com.esotericsoftware.kryo:kryo</artifact>
+									<includes>
+										<include>**</include>
+									</includes>
+								</filter>
+								<filter>
+									<artifact>*:*</artifact>
+									<excludes>
+										<exclude>META-INF/LICENSE.txt</exclude>
+										<exclude>META-INF/NOTICE.txt</exclude>
+										<exclude>META-INF/*.SF</exclude>
+										<exclude>META-INF/*.DSA</exclude>
+										<exclude>META-INF/*.RSA</exclude>
+										<exclude>*.jar</exclude>
+										<exclude>tracker.py</exclude>
+									</excludes>
+								</filter>
+							</filters>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+		</plugins>
+	</build>
+
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/docs/gitbook/binaryclass/news20_rf.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/binaryclass/news20_rf.md b/docs/gitbook/binaryclass/news20_rf.md
index fd0b475..327939b 100644
--- a/docs/gitbook/binaryclass/news20_rf.md
+++ b/docs/gitbook/binaryclass/news20_rf.md
@@ -47,7 +47,7 @@ from
 ## Prediction
 
 ```sql
-SET hivevar:classification=true;
+-- SET hivevar:classification=true;
 
 drop table rf_predicted;
 create table rf_predicted
@@ -60,7 +60,8 @@ FROM (
   SELECT
     rowid, 
     m.model_weight,
-    tree_predict(m.model_id, m.model, t.features, ${classification}) as predicted
+    tree_predict(m.model_id, m.model, t.features, "-classification") as predicted
+    -- tree_predict(m.model_id, m.model, t.features, ${classification}) as predicted
   FROM
     rf_model m
     LEFT OUTER JOIN -- CROSS JOIN


[2/4] incubator-hivemall git commit: Close #131: [v0.5.0-rc3] Merge v0.5.0 branch

Posted by my...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/pom.xml
----------------------------------------------------------------------
diff --git a/spark/pom.xml b/spark/pom.xml
new file mode 100644
index 0000000..d018b8d
--- /dev/null
+++ b/spark/pom.xml
@@ -0,0 +1,295 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+
+	<parent>
+		<groupId>org.apache.hivemall</groupId>
+		<artifactId>hivemall</artifactId>
+		<version>0.5.1-incubating-SNAPSHOT</version>
+		<relativePath>../pom.xml</relativePath>
+	</parent>
+
+	<artifactId>hivemall-spark</artifactId>
+	<packaging>pom</packaging>
+	<name>Hivemall on Apache Spark</name>
+
+	<modules>
+		<module>common</module>
+		<module>spark-2.0</module>
+		<module>spark-2.1</module>
+		<module>spark-2.2</module>
+	</modules>
+
+	<properties>
+		<main.basedir>${project.parent.basedir}</main.basedir>
+		<scala.version>2.11.8</scala.version>
+		<scala.binary.version>2.11</scala.binary.version>
+		<scalatest.jvm.opts>-ea -Xms768m -Xmx1024m -XX:PermSize=128m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m</scalatest.jvm.opts>
+	</properties>
+
+	<dependencyManagement>
+		<dependencies>
+			<!-- compile scope -->
+			<dependency>
+				<groupId>org.apache.hivemall</groupId>
+				<artifactId>hivemall-core</artifactId>
+				<version>${project.version}</version>
+				<scope>compile</scope>
+			</dependency>
+			<dependency>
+				<groupId>org.apache.hivemall</groupId>
+				<artifactId>hivemall-xgboost</artifactId>
+				<version>${project.version}</version>
+				<scope>compile</scope>
+			</dependency>
+			<dependency>
+				<groupId>org.apache.commons</groupId>
+				<artifactId>commons-compress</artifactId>
+				<version>1.8</version>
+				<scope>compile</scope>
+			</dependency>
+
+			<!-- provided scope -->
+			<dependency>
+				<groupId>org.scala-lang</groupId>
+				<artifactId>scala-library</artifactId>
+				<version>${scala.version}</version>
+				<scope>provided</scope>
+			</dependency>
+
+			<!-- test dependencies -->
+			<dependency>
+				<groupId>org.apache.hivemall</groupId>
+				<artifactId>hivemall-mixserv</artifactId>
+				<version>${project.version}</version>
+				<scope>test</scope>
+			</dependency>
+			<dependency>
+				<groupId>org.scalatest</groupId>
+				<artifactId>scalatest_${scala.binary.version}</artifactId>
+				<version>2.2.4</version>
+				<scope>test</scope>
+			</dependency>
+		</dependencies>
+	</dependencyManagement>
+
+	<build>
+		<directory>target</directory>
+		<outputDirectory>target/classes</outputDirectory>
+		<finalName>${project.artifactId}-${project.version}</finalName>
+		<testOutputDirectory>target/test-classes</testOutputDirectory>
+
+		<pluginManagement>
+			<plugins>
+				<plugin>
+					<groupId>net.alchim31.maven</groupId>
+					<artifactId>scala-maven-plugin</artifactId>
+					<version>3.2.2</version>
+				</plugin>
+				<plugin>
+					<groupId>org.scalatest</groupId>
+					<artifactId>scalatest-maven-plugin</artifactId>
+					<version>1.0</version>
+					<configuration>
+						<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+						<junitxml>.</junitxml>
+						<filereports>SparkTestSuite.txt</filereports>
+						<argLine>${scalatest.jvm.opts}</argLine>
+						<stderr />
+						<environmentVariables>
+							<SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
+							<SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
+							<SPARK_TESTING>1</SPARK_TESTING>
+							<JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
+							<PATH>${env.JAVA_HOME}/bin:${env.PATH}</PATH>
+						</environmentVariables>
+						<systemProperties>
+							<log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
+							<derby.system.durability>test</derby.system.durability>
+							<java.awt.headless>true</java.awt.headless>
+							<java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
+							<spark.testing>1</spark.testing>
+							<spark.ui.enabled>false</spark.ui.enabled>
+							<spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
+							<spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
+							<!-- Needed by sql/hive tests. -->
+							<test.src.tables>__not_used__</test.src.tables>
+						</systemProperties>
+						<tagsToExclude>${test.exclude.tags}</tagsToExclude>
+					</configuration>
+				</plugin>
+				<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
+				<plugin>
+					<groupId>org.apache.maven.plugins</groupId>
+					<artifactId>maven-shade-plugin</artifactId>
+					<executions>
+						<execution>
+							<id>jar-with-dependencies</id>
+							<phase>package</phase>
+							<goals>
+								<goal>shade</goal>
+							</goals>
+							<configuration>
+								<finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
+								<outputDirectory>${main.basedir}/target</outputDirectory>
+								<minimizeJar>false</minimizeJar>
+								<createDependencyReducedPom>false</createDependencyReducedPom>
+								<createSourcesJar>true</createSourcesJar>
+								<artifactSet>
+									<includes>
+										<include>org.apache.hivemall:hivemall-spark-common</include>
+										<!-- hivemall-core -->
+										<include>org.apache.hivemall:hivemall-core</include>
+										<include>io.netty:netty-all</include>
+										<include>com.github.haifengl:smile-core</include>
+										<include>com.github.haifengl:smile-math</include>
+										<include>com.github.haifengl:smile-data</include>
+										<include>org.tukaani:xz</include>
+										<include>org.apache.commons:commons-math3</include>
+										<include>org.roaringbitmap:RoaringBitmap</include>
+										<include>it.unimi.dsi:fastutil</include>
+										<include>com.clearspring.analytics:stream</include>
+										<!-- hivemall-nlp -->
+										<include>org.apache.hivemall:hivemall-nlp</include>
+										<include>org.apache.lucene:lucene-analyzers-kuromoji</include>
+										<include>org.apache.lucene:lucene-analyzers-smartcn</include>
+										<include>org.apache.lucene:lucene-analyzers-common</include>
+										<include>org.apache.lucene:lucene-core</include>
+										<!-- hivemall-xgboost -->
+										<include>org.apache.hivemall:hivemall-xgboost</include>
+										<include>io.github.myui:xgboost4j</include>
+										<include>com.esotericsoftware.kryo:kryo</include>
+									</includes>
+								</artifactSet>
+								<transformers>
+									<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+										<manifestEntries>
+											<Implementation-Title>${project.name}</Implementation-Title>
+											<Implementation-Version>${project.version}</Implementation-Version>
+											<Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
+										</manifestEntries>
+									</transformer>
+								</transformers>
+								<filters>
+									<filter>
+										<artifact>org.apache.lucene:*</artifact>
+										<includes>
+											<include>**</include>
+										</includes>
+									</filter>
+									<filter>
+										<artifact>com.esotericsoftware.kryo:kryo</artifact>
+										<includes>
+											<include>**</include>
+										</includes>
+									</filter>
+									<filter>
+										<artifact>*:*</artifact>
+										<excludes>
+											<exclude>META-INF/LICENSE.txt</exclude>
+											<exclude>META-INF/NOTICE.txt</exclude>
+											<exclude>META-INF/*.SF</exclude>
+											<exclude>META-INF/*.DSA</exclude>
+											<exclude>META-INF/*.RSA</exclude>
+											<exclude>*.jar</exclude>
+											<exclude>tracker.py</exclude>
+										</excludes>
+									</filter>
+								</filters>
+							</configuration>
+						</execution>
+					</executions>
+				</plugin>
+				<plugin>
+					<groupId>org.scalastyle</groupId>
+					<artifactId>scalastyle-maven-plugin</artifactId>
+					<version>0.8.0</version>
+				</plugin>
+			</plugins>
+		</pluginManagement>
+
+		<plugins>
+			<plugin>
+				<groupId>org.scalastyle</groupId>
+				<artifactId>scalastyle-maven-plugin</artifactId>
+				<configuration>
+					<verbose>false</verbose>
+					<failOnViolation>true</failOnViolation>
+					<includeTestSourceDirectory>true</includeTestSourceDirectory>
+					<failOnWarning>false</failOnWarning>
+					<sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
+					<testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
+					<configLocation>spark/scalastyle-config.xml</configLocation>
+					<outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
+					<inputEncoding>${project.build.sourceEncoding}</inputEncoding>
+					<outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
+				</configuration>
+				<executions>
+					<execution>
+						<goals>
+							<goal>check</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>net.alchim31.maven</groupId>
+				<artifactId>scala-maven-plugin</artifactId>
+				<executions>
+					<execution>
+						<id>scala-compile-first</id>
+						<phase>process-resources</phase>
+						<goals>
+							<goal>add-source</goal>
+							<goal>compile</goal>
+						</goals>
+					</execution>
+					<execution>
+						<id>scala-test-compile</id>
+						<phase>process-test-resources</phase>
+						<goals>
+							<goal>testCompile</goal>
+						</goals>
+					</execution>
+				</executions>
+				<!-- For incremental compilation -->
+				<configuration>
+					<scalaVersion>${scala.version}</scalaVersion>
+					<recompileMode>incremental</recompileMode>
+					<useZincServer>true</useZincServer>
+					<args>
+						<arg>-unchecked</arg>
+						<arg>-deprecation</arg>
+						<!-- TODO: To enable this option, we need to fix many wornings -->
+						<!-- <arg>-feature</arg> -->
+					</args>
+					<jvmArgs>
+						<jvmArg>-Xms768m</jvmArg>
+						<jvmArg>-Xmx1024m</jvmArg>
+						<jvmArg>-XX:PermSize=128m</jvmArg>
+						<jvmArg>-XX:MaxPermSize=512m</jvmArg>
+						<jvmArg>-XX:ReservedCodeCacheSize=512m</jvmArg>
+					</jvmArgs>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/spark/scalastyle-config.xml b/spark/scalastyle-config.xml
new file mode 100644
index 0000000..13d1c47
--- /dev/null
+++ b/spark/scalastyle-config.xml
@@ -0,0 +1,333 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<!--
+If you wish to turn off checking for a section of code, you can put a comment in the source
+before and after the section, with the following syntax:
+
+  // scalastyle:off
+  ...  // stuff that breaks the styles
+  // scalastyle:on
+
+You can also disable only one rule, by specifying its rule id, as specified in:
+  http://www.scalastyle.org/rules-0.7.0.html
+
+  // scalastyle:off no.finalize
+  override def finalize(): Unit = ...
+  // scalastyle:on no.finalize
+
+This file is divided into 3 sections:
+ (1) rules that we enforce.
+ (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
+     (or we need to make the scalastyle rule more configurable).
+ (3) rules that we don't want to enforce.
+-->
+
+<scalastyle>
+  <name>Scalastyle standard configuration</name>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we enforce                                   -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
+    <parameters>
+       <parameter name="header"><![CDATA[/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
+    <parameters>
+      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
+      <parameter name="tabSize"><![CDATA[2]]></parameter>
+      <parameter name="ignoreImports">true</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+    <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
+    <parameters>
+      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
+      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
+
+  <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
+   <parameters>
+     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+   </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
+    <parameters>
+     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+    </parameters>
+  </check>
+
+  <!-- ??? usually shouldn't be checked into the code base. -->
+  <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
+
+  <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
+  <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^println$</parameter></parameters>
+    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
+      // scalastyle:off println
+      println(...)
+      // scalastyle:on println]]></customMessage>
+  </check>
+
+  <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
+    <customMessage><![CDATA[
+      @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
+    ]]></customMessage>
+  </check>
+
+  <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
+      ShutdownHookManager.addShutdownHook instead.
+      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
+      // scalastyle:off runtimeaddshutdownhook
+      Runtime.getRuntime.addShutdownHook(...)
+      // scalastyle:on runtimeaddshutdownhook
+    ]]></customMessage>
+  </check>
+
+  <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
+      java.util.concurrent.ConcurrentLinkedQueue instead.
+      If you must use mutable.SynchronizedBuffer, wrap the code block with
+      // scalastyle:off mutablesynchronizedbuffer
+      mutable.SynchronizedBuffer[...]
+      // scalastyle:on mutablesynchronizedbuffer
+    ]]></customMessage>
+  </check>
+
+  <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
+      If you must use Class.forName, wrap the code block with
+      // scalastyle:off classforname
+      Class.forName(...)
+      // scalastyle:on classforname
+    ]]></customMessage>
+  </check>
+
+  <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.result</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
+      If you must use Await.result, wrap the code block with
+      // scalastyle:off awaitresult
+      Await.result(...)
+      // scalastyle:on awaitresult
+    ]]></customMessage>
+  </check>
+
+  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
+  <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">JavaConversions</parameter></parameters>
+    <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
+    scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
+  </check>
+
+  <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
+    <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
+    of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
+    <parameters>
+      <parameter name="groups">java,scala,3rdParty,spark</parameter>
+      <parameter name="group.java">javax?\..*</parameter>
+      <parameter name="group.scala">scala\..*</parameter>
+      <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
+      <parameter name="group.spark">org\.apache\.spark\..*</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
+    <parameters>
+      <parameter name="tokens">COMMA</parameter>
+    </parameters>
+  </check>
+
+  <!-- SPARK-3854: Single Space between ')' and '{' -->
+  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">\)\{</parameter></parameters>
+    <customMessage><![CDATA[
+      Single Space between ')' and `{`.
+    ]]></customMessage>
+  </check>
+
+  <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
+    <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
+  </check>
+
+  <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
+    <customMessage>Omit braces in case clauses.</customMessage>
+  </check>
+
+  <!-- SPARK-16877: Avoid Java annotations -->
+  <check customId="OverrideJavaCase" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^Override$</parameter></parameters>
+    <customMessage>override modifier should be used instead of @java.lang.Override.</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
+
+  <!-- ================================================================================ -->
+  <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
+  <!-- ================================================================================ -->
+
+  <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
+  <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
+
+  <!-- This breaks symbolic method names so we don't turn it on. -->
+  <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
+  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
+    <parameters>
+    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
+    </parameters>
+  </check>
+
+  <!-- Should turn this on, but we have a few places that need to be fixed first -->
+  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we don't want                                -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
+    <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
+  </check>
+
+  <!-- We want the opposite of this: NewLineAtEofChecker -->
+  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
+
+  <!-- This one complains about all kinds of random things. Disable. -->
+  <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
+
+  <!-- We use return quite a bit for control flows and guards -->
+  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
+
+  <!-- We use null a lot in low level code and to interface with 3rd party code -->
+  <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
+    <parameters><parameter name="maxFileLength">800></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
+    <parameters><parameter name="maxTypes">30</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
+    <parameters><parameter name="maximum">10</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
+    <parameters><parameter name="maxLength">50</parameter></parameters>
+  </check>
+
+  <!-- Not exactly feasible to enforce this right now. -->
+  <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
+    <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
+  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
+    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
+  </check>
+
+</scalastyle>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/pom.xml b/spark/spark-2.0/pom.xml
index e197586..54c817d 100644
--- a/spark/spark-2.0/pom.xml
+++ b/spark/spark-2.0/pom.xml
@@ -16,37 +16,36 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 
 	<parent>
 		<groupId>org.apache.hivemall</groupId>
-		<artifactId>hivemall</artifactId>
-		<version>0.5.0-incubating-SNAPSHOT</version>
-		<relativePath>../../pom.xml</relativePath>
+		<artifactId>hivemall-spark</artifactId>
+		<version>0.5.1-incubating-SNAPSHOT</version>
+		<relativePath>../pom.xml</relativePath>
 	</parent>
 
-	<artifactId>hivemall-spark</artifactId>
+	<artifactId>hivemall-spark2.0</artifactId>
 	<name>Hivemall on Spark 2.0</name>
 	<packaging>jar</packaging>
 
 	<properties>
-		<main.basedir>${project.parent.basedir}</main.basedir>
+		<main.basedir>${project.parent.parent.basedir}</main.basedir>
+		<spark.version>2.0.2</spark.version>
+		<spark.binary.version>2.0</spark.binary.version>
 	</properties>
 
 	<dependencies>
-		<!-- hivemall dependencies -->
+		<!-- compile scope -->
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-core</artifactId>
-			<version>${project.version}</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-xgboost</artifactId>
-			<version>${project.version}</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
@@ -56,21 +55,12 @@
 			<scope>compile</scope>
 		</dependency>
 
-		<!-- third-party dependencies -->
+		<!-- provided scope -->
 		<dependency>
 			<groupId>org.scala-lang</groupId>
 			<artifactId>scala-library</artifactId>
-			<version>${scala.version}</version>
-			<scope>compile</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.commons</groupId>
-			<artifactId>commons-compress</artifactId>
-			<version>1.8</version>
-			<scope>compile</scope>
+			<scope>provided</scope>
 		</dependency>
-
-		<!-- other provided dependencies -->
 		<dependency>
 			<groupId>org.apache.spark</groupId>
 			<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -106,114 +96,26 @@
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-mixserv</artifactId>
-			<version>${project.version}</version>
-			<scope>test</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.xerial</groupId>
-			<artifactId>xerial-core</artifactId>
-			<version>3.2.3</version>
 			<scope>test</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.scalatest</groupId>
 			<artifactId>scalatest_${scala.binary.version}</artifactId>
-			<version>2.2.4</version>
 			<scope>test</scope>
 		</dependency>
 	</dependencies>
 
 	<build>
-		<directory>target</directory>
-		<outputDirectory>target/classes</outputDirectory>
-		<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-		<testOutputDirectory>target/test-classes</testOutputDirectory>
 		<plugins>
-			<!-- For incremental compilation -->
-			<plugin>
-				<groupId>net.alchim31.maven</groupId>
-				<artifactId>scala-maven-plugin</artifactId>
-				<version>3.2.2</version>
-				<executions>
-					<execution>
-						<id>scala-compile-first</id>
-						<phase>process-resources</phase>
-						<goals>
-							<goal>compile</goal>
-						</goals>
-					</execution>
-					<execution>
-						<id>scala-test-compile-first</id>
-						<phase>process-test-resources</phase>
-						<goals>
-							<goal>testCompile</goal>
-						</goals>
-					</execution>
-				</executions>
-				<configuration>
-					<scalaVersion>${scala.version}</scalaVersion>
-					<recompileMode>incremental</recompileMode>
-					<useZincServer>true</useZincServer>
-					<args>
-						<arg>-unchecked</arg>
-						<arg>-deprecation</arg>
-						<!-- TODO: To enable this option, we need to fix many wornings -->
-						<!-- <arg>-feature</arg> -->
-					</args>
-					<jvmArgs>
-						<jvmArg>-Xms512m</jvmArg>
-						<jvmArg>-Xmx1024m</jvmArg>
-					</jvmArgs>
-				</configuration>
-			</plugin>
-			<!-- hivemall-spark_xx-xx.jar -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-jar-plugin</artifactId>
-				<version>2.5</version>
-				<configuration>
-					<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-					<outputDirectory>${project.parent.build.directory}</outputDirectory>
-				</configuration>
-			</plugin>
 			<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-shade-plugin</artifactId>
-				<version>3.1.0</version>
-				<executions>
-					<execution>
-						<id>jar-with-dependencies</id>
-						<phase>package</phase>
-						<goals>
-							<goal>shade</goal>
-						</goals>
-						<configuration>
-							<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
-							<outputDirectory>${project.parent.build.directory}</outputDirectory>
-							<minimizeJar>false</minimizeJar>
-							<createDependencyReducedPom>false</createDependencyReducedPom>
-							<artifactSet>
-								<includes>
-									<include>org.apache.hivemall:hivemall-core</include>
-									<include>org.apache.hivemall:hivemall-xgboost</include>
-									<include>org.apache.hivemall:hivemall-spark-common</include>
-									<include>com.github.haifengl:smile-core</include>
-									<include>com.github.haifengl:smile-math</include>
-									<include>com.github.haifengl:smile-data</include>
-									<include>ml.dmlc:xgboost4j</include>
-									<include>com.esotericsoftware.kryo:kryo</include>
-								</includes>
-							</artifactSet>
-						</configuration>
-					</execution>
-				</executions>
 			</plugin>
 			<!-- disable surefire because there is no java test -->
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-surefire-plugin</artifactId>
-				<version>2.7</version>
 				<configuration>
 					<skipTests>true</skipTests>
 				</configuration>
@@ -222,33 +124,6 @@
 			<plugin>
 				<groupId>org.scalatest</groupId>
 				<artifactId>scalatest-maven-plugin</artifactId>
-				<version>1.0</version>
-				<configuration>
-					<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-					<junitxml>.</junitxml>
-					<filereports>SparkTestSuite.txt</filereports>
-					<argLine>${spark.test.jvm.opts}</argLine>
-					<stderr />
-					<environmentVariables>
-						<SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
-						<SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
-						<SPARK_TESTING>1</SPARK_TESTING>
-						<JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
-					</environmentVariables>
-					<systemProperties>
-						<log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
-						<derby.system.durability>test</derby.system.durability>
-						<java.awt.headless>true</java.awt.headless>
-						<java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
-						<spark.testing>1</spark.testing>
-						<spark.ui.enabled>false</spark.ui.enabled>
-						<spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
-						<spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
-						<!-- Needed by sql/hive tests. -->
-						<test.src.tables>__not_used__</test.src.tables>
-					</systemProperties>
-					<tagsToExclude>${test.exclude.tags}</tagsToExclude>
-				</configuration>
 				<executions>
 					<execution>
 						<id>test</id>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ b/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+  def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
+      : DStream[Row] = {
+    ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+      f(sqlContext.createDataFrame(rdd)).rdd
+    }
+  }
+}
+
+object HivemallStreamingOps {
+
+  /**
+   * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+   */
+  implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
+      : HivemallStreamingOps = {
+    new HivemallStreamingOps(ds)
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
index d3bf435..4a43afc 100644
--- a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
+++ b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
@@ -35,7 +35,7 @@ final class HiveUdfWithFeatureSuite extends HivemallFeatureQueryTest {
 
     checkAnswer(
       sql(s"SELECT DISTINCT hivemall_version()"),
-      Row("0.5.0-incubating-SNAPSHOT")
+      Row("0.5.1-incubating-SNAPSHOT")
     )
 
     // sql("DROP TEMPORARY FUNCTION IF EXISTS hivemall_version")

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 5e99fd8..399a557 100644
--- a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -293,7 +293,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
   }
 
   test("misc - hivemall_version") {
-    checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.0-incubating-SNAPSHOT"))
+    checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.1-incubating-SNAPSHOT"))
   }
 
   test("misc - rowid") {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/pom.xml b/spark/spark-2.1/pom.xml
index 3d07184..e10b4ab 100644
--- a/spark/spark-2.1/pom.xml
+++ b/spark/spark-2.1/pom.xml
@@ -16,23 +16,24 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 
 	<parent>
 		<groupId>org.apache.hivemall</groupId>
-		<artifactId>hivemall</artifactId>
-		<version>0.5.0-incubating-SNAPSHOT</version>
-		<relativePath>../../pom.xml</relativePath>
+		<artifactId>hivemall-spark</artifactId>
+		<version>0.5.1-incubating-SNAPSHOT</version>
+		<relativePath>../pom.xml</relativePath>
 	</parent>
 
-	<artifactId>hivemall-spark</artifactId>
+	<artifactId>hivemall-spark2.1</artifactId>
 	<name>Hivemall on Spark 2.1</name>
 	<packaging>jar</packaging>
 
 	<properties>
-		<main.basedir>${project.parent.basedir}</main.basedir>
+		<main.basedir>${project.parent.parent.basedir}</main.basedir>
+		<spark.version>2.1.1</spark.version>
+		<spark.binary.version>2.1</spark.binary.version>
 	</properties>
 
 	<dependencies>
@@ -40,13 +41,11 @@
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-core</artifactId>
-			<version>${project.version}</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-xgboost</artifactId>
-			<version>${project.version}</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
@@ -56,21 +55,12 @@
 			<scope>compile</scope>
 		</dependency>
 
-		<!-- third-party dependencies -->
+		<!-- provided scope -->
 		<dependency>
 			<groupId>org.scala-lang</groupId>
 			<artifactId>scala-library</artifactId>
-			<version>${scala.version}</version>
-			<scope>compile</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.commons</groupId>
-			<artifactId>commons-compress</artifactId>
-			<version>1.8</version>
-			<scope>compile</scope>
+			<scope>provided</scope>
 		</dependency>
-
-		<!-- other provided dependencies -->
 		<dependency>
 			<groupId>org.apache.spark</groupId>
 			<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -106,114 +96,26 @@
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-mixserv</artifactId>
-			<version>${project.version}</version>
-			<scope>test</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.xerial</groupId>
-			<artifactId>xerial-core</artifactId>
-			<version>3.2.3</version>
 			<scope>test</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.scalatest</groupId>
 			<artifactId>scalatest_${scala.binary.version}</artifactId>
-			<version>2.2.4</version>
 			<scope>test</scope>
 		</dependency>
 	</dependencies>
 
 	<build>
-		<directory>target</directory>
-		<outputDirectory>target/classes</outputDirectory>
-		<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-		<testOutputDirectory>target/test-classes</testOutputDirectory>
 		<plugins>
-			<!-- For incremental compilation -->
-			<plugin>
-				<groupId>net.alchim31.maven</groupId>
-				<artifactId>scala-maven-plugin</artifactId>
-				<version>3.2.2</version>
-				<executions>
-					<execution>
-						<id>scala-compile-first</id>
-						<phase>process-resources</phase>
-						<goals>
-							<goal>compile</goal>
-						</goals>
-					</execution>
-					<execution>
-						<id>scala-test-compile-first</id>
-						<phase>process-test-resources</phase>
-						<goals>
-							<goal>testCompile</goal>
-						</goals>
-					</execution>
-				</executions>
-				<configuration>
-					<scalaVersion>${scala.version}</scalaVersion>
-					<recompileMode>incremental</recompileMode>
-					<useZincServer>true</useZincServer>
-					<args>
-						<arg>-unchecked</arg>
-						<arg>-deprecation</arg>
-						<!-- TODO: To enable this option, we need to fix many wornings -->
-						<!-- <arg>-feature</arg> -->
-					</args>
-					<jvmArgs>
-						<jvmArg>-Xms512m</jvmArg>
-						<jvmArg>-Xmx1024m</jvmArg>
-					</jvmArgs>
-				</configuration>
-			</plugin>
-			<!-- hivemall-spark_xx-xx.jar -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-jar-plugin</artifactId>
-				<version>2.5</version>
-				<configuration>
-					<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-					<outputDirectory>${project.parent.build.directory}</outputDirectory>
-				</configuration>
-			</plugin>
 			<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-shade-plugin</artifactId>
-				<version>3.1.0</version>
-				<executions>
-					<execution>
-						<id>jar-with-dependencies</id>
-						<phase>package</phase>
-						<goals>
-							<goal>shade</goal>
-						</goals>
-						<configuration>
-							<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
-							<outputDirectory>${project.parent.build.directory}</outputDirectory>
-							<minimizeJar>false</minimizeJar>
-							<createDependencyReducedPom>false</createDependencyReducedPom>
-							<artifactSet>
-								<includes>
-									<include>org.apache.hivemall:hivemall-core</include>
-									<include>org.apache.hivemall:hivemall-xgboost</include>
-									<include>org.apache.hivemall:hivemall-spark-common</include>
-									<include>com.github.haifengl:smile-core</include>
-									<include>com.github.haifengl:smile-math</include>
-									<include>com.github.haifengl:smile-data</include>
-									<include>ml.dmlc:xgboost4j</include>
-									<include>com.esotericsoftware.kryo:kryo</include>
-								</includes>
-							</artifactSet>
-						</configuration>
-					</execution>
-				</executions>
 			</plugin>
 			<!-- disable surefire because there is no java test -->
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-surefire-plugin</artifactId>
-				<version>2.7</version>
 				<configuration>
 					<skipTests>true</skipTests>
 				</configuration>
@@ -222,33 +124,6 @@
 			<plugin>
 				<groupId>org.scalatest</groupId>
 				<artifactId>scalatest-maven-plugin</artifactId>
-				<version>1.0</version>
-				<configuration>
-					<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-					<junitxml>.</junitxml>
-					<filereports>SparkTestSuite.txt</filereports>
-					<argLine>${spark.test.jvm.opts}</argLine>
-					<stderr />
-					<environmentVariables>
-						<SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
-						<SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
-						<SPARK_TESTING>1</SPARK_TESTING>
-						<JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
-					</environmentVariables>
-					<systemProperties>
-						<log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
-						<derby.system.durability>test</derby.system.durability>
-						<java.awt.headless>true</java.awt.headless>
-						<java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
-						<spark.testing>1</spark.testing>
-						<spark.ui.enabled>false</spark.ui.enabled>
-						<spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
-						<spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
-						<!-- Needed by sql/hive tests. -->
-						<test.src.tables>__not_used__</test.src.tables>
-					</systemProperties>
-					<tagsToExclude>${test.exclude.tags}</tagsToExclude>
-				</configuration>
 				<executions>
 					<execution>
 						<id>test</id>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ b/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+  def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
+      : DStream[Row] = {
+    ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+      f(sqlContext.createDataFrame(rdd)).rdd
+    }
+  }
+}
+
+object HivemallStreamingOps {
+
+  /**
+   * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+   */
+  implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
+      : HivemallStreamingOps = {
+    new HivemallStreamingOps(ds)
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
index eb4ec04..cecceca 100644
--- a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
+++ b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
@@ -35,7 +35,7 @@ final class HiveUdfWithFeatureSuite extends HivemallFeatureQueryTest {
 
     checkAnswer(
       sql(s"SELECT DISTINCT hivemall_version()"),
-      Row("0.5.0-incubating-SNAPSHOT")
+      Row("0.5.1-incubating-SNAPSHOT")
     )
 
     // sql("DROP TEMPORARY FUNCTION IF EXISTS hivemall_version")

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 84ab0cd..8dad4c3 100644
--- a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -295,7 +295,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
   }
 
   test("misc - hivemall_version") {
-    checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.0-incubating-SNAPSHOT"))
+    checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.1-incubating-SNAPSHOT"))
   }
 
   test("misc - rowid") {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/pom.xml b/spark/spark-2.2/pom.xml
index 5366e1d..47aea92 100644
--- a/spark/spark-2.2/pom.xml
+++ b/spark/spark-2.2/pom.xml
@@ -16,40 +16,40 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 
 	<parent>
 		<groupId>org.apache.hivemall</groupId>
-		<artifactId>hivemall</artifactId>
-		<version>0.5.0-incubating-SNAPSHOT</version>
-		<relativePath>../../pom.xml</relativePath>
+		<artifactId>hivemall-spark</artifactId>
+		<version>0.5.1-incubating-SNAPSHOT</version>
+		<relativePath>../pom.xml</relativePath>
 	</parent>
 
-	<artifactId>hivemall-spark</artifactId>
+	<artifactId>hivemall-spark2.2</artifactId>
 	<name>Hivemall on Spark 2.2</name>
 	<packaging>jar</packaging>
 
 	<properties>
-		<PermGen>64m</PermGen>
-		<MaxPermGen>512m</MaxPermGen>
-		<CodeCacheSize>512m</CodeCacheSize>
-		<main.basedir>${project.parent.basedir}</main.basedir>
+		<main.basedir>${project.parent.parent.basedir}</main.basedir>
+		<spark.version>2.2.0</spark.version>
+		<spark.binary.version>2.2</spark.binary.version>
+		<hadoop.version>2.6.5</hadoop.version>
+		<scalatest.jvm.opts>-ea -Xms768m -Xmx2g -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=512m -XX:ReservedCodeCacheSize=512m</scalatest.jvm.opts>
+		<maven.compiler.source>1.8</maven.compiler.source>
+		<maven.compiler.target>1.8</maven.compiler.target>
 	</properties>
 
 	<dependencies>
-		<!-- hivemall dependencies -->
+		<!-- compile scope -->
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-core</artifactId>
-			<version>${project.version}</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-xgboost</artifactId>
-			<version>${project.version}</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>
@@ -59,22 +59,13 @@
 			<scope>compile</scope>
 		</dependency>
 
-		<!-- third-party dependencies -->
+		<!-- provided scope -->
 		<dependency>
 			<groupId>org.scala-lang</groupId>
 			<artifactId>scala-library</artifactId>
-			<version>${scala.version}</version>
-			<scope>compile</scope>
+			<scope>provided</scope>
 		</dependency>
 		<dependency>
-			<groupId>org.apache.commons</groupId>
-			<artifactId>commons-compress</artifactId>
-			<version>1.8</version>
-			<scope>compile</scope>
-		</dependency>
-
-		<!-- other provided dependencies -->
-		<dependency>
 			<groupId>org.apache.spark</groupId>
 			<artifactId>spark-core_${scala.binary.version}</artifactId>
 			<version>${spark.version}</version>
@@ -109,117 +100,26 @@
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-mixserv</artifactId>
-			<version>${project.version}</version>
-			<scope>test</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.xerial</groupId>
-			<artifactId>xerial-core</artifactId>
-			<version>3.2.3</version>
 			<scope>test</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.scalatest</groupId>
 			<artifactId>scalatest_${scala.binary.version}</artifactId>
-			<version>2.2.4</version>
 			<scope>test</scope>
 		</dependency>
 	</dependencies>
 
 	<build>
-		<directory>target</directory>
-		<outputDirectory>target/classes</outputDirectory>
-		<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-		<testOutputDirectory>target/test-classes</testOutputDirectory>
 		<plugins>
-			<!-- For incremental compilation -->
-			<plugin>
-				<groupId>net.alchim31.maven</groupId>
-				<artifactId>scala-maven-plugin</artifactId>
-				<version>3.2.2</version>
-				<executions>
-					<execution>
-						<id>scala-compile-first</id>
-						<phase>process-resources</phase>
-						<goals>
-							<goal>compile</goal>
-						</goals>
-					</execution>
-					<execution>
-						<id>scala-test-compile-first</id>
-						<phase>process-test-resources</phase>
-						<goals>
-							<goal>testCompile</goal>
-						</goals>
-					</execution>
-				</executions>
-				<configuration>
-					<scalaVersion>${scala.version}</scalaVersion>
-					<recompileMode>incremental</recompileMode>
-					<useZincServer>true</useZincServer>
-					<args>
-						<arg>-unchecked</arg>
-						<arg>-deprecation</arg>
-						<!-- TODO: To enable this option, we need to fix many wornings -->
-						<!-- <arg>-feature</arg> -->
-					</args>
-					<jvmArgs>
-						<jvmArg>-Xms1024m</jvmArg>
-						<jvmArg>-Xmx1024m</jvmArg>
-						<jvmArg>-XX:PermSize=${PermGen}</jvmArg>
-						<jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
-						<jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
-					</jvmArgs>
-				</configuration>
-			</plugin>
-			<!-- hivemall-spark_xx-xx.jar -->
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-jar-plugin</artifactId>
-				<version>2.5</version>
-				<configuration>
-					<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-					<outputDirectory>${project.parent.build.directory}</outputDirectory>
-				</configuration>
-			</plugin>
 			<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-shade-plugin</artifactId>
-				<version>3.1.0</version>
-				<executions>
-					<execution>
-						<id>jar-with-dependencies</id>
-						<phase>package</phase>
-						<goals>
-							<goal>shade</goal>
-						</goals>
-						<configuration>
-							<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
-							<outputDirectory>${project.parent.build.directory}</outputDirectory>
-							<minimizeJar>false</minimizeJar>
-							<createDependencyReducedPom>false</createDependencyReducedPom>
-							<artifactSet>
-								<includes>
-									<include>org.apache.hivemall:hivemall-core</include>
-									<include>org.apache.hivemall:hivemall-xgboost</include>
-									<include>org.apache.hivemall:hivemall-spark-common</include>
-									<include>com.github.haifengl:smile-core</include>
-									<include>com.github.haifengl:smile-math</include>
-									<include>com.github.haifengl:smile-data</include>
-									<include>ml.dmlc:xgboost4j</include>
-									<include>com.esotericsoftware.kryo:kryo</include>
-								</includes>
-							</artifactSet>
-						</configuration>
-					</execution>
-				</executions>
 			</plugin>
 			<!-- disable surefire because there is no java test -->
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-surefire-plugin</artifactId>
-				<version>2.7</version>
 				<configuration>
 					<skipTests>true</skipTests>
 				</configuration>
@@ -228,33 +128,6 @@
 			<plugin>
 				<groupId>org.scalatest</groupId>
 				<artifactId>scalatest-maven-plugin</artifactId>
-				<version>1.0</version>
-				<configuration>
-					<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-					<junitxml>.</junitxml>
-					<filereports>SparkTestSuite.txt</filereports>
-					<argLine>-ea -Xmx2g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
-					<stderr />
-					<environmentVariables>
-						<SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
-						<SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
-						<SPARK_TESTING>1</SPARK_TESTING>
-						<JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
-					</environmentVariables>
-					<systemProperties>
-						<log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
-						<derby.system.durability>test</derby.system.durability>
-						<java.awt.headless>true</java.awt.headless>
-						<java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
-						<spark.testing>1</spark.testing>
-						<spark.ui.enabled>false</spark.ui.enabled>
-						<spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
-						<spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
-						<!-- Needed by sql/hive tests. -->
-						<test.src.tables>__not_used__</test.src.tables>
-					</systemProperties>
-					<tagsToExclude>${test.exclude.tags}</tagsToExclude>
-				</configuration>
 				<executions>
 					<execution>
 						<id>test</id>
@@ -264,6 +137,16 @@
 					</execution>
 				</executions>
 			</plugin>
+			<plugin>
+				<groupId>org.scalatest</groupId>
+				<artifactId>scalatest-maven-plugin</artifactId>
+				<configuration>
+					<environmentVariables>
+						<JAVA_HOME>${env.JAVA8_HOME}</JAVA_HOME>
+						<PATH>${env.JAVA8_HOME}/bin:${env.PATH}</PATH>
+					</environmentVariables>
+				</configuration>
+			</plugin>
 		</plugins>
 	</build>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
index 00617b7..2982d9c 100644
--- a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
+++ b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
@@ -127,7 +127,7 @@ final class HivemallGroupedDataset(groupBy: RelationalGroupedDataset) {
    * @group ensemble
    */
   def max_label(score: String, label: String): DataFrame = {
-    checkType(score, DoubleType)
+    // checkType(score, DoubleType)
     checkType(label, StringType)
     val udaf = HiveUDAFFunction(
         "max_label",

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ b/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+  def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
+      : DStream[Row] = {
+    ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+      f(sqlContext.createDataFrame(rdd)).rdd
+    }
+  }
+}
+
+object HivemallStreamingOps {
+
+  /**
+   * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+   */
+  implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
+      : HivemallStreamingOps = {
+    new HivemallStreamingOps(ds)
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
index 1e1c574..f16eae0 100644
--- a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
+++ b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
@@ -36,7 +36,7 @@ final class HiveUdfWithFeatureSuite extends HivemallFeatureQueryTest {
 
     checkAnswer(
       sql(s"SELECT DISTINCT hivemall_version()"),
-      Row("0.5.0-incubating-SNAPSHOT")
+      Row("0.5.1-incubating-SNAPSHOT")
     )
 
     // sql("DROP TEMPORARY FUNCTION IF EXISTS hivemall_version")

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index f73cb75..f2b7b6e 100644
--- a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -562,7 +562,7 @@ class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
   }
 
   test("misc - hivemall_version") {
-    checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.0-incubating-SNAPSHOT"))
+    checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.1-incubating-SNAPSHOT"))
   }
 
   test("misc - rowid") {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-common/pom.xml b/spark/spark-common/pom.xml
deleted file mode 100644
index 50670d3..0000000
--- a/spark/spark-common/pom.xml
+++ /dev/null
@@ -1,146 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-
-	<parent>
-		<groupId>org.apache.hivemall</groupId>
-		<artifactId>hivemall</artifactId>
-		<version>0.5.0-incubating-SNAPSHOT</version>
-		<relativePath>../../pom.xml</relativePath>
-	</parent>
-
-	<artifactId>hivemall-spark-common</artifactId>
-	<name>Hivemall on Spark Common</name>
-	<packaging>jar</packaging>
-
-	<properties>
-		<main.basedir>${project.parent.basedir}</main.basedir>
-	</properties>
-
-	<dependencies>
-		<!-- hivemall dependencies -->
-		<dependency>
-			<groupId>org.apache.hivemall</groupId>
-			<artifactId>hivemall-core</artifactId>
-			<version>${project.version}</version>
-			<scope>compile</scope>
-		</dependency>
-
-		<!-- other provided dependencies -->
-		<dependency>
-			<groupId>org.apache.spark</groupId>
-			<artifactId>spark-sql_${scala.binary.version}</artifactId>
-			<version>${spark.version}</version>
-			<scope>provided</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.spark</groupId>
-			<artifactId>spark-hive_${scala.binary.version}</artifactId>
-			<version>${spark.version}</version>
-			<scope>provided</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.spark</groupId>
-			<artifactId>spark-streaming_${scala.binary.version}</artifactId>
-			<version>${spark.version}</version>
-			<scope>provided</scope>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.hadoop</groupId>
-			<artifactId>hadoop-common</artifactId>
-			<version>${hadoop.version}</version>
-			<scope>provided</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.hadoop</groupId>
-			<artifactId>hadoop-mapreduce-client-core</artifactId>
-			<version>${hadoop.version}</version>
-			<scope>provided</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.hive</groupId>
-			<artifactId>hive-exec</artifactId>
-			<version>${hive.version}</version>
-			<scope>provided</scope>
-		</dependency>
-	</dependencies>
-
-	<build>
-		<directory>target</directory>
-		<outputDirectory>target/classes</outputDirectory>
-		<finalName>${project.artifactId}-${project.version}</finalName>
-		<testOutputDirectory>target/test-classes</testOutputDirectory>
-		<plugins>
-			<!-- For resolving spark binary incompatibility -->
-			<plugin>
-				<artifactId>maven-clean-plugin</artifactId>
-				<version>3.0.0</version>
-				<executions>
-					<execution>
-						<phase>initialize</phase>
-						<goals>
-							<goal>clean</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-			<!-- For incremental compilation -->
-			<plugin>
-				<groupId>net.alchim31.maven</groupId>
-				<artifactId>scala-maven-plugin</artifactId>
-				<version>3.2.2</version>
-				<executions>
-					<execution>
-						<id>scala-compile-first</id>
-						<phase>process-resources</phase>
-						<goals>
-							<goal>compile</goal>
-						</goals>
-					</execution>
-					<execution>
-						<id>scala-test-compile-first</id>
-						<phase>process-test-resources</phase>
-						<goals>
-							<goal>testCompile</goal>
-						</goals>
-					</execution>
-				</executions>
-				<configuration>
-					<scalaVersion>${scala.version}</scalaVersion>
-					<recompileMode>incremental</recompileMode>
-					<useZincServer>true</useZincServer>
-					<args>
-						<arg>-unchecked</arg>
-						<arg>-deprecation</arg>
-						<!-- TODO: To enable this option, we need to fix many wornings -->
-						<!-- <arg>-feature</arg> -->
-					</args>
-					<jvmArgs>
-						<jvmArg>-Xms512m</jvmArg>
-						<jvmArg>-Xmx1024m</jvmArg>
-					</jvmArgs>
-				</configuration>
-			</plugin>
-		</plugins>
-	</build>
-</project>
-

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/spark/spark-common/scalastyle-config.xml b/spark/spark-common/scalastyle-config.xml
deleted file mode 100644
index 13d1c47..0000000
--- a/spark/spark-common/scalastyle-config.xml
+++ /dev/null
@@ -1,333 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<!--
-If you wish to turn off checking for a section of code, you can put a comment in the source
-before and after the section, with the following syntax:
-
-  // scalastyle:off
-  ...  // stuff that breaks the styles
-  // scalastyle:on
-
-You can also disable only one rule, by specifying its rule id, as specified in:
-  http://www.scalastyle.org/rules-0.7.0.html
-
-  // scalastyle:off no.finalize
-  override def finalize(): Unit = ...
-  // scalastyle:on no.finalize
-
-This file is divided into 3 sections:
- (1) rules that we enforce.
- (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
-     (or we need to make the scalastyle rule more configurable).
- (3) rules that we don't want to enforce.
--->
-
-<scalastyle>
-  <name>Scalastyle standard configuration</name>
-
-  <!-- ================================================================================ -->
-  <!--                               rules we enforce                                   -->
-  <!-- ================================================================================ -->
-
-  <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
-    <parameters>
-       <parameter name="header"><![CDATA[/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */]]></parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
-    <parameters>
-      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
-      <parameter name="tabSize"><![CDATA[2]]></parameter>
-      <parameter name="ignoreImports">true</parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
-    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
-    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
-    <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
-    <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
-    <parameters>
-      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
-      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
-
-  <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
-   <parameters>
-     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
-   </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
-    <parameters>
-     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
-    </parameters>
-  </check>
-
-  <!-- ??? usually shouldn't be checked into the code base. -->
-  <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
-
-  <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
-  <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">^println$</parameter></parameters>
-    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
-      // scalastyle:off println
-      println(...)
-      // scalastyle:on println]]></customMessage>
-  </check>
-
-  <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
-    <customMessage><![CDATA[
-      @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
-    ]]></customMessage>
-  </check>
-
-  <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
-      ShutdownHookManager.addShutdownHook instead.
-      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
-      // scalastyle:off runtimeaddshutdownhook
-      Runtime.getRuntime.addShutdownHook(...)
-      // scalastyle:on runtimeaddshutdownhook
-    ]]></customMessage>
-  </check>
-
-  <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
-      java.util.concurrent.ConcurrentLinkedQueue instead.
-      If you must use mutable.SynchronizedBuffer, wrap the code block with
-      // scalastyle:off mutablesynchronizedbuffer
-      mutable.SynchronizedBuffer[...]
-      // scalastyle:on mutablesynchronizedbuffer
-    ]]></customMessage>
-  </check>
-
-  <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
-      If you must use Class.forName, wrap the code block with
-      // scalastyle:off classforname
-      Class.forName(...)
-      // scalastyle:on classforname
-    ]]></customMessage>
-  </check>
-
-  <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Await\.result</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
-      If you must use Await.result, wrap the code block with
-      // scalastyle:off awaitresult
-      Await.result(...)
-      // scalastyle:on awaitresult
-    ]]></customMessage>
-  </check>
-
-  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
-  <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">JavaConversions</parameter></parameters>
-    <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
-    scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
-  </check>
-
-  <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
-    <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
-    of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
-    <parameters>
-      <parameter name="groups">java,scala,3rdParty,spark</parameter>
-      <parameter name="group.java">javax?\..*</parameter>
-      <parameter name="group.scala">scala\..*</parameter>
-      <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
-      <parameter name="group.spark">org\.apache\.spark\..*</parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
-    <parameters>
-      <parameter name="tokens">COMMA</parameter>
-    </parameters>
-  </check>
-
-  <!-- SPARK-3854: Single Space between ')' and '{' -->
-  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">\)\{</parameter></parameters>
-    <customMessage><![CDATA[
-      Single Space between ')' and `{`.
-    ]]></customMessage>
-  </check>
-
-  <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
-    <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
-  </check>
-
-  <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
-    <customMessage>Omit braces in case clauses.</customMessage>
-  </check>
-
-  <!-- SPARK-16877: Avoid Java annotations -->
-  <check customId="OverrideJavaCase" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">^Override$</parameter></parameters>
-    <customMessage>override modifier should be used instead of @java.lang.Override.</customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
-
-  <!-- ================================================================================ -->
-  <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
-  <!-- ================================================================================ -->
-
-  <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
-  <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
-  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
-  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
-
-  <!-- This breaks symbolic method names so we don't turn it on. -->
-  <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
-  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
-    <parameters>
-    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
-    </parameters>
-  </check>
-
-  <!-- Should turn this on, but we have a few places that need to be fixed first -->
-  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
-
-  <!-- ================================================================================ -->
-  <!--                               rules we don't want                                -->
-  <!-- ================================================================================ -->
-
-  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
-    <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
-  </check>
-
-  <!-- We want the opposite of this: NewLineAtEofChecker -->
-  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
-
-  <!-- This one complains about all kinds of random things. Disable. -->
-  <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
-
-  <!-- We use return quite a bit for control flows and guards -->
-  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
-
-  <!-- We use null a lot in low level code and to interface with 3rd party code -->
-  <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
-    <parameters><parameter name="maxFileLength">800></parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
-    <parameters><parameter name="maxTypes">30</parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
-    <parameters><parameter name="maximum">10</parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
-    <parameters><parameter name="maxLength">50</parameter></parameters>
-  </check>
-
-  <!-- Not exactly feasible to enforce this right now. -->
-  <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
-  <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
-    <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
-  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
-    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
-  </check>
-
-</scalastyle>