You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hivemall.apache.org by my...@apache.org on 2017/10/16 12:06:27 UTC

[1/4] incubator-hivemall git commit: Close #122: [HIVEMALL-147][Spark] Support all Hivemall functions of v0.5-rc.1 in Spark Dataframe

Repository: incubator-hivemall
Updated Branches:
  refs/heads/master fdf702143 -> e8abae257


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 6b5d4cd..de2481c 100644
--- a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.types._
 import org.apache.spark.test.TestFPWrapper._
 import org.apache.spark.test.TestUtils
 
-final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
+
+class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
 
   test("anomaly") {
     import hiveContext.implicits._
@@ -42,61 +43,113 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
   }
 
   test("knn.similarity") {
-    val df1 = DummyInputData.select(cosine_sim(lit2(Seq(1, 2, 3, 4)), lit2(Seq(3, 4, 5, 6))))
-    assert(df1.collect.apply(0).getFloat(0) ~== 0.500f)
+    import hiveContext.implicits._
+
+    val df1 = DummyInputData.select(
+      cosine_similarity(typedLit(Seq(1, 2, 3, 4)), typedLit(Seq(3, 4, 5, 6))))
+    val rows1 = df1.collect
+    assert(rows1.length == 1)
+    assert(rows1(0).getFloat(0) ~== 0.500f)
 
-    val df2 = DummyInputData.select(jaccard(lit(5), lit(6)))
-    assert(df2.collect.apply(0).getFloat(0) ~== 0.96875f)
+    val df2 = DummyInputData.select(jaccard_similarity(lit(5), lit(6)))
+    val rows2 = df2.collect
+    assert(rows2.length == 1)
+    assert(rows2(0).getFloat(0) ~== 0.96875f)
 
-    val df3 = DummyInputData.select(angular_similarity(lit2(Seq(1, 2, 3)), lit2(Seq(4, 5, 6))))
-    assert(df3.collect.apply(0).getFloat(0) ~== 0.500f)
+    val df3 = DummyInputData.select(
+      angular_similarity(typedLit(Seq(1, 2, 3)), typedLit(Seq(4, 5, 6))))
+    val rows3 = df3.collect
+    assert(rows3.length == 1)
+    assert(rows3(0).getFloat(0) ~== 0.500f)
 
-    val df4 = DummyInputData.select(euclid_similarity(lit2(Seq(5, 3, 1)), lit2(Seq(2, 8, 3))))
-    assert(df4.collect.apply(0).getFloat(0) ~== 0.33333334f)
+    val df4 = DummyInputData.select(
+      euclid_similarity(typedLit(Seq(5, 3, 1)), typedLit(Seq(2, 8, 3))))
+    val rows4 = df4.collect
+    assert(rows4.length == 1)
+    assert(rows4(0).getFloat(0) ~== 0.33333334f)
 
     val df5 = DummyInputData.select(distance2similarity(lit(1.0)))
-    assert(df5.collect.apply(0).getFloat(0) ~== 0.5f)
+    val rows5 = df5.collect
+    assert(rows5.length == 1)
+    assert(rows5(0).getFloat(0) ~== 0.5f)
+
+    val df6 = Seq((Seq("1:0.3", "4:0.1"), Map(0 -> 0.5))).toDF("a", "b")
+    // TODO: Currently, just check if no exception thrown
+    assert(df6.dimsum_mapper(df6("a"), df6("b")).collect.isEmpty)
   }
 
   test("knn.distance") {
     val df1 = DummyInputData.select(hamming_distance(lit(1), lit(3)))
-    checkAnswer(df1, Row(1) :: Nil)
+    checkAnswer(df1, Row(1))
 
     val df2 = DummyInputData.select(popcnt(lit(1)))
-    checkAnswer(df2, Row(1) :: Nil)
-
-    val df3 = DummyInputData.select(kld(lit(0.1), lit(0.5), lit(0.2), lit(0.5)))
-    assert(df3.collect.apply(0).getDouble(0) ~== 0.01)
-
-    val df4 = DummyInputData.select(
-      euclid_distance(lit2(Seq("0.1", "0.5")), lit2(Seq("0.2", "0.5"))))
-    assert(df4.collect.apply(0).getFloat(0) ~== 1.4142135f)
-
-    val df5 = DummyInputData.select(
-      cosine_distance(lit2(Seq("0.8", "0.3")), lit2(Seq("0.4", "0.6"))))
-    assert(df5.collect.apply(0).getFloat(0) ~== 1.0f)
-
-    val df6 = DummyInputData.select(
-      angular_distance(lit2(Seq("0.1", "0.1")), lit2(Seq("0.3", "0.8"))))
-    assert(df6.collect.apply(0).getFloat(0) ~== 0.50f)
-
-    val df7 = DummyInputData.select(
-      manhattan_distance(lit2(Seq("0.7", "0.8")), lit2(Seq("0.5", "0.6"))))
-    assert(df7.collect.apply(0).getFloat(0) ~== 4.0f)
-
-    val df8 = DummyInputData.select(
-      minkowski_distance(lit2(Seq("0.1", "0.2")), lit2(Seq("0.2", "0.2")), lit2(1.0)))
-    assert(df8.collect.apply(0).getFloat(0) ~== 2.0f)
+    checkAnswer(df2, Row(1))
+
+    val rows3 = DummyInputData.select(kld(lit(0.1), lit(0.5), lit(0.2), lit(0.5))).collect
+    assert(rows3.length === 1)
+    assert(rows3(0).getDouble(0) ~== 0.01)
+
+    val rows4 = DummyInputData.select(
+      euclid_distance(typedLit(Seq("0.1", "0.5")), typedLit(Seq("0.2", "0.5")))).collect
+    assert(rows4.length === 1)
+    assert(rows4(0).getFloat(0) ~== 1.4142135f)
+
+    val rows5 = DummyInputData.select(
+      cosine_distance(typedLit(Seq("0.8", "0.3")), typedLit(Seq("0.4", "0.6")))).collect
+    assert(rows5.length === 1)
+    assert(rows5(0).getFloat(0) ~== 1.0f)
+
+    val rows6 = DummyInputData.select(
+      angular_distance(typedLit(Seq("0.1", "0.1")), typedLit(Seq("0.3", "0.8")))).collect
+    assert(rows6.length === 1)
+    assert(rows6(0).getFloat(0) ~== 0.50f)
+
+    val rows7 = DummyInputData.select(
+      manhattan_distance(typedLit(Seq("0.7", "0.8")), typedLit(Seq("0.5", "0.6")))).collect
+    assert(rows7.length === 1)
+    assert(rows7(0).getFloat(0) ~== 4.0f)
+
+    val rows8 = DummyInputData.select(
+      minkowski_distance(typedLit(Seq("0.1", "0.2")), typedLit(Seq("0.2", "0.2")), typedLit(1.0))
+    ).collect
+    assert(rows8.length === 1)
+    assert(rows8(0).getFloat(0) ~== 2.0f)
+
+    val rows9 = DummyInputData.select(
+      jaccard_distance(typedLit(Seq("0.3", "0.8")), typedLit(Seq("0.1", "0.2")))).collect
+    assert(rows9.length === 1)
+    assert(rows9(0).getFloat(0) ~== 1.0f)
   }
 
   test("knn.lsh") {
     import hiveContext.implicits._
-    assert(IntList2Data.minhash(lit(1), $"target").count() > 0)
-
-    assert(DummyInputData.select(bbit_minhash(lit2(Seq("1:0.1", "2:0.5")), lit(false))).count
-      == DummyInputData.count)
-    assert(DummyInputData.select(minhashes(lit2(Seq("1:0.1", "2:0.5")), lit(false))).count
-      == DummyInputData.count)
+    checkAnswer(
+      IntList2Data.minhash(lit(1), $"target"),
+      Row(1016022700, 1) ::
+      Row(1264890450, 1) ::
+      Row(1304330069, 1) ::
+      Row(1321870696, 1) ::
+      Row(1492709716, 1) ::
+      Row(1511363108, 1) ::
+      Row(1601347428, 1) ::
+      Row(1974434012, 1) ::
+      Row(2022223284, 1) ::
+      Row(326269457, 1) ::
+      Row(50559334, 1) ::
+      Row(716040854, 1) ::
+      Row(759249519, 1) ::
+      Row(809187771, 1) ::
+      Row(900899651, 1) ::
+      Nil
+    )
+    checkAnswer(
+      DummyInputData.select(bbit_minhash(typedLit(Seq("1:0.1", "2:0.5")), lit(false))),
+      Row("31175986876675838064867796245644543067")
+    )
+    checkAnswer(
+      DummyInputData.select(minhashes(typedLit(Seq("1:0.1", "2:0.5")), lit(false))),
+      Row(Seq(1571683640, 987207869, 370931990, 988455638, 846963275))
+    )
   }
 
   test("ftvec - add_bias") {
@@ -111,12 +164,13 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
 
   test("ftvec - extract_feature") {
     val df = DummyInputData.select(extract_feature(lit("1:0.8")))
-    checkAnswer(df, Row("1") :: Nil)
+    checkAnswer(df, Row("1"))
   }
 
   test("ftvec - extract_weight") {
-    val df = DummyInputData.select(extract_weight(lit("3:0.1")))
-    assert(df.collect.apply(0).getDouble(0) ~== 0.1)
+    val rows = DummyInputData.select(extract_weight(lit("3:0.1"))).collect
+    assert(rows.length === 1)
+    assert(rows(0).getDouble(0) ~== 0.1)
   }
 
   test("ftvec - explode_array") {
@@ -162,26 +216,36 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
   }
 
   test("ftvec.hash") {
-    assert(DummyInputData.select(mhash(lit("test"))).count == DummyInputData.count)
-    assert(DummyInputData.select(org.apache.spark.sql.hive.HivemallOps.sha1(lit("test"))).count ==
-      DummyInputData.count)
-    // TODO: The tests below failed because:
-    //   org.apache.spark.sql.AnalysisException: List type in java is unsupported because JVM type
-    //     erasure makes spark fail to catch a component type in List<>;
-    //
-    // assert(DummyInputData.select(array_hash_values(lit2(Seq("aaa", "bbb")))).count
-    //   == DummyInputData.count)
-    // assert(DummyInputData.select(
-    //     prefixed_hash_values(lit2(Seq("ccc", "ddd")), lit("prefix"))).count
-    //   == DummyInputData.count)
+    checkAnswer(DummyInputData.select(mhash(lit("test"))), Row(4948445))
+    checkAnswer(DummyInputData.select(HivemallOps.sha1(lit("test"))), Row(12184508))
+    checkAnswer(DummyInputData.select(feature_hashing(typedLit(Seq("1:0.1", "3:0.5")))),
+      Row(Seq("11293631:0.1", "4331412:0.5")))
+    checkAnswer(DummyInputData.select(array_hash_values(typedLit(Seq("aaa", "bbb")))),
+      Row(Seq(4063537, 8459207)))
+    checkAnswer(DummyInputData.select(
+        prefixed_hash_values(typedLit(Seq("ccc", "ddd")), lit("prefix"))),
+      Row(Seq("prefix7873825", "prefix8965544")))
+  }
+
+  test("ftvec.parting") {
+    checkAnswer(DummyInputData.select(polynomial_features(typedLit(Seq("2:0.4", "6:0.1")), lit(2))),
+      Row(Seq("2:0.4", "2^2:0.16000001", "2^6:0.040000003", "6:0.1", "6^6:0.010000001")))
+    checkAnswer(DummyInputData.select(powered_features(typedLit(Seq("4:0.8", "5:0.2")), lit(2))),
+      Row(Seq("4:0.8", "4^2:0.64000005", "5:0.2", "5^2:0.040000003")))
   }
 
   test("ftvec.scaling") {
-    val df1 = TinyTrainData.select(rescale(lit(2.0f), lit(1.0), lit(5.0)))
-    assert(df1.collect.apply(0).getFloat(0) === 0.25f)
-    val df2 = TinyTrainData.select(zscore(lit(1.0f), lit(0.5), lit(0.5)))
-    assert(df2.collect.apply(0).getFloat(0) === 1.0f)
-    val df3 = TinyTrainData.select(normalize(TinyTrainData.col("features")))
+    val rows1 = TinyTrainData.select(rescale(lit(2.0f), lit(1.0), lit(5.0))).collect
+    assert(rows1.length === 3)
+    assert(rows1(0).getFloat(0) ~== 0.25f)
+    assert(rows1(1).getFloat(0) ~== 0.25f)
+    assert(rows1(2).getFloat(0) ~== 0.25f)
+    val rows2 = TinyTrainData.select(zscore(lit(1.0f), lit(0.5), lit(0.5))).collect
+    assert(rows2.length === 3)
+    assert(rows2(0).getFloat(0) ~== 1.0f)
+    assert(rows2(1).getFloat(0) ~== 1.0f)
+    assert(rows2(2).getFloat(0) ~== 1.0f)
+    val df3 = TinyTrainData.select(l2_normalize(TinyTrainData.col("features")))
     checkAnswer(
       df3,
       Row(Seq("1:0.9701425", "2:0.24253562")) ::
@@ -205,10 +269,10 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
         Seq(292.1666753739119, 152.70000455081467, 187.93333893418327, 59.93333511948589)))
       .toDF("arg0", "arg1")
 
-    val result = df.select(chi2(df("arg0"), df("arg1"))).collect
-    assert(result.length == 1)
-    val chi2Val = result.head.getAs[Row](0).getAs[Seq[Double]](0)
-    val pVal = result.head.getAs[Row](0).getAs[Seq[Double]](1)
+    val rows = df.select(chi2(df("arg0"), df("arg1"))).collect
+    assert(rows.length == 1)
+    val chi2Val = rows.head.getAs[Row](0).getAs[Seq[Double]](0)
+    val pVal = rows.head.getAs[Row](0).getAs[Seq[Double]](1)
 
     (chi2Val, Seq(10.81782088, 3.59449902, 116.16984746, 67.24482759))
       .zipped
@@ -240,58 +304,260 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
     // assert(TinyTrainData.rand_amplify(lit(3), lit("-buf 8", $"label", $"features")).count() == 9)
   }
 
-  ignore("ftvec.conv") {
+  test("ftvec.conv") {
     import hiveContext.implicits._
 
-    val df1 = Seq((0.0, "1:0.1" :: "3:0.3" :: Nil), (1, 0, "2:0.2" :: Nil)).toDF("a", "b")
     checkAnswer(
-      df1.select(to_dense_features(df1("b"), lit(3))),
-      Row(Array(0.1f, 0.0f, 0.3f)) :: Row(Array(0.0f, 0.2f, 0.0f)) :: Nil
+      DummyInputData.select(to_dense_features(typedLit(Seq("0:0.1", "1:0.3")), lit(1))),
+      Row(Array(0.1f, 0.3f))
+    )
+    checkAnswer(
+      DummyInputData.select(to_sparse_features(typedLit(Seq(0.1f, 0.2f, 0.3f)))),
+      Row(Seq("0:0.1", "1:0.2", "2:0.3"))
     )
-    val df2 = Seq((0.1, 0.2, 0.3), (0.2, 0.5, 0.4)).toDF("a", "b", "c")
     checkAnswer(
-      df2.select(to_sparse_features(df2("a"), df2("b"), df2("c"))),
-      Row(Seq("1:0.1", "2:0.2", "3:0.3")) :: Row(Seq("1:0.2", "2:0.5", "3:0.4")) :: Nil
+      DummyInputData.select(feature_binning(typedLit(Seq("1")), typedLit(Map("1" -> Seq(0, 3))))),
+      Row(Seq("1"))
     )
   }
 
   test("ftvec.trans") {
     import hiveContext.implicits._
 
+    checkAnswer(
+      DummyInputData.select(vectorize_features(typedLit(Seq("a", "b")), lit(0.1f), lit(0.2f))),
+      Row(Seq("a:0.1", "b:0.2"))
+    )
+    checkAnswer(
+      DummyInputData.select(categorical_features(typedLit(Seq("a", "b")), lit("c11"), lit("c12"))),
+      Row(Seq("a#c11", "b#c12"))
+    )
+    checkAnswer(
+      DummyInputData.select(indexed_features(lit(0.1), lit(0.2), lit(0.3))),
+      Row(Seq("1:0.1", "2:0.2", "3:0.3"))
+    )
+    checkAnswer(
+      DummyInputData.select(quantitative_features(typedLit(Seq("a", "b")), lit(0.1), lit(0.2))),
+      Row(Seq("a:0.1", "b:0.2"))
+    )
+    checkAnswer(
+      DummyInputData.select(ffm_features(typedLit(Seq("1", "2")), lit(0.5), lit(0.2))),
+      Row(Seq("190:140405:1", "111:1058718:1"))
+    )
+    checkAnswer(
+      DummyInputData.select(add_field_indicies(typedLit(Seq("0.5", "0.1")))),
+      Row(Seq("1:0.5", "2:0.1"))
+    )
+
     val df1 = Seq((1, -3, 1), (2, -2, 1)).toDF("a", "b", "c")
     checkAnswer(
       df1.binarize_label($"a", $"b", $"c"),
       Row(1, 1) :: Row(1, 1) :: Row(1, 1) :: Nil
     )
+    val df2 = Seq(("xxx", "yyy", 0), ("zzz", "yyy", 1)).toDF("a", "b", "c").coalesce(1)
+    checkAnswer(
+      df2.quantified_features(lit(true), df2("a"), df2("b"), df2("c")),
+      Row(Seq(0.0, 0.0, 0.0)) :: Row(Seq(1.0, 0.0, 1.0)) :: Nil
+    )
+  }
+
+  test("ftvec.ranking") {
+    import hiveContext.implicits._
+
+    val df1 = Seq((1, 0 :: 3 :: 4 :: Nil), (2, 8 :: 9 :: Nil)).toDF("a", "b").coalesce(1)
+    checkAnswer(
+      df1.bpr_sampling($"a", $"b"),
+      Row(1, 0, 7) ::
+      Row(1, 3, 6) ::
+      Row(2, 8, 0) ::
+      Row(2, 8, 4) ::
+      Row(2, 9, 7) ::
+      Nil
+    )
+    val df2 = Seq(1 :: 8 :: 9 :: Nil, 0 :: 3 :: Nil).toDF("a").coalesce(1)
+    checkAnswer(
+      df2.item_pairs_sampling($"a", lit(3)),
+      Row(0, 1) ::
+      Row(1, 0) ::
+      Row(3, 2) ::
+      Nil
+    )
+    val df3 = Seq(3 :: 5 :: Nil, 0 :: Nil).toDF("a").coalesce(1)
+    checkAnswer(
+      df3.populate_not_in($"a", lit(1)),
+      Row(0) ::
+      Row(1) ::
+      Row(1) ::
+      Nil
+    )
+  }
 
-    val df2 = Seq((0.1f, 0.2f), (0.5f, 0.3f)).toDF("a", "b")
+  test("tools") {
+    // checkAnswer(
+    //   DummyInputData.select(convert_label(lit(5))),
+    //   Nil
+    // )
     checkAnswer(
-      df2.select(vectorize_features(lit2(Seq("a", "b")), df2("a"), df2("b"))),
-      Row(Seq("a:0.1", "b:0.2")) :: Row(Seq("a:0.5", "b:0.3")) :: Nil
+      DummyInputData.select(x_rank(lit("abc"))),
+      Row(1)
     )
+  }
 
-    val df3 = Seq(("c11", "c12"), ("c21", "c22")).toDF("a", "b")
+  test("tools.array") {
+    checkAnswer(
+      DummyInputData.select(float_array(lit(3))),
+      Row(Seq())
+    )
+    checkAnswer(
+      DummyInputData.select(array_remove(typedLit(Seq(1, 2, 3)), lit(2))),
+      Row(Seq(1, 3))
+    )
+    checkAnswer(
+      DummyInputData.select(sort_and_uniq_array(typedLit(Seq(2, 1, 3, 1)))),
+      Row(Seq(1, 2, 3))
+    )
+    checkAnswer(
+      DummyInputData.select(subarray_endwith(typedLit(Seq(1, 2, 3, 4, 5)), lit(4))),
+      Row(Seq(1, 2, 3, 4))
+    )
+    checkAnswer(
+      DummyInputData.select(
+        array_concat(typedLit(Seq(1, 2)), typedLit(Seq(3)), typedLit(Seq(4, 5)))),
+      Row(Seq(1, 2, 3, 4, 5))
+    )
     checkAnswer(
-      df3.select(categorical_features(lit2(Seq("a", "b")), df3("a"), df3("b"))),
-      Row(Seq("a#c11", "b#c12")) :: Row(Seq("a#c21", "b#c22")) :: Nil
+      DummyInputData.select(subarray(typedLit(Seq(1, 2, 3, 4, 5)), lit(2), lit(4))),
+      Row(Seq(3, 4))
     )
+    checkAnswer(
+      DummyInputData.select(to_string_array(typedLit(Seq(1, 2, 3, 4, 5)))),
+      Row(Seq("1", "2", "3", "4", "5"))
+    )
+    checkAnswer(
+      DummyInputData.select(array_intersect(typedLit(Seq(1, 2, 3)), typedLit(Seq(2, 3, 4)))),
+      Row(Seq(2, 3))
+    )
+  }
+
+  test("tools.array - select_k_best") {
+    import hiveContext.implicits._
+
+    val data = Seq(Seq(0, 1, 3), Seq(2, 4, 1), Seq(5, 4, 9))
+    val df = data.map(d => (d, Seq(3, 1, 2))).toDF("features", "importance_list")
+    val k = 2
 
-    val df4 = Seq((0.1, 0.2, 0.3), (0.2, 0.5, 0.4)).toDF("a", "b", "c")
     checkAnswer(
-      df4.select(indexed_features(df4("a"), df4("b"), df4("c"))),
-      Row(Seq("1:0.1", "2:0.2", "3:0.3")) :: Row(Seq("1:0.2", "2:0.5", "3:0.4")) :: Nil
+      df.select(select_k_best(df("features"), df("importance_list"), lit(k))),
+      Row(Seq(0.0, 3.0)) :: Row(Seq(2.0, 1.0)) :: Row(Seq(5.0, 9.0)) :: Nil
     )
+  }
 
-    val df5 = Seq(("xxx", "yyy", 0), ("zzz", "yyy", 1)).toDF("a", "b", "c").coalesce(1)
+  test("tools.bits") {
     checkAnswer(
-      df5.quantified_features(lit(true), df5("a"), df5("b"), df5("c")),
-      Row(Seq(0.0, 0.0, 0.0)) :: Row(Seq(1.0, 0.0, 1.0)) :: Nil
+      DummyInputData.select(to_bits(typedLit(Seq(1, 3, 9)))),
+      Row(Seq(522L))
     )
+    checkAnswer(
+      DummyInputData.select(unbits(typedLit(Seq(1L, 3L)))),
+      Row(Seq(0L, 64L, 65L))
+    )
+    checkAnswer(
+      DummyInputData.select(bits_or(typedLit(Seq(1L, 3L)), typedLit(Seq(8L, 23L)))),
+      Row(Seq(9L, 23L))
+    )
+  }
 
-    val df6 = Seq((0.1, 0.2), (0.5, 0.3)).toDF("a", "b")
+  test("tools.compress") {
     checkAnswer(
-      df6.select(quantitative_features(lit2(Seq("a", "b")), df6("a"), df6("b"))),
-      Row(Seq("a:0.1", "b:0.2")) :: Row(Seq("a:0.5", "b:0.3")) :: Nil
+      DummyInputData.select(inflate(deflate(lit("input text")))),
+      Row("input text")
+    )
+  }
+
+  test("tools.map") {
+    val rows = DummyInputData.select(
+      map_get_sum(typedLit(Map(1 -> 0.2f, 2 -> 0.5f, 4 -> 0.8f)), typedLit(Seq(1, 4)))
+    ).collect
+    assert(rows.length === 1)
+    assert(rows(0).getDouble(0) ~== 1.0f)
+
+    checkAnswer(
+      DummyInputData.select(map_tail_n(typedLit(Map(1 -> 2, 2 -> 5)), lit(1))),
+      Row(Map(2 -> 5))
+    )
+  }
+
+  test("tools.text") {
+    checkAnswer(
+      DummyInputData.select(tokenize(lit("This is a pen"))),
+      Row("This" :: "is" :: "a" :: "pen" :: Nil)
+    )
+    checkAnswer(
+      DummyInputData.select(is_stopword(lit("because"))),
+      Row(true)
+    )
+    checkAnswer(
+      DummyInputData.select(singularize(lit("between"))),
+      Row("between")
+    )
+    checkAnswer(
+      DummyInputData.select(split_words(lit("Hello, world"))),
+      Row("Hello," :: "world" :: Nil)
+    )
+    checkAnswer(
+      DummyInputData.select(normalize_unicode(lit("abcdefg"))),
+      Row("abcdefg")
+    )
+    checkAnswer(
+      DummyInputData.select(base91(typedLit("input text".getBytes))),
+      Row("xojg[@TX;R..B")
+    )
+    checkAnswer(
+      DummyInputData.select(unbase91(lit("XXXX"))),
+      Row(68 :: -120 :: 8 :: Nil)
+    )
+    checkAnswer(
+      DummyInputData.select(word_ngrams(typedLit("abcd" :: "efg" :: "hij" :: Nil), lit(2), lit(2))),
+      Row("abcd efg" :: "efg hij" :: Nil)
+    )
+  }
+
+  test("tools - generated_series") {
+    checkAnswer(
+      DummyInputData.generate_series(lit(0), lit(3)),
+      Row(0) :: Row(1) :: Row(2) :: Row(3) :: Nil
+    )
+  }
+
+  test("geospatial") {
+    val rows1 = DummyInputData.select(tilex2lon(lit(1), lit(6))).collect
+    assert(rows1.length === 1)
+    assert(rows1(0).getDouble(0) ~== -174.375)
+
+    val rows2 = DummyInputData.select(tiley2lat(lit(1), lit(3))).collect
+    assert(rows2.length === 1)
+    assert(rows2(0).getDouble(0) ~== 79.17133464081945)
+
+    val rows3 = DummyInputData.select(
+      haversine_distance(lit(0.3), lit(0.1), lit(0.4), lit(0.1))).collect
+    assert(rows3.length === 1)
+    assert(rows3(0).getDouble(0) ~== 11.119492664455878)
+
+    checkAnswer(
+      DummyInputData.select(tile(lit(0.1), lit(0.8), lit(3))),
+      Row(28)
+    )
+    checkAnswer(
+      DummyInputData.select(map_url(lit(0.1), lit(0.8), lit(3))),
+      Row("http://tile.openstreetmap.org/3/4/3.png")
+    )
+    checkAnswer(
+      DummyInputData.select(lat2tiley(lit(0.3), lit(3))),
+      Row(3)
+    )
+    checkAnswer(
+      DummyInputData.select(lon2tilex(lit(0.4), lit(2))),
+      Row(2)
     )
   }
 
@@ -494,7 +760,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
     val schema = new StructType().add("a", IntegerType).add("b", StringType)
     checkAnswer(
       df.select(from_csv($"value", schema)),
-      Row(Row(1, "abc")) :: Nil)
+      Row(Row(1, "abc")))
   }
 
   test("misc - to_csv") {
@@ -522,7 +788,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
 
     val model = Seq((0.0, 0.1 :: 0.1 :: Nil), (1.0, 0.2 :: 0.3 :: 0.2 :: Nil))
       .toDF("label", "features")
-      .train_randomforest_regr($"features", $"label")
+      .train_randomforest_regressor($"features", $"label")
 
     val testData = Seq((0.0, 0.1 :: 0.0 :: Nil), (1.0, 0.3 :: 0.5 :: 0.4 :: Nil))
       .toDF("label", "features")
@@ -542,22 +808,11 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
     checkAnswer(predicted, Seq(Row(0), Row(1)))
   }
 
-  test("tools.array - select_k_best") {
-    import hiveContext.implicits._
-
-    val data = Seq(Seq(0, 1, 3), Seq(2, 4, 1), Seq(5, 4, 9))
-    val df = data.map(d => (d, Seq(3, 1, 2))).toDF("features", "importance_list")
-    val k = 2
-
-    checkAnswer(
-      df.select(select_k_best(df("features"), df("importance_list"), lit(k))),
-      Row(Seq(0.0, 3.0)) :: Row(Seq(2.0, 1.0)) :: Row(Seq(5.0, 9.0)) :: Nil
-    )
-  }
-
   test("misc - sigmoid") {
     import hiveContext.implicits._
-    assert(DummyInputData.select(sigmoid($"c0")).collect.apply(0).getDouble(0) ~== 0.500)
+    val rows = DummyInputData.select(sigmoid($"c0")).collect
+    assert(rows.length === 1)
+    assert(rows(0).getDouble(0) ~== 0.500)
   }
 
   test("misc - lr_datagen") {
@@ -567,16 +822,18 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
   test("invoke regression functions") {
     import hiveContext.implicits._
     Seq(
-      "train_adadelta",
-      "train_adagrad",
+      "train_regressor",
+      "train_adadelta_regr",
+      "train_adagrad_regr",
       "train_arow_regr",
       "train_arowe_regr",
       "train_arowe2_regr",
-      "train_logregr",
+      "train_logistic_regr",
       "train_pa1_regr",
       "train_pa1a_regr",
       "train_pa2_regr",
       "train_pa2a_regr"
+      // "train_randomforest_regressor"
     ).map { func =>
       TestUtils.invokeFunc(new HivemallOps(TinyTrainData), func, Seq($"features", $"label"))
         .foreach(_ => {}) // Just call it
@@ -586,6 +843,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
   test("invoke classifier functions") {
     import hiveContext.implicits._
     Seq(
+      "train_classifier",
       "train_perceptron",
       "train_pa",
       "train_pa1",
@@ -596,6 +854,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
       "train_scw",
       "train_scw2",
       "train_adagrad_rda"
+      // "train_randomforest_classifier"
     ).map { func =>
       TestUtils.invokeFunc(new HivemallOps(TinyTrainData), func, Seq($"features", $"label"))
         .foreach(_ => {}) // Just call it
@@ -611,6 +870,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
       "train_multiclass_pa2",
       "train_multiclass_cw",
       "train_multiclass_arow",
+      "train_multiclass_arowh",
       "train_multiclass_scw",
       "train_multiclass_scw2"
     ).map { func =>
@@ -628,7 +888,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
       (Array(0.3, 0.1, 0.2), 0),
       (Array(0.3, 0.1, 0.2), 0)).toDF("features", "label")
     Seq(
-      "train_randomforest_regr",
+      "train_randomforest_regressor",
       "train_randomforest_classifier"
     ).map { func =>
       TestUtils.invokeFunc(new HivemallOps(testDf.coalesce(1)), func, Seq($"features", $"label"))
@@ -636,6 +896,27 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
     }
   }
 
+  test("invoke recommend functions") {
+    import hiveContext.implicits._
+    val df = Seq((1, Map(1 -> 0.3), Map(2 -> Map(4 -> 0.1)), 0, Map(3 -> 0.5)))
+      .toDF("i", "r_i", "topKRatesOfI", "j", "r_j")
+    // Just call it
+    df.train_slim($"i", $"r_i", $"topKRatesOfI", $"j", $"r_j").collect
+
+  }
+
+  ignore("invoke topicmodel functions") {
+    import hiveContext.implicits._
+    val testDf = Seq(Seq("abcd", "'efghij", "klmn")).toDF("words")
+    Seq(
+      "train_lda",
+      "train_plsa"
+    ).map { func =>
+      TestUtils.invokeFunc(new HivemallOps(testDf.coalesce(1)), func, Seq($"words"))
+        .foreach(_ => {}) // Just call it
+    }
+  }
+
   protected def checkRegrPrecision(func: String): Unit = {
     import hiveContext.implicits._
 
@@ -730,12 +1011,12 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
 
   ignore("check regression precision") {
     Seq(
-      "train_adadelta",
-      "train_adagrad",
+      "train_adadelta_regr",
+      "train_adagrad_regr",
       "train_arow_regr",
       "train_arowe_regr",
       "train_arowe2_regr",
-      "train_logregr",
+      "train_logistic_regr",
       "train_pa1_regr",
       "train_pa1a_regr",
       "train_pa2_regr",
@@ -762,61 +1043,212 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
     }
   }
 
-  test("user-defined aggregators for ensembles") {
+  test("aggregations for classifiers") {
+    import hiveContext.implicits._
+    val df1 = Seq((1, 0.1, 0.1, 0.2f, 0.2f, 0.2f, 0.2f))
+      .toDF("key", "xh", "xk", "w0", "w1", "w2", "w3")
+    val row1 = df1.groupBy($"key").kpa_predict("xh", "xk", "w0", "w1", "w2", "w3").collect
+    assert(row1.length === 1)
+    assert(row1(0).getDouble(1) ~== 0.002000000029802)
+  }
+
+  test("aggregations for ensembles") {
     import hiveContext.implicits._
 
-    val df1 = Seq((1, 0.1f), (1, 0.2f), (2, 0.1f)).toDF("c0", "c1")
-    val row1 = df1.groupBy($"c0").voted_avg("c1").collect
-    assert(row1(0).getDouble(1) ~== 0.15)
-    assert(row1(1).getDouble(1) ~== 0.10)
+    val df1 = Seq((1, 0.1), (1, 0.2), (2, 0.1)).toDF("c0", "c1")
+    val rows1 = df1.groupBy($"c0").voted_avg("c1").collect
+    assert(rows1.length === 2)
+    assert(rows1(0).getDouble(1) ~== 0.15)
+    assert(rows1(1).getDouble(1) ~== 0.10)
 
-    val df3 = Seq((1, 0.2f), (1, 0.8f), (2, 0.3f)).toDF("c0", "c1")
-    val row3 = df3.groupBy($"c0").weight_voted_avg("c1").collect
-    assert(row3(0).getDouble(1) ~== 0.50)
-    assert(row3(1).getDouble(1) ~== 0.30)
+    val df3 = Seq((1, 0.2), (1, 0.8), (2, 0.3)).toDF("c0", "c1")
+    val rows3 = df3.groupBy($"c0").weight_voted_avg("c1").collect
+    assert(rows3.length === 2)
+    assert(rows3(0).getDouble(1) ~== 0.50)
+    assert(rows3(1).getDouble(1) ~== 0.30)
 
     val df5 = Seq((1, 0.2f, 0.1f), (1, 0.4f, 0.2f), (2, 0.8f, 0.9f)).toDF("c0", "c1", "c2")
-    val row5 = df5.groupBy($"c0").argmin_kld("c1", "c2").collect
-    assert(row5(0).getFloat(1) ~== 0.266666666)
-    assert(row5(1).getFloat(1) ~== 0.80)
+    val rows5 = df5.groupBy($"c0").argmin_kld("c1", "c2").collect
+    assert(rows5.length === 2)
+    assert(rows5(0).getFloat(1) ~== 0.266666666)
+    assert(rows5(1).getFloat(1) ~== 0.80)
+
+    val df6 = Seq((1, "id-0", 0.2), (1, "id-1", 0.4), (1, "id-2", 0.1)).toDF("c0", "c1", "c2")
+    val rows6 = df6.groupBy($"c0").max_label("c2", "c1").collect
+    assert(rows6.length === 1)
+    assert(rows6(0).getString(1) == "id-1")
+
+    val df7 = Seq((1, "id-0", 0.5), (1, "id-1", 0.1), (1, "id-2", 0.2)).toDF("c0", "c1", "c2")
+    val rows7 = df7.groupBy($"c0").maxrow("c2", "c1").toDF("c0", "c1").select($"c1.col1").collect
+    assert(rows7.length === 1)
+    assert(rows7(0).getString(0) == "id-0")
+
+    val df8 = Seq((1, 1), (1, 2), (2, 1), (1, 5)).toDF("c0", "c1")
+    val rows8 = df8.groupBy($"c0").rf_ensemble("c1").toDF("c0", "c1")
+      .select("c1.probability").collect
+    assert(rows8.length === 2)
+    assert(rows8(0).getDouble(0) ~== 0.3333333333)
+    assert(rows8(1).getDouble(0) ~== 1.0)
+  }
+
+  test("aggregations for evaluation") {
+    import hiveContext.implicits._
 
-    val df6 = Seq((1, "id-0", 0.2f), (1, "id-1", 0.4f), (1, "id-2", 0.1f)).toDF("c0", "c1", "c2")
-    val row6 = df6.groupBy($"c0").max_label("c2", "c1").collect
-    assert(row6(0).getString(1) == "id-1")
+    val testDf1 = Seq((1, 1.0, 0.5), (1, 0.3, 0.5), (1, 0.1, 0.2)).toDF("c0", "c1", "c2")
+    val rows1 = testDf1.groupBy($"c0").mae("c1", "c2").collect
+    assert(rows1.length === 1)
+    assert(rows1(0).getDouble(1) ~== 0.26666666)
+    val rows2 = testDf1.groupBy($"c0").mse("c1", "c2").collect
+    assert(rows2.length === 1)
+    assert(rows2(0).getDouble(1) ~== 0.1)
+    val rows3 = testDf1.groupBy($"c0").rmse("c1", "c2").collect
+    assert(rows3.length === 1)
+    assert(rows3(0).getDouble(1) ~== 0.31622776601683794)
+    val rows4 = testDf1.groupBy($"c0").r2("c1", "c2").collect
+    assert(rows4.length === 1)
+    assert(rows4(0).getDouble(1) ~== -4.0)
+    val rows5 = testDf1.groupBy($"c0").logloss("c1", "c2").collect
+    assert(rows5.length === 1)
+    assert(rows5(0).getDouble(1) ~== 6.198305767142615)
+
+    val testDf2 = Seq((1, Array(1, 2), Array(2, 3)), (1, Array(3, 8), Array(5, 4)))
+      .toDF("c0", "c1", "c2")
+    val rows6 = testDf2.groupBy($"c0").ndcg("c1", "c2").collect
+    assert(rows6.length === 1)
+    assert(rows6(0).getDouble(1) ~== 0.19342640361727081)
+    val rows7 = testDf2.groupBy($"c0").precision_at("c1", "c2").collect
+    assert(rows7.length === 1)
+    assert(rows7(0).getDouble(1) ~== 0.25)
+    val rows8 = testDf2.groupBy($"c0").recall_at("c1", "c2").collect
+    assert(rows8.length === 1)
+    assert(rows8(0).getDouble(1) ~== 0.25)
+    val rows9 = testDf2.groupBy($"c0").hitrate("c1", "c2").collect
+    assert(rows9.length === 1)
+    assert(rows9(0).getDouble(1) ~== 0.50)
+    val rows10 = testDf2.groupBy($"c0").mrr("c1", "c2").collect
+    assert(rows10.length === 1)
+    assert(rows10(0).getDouble(1) ~== 0.25)
+    val rows11 = testDf2.groupBy($"c0").average_precision("c1", "c2").collect
+    assert(rows11.length === 1)
+    assert(rows11(0).getDouble(1) ~== 0.25)
+    val rows12 = testDf2.groupBy($"c0").auc("c1", "c2").collect
+    assert(rows12.length === 1)
+    assert(rows12(0).getDouble(1) ~== 0.25)
+  }
 
-    val df7 = Seq((1, "id-0", 0.5f), (1, "id-1", 0.1f), (1, "id-2", 0.2f)).toDF("c0", "c1", "c2")
-    val row7 = df7.groupBy($"c0").maxrow("c2", "c1").toDF("c0", "c1").select($"c1.col1").collect
-    assert(row7(0).getString(0) == "id-0")
+  test("aggregations for topicmodel") {
+    import hiveContext.implicits._
 
-    // val df8 = Seq((1, 1), (1, 2), (2, 1), (1, 5)).toDF("c0", "c1")
-    // val row8 = df8.groupBy($"c0").rf_ensemble("c1").toDF("c0", "c1")
-    //   .select("c1.probability").collect
-    // assert(row8(0).getDouble(0) ~== 0.3333333333)
-    // assert(row8(1).getDouble(0) ~== 1.0)
+    val testDf = Seq((1, "abcd", 0.1, 0, 0.1), (1, "efgh", 0.2, 0, 0.1))
+      .toDF("key", "word", "value", "label", "lambda")
+    val rows1 = testDf.groupBy($"key").lda_predict("word", "value", "label", "lambda").collect
+    assert(rows1.length === 1)
+    val result1 = rows1(0).getSeq[Row](1).map { case Row(label: Int, prob: Float) => label -> prob }
+      .toMap[Int, Float]
+    assert(result1.size === 10)
+    assert(result1(0) ~== 0.07692449)
+    assert(result1(1) ~== 0.07701121)
+    assert(result1(2) ~== 0.07701129)
+    assert(result1(3) ~== 0.07705542)
+    assert(result1(4) ~== 0.07701511)
+    assert(result1(5) ~== 0.07701234)
+    assert(result1(6) ~== 0.07701384)
+    assert(result1(7) ~== 0.30693996)
+    assert(result1(8) ~== 0.07700701)
+    assert(result1(9) ~== 0.07700934)
+
+    val rows2 = testDf.groupBy($"key").plsa_predict("word", "value", "label", "lambda").collect
+    assert(rows2.length === 1)
+    val result2 = rows2(0).getSeq[Row](1).map { case Row(label: Int, prob: Float) => label -> prob }
+      .toMap[Int, Float]
+    assert(result2.size === 10)
+    assert(result2(0) ~== 0.062156882)
+    assert(result2(1) ~== 0.05088547)
+    assert(result2(2) ~== 0.12434204)
+    assert(result2(3) ~== 0.31869823)
+    assert(result2(4) ~== 0.01584355)
+    assert(result2(5) ~== 0.0057667173)
+    assert(result2(6) ~== 0.10864779)
+    assert(result2(7) ~== 0.09346106)
+    assert(result2(8) ~== 0.13905199)
+    assert(result2(9) ~== 0.081146255)
   }
 
-  test("user-defined aggregators for evaluation") {
+  test("aggregations for ftvec.text") {
     import hiveContext.implicits._
+    val testDf = Seq((1, "abc def hi jk l"), (1, "def jk")).toDF("key", "text")
+    val rows = testDf.groupBy($"key").tf("text").collect
+    assert(rows.length === 1)
+    val result = rows(0).getAs[Map[String, Float]](1)
+    assert(result.size === 2)
+    assert(result("def jk") ~== 0.5f)
+    assert(result("abc def hi jk l") ~== 0.5f)
+  }
 
-    val df1 = Seq((1, 1.0f, 0.5f), (1, 0.3f, 0.5f), (1, 0.1f, 0.2f)).toDF("c0", "c1", "c2")
-    val row1 = df1.groupBy($"c0").mae("c1", "c2").collect
-    assert(row1(0).getDouble(1) ~== 0.26666666)
+  test("aggregations for tools.array") {
+    import hiveContext.implicits._
 
-    val df2 = Seq((1, 0.3f, 0.8f), (1, 1.2f, 2.0f), (1, 0.2f, 0.3f)).toDF("c0", "c1", "c2")
-    val row2 = df2.groupBy($"c0").mse("c1", "c2").collect
-    assert(row2(0).getDouble(1) ~== 0.29999999)
+    val testDf = Seq((1, 1 :: 3 :: Nil), (1, 3 :: 5 :: Nil)).toDF("key", "ar")
+    val rows1 = testDf.groupBy($"key").array_avg("ar").collect
+    assert(rows1.length === 1)
+    val result1 = rows1(0).getSeq[Float](1)
+    assert(result1.length === 2)
+    assert(result1(0) ~== 2.0f)
+    assert(result1(1) ~== 4.0f)
+
+    val rows2 = testDf.groupBy($"key").array_sum("ar").collect
+    assert(rows2.length === 1)
+    val result2 = rows2(0).getSeq[Double](1)
+    assert(result2.length === 2)
+    assert(result2(0) ~== 4.0)
+    assert(result2(1) ~== 8.0)
+  }
 
-    val df3 = Seq((1, 0.3f, 0.8f), (1, 1.2f, 2.0f), (1, 0.2f, 0.3f)).toDF("c0", "c1", "c2")
-    val row3 = df3.groupBy($"c0").rmse("c1", "c2").collect
-    assert(row3(0).getDouble(1) ~== 0.54772253)
+  test("aggregations for tools.bits") {
+    import hiveContext.implicits._
+    val testDf = Seq((1, 1), (1, 7)).toDF("key", "x")
+    val rows = testDf.groupBy($"key").bits_collect("x").collect
+    assert(rows.length === 1)
+    val result = rows(0).getSeq[Int](1)
+    assert(result === Seq(130))
+  }
 
-    val df4 = Seq((1, Array(1, 2), Array(2, 3)), (1, Array(3, 8), Array(5, 4))).toDF
-      .toDF("c0", "c1", "c2")
-    val row4 = df4.groupBy($"c0").f1score("c1", "c2").collect
-    assert(row4(0).getDouble(1) ~== 0.25)
+  test("aggregations for tools.list") {
+    import hiveContext.implicits._
+    val testDf = Seq((1, 3), (1, 1), (1, 2)).toDF("key", "x")
+    val rows = testDf.groupBy($"key").to_ordered_list("x").collect
+    assert(rows.length === 1)
+    val result = rows(0).getSeq[Int](1)
+    assert(result === Seq(1, 2, 3))
+  }
+
+  test("aggregations for tools.map") {
+    import hiveContext.implicits._
+    val testDf = Seq((1, 1, "a"), (1, 2, "b"), (1, 3, "c")).toDF("key", "k", "v")
+    val rows = testDf.groupBy($"key").to_map("k", "v").collect
+    assert(rows.length === 1)
+    val result = rows(0).getMap[Int, String](1)
+    assert(result === Map(1 -> "a", 2 -> "b", 3 -> "c"))
+  }
+
+  test("aggregations for tools.math") {
+    import hiveContext.implicits._
+    val testDf = Seq(
+      (1, Seq(1, 2, 3, 4), Seq(5, 6, 7, 8)),
+      (1, Seq(9, 10, 11, 12), Seq(13, 14, 15, 16))
+    ).toDF("key", "mtx1", "mtx2")
+    val rows = testDf.groupBy($"key").transpose_and_dot("mtx1", "mtx2").collect
+    assert(rows.length === 1)
+    val result = rows(0).getSeq[Int](1)
+    assert(result === Seq(
+      Seq(122.0, 132.0, 142.0, 152.0),
+      Seq(140.0, 152.0, 164.0, 176.0),
+      Seq(158.0, 172.0, 186.0, 200.0),
+      Seq(176.0, 192.0, 208.0, 224.0))
+    )
   }
 
-  test("user-defined aggregators for ftvec.trans") {
+  test("aggregations for ftvec.trans") {
     import hiveContext.implicits._
 
     val df0 = Seq((1, "cat", "mammal", 9), (1, "dog", "mammal", 10), (1, "human", "mammal", 10),
@@ -842,7 +1274,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
     assert(result012.values.toSet === Set(9, 10, 11, 12, 13))
   }
 
-  test("user-defined aggregators for ftvec.selection") {
+  test("aggregations for ftvec.selection") {
     import hiveContext.implicits._
 
     // see also hivemall.ftvec.selection.SignalNoiseRatioUDAFTest
@@ -889,7 +1321,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
       .foreach((actual, expected) => assert(actual ~== expected))
   }
 
-  test("user-defined aggregators for tools.matrix") {
+  test("aggregations for tools.matrix") {
     import hiveContext.implicits._
 
     // | 1  2  3 |T    | 5  6  7 |
@@ -950,9 +1382,9 @@ final class HivemallOpsWithVectorSuite extends VectorQueryTest {
     )
   }
 
-  test("train_logregr") {
+  test("train_logistic_regr") {
     checkAnswer(
-      mllibTrainDf.train_logregr($"features", $"label")
+      mllibTrainDf.train_logistic_regr($"features", $"label")
         .groupBy("feature").agg("weight" -> "avg")
         .select($"feature"),
       Seq(0, 1, 2, 3, 4, 5, 6).map(v => Row(s"$v"))

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/test/HivemallFeatureQueryTest.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/test/HivemallFeatureQueryTest.scala b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/test/HivemallFeatureQueryTest.scala
index 3ca9bbf..bc656d1 100644
--- a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/test/HivemallFeatureQueryTest.scala
+++ b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/test/HivemallFeatureQueryTest.scala
@@ -36,17 +36,6 @@ abstract class HivemallFeatureQueryTest extends QueryTest with SQLTestUtils with
 
   import hiveContext.implicits._
 
-  /**
-   * TODO: spark-2.0 does not support literals for some types (e.g., Seq[_] and Array[_]).
-   * So, it provides that functionality here.
-   * This helper function will be removed in future releases.
-   */
-  protected def lit2[T : TypeTag](v: T): Column = {
-    val ScalaReflection.Schema(dataType, _) = ScalaReflection.schemaFor[T]
-    val convert = CatalystTypeConverters.createToCatalystConverter(dataType)
-    Column(Literal(convert(v), dataType))
-  }
-
   protected val DummyInputData = Seq((0, 0)).toDF("c0", "c1")
 
   protected val IntList2Data =

[3/4] incubator-hivemall git commit: Fixed typos

Posted by my...@apache.org.

Fixed typos


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/c58eaeaf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/c58eaeaf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/c58eaeaf

Branch: refs/heads/master
Commit: c58eaeaf8d5be966967c1db169031d8fa4ee30c8
Parents: 1680c42
Author: Makoto Yui <my...@apache.org>
Authored: Mon Oct 16 20:53:53 2017 +0900
Committer: Makoto Yui <my...@apache.org>
Committed: Mon Oct 16 21:05:00 2017 +0900

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala   | 4 ++--
 .../test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c58eaeaf/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
index 45f7b4d..90a21d7 100644
--- a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
+++ b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
@@ -1843,10 +1843,10 @@ object HivemallOps {
    * @see [[hivemall.ftvec.trans.AddFieldIndicesUDF]]
    * @group ftvec.trans
    */
-  def add_field_indicies(features: Column): Column = withExpr {
+  def add_field_indices(features: Column): Column = withExpr {
     planHiveGenericUDF(
       "hivemall.ftvec.trans.AddFieldIndicesUDF",
-      "add_field_indicies",
+      "add_field_indices",
       features :: Nil
     )
   }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c58eaeaf/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index de2481c..5d0f075 100644
--- a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -345,7 +345,7 @@ class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
       Row(Seq("190:140405:1", "111:1058718:1"))
     )
     checkAnswer(
-      DummyInputData.select(add_field_indicies(typedLit(Seq("0.5", "0.1")))),
+      DummyInputData.select(add_field_indices(typedLit(Seq("0.5", "0.1")))),
       Row(Seq("1:0.5", "2:0.1"))
     )

[2/4] incubator-hivemall git commit: Close #122: [HIVEMALL-147][Spark] Support all Hivemall functions of v0.5-rc.1 in Spark Dataframe

Posted by my...@apache.org.

Close #122: [HIVEMALL-147][Spark] Support all Hivemall functions of v0.5-rc.1 in Spark Dataframe


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/1680c42c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/1680c42c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/1680c42c

Branch: refs/heads/master
Commit: 1680c42cf762a52183f76613fb02411f8f3a671a
Parents: fdf7021
Author: Takeshi Yamamuro <ya...@apache.org>
Authored: Mon Oct 16 20:52:56 2017 +0900
Committer: Makoto Yui <my...@apache.org>
Committed: Mon Oct 16 21:04:41 2017 +0900

----------------------------------------------------------------------
 .../main/java/hivemall/evaluation/AUCUDAF.java  |   4 +-
 .../java/hivemall/evaluation/HitRateUDAF.java   |   5 +-
 .../main/java/hivemall/evaluation/MAPUDAF.java  |   5 +-
 .../main/java/hivemall/evaluation/MRRUDAF.java  |   5 +-
 .../main/java/hivemall/evaluation/NDCGUDAF.java |   5 +-
 .../java/hivemall/evaluation/PrecisionUDAF.java |   5 +-
 .../java/hivemall/evaluation/RecallUDAF.java    |   5 +-
 .../tools/array/ArrayAvgGenericUDAF.java        |   2 -
 .../hivemall/topicmodel/LDAPredictUDAF.java     |   2 +-
 .../hivemall/topicmodel/PLSAPredictUDAF.java    |   2 +-
 .../spark/sql/hive/HivemallGroupedDataset.scala | 474 +++++++++--
 .../org/apache/spark/sql/hive/HivemallOps.scala | 824 +++++++++++++++++--
 .../spark/sql/hive/HivemallOpsSuite.scala       | 736 +++++++++++++----
 .../hive/test/HivemallFeatureQueryTest.scala    |  11 -
 14 files changed, 1764 insertions(+), 321 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/evaluation/AUCUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/AUCUDAF.java b/core/src/main/java/hivemall/evaluation/AUCUDAF.java
index 508e36a..6dba174 100644
--- a/core/src/main/java/hivemall/evaluation/AUCUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/AUCUDAF.java
@@ -110,7 +110,7 @@ public final class AUCUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 2 || parameters.length == 3) : parameters.length;
+            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input
@@ -439,7 +439,7 @@ public final class AUCUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 2 || parameters.length == 3) : parameters.length;
+            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/evaluation/HitRateUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/HitRateUDAF.java b/core/src/main/java/hivemall/evaluation/HitRateUDAF.java
index 6df6087..6a2d0da 100644
--- a/core/src/main/java/hivemall/evaluation/HitRateUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/HitRateUDAF.java
@@ -71,9 +71,6 @@ import org.apache.hadoop.io.LongWritable;
                 + " - Returns HitRate")
 public final class HitRateUDAF extends AbstractGenericUDAFResolver {
 
-    // prevent instantiation
-    private HitRateUDAF() {}
-
     @Override
     public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException {
         if (typeInfo.length != 2 && typeInfo.length != 3) {
@@ -109,7 +106,7 @@ public final class HitRateUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 2 || parameters.length == 3) : parameters.length;
+            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/evaluation/MAPUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/MAPUDAF.java b/core/src/main/java/hivemall/evaluation/MAPUDAF.java
index 45e64cb..fef1f43 100644
--- a/core/src/main/java/hivemall/evaluation/MAPUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/MAPUDAF.java
@@ -53,9 +53,6 @@ import org.apache.hadoop.io.LongWritable;
                 + " - Returns MAP")
 public final class MAPUDAF extends AbstractGenericUDAFResolver {
 
-    // prevent instantiation
-    private MAPUDAF() {}
-
     @Override
     public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException {
         if (typeInfo.length != 2 && typeInfo.length != 3) {
@@ -91,7 +88,7 @@ public final class MAPUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 2 || parameters.length == 3) : parameters.length;
+            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/evaluation/MRRUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/MRRUDAF.java b/core/src/main/java/hivemall/evaluation/MRRUDAF.java
index 98b8c3d..fcd9d51 100644
--- a/core/src/main/java/hivemall/evaluation/MRRUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/MRRUDAF.java
@@ -53,9 +53,6 @@ import org.apache.hadoop.io.LongWritable;
                 + " - Returns MRR")
 public final class MRRUDAF extends AbstractGenericUDAFResolver {
 
-    // prevent instantiation
-    private MRRUDAF() {}
-
     @Override
     public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException {
         if (typeInfo.length != 2 && typeInfo.length != 3) {
@@ -91,7 +88,7 @@ public final class MRRUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 2 || parameters.length == 3) : parameters.length;
+            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/evaluation/NDCGUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/NDCGUDAF.java b/core/src/main/java/hivemall/evaluation/NDCGUDAF.java
index 4e4fde6..00aa16a 100644
--- a/core/src/main/java/hivemall/evaluation/NDCGUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/NDCGUDAF.java
@@ -55,9 +55,6 @@ import org.apache.hadoop.io.LongWritable;
                 + " - Returns nDCG")
 public final class NDCGUDAF extends AbstractGenericUDAFResolver {
 
-    // prevent instantiation
-    private NDCGUDAF() {}
-
     @Override
     public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException {
         if (typeInfo.length != 2 && typeInfo.length != 3) {
@@ -94,7 +91,7 @@ public final class NDCGUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 2 || parameters.length == 3) : parameters.length;
+            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java
index de8a876..2e09a71 100644
--- a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java
@@ -53,9 +53,6 @@ import org.apache.hadoop.io.LongWritable;
                 + " - Returns Precision")
 public final class PrecisionUDAF extends AbstractGenericUDAFResolver {
 
-    // prevent instantiation
-    private PrecisionUDAF() {}
-
     @Override
     public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException {
         if (typeInfo.length != 2 && typeInfo.length != 3) {
@@ -91,7 +88,7 @@ public final class PrecisionUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 2 || parameters.length == 3) : parameters.length;
+            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/evaluation/RecallUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/RecallUDAF.java b/core/src/main/java/hivemall/evaluation/RecallUDAF.java
index 30b1712..d084c94 100644
--- a/core/src/main/java/hivemall/evaluation/RecallUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/RecallUDAF.java
@@ -53,9 +53,6 @@ import org.apache.hadoop.io.LongWritable;
                 + " - Returns Recall")
 public final class RecallUDAF extends AbstractGenericUDAFResolver {
 
-    // prevent instantiation
-    private RecallUDAF() {}
-
     @Override
     public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException {
         if (typeInfo.length != 2 && typeInfo.length != 3) {
@@ -91,7 +88,7 @@ public final class RecallUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 2 || parameters.length == 3) : parameters.length;
+            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java b/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java
index 6dbb7d5..090a50c 100644
--- a/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java
+++ b/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java
@@ -61,8 +61,6 @@ import org.apache.hadoop.io.IntWritable;
         + " in which each element is the mean of a set of numbers")
 public final class ArrayAvgGenericUDAF extends AbstractGenericUDAFResolver {
 
-    private ArrayAvgGenericUDAF() {}// prevent instantiation
-
     @Override
     public GenericUDAFEvaluator getEvaluator(TypeInfo[] typeInfo) throws SemanticException {
         if (typeInfo.length != 1) {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/topicmodel/LDAPredictUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/topicmodel/LDAPredictUDAF.java b/core/src/main/java/hivemall/topicmodel/LDAPredictUDAF.java
index 94d510a..68c802f 100644
--- a/core/src/main/java/hivemall/topicmodel/LDAPredictUDAF.java
+++ b/core/src/main/java/hivemall/topicmodel/LDAPredictUDAF.java
@@ -200,7 +200,7 @@ public final class LDAPredictUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 4 || parameters.length == 5);
+            assert (parameters.length == 1 || parameters.length == 4 || parameters.length == 5);
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/core/src/main/java/hivemall/topicmodel/PLSAPredictUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/topicmodel/PLSAPredictUDAF.java b/core/src/main/java/hivemall/topicmodel/PLSAPredictUDAF.java
index 7702945..6210359 100644
--- a/core/src/main/java/hivemall/topicmodel/PLSAPredictUDAF.java
+++ b/core/src/main/java/hivemall/topicmodel/PLSAPredictUDAF.java
@@ -202,7 +202,7 @@ public final class PLSAPredictUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (parameters.length == 4 || parameters.length == 5);
+            assert (parameters.length == 1 || parameters.length == 4 || parameters.length == 5);
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
index a012efd..00617b7 100644
--- a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
+++ b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
@@ -34,25 +34,61 @@ import org.apache.spark.sql.types._
 /**
  * Groups the [[DataFrame]] using the specified columns, so we can run aggregation on them.
  *
+ * @groupname classifier
  * @groupname ensemble
- * @groupname ftvec.trans
  * @groupname evaluation
+ * @groupname topicmodel
+ * @groupname ftvec.selection
+ * @groupname ftvec.text
+ * @groupname ftvec.trans
+ * @groupname tools.array
+ * @groupname tools.bits
+ * @groupname tools.list
+ * @groupname tools.map
+ * @groupname tools.matrix
+ * @groupname tools.math
+ *
+ * A list of unsupported functions is as follows:
+ *  * ftvec.conv
+ *   - conv2dense
+ *   - build_bins
  */
 final class HivemallGroupedDataset(groupBy: RelationalGroupedDataset) {
 
   /**
+   * @see hivemall.classifier.KPAPredictUDAF
+   * @group classifier
+   */
+  def kpa_predict(xh: String, xk: String, w0: String, w1: String, w2: String, w3: String)
+    : DataFrame = {
+    checkType(xh, DoubleType)
+    checkType(xk, DoubleType)
+    checkType(w0, FloatType)
+    checkType(w1, FloatType)
+    checkType(w2, FloatType)
+    checkType(w3, FloatType)
+    val udaf = HiveUDAFFunction(
+        "kpa_predict",
+        new HiveFunctionWrapper("hivemall.classifier.KPAPredictUDAF"),
+        Seq(xh, xk, w0, w1, w2, w3).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
    * @see hivemall.ensemble.bagging.VotedAvgUDAF
    * @group ensemble
    */
   def voted_avg(weight: String): DataFrame = {
-    // checkType(weight, NumericType)
+    checkType(weight, DoubleType)
     val udaf = HiveUDAFFunction(
         "voted_avg",
         new HiveFunctionWrapper("hivemall.ensemble.bagging.WeightVotedAvgUDAF"),
-        Seq(weight).map(df.col(_).expr),
+        Seq(weight).map(df(_).expr),
         isUDAFBridgeRequired = true)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
@@ -60,14 +96,14 @@ final class HivemallGroupedDataset(groupBy: RelationalGroupedDataset) {
    * @group ensemble
    */
   def weight_voted_avg(weight: String): DataFrame = {
-    // checkType(weight, NumericType)
+    checkType(weight, DoubleType)
     val udaf = HiveUDAFFunction(
         "weight_voted_avg",
         new HiveFunctionWrapper("hivemall.ensemble.bagging.WeightVotedAvgUDAF"),
-        Seq(weight).map(df.col(_).expr),
+        Seq(weight).map(df(_).expr),
         isUDAFBridgeRequired = true)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
@@ -75,15 +111,15 @@ final class HivemallGroupedDataset(groupBy: RelationalGroupedDataset) {
    * @group ensemble
    */
   def argmin_kld(weight: String, conv: String): DataFrame = {
-    // checkType(weight, NumericType)
-    // checkType(conv, NumericType)
+    checkType(weight, FloatType)
+    checkType(conv, FloatType)
     val udaf = HiveUDAFFunction(
         "argmin_kld",
         new HiveFunctionWrapper("hivemall.ensemble.ArgminKLDistanceUDAF"),
-        Seq(weight, conv).map(df.col(_).expr),
+        Seq(weight, conv).map(df(_).expr),
         isUDAFBridgeRequired = true)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
@@ -91,15 +127,15 @@ final class HivemallGroupedDataset(groupBy: RelationalGroupedDataset) {
    * @group ensemble
    */
   def max_label(score: String, label: String): DataFrame = {
-    // checkType(score, NumericType)
+    checkType(score, DoubleType)
     checkType(label, StringType)
     val udaf = HiveUDAFFunction(
         "max_label",
         new HiveFunctionWrapper("hivemall.ensemble.MaxValueLabelUDAF"),
-        Seq(score, label).map(df.col(_).expr),
+        Seq(score, label).map(df(_).expr),
         isUDAFBridgeRequired = true)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
@@ -107,134 +143,430 @@ final class HivemallGroupedDataset(groupBy: RelationalGroupedDataset) {
    * @group ensemble
    */
   def maxrow(score: String, label: String): DataFrame = {
-    // checkType(score, NumericType)
+    checkType(score, DoubleType)
     checkType(label, StringType)
     val udaf = HiveUDAFFunction(
         "maxrow",
         new HiveFunctionWrapper("hivemall.ensemble.MaxRowUDAF"),
-        Seq(score, label).map(df.col(_).expr),
+        Seq(score, label).map(df(_).expr),
         isUDAFBridgeRequired = false)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
    * @see hivemall.smile.tools.RandomForestEnsembleUDAF
    * @group ensemble
    */
-  def rf_ensemble(predict: String): DataFrame = {
-    // checkType(predict, NumericType)
+  @scala.annotation.varargs
+  def rf_ensemble(yhat: String, others: String*): DataFrame = {
+    checkType(yhat, IntegerType)
     val udaf = HiveUDAFFunction(
         "rf_ensemble",
         new HiveFunctionWrapper("hivemall.smile.tools.RandomForestEnsembleUDAF"),
-        Seq(predict).map(df.col(_).expr),
+        (yhat +: others).map(df(_).expr),
         isUDAFBridgeRequired = false)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
-   * @see hivemall.tools.matrix.TransposeAndDotUDAF
+   * @see hivemall.evaluation.MeanAbsoluteErrorUDAF
+   * @group evaluation
    */
-  def transpose_and_dot(X: String, Y: String): DataFrame = {
+  def mae(predict: String, target: String): DataFrame = {
+    checkType(predict, DoubleType)
+    checkType(target, DoubleType)
     val udaf = HiveUDAFFunction(
-        "transpose_and_dot",
-        new HiveFunctionWrapper("hivemall.tools.matrix.TransposeAndDotUDAF"),
-        Seq(X, Y).map(df.col(_).expr),
+        "mae",
+        new HiveFunctionWrapper("hivemall.evaluation.MeanAbsoluteErrorUDAF"),
+        Seq(predict, target).map(df(_).expr),
+        isUDAFBridgeRequired = true)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.MeanSquareErrorUDAF
+   * @group evaluation
+   */
+  def mse(predict: String, target: String): DataFrame = {
+    checkType(predict, DoubleType)
+    checkType(target, DoubleType)
+    val udaf = HiveUDAFFunction(
+        "mse",
+        new HiveFunctionWrapper("hivemall.evaluation.MeanSquaredErrorUDAF"),
+        Seq(predict, target).map(df(_).expr),
+        isUDAFBridgeRequired = true)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.RootMeanSquareErrorUDAF
+   * @group evaluation
+   */
+  def rmse(predict: String, target: String): DataFrame = {
+    checkType(predict, DoubleType)
+    checkType(target, DoubleType)
+    val udaf = HiveUDAFFunction(
+        "rmse",
+        new HiveFunctionWrapper("hivemall.evaluation.RootMeanSquaredErrorUDAF"),
+        Seq(predict, target).map(df(_).expr),
+        isUDAFBridgeRequired = true)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.R2UDAF
+   * @group evaluation
+   */
+  def r2(predict: String, target: String): DataFrame = {
+    checkType(predict, DoubleType)
+    checkType(target, DoubleType)
+    val udaf = HiveUDAFFunction(
+        "r2",
+        new HiveFunctionWrapper("hivemall.evaluation.R2UDAF"),
+        Seq(predict, target).map(df(_).expr),
+        isUDAFBridgeRequired = true)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.LogarithmicLossUDAF
+   * @group evaluation
+   */
+  def logloss(predict: String, target: String): DataFrame = {
+    checkType(predict, DoubleType)
+    checkType(target, DoubleType)
+    val udaf = HiveUDAFFunction(
+        "logloss",
+        new HiveFunctionWrapper("hivemall.evaluation.LogarithmicLossUDAF"),
+        Seq(predict, target).map(df(_).expr),
+        isUDAFBridgeRequired = true)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.F1ScoreUDAF
+   * @group evaluation
+   */
+  def f1score(predict: String, target: String): DataFrame = {
+    // checkType(target, ArrayType(IntegerType, false))
+    // checkType(predict, ArrayType(IntegerType, false))
+    val udaf = HiveUDAFFunction(
+        "f1score",
+        new HiveFunctionWrapper("hivemall.evaluation.F1ScoreUDAF"),
+        Seq(predict, target).map(df(_).expr),
+        isUDAFBridgeRequired = true)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.NDCGUDAF
+   * @group evaluation
+   */
+  @scala.annotation.varargs
+  def ndcg(rankItems: String, correctItems: String, others: String*): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "ndcg",
+        new HiveFunctionWrapper("hivemall.evaluation.NDCGUDAF"),
+        (rankItems +: correctItems +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.PrecisionUDAF
+   * @group evaluation
+   */
+  @scala.annotation.varargs
+  def precision_at(rankItems: String, correctItems: String, others: String*): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "precision_at",
+        new HiveFunctionWrapper("hivemall.evaluation.PrecisionUDAF"),
+        (rankItems +: correctItems +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.RecallUDAF
+   * @group evaluation
+   */
+  @scala.annotation.varargs
+  def recall_at(rankItems: String, correctItems: String, others: String*): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "recall_at",
+        new HiveFunctionWrapper("hivemall.evaluation.RecallUDAF"),
+        (rankItems +: correctItems +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.HitRateUDAF
+   * @group evaluation
+   */
+  @scala.annotation.varargs
+  def hitrate(rankItems: String, correctItems: String, others: String*): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "hitrate",
+        new HiveFunctionWrapper("hivemall.evaluation.HitRateUDAF"),
+        (rankItems +: correctItems +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.MRRUDAF
+   * @group evaluation
+   */
+  @scala.annotation.varargs
+  def mrr(rankItems: String, correctItems: String, others: String*): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "mrr",
+        new HiveFunctionWrapper("hivemall.evaluation.MRRUDAF"),
+        (rankItems +: correctItems +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.MAPUDAF
+   * @group evaluation
+   */
+  @scala.annotation.varargs
+  def average_precision(rankItems: String, correctItems: String, others: String*): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "average_precision",
+        new HiveFunctionWrapper("hivemall.evaluation.MAPUDAF"),
+        (rankItems +: correctItems +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.evaluation.AUCUDAF
+   * @group evaluation
+   */
+  @scala.annotation.varargs
+  def auc(args: String*): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "auc",
+        new HiveFunctionWrapper("hivemall.evaluation.AUCUDAF"),
+        args.map(df(_).expr),
         isUDAFBridgeRequired = false)
       .toAggregateExpression()
-    toDF(Seq(Alias(udaf, udaf.prettyName)()))
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.topicmodel.LDAPredictUDAF
+   * @group topicmodel
+   */
+  @scala.annotation.varargs
+  def lda_predict(word: String, value: String, label: String, lambda: String, others: String*)
+    : DataFrame = {
+    checkType(word, StringType)
+    checkType(value, DoubleType)
+    checkType(label, IntegerType)
+    checkType(lambda, DoubleType)
+    val udaf = HiveUDAFFunction(
+        "lda_predict",
+        new HiveFunctionWrapper("hivemall.topicmodel.LDAPredictUDAF"),
+        (word +: value +: label +: lambda +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.topicmodel.PLSAPredictUDAF
+   * @group topicmodel
+   */
+  @scala.annotation.varargs
+  def plsa_predict(word: String, value: String, label: String, prob: String, others: String*)
+    : DataFrame = {
+    checkType(word, StringType)
+    checkType(value, DoubleType)
+    checkType(label, IntegerType)
+    checkType(prob, DoubleType)
+    val udaf = HiveUDAFFunction(
+        "plsa_predict",
+        new HiveFunctionWrapper("hivemall.topicmodel.PLSAPredictUDAF"),
+        (word +: value +: label +: prob +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.ftvec.text.TermFrequencyUDAF
+   * @group ftvec.text
+   */
+  def tf(text: String): DataFrame = {
+    checkType(text, StringType)
+    val udaf = HiveUDAFFunction(
+        "tf",
+        new HiveFunctionWrapper("hivemall.ftvec.text.TermFrequencyUDAF"),
+        Seq(text).map(df(_).expr),
+        isUDAFBridgeRequired = true)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
    * @see hivemall.ftvec.trans.OnehotEncodingUDAF
    * @group ftvec.trans
    */
-  def onehot_encoding(cols: String*): DataFrame = {
+  @scala.annotation.varargs
+  def onehot_encoding(feature: String, others: String*): DataFrame = {
     val udaf = HiveUDAFFunction(
         "onehot_encoding",
         new HiveFunctionWrapper("hivemall.ftvec.trans.OnehotEncodingUDAF"),
-        cols.map(df.col(_).expr),
+        (feature +: others).map(df(_).expr),
         isUDAFBridgeRequired = false)
       .toAggregateExpression()
-    toDF(Seq(Alias(udaf, udaf.prettyName)()))
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
    * @see hivemall.ftvec.selection.SignalNoiseRatioUDAF
+   * @group ftvec.selection
    */
-  def snr(X: String, Y: String): DataFrame = {
+  def snr(feature: String, label: String): DataFrame = {
     val udaf = HiveUDAFFunction(
         "snr",
         new HiveFunctionWrapper("hivemall.ftvec.selection.SignalNoiseRatioUDAF"),
-        Seq(X, Y).map(df.col(_).expr),
+        Seq(feature, label).map(df(_).expr),
         isUDAFBridgeRequired = false)
       .toAggregateExpression()
-    toDF(Seq(Alias(udaf, udaf.prettyName)()))
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
-   * @see hivemall.evaluation.MeanAbsoluteErrorUDAF
-   * @group evaluation
+   * @see hivemall.tools.array.ArrayAvgGenericUDAF
+   * @group tools.array
    */
-  def mae(predict: String, target: String): DataFrame = {
-    checkType(predict, FloatType)
-    checkType(target, FloatType)
+  def array_avg(ar: String): DataFrame = {
     val udaf = HiveUDAFFunction(
-        "mae",
-        new HiveFunctionWrapper("hivemall.evaluation.MeanAbsoluteErrorUDAF"),
-        Seq(predict, target).map(df.col(_).expr),
-        isUDAFBridgeRequired = true)
+        "array_avg",
+        new HiveFunctionWrapper("hivemall.tools.array.ArrayAvgGenericUDAF"),
+        Seq(ar).map(df(_).expr),
+        isUDAFBridgeRequired = false)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
-   * @see hivemall.evaluation.MeanSquareErrorUDAF
-   * @group evaluation
+   * @see hivemall.tools.array.ArraySumUDAF
+   * @group tools.array
    */
-  def mse(predict: String, target: String): DataFrame = {
-    checkType(predict, FloatType)
-    checkType(target, FloatType)
+  def array_sum(ar: String): DataFrame = {
     val udaf = HiveUDAFFunction(
-        "mse",
-        new HiveFunctionWrapper("hivemall.evaluation.MeanSquaredErrorUDAF"),
-        Seq(predict, target).map(df.col(_).expr),
+        "array_sum",
+        new HiveFunctionWrapper("hivemall.tools.array.ArraySumUDAF"),
+        Seq(ar).map(df(_).expr),
         isUDAFBridgeRequired = true)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
-   * @see hivemall.evaluation.RootMeanSquareErrorUDAF
-   * @group evaluation
+   * @see hivemall.tools.bits.BitsCollectUDAF
+   * @group tools.bits
    */
-  def rmse(predict: String, target: String): DataFrame = {
-    checkType(predict, FloatType)
-    checkType(target, FloatType)
+  def bits_collect(x: String): DataFrame = {
     val udaf = HiveUDAFFunction(
-      "rmse",
-      new HiveFunctionWrapper("hivemall.evaluation.RootMeanSquaredErrorUDAF"),
-        Seq(predict, target).map(df.col(_).expr),
-        isUDAFBridgeRequired = true)
+        "bits_collect",
+        new HiveFunctionWrapper("hivemall.tools.bits.BitsCollectUDAF"),
+        Seq(x).map(df(_).expr),
+        isUDAFBridgeRequired = false)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**
-   * @see hivemall.evaluation.F1ScoreUDAF
-   * @group evaluation
+   * @see hivemall.tools.list.UDAFToOrderedList
+   * @group tools.list
    */
-  def f1score(predict: String, target: String): DataFrame = {
-    // checkType(target, ArrayType(IntegerType))
-    // checkType(predict, ArrayType(IntegerType))
+  @scala.annotation.varargs
+  def to_ordered_list(value: String, others: String*): DataFrame = {
     val udaf = HiveUDAFFunction(
-        "f1score",
-        new HiveFunctionWrapper("hivemall.evaluation.F1ScoreUDAF"),
-        Seq(predict, target).map(df.col(_).expr),
+        "to_ordered_list",
+        new HiveFunctionWrapper("hivemall.tools.list.UDAFToOrderedList"),
+        (value +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.tools.map.UDAFToMap
+   * @group tools.map
+   */
+  def to_map(key: String, value: String): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "to_map",
+        new HiveFunctionWrapper("hivemall.tools.map.UDAFToMap"),
+        Seq(key, value).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.tools.map.UDAFToOrderedMap
+   * @group tools.map
+   */
+  @scala.annotation.varargs
+  def to_ordered_map(key: String, value: String, others: String*): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "to_ordered_map",
+        new HiveFunctionWrapper("hivemall.tools.map.UDAFToOrderedMap"),
+        (key +: value +: others).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.tools.matrix.TransposeAndDotUDAF
+   * @group tools.matrix
+   */
+  def transpose_and_dot(matrix0_row: String, matrix1_row: String): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "transpose_and_dot",
+        new HiveFunctionWrapper("hivemall.tools.matrix.TransposeAndDotUDAF"),
+        Seq(matrix0_row, matrix1_row).map(df(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
+  }
+
+  /**
+   * @see hivemall.tools.math.L2NormUDAF
+   * @group tools.math
+   */
+  def l2_norm(xi: String): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "l2_norm",
+        new HiveFunctionWrapper("hivemall.tools.math.L2NormUDAF"),
+        Seq(xi).map(df(_).expr),
         isUDAFBridgeRequired = true)
       .toAggregateExpression()
-    toDF((Alias(udaf, udaf.prettyName)() :: Nil).toSeq)
+    toDF(Alias(udaf, udaf.prettyName)() :: Nil)
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1680c42c/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
index 22d3153..45f7b4d 100644
--- a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
+++ b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
@@ -38,12 +38,18 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
+
 /**
- * Hivemall wrapper and some utility functions for DataFrame.
+ * Hivemall wrapper and some utility functions for DataFrame. These functions below derives
+ * from `resources/ddl/define-all-as-permanent.hive`.
  *
  * @groupname regression
  * @groupname classifier
  * @groupname classifier.multiclass
+ * @groupname recommend
+ * @groupname topicmodel
+ * @groupname geospatial
+ * @groupname smile
  * @groupname xgboost
  * @groupname anomaly
  * @groupname knn.similarity
@@ -52,28 +58,72 @@ import org.apache.spark.unsafe.types.UTF8String
  * @groupname ftvec
  * @groupname ftvec.amplify
  * @groupname ftvec.hashing
+ * @groupname ftvec.paring
  * @groupname ftvec.scaling
+ * @groupname ftvec.selection
  * @groupname ftvec.conv
  * @groupname ftvec.trans
+ * @groupname ftvec.ranking
+ * @groupname tools
+ * @groupname tools.array
+ * @groupname tools.bits
+ * @groupname tools.compress
+ * @groupname tools.map
+ * @groupname tools.text
  * @groupname misc
+ *
+ * A list of unsupported functions is as follows:
+ *  * smile
+ *   - guess_attribute_types
+ *  * mapred functions
+ *   - taskid
+ *   - jobid
+ *   - rownum
+ *   - distcache_gets
+ *   - jobconf_gets
+ *  * matrix factorization
+ *   - mf_predict
+ *   - train_mf_sgd
+ *   - train_mf_adagrad
+ *   - train_bprmf
+ *   - bprmf_predict
+ *  * Factorization Machine
+ *   - fm_predict
+ *   - train_fm
+ *   - train_ffm
+ *   - ffm_predict
  */
 final class HivemallOps(df: DataFrame) extends Logging {
   import internal.HivemallOpsImpl._
 
-  private[this] lazy val _sparkSession = df.sparkSession
-  private[this] lazy val _analyzer = _sparkSession.sessionState.analyzer
-  private[this] lazy val _strategy = new UserProvidedPlanner(_sparkSession.sqlContext.conf)
+  private lazy val _sparkSession = df.sparkSession
+  private lazy val _strategy = new UserProvidedPlanner(_sparkSession.sqlContext.conf)
+
+  /**
+   * @see [[hivemall.regression.GeneralRegressorUDTF]]
+   * @group regression
+   */
+  @scala.annotation.varargs
+  def train_regressor(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.regression.GeneralRegressorUDTF",
+      "train_regressor",
+      setMixServs(toHivemallFeatures(exprs)),
+      Seq("feature", "weight")
+    )
+  }
 
   /**
    * @see [[hivemall.regression.AdaDeltaUDTF]]
    * @group regression
    */
   @scala.annotation.varargs
-  def train_adadelta(exprs: Column*): DataFrame = withTypedPlan {
+  def train_adadelta_regr(exprs: Column*): DataFrame = withTypedPlan {
     planHiveGenericUDTF(
       df,
       "hivemall.regression.AdaDeltaUDTF",
-      "train_adadelta",
+      "train_adadelta_regr",
       setMixServs(toHivemallFeatures(exprs)),
       Seq("feature", "weight")
     )
@@ -84,11 +134,11 @@ final class HivemallOps(df: DataFrame) extends Logging {
    * @group regression
    */
   @scala.annotation.varargs
-  def train_adagrad(exprs: Column*): DataFrame = withTypedPlan {
+  def train_adagrad_regr(exprs: Column*): DataFrame = withTypedPlan {
     planHiveGenericUDTF(
       df,
       "hivemall.regression.AdaGradUDTF",
-      "train_adagrad",
+      "train_adagrad_regr",
       setMixServs(toHivemallFeatures(exprs)),
       Seq("feature", "weight")
     )
@@ -144,11 +194,11 @@ final class HivemallOps(df: DataFrame) extends Logging {
    * @group regression
    */
   @scala.annotation.varargs
-  def train_logregr(exprs: Column*): DataFrame = withTypedPlan {
+  def train_logistic_regr(exprs: Column*): DataFrame = withTypedPlan {
     planHiveGenericUDTF(
       df,
       "hivemall.regression.LogressUDTF",
-      "train_logregr",
+      "train_logistic_regr",
       setMixServs(toHivemallFeatures(exprs)),
       Seq("feature", "weight")
     )
@@ -215,17 +265,17 @@ final class HivemallOps(df: DataFrame) extends Logging {
   }
 
   /**
-   * @see [[hivemall.smile.regression.RandomForestRegressionUDTF]]
-   * @group regression
+   * @see [[hivemall.classifier.GeneralClassifierUDTF]]
+   * @group classifier
    */
   @scala.annotation.varargs
-  def train_randomforest_regr(exprs: Column*): DataFrame = withTypedPlan {
+  def train_classifier(exprs: Column*): DataFrame = withTypedPlan {
     planHiveGenericUDTF(
       df,
-      "hivemall.smile.regression.RandomForestRegressionUDTF",
-      "train_randomforest_regr",
+      "hivemall.classifier.GeneralClassifierUDTF",
+      "train_classifier",
       setMixServs(toHivemallFeatures(exprs)),
-      Seq("model_id", "model_type", "pred_model", "var_importance", "oob_errors", "oob_tests")
+      Seq("feature", "weight")
     )
   }
 
@@ -380,17 +430,17 @@ final class HivemallOps(df: DataFrame) extends Logging {
   }
 
   /**
-   * @see [[hivemall.smile.classification.RandomForestClassifierUDTF]]
+   * @see [[hivemall.classifier.KernelExpansionPassiveAggressiveUDTF]]
    * @group classifier
    */
   @scala.annotation.varargs
-  def train_randomforest_classifier(exprs: Column*): DataFrame = withTypedPlan {
+  def train_kpa(exprs: Column*): DataFrame = withTypedPlan {
     planHiveGenericUDTF(
       df,
-      "hivemall.smile.classification.RandomForestClassifierUDTF",
-      "train_randomforest_classifier",
+      "hivemall.classifier.KernelExpansionPassiveAggressiveUDTF",
+      "train_kpa",
       setMixServs(toHivemallFeatures(exprs)),
-      Seq("model_id", "model_type", "pred_model", "var_importance", "oob_errors", "oob_tests")
+      Seq("h", "hk", "w0", "w1", "w2", "w3")
     )
   }
 
@@ -485,6 +535,21 @@ final class HivemallOps(df: DataFrame) extends Logging {
   }
 
   /**
+   * @see [[hivemall.classifier.multiclass.MulticlassAROWClassifierUDTF.AROWh]]
+   * @group classifier.multiclass
+   */
+  @scala.annotation.varargs
+  def train_multiclass_arowh(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.classifier.multiclass.MulticlassAROWClassifierUDTF$AROWh",
+      "train_multiclass_arowh",
+      setMixServs(toHivemallFeatures(exprs)),
+      Seq("label", "feature", "weight", "conv")
+    )
+  }
+
+  /**
    * @see [[hivemall.classifier.multiclass.MulticlassSoftConfidenceWeightedUDTF.SCW1]]
    * @group classifier.multiclass
    */
@@ -515,6 +580,81 @@ final class HivemallOps(df: DataFrame) extends Logging {
   }
 
   /**
+   * @see [[hivemall.recommend.SlimUDTF]]
+   * @group recommend
+   */
+  @scala.annotation.varargs
+  def train_slim(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.recommend.SlimUDTF",
+      "train_slim",
+      setMixServs(toHivemallFeatures(exprs)),
+      Seq("j", "nn", "w")
+    )
+  }
+
+  /**
+   * @see [[hivemall.topicmodel.LDAUDTF]]
+   * @group topicmodel
+   */
+  @scala.annotation.varargs
+  def train_lda(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.topicmodel.LDAUDTF",
+      "train_lda",
+      setMixServs(toHivemallFeatures(exprs)),
+      Seq("topic", "word", "score")
+    )
+  }
+
+  /**
+   * @see [[hivemall.topicmodel.PLSAUDTF]]
+   * @group topicmodel
+   */
+  @scala.annotation.varargs
+  def train_plsa(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.topicmodel.PLSAUDTF",
+      "train_plsa",
+      setMixServs(toHivemallFeatures(exprs)),
+      Seq("topic", "word", "score")
+    )
+  }
+
+  /**
+   * @see [[hivemall.smile.regression.RandomForestRegressionUDTF]]
+   * @group smile
+   */
+  @scala.annotation.varargs
+  def train_randomforest_regressor(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.smile.regression.RandomForestRegressionUDTF",
+      "train_randomforest_regressor",
+      setMixServs(toHivemallFeatures(exprs)),
+      Seq("model_id", "model_type", "pred_model", "var_importance", "oob_errors", "oob_tests")
+    )
+  }
+
+  /**
+   * @see [[hivemall.smile.classification.RandomForestClassifierUDTF]]
+   * @group smile
+   */
+  @scala.annotation.varargs
+  def train_randomforest_classifier(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.smile.classification.RandomForestClassifierUDTF",
+      "train_randomforest_classifier",
+      setMixServs(toHivemallFeatures(exprs)),
+      Seq("model_id", "model_type", "pred_model", "var_importance", "oob_errors", "oob_tests")
+    )
+  }
+
+  /**
    * :: Experimental ::
    * @see [[hivemall.xgboost.regression.XGBoostRegressionUDTF]]
    * @group xgboost
@@ -600,6 +740,21 @@ final class HivemallOps(df: DataFrame) extends Logging {
   }
 
   /**
+   * @see [[hivemall.knn.similarity.DIMSUMMapperUDTF]]
+   * @group knn.similarity
+   */
+  @scala.annotation.varargs
+  def dimsum_mapper(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.knn.similarity.DIMSUMMapperUDTF",
+      "dimsum_mapper",
+      exprs,
+      Seq("j", "k", "b_jk")
+    )
+  }
+
+  /**
    * @see [[hivemall.knn.lsh.MinHashUDTF]]
    * @group knn.lsh
    */
@@ -609,7 +764,7 @@ final class HivemallOps(df: DataFrame) extends Logging {
       df,
       "hivemall.knn.lsh.MinHashUDTF",
       "minhash",
-      setMixServs(toHivemallFeatures(exprs)),
+      exprs,
       Seq("clusterid", "item")
     )
   }
@@ -624,7 +779,7 @@ final class HivemallOps(df: DataFrame) extends Logging {
       df,
       "hivemall.ftvec.amplify.AmplifierUDTF",
       "amplify",
-      setMixServs(toHivemallFeatures(exprs)),
+      exprs,
       Seq("clusterid", "item")
     )
   }
@@ -668,7 +823,7 @@ final class HivemallOps(df: DataFrame) extends Logging {
       df,
       "hivemall.ftvec.conv.QuantifyColumnsUDTF",
       "quantify",
-      setMixServs(toHivemallFeatures(exprs)),
+      exprs,
       (0 until exprs.size - 1).map(i => s"c$i")
     )
   }
@@ -683,7 +838,7 @@ final class HivemallOps(df: DataFrame) extends Logging {
       df,
       "hivemall.ftvec.trans.BinarizeLabelUDTF",
       "binarize_label",
-      setMixServs(toHivemallFeatures(exprs)),
+      exprs,
       (0 until exprs.size - 1).map(i => s"c$i")
     )
   }
@@ -698,17 +853,62 @@ final class HivemallOps(df: DataFrame) extends Logging {
       df,
       "hivemall.ftvec.trans.QuantifiedFeaturesUDTF",
       "quantified_features",
-      setMixServs(toHivemallFeatures(exprs)),
+      exprs,
       Seq("features")
     )
   }
 
   /**
+   * @see [[hivemall.ftvec.ranking.BprSamplingUDTF]]
+   * @group ftvec.ranking
+   */
+  @scala.annotation.varargs
+  def bpr_sampling(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.ftvec.ranking.BprSamplingUDTF",
+      "bpr_sampling",
+      exprs,
+      Seq("user", "pos_item", "neg_item")
+    )
+  }
+
+  /**
+   * @see [[hivemall.ftvec.ranking.ItemPairsSamplingUDTF]]
+   * @group ftvec.ranking
+   */
+  @scala.annotation.varargs
+  def item_pairs_sampling(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.ftvec.ranking.ItemPairsSamplingUDTF",
+      "item_pairs_sampling",
+      exprs,
+      Seq("pos_item_id", "neg_item_id")
+    )
+  }
+
+  /**
+   * @see [[hivemall.ftvec.ranking.PopulateNotInUDTF]]
+   * @group ftvec.ranking
+   */
+  @scala.annotation.varargs
+  def populate_not_in(exprs: Column*): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.ftvec.ranking.PopulateNotInUDTF",
+      "populate_not_in",
+      exprs,
+      Seq("item")
+    )
+  }
+
+  /**
    * Splits Seq[String] into pieces.
    * @group ftvec
    */
-  def explode_array(expr: Column): DataFrame = {
-    df.explode(expr) { case Row(v: Seq[_]) =>
+  def explode_array(features: Column): DataFrame = {
+    df.explode(features) { case Row(v: Seq[_]) =>
       // Type erasure removes the component type in Seq
       v.map(s => HivemallFeature(s.asInstanceOf[String]))
     }
@@ -718,7 +918,7 @@ final class HivemallOps(df: DataFrame) extends Logging {
    * Splits [[Vector]] into pieces.
    * @group ftvec
    */
-  def explode_vector(expr: Column): DataFrame = {
+  def explode_vector(features: Column): DataFrame = {
     val elementSchema = StructType(
       StructField("feature", StringType) :: StructField("weight", DoubleType) :: Nil)
     val explodeFunc: Row => TraversableOnce[InternalRow] = (row: Row) => {
@@ -737,7 +937,7 @@ final class HivemallOps(df: DataFrame) extends Logging {
     }
     withTypedPlan {
       Generate(
-        UserDefinedGenerator(elementSchema, explodeFunc, expr.expr :: Nil),
+        UserDefinedGenerator(elementSchema, explodeFunc, features.expr :: Nil),
         join = true, outer = false, None,
         generatorOutput = Nil,
         df.logicalPlan)
@@ -745,6 +945,20 @@ final class HivemallOps(df: DataFrame) extends Logging {
   }
 
   /**
+   * @see [[hivemall.tools.GenerateSeriesUDTF]]
+   * @group tools
+   */
+  def generate_series(start: Column, end: Column): DataFrame = withTypedPlan {
+    planHiveGenericUDTF(
+      df,
+      "hivemall.tools.GenerateSeriesUDTF",
+      "generate_series",
+      start :: end :: Nil,
+      Seq("generate_series")
+    )
+  }
+
+  /**
    * Returns `top-k` records for each `group`.
    * @group misc
    */
@@ -877,7 +1091,7 @@ final class HivemallOps(df: DataFrame) extends Logging {
       df,
       "hivemall.dataset.LogisticRegressionDataGeneratorUDTFWrapper",
       "lr_datagen",
-      setMixServs(toHivemallFeatures(exprs)),
+      exprs,
       Seq("label", "features")
     )
   }
@@ -900,7 +1114,7 @@ final class HivemallOps(df: DataFrame) extends Logging {
    * in all possible spark workers.
    */
   @Experimental
-  private[this] def setMixServs(exprs: Seq[Column]): Seq[Column] = {
+  private def setMixServs(exprs: Seq[Column]): Seq[Column] = {
     val mixes = System.getenv("HIVEMALL_MIX_SERVERS")
     if (mixes != null && !mixes.isEmpty()) {
       val groupId = df.sqlContext.sparkContext.applicationId + "-" + UUID.randomUUID
@@ -919,7 +1133,7 @@ final class HivemallOps(df: DataFrame) extends Logging {
   /**
    * If the input is a [[Vector]], transform it into Hivemall features.
    */
-  @inline private[this] def toHivemallFeatures(exprs: Seq[Column]): Seq[Column] = {
+  @inline private def toHivemallFeatures(exprs: Seq[Column]): Seq[Column] = {
     df.select(exprs: _*).queryExecution.analyzed.schema.zip(exprs).map {
       case (StructField(_, _: VectorUDT, _, _), c) => HivemallUtils.to_hivemall_features(c)
       case (_, c) => c
@@ -929,13 +1143,13 @@ final class HivemallOps(df: DataFrame) extends Logging {
   /**
    * A convenient function to wrap a logical plan and produce a DataFrame.
    */
-  @inline private[this] def withTypedPlan(logicalPlan: => LogicalPlan): DataFrame = {
+  @inline private def withTypedPlan(logicalPlan: => LogicalPlan): DataFrame = {
     val queryExecution = _sparkSession.sessionState.executePlan(logicalPlan)
     val outputSchema = queryExecution.sparkPlan.schema
     new Dataset[Row](df.sparkSession, queryExecution, RowEncoder(outputSchema))
   }
 
-  @inline private[this] def withTypedPlanInCustomStrategy(logicalPlan: => LogicalPlan)
+  @inline private def withTypedPlanInCustomStrategy(logicalPlan: => LogicalPlan)
     : DataFrame = {
     // Inject custom strategies
     if (!_sparkSession.experimental.extraStrategies.contains(_strategy)) {
@@ -967,6 +1181,118 @@ object HivemallOps {
   }
 
   /**
+   * @see [[hivemall.geospatial.TileUDF]]
+   * @group geospatial
+   */
+  def tile(lat: Column, lon: Column, zoom: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.geospatial.TileUDF",
+      "tile",
+      lat :: lon :: zoom :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.geospatial.MapURLUDF]]
+   * @group geospatial
+   */
+  @scala.annotation.varargs
+  def map_url(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.geospatial.MapURLUDF",
+      "map_url",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.geospatial.Lat2TileYUDF]]
+   * @group geospatial
+   */
+  def lat2tiley(lat: Column, zoom: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.geospatial.Lat2TileYUDF",
+      "lat2tiley",
+      lat :: zoom :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.geospatial.Lon2TileXUDF]]
+   * @group geospatial
+   */
+  def lon2tilex(lon: Column, zoom: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.geospatial.Lon2TileXUDF",
+      "lon2tilex",
+      lon :: zoom :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.geospatial.TileX2LonUDF]]
+   * @group geospatial
+   */
+  def tilex2lon(x: Column, zoom: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.geospatial.TileX2LonUDF",
+      "tilex2lon",
+      x :: zoom :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.geospatial.TileY2LatUDF]]
+   * @group geospatial
+   */
+  def tiley2lat(y: Column, zoom: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.geospatial.TileY2LatUDF",
+      "tiley2lat",
+      y :: zoom :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.geospatial.HaversineDistanceUDF]]
+   * @group geospatial
+   */
+  @scala.annotation.varargs
+  def haversine_distance(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.geospatial.HaversineDistanceUDF",
+      "haversine_distance",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.smile.tools.TreePredictUDF]]
+   * @group smile
+   */
+  @scala.annotation.varargs
+  def tree_predict(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.smile.tools.TreePredictUDF",
+      "tree_predict",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.smile.tools.TreeExportUDF]]
+   * @group smile
+   */
+  @scala.annotation.varargs
+  def tree_export(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.smile.tools.TreeExportUDF",
+      "tree_export",
+      exprs
+    )
+  }
+
+  /**
    * @see [[hivemall.anomaly.ChangeFinderUDF]]
    * @group anomaly
    */
@@ -997,10 +1323,10 @@ object HivemallOps {
    * @group knn.similarity
    */
   @scala.annotation.varargs
-  def cosine_sim(exprs: Column*): Column = withExpr {
+  def cosine_similarity(exprs: Column*): Column = withExpr {
     planHiveGenericUDF(
       "hivemall.knn.similarity.CosineSimilarityUDF",
-      "cosine_sim",
+      "cosine_similarity",
       exprs
     )
   }
@@ -1010,10 +1336,10 @@ object HivemallOps {
    * @group knn.similarity
    */
   @scala.annotation.varargs
-  def jaccard(exprs: Column*): Column = withExpr {
+  def jaccard_similarity(exprs: Column*): Column = withExpr {
     planHiveUDF(
       "hivemall.knn.similarity.JaccardIndexUDF",
-      "jaccard",
+      "jaccard_similarity",
       exprs
     )
   }
@@ -1137,6 +1463,19 @@ object HivemallOps {
   }
 
   /**
+   * @see [[hivemall.knn.distance.JaccardDistanceUDF]]
+   * @group knn.distance
+   */
+  @scala.annotation.varargs
+  def jaccard_distance(exprs: Column*): Column = withExpr {
+    planHiveUDF(
+      "hivemall.knn.distance.JaccardDistanceUDF",
+      "jaccard_distance",
+      exprs
+    )
+  }
+
+  /**
    * @see [[hivemall.knn.distance.ManhattanDistanceUDF]]
    * @group knn.distance
    */
@@ -1154,7 +1493,7 @@ object HivemallOps {
    * @group knn.distance
    */
   @scala.annotation.varargs
-  def minkowski_distance (exprs: Column*): Column = withExpr {
+  def minkowski_distance(exprs: Column*): Column = withExpr {
     planHiveGenericUDF(
       "hivemall.knn.distance.MinkowskiDistanceUDF",
       "minkowski_distance",
@@ -1237,11 +1576,11 @@ object HivemallOps {
    * @see [[hivemall.ftvec.AddFeatureIndexUDFWrapper]]
    * @group ftvec
    */
-  def add_feature_index(expr: Column): Column = withExpr {
+  def add_feature_index(features: Column): Column = withExpr {
     planHiveGenericUDF(
       "hivemall.ftvec.AddFeatureIndexUDFWrapper",
       "add_feature_index",
-      expr :: Nil
+      features :: Nil
     )
   }
 
@@ -1273,11 +1612,12 @@ object HivemallOps {
    * @see [[hivemall.ftvec.hashing.Sha1UDF]]
    * @group ftvec.hashing
    */
-  def sha1(expr: Column): Column = withExpr {
+  @scala.annotation.varargs
+  def sha1(exprs: Column*): Column = withExpr {
     planHiveUDF(
       "hivemall.ftvec.hashing.Sha1UDF",
       "sha1",
-      expr :: Nil
+      exprs
     )
   }
 
@@ -1287,7 +1627,6 @@ object HivemallOps {
    */
   @scala.annotation.varargs
   def array_hash_values(exprs: Column*): Column = withExpr {
-    // TODO: Need a wrapper class because of using unsupported types
     planHiveUDF(
       "hivemall.ftvec.hashing.ArrayHashValuesUDF",
       "array_hash_values",
@@ -1301,7 +1640,6 @@ object HivemallOps {
    */
   @scala.annotation.varargs
   def prefixed_hash_values(exprs: Column*): Column = withExpr {
-    // TODO: Need a wrapper class because of using unsupported types
     planHiveUDF(
       "hivemall.ftvec.hashing.ArrayPrefixedHashValuesUDF",
       "prefixed_hash_values",
@@ -1310,6 +1648,45 @@ object HivemallOps {
   }
 
   /**
+   * @see [[hivemall.ftvec.hashing.FeatureHashingUDF]]
+   * @group ftvec.hashing
+   */
+  @scala.annotation.varargs
+  def feature_hashing(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.ftvec.hashing.FeatureHashingUDF",
+      "feature_hashing",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.ftvec.pairing.PolynomialFeaturesUDF]]
+   * @group ftvec.paring
+   */
+  @scala.annotation.varargs
+  def polynomial_features(exprs: Column*): Column = withExpr {
+    planHiveUDF(
+      "hivemall.ftvec.pairing.PolynomialFeaturesUDF",
+      "polynomial_features",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.ftvec.pairing.PoweredFeaturesUDF]]
+   * @group ftvec.paring
+   */
+  @scala.annotation.varargs
+  def powered_features(exprs: Column*): Column = withExpr {
+    planHiveUDF(
+      "hivemall.ftvec.pairing.PoweredFeaturesUDF",
+      "powered_features",
+      exprs
+    )
+  }
+
+  /**
    * @see [[hivemall.ftvec.scaling.RescaleUDF]]
    * @group ftvec.scaling
    */
@@ -1338,7 +1715,7 @@ object HivemallOps {
    * @see [[hivemall.ftvec.scaling.L2NormalizationUDFWrapper]]
    * @group ftvec.scaling
    */
-  def normalize(expr: Column): Column = withExpr {
+  def l2_normalize(expr: Column): Column = withExpr {
     planHiveGenericUDF(
       "hivemall.ftvec.scaling.L2NormalizationUDFWrapper",
       "normalize",
@@ -1364,8 +1741,7 @@ object HivemallOps {
    */
   @scala.annotation.varargs
   def to_dense_features(exprs: Column*): Column = withExpr {
-    // TODO: Need a wrapper class because of using unsupported types
-    planHiveGenericUDF(
+    planHiveUDF(
       "hivemall.ftvec.conv.ToDenseFeaturesUDF",
       "to_dense_features",
       exprs
@@ -1378,8 +1754,7 @@ object HivemallOps {
    */
   @scala.annotation.varargs
   def to_sparse_features(exprs: Column*): Column = withExpr {
-    // TODO: Need a wrapper class because of using unsupported types
-    planHiveGenericUDF(
+    planHiveUDF(
       "hivemall.ftvec.conv.ToSparseFeaturesUDF",
       "to_sparse_features",
       exprs
@@ -1387,6 +1762,19 @@ object HivemallOps {
   }
 
   /**
+   * @see [[hivemall.ftvec.binning.FeatureBinningUDF]]
+   * @group ftvec.conv
+   */
+  @scala.annotation.varargs
+  def feature_binning(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.ftvec.binning.FeatureBinningUDF",
+      "feature_binning",
+      exprs
+    )
+  }
+
+  /**
    * @see [[hivemall.ftvec.trans.VectorizeFeaturesUDF]]
    * @group ftvec.trans
    */
@@ -1413,6 +1801,19 @@ object HivemallOps {
   }
 
   /**
+   * @see [[hivemall.ftvec.trans.FFMFeaturesUDF]]
+   * @group ftvec.trans
+   */
+  @scala.annotation.varargs
+  def ffm_features(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.ftvec.trans.FFMFeaturesUDF",
+      "ffm_features",
+      exprs
+    )
+  }
+
+  /**
    * @see [[hivemall.ftvec.trans.IndexedFeatures]]
    * @group ftvec.trans
    */
@@ -1439,15 +1840,136 @@ object HivemallOps {
   }
 
   /**
-   * @see [[hivemall.smile.tools.TreePredictUDF]]
-   * @group misc
+   * @see [[hivemall.ftvec.trans.AddFieldIndicesUDF]]
+   * @group ftvec.trans
+   */
+  def add_field_indicies(features: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.ftvec.trans.AddFieldIndicesUDF",
+      "add_field_indicies",
+      features :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.ConvertLabelUDF]]
+   * @group tools
+   */
+  def convert_label(label: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.ConvertLabelUDF",
+      "convert_label",
+      label :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.RankSequenceUDF]]
+   * @group tools
+   */
+  def x_rank(key: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.RankSequenceUDF",
+      "x_rank",
+      key :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.array.AllocFloatArrayUDF]]
+   * @group tools.array
+   */
+  def float_array(nDims: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.array.AllocFloatArrayUDF",
+      "float_array",
+      nDims :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.array.ArrayRemoveUDF]]
+   * @group tools.array
+   */
+  def array_remove(original: Column, target: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.array.ArrayRemoveUDF",
+      "array_remove",
+      original :: target :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.array.SortAndUniqArrayUDF]]
+   * @group tools.array
+   */
+  def sort_and_uniq_array(ar: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.array.SortAndUniqArrayUDF",
+      "sort_and_uniq_array",
+      ar :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.array.SubarrayEndWithUDF]]
+   * @group tools.array
+   */
+  def subarray_endwith(original: Column, key: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.array.SubarrayEndWithUDF",
+      "subarray_endwith",
+      original :: key :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.array.ArrayConcatUDF]]
+   * @group tools.array
    */
   @scala.annotation.varargs
-  def tree_predict(exprs: Column*): Column = withExpr {
+  def array_concat(arrays: Column*): Column = withExpr {
     planHiveGenericUDF(
-      "hivemall.smile.tools.TreePredictUDF",
-      "tree_predict",
-      exprs
+      "hivemall.tools.array.ArrayConcatUDF",
+      "array_concat",
+      arrays
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.array.SubarrayUDF]]
+   * @group tools.array
+   */
+  def subarray(original: Column, fromIndex: Column, toIndex: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.array.SubarrayUDF",
+      "subarray",
+      original :: fromIndex :: toIndex :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.array.ToStringArrayUDF]]
+   * @group tools.array
+   */
+  def to_string_array(ar: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.array.ToStringArrayUDF",
+      "to_string_array",
+      ar :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.array.ArrayIntersectUDF]]
+   * @group tools.array
+   */
+  @scala.annotation.varargs
+  def array_intersect(arrays: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.tools.array.ArrayIntersectUDF",
+      "array_intersect",
+      arrays
     )
   }
 
@@ -1464,6 +1986,194 @@ object HivemallOps {
   }
 
   /**
+   * @see [[hivemall.tools.bits.ToBitsUDF]]
+   * @group tools.bits
+   */
+  def to_bits(indexes: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.tools.bits.ToBitsUDF",
+      "to_bits",
+      indexes :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.bits.UnBitsUDF]]
+   * @group tools.bits
+   */
+  def unbits(bitset: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.tools.bits.UnBitsUDF",
+      "unbits",
+      bitset :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.bits.BitsORUDF]]
+   * @group tools.bits
+   */
+  @scala.annotation.varargs
+  def bits_or(bits: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.tools.bits.BitsORUDF",
+      "bits_or",
+      bits
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.compress.InflateUDF]]
+   * @group tools.compress
+   */
+  @scala.annotation.varargs
+  def inflate(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.tools.compress.InflateUDF",
+      "inflate",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.compress.DeflateUDF]]
+   * @group tools.compress
+   */
+  @scala.annotation.varargs
+  def deflate(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.tools.compress.DeflateUDF",
+      "deflate",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.map.MapGetSumUDF]]
+   * @group tools.map
+   */
+  @scala.annotation.varargs
+  def map_get_sum(exprs: Column*): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.map.MapGetSumUDF",
+      "map_get_sum",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.map.MapTailNUDF]]
+   * @group tools.map
+   */
+  @scala.annotation.varargs
+  def map_tail_n(exprs: Column*): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.tools.map.MapTailNUDF",
+      "map_tail_n",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.text.TokenizeUDF]]
+   * @group tools.text
+   */
+  @scala.annotation.varargs
+  def tokenize(exprs: Column*): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.text.TokenizeUDF",
+      "tokenize",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.text.StopwordUDF]]
+   * @group tools.text
+   */
+  def is_stopword(word: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.text.StopwordUDF",
+      "is_stopword",
+      word :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.text.SingularizeUDF]]
+   * @group tools.text
+   */
+  def singularize(word: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.text.SingularizeUDF",
+      "singularize",
+      word :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.text.SplitWordsUDF]]
+   * @group tools.text
+   */
+  @scala.annotation.varargs
+  def split_words(exprs: Column*): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.text.SplitWordsUDF",
+      "split_words",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.text.NormalizeUnicodeUDF]]
+   * @group tools.text
+   */
+  @scala.annotation.varargs
+  def normalize_unicode(exprs: Column*): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.text.NormalizeUnicodeUDF",
+      "normalize_unicode",
+      exprs
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.text.Base91UDF]]
+   * @group tools.text
+   */
+  def base91(bin: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.tools.text.Base91UDF",
+      "base91",
+      bin :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.text.Unbase91UDF]]
+   * @group tools.text
+   */
+  def unbase91(base91String: Column): Column = withExpr {
+    planHiveGenericUDF(
+      "hivemall.tools.text.Unbase91UDF",
+      "unbase91",
+      base91String :: Nil
+    )
+  }
+
+  /**
+   * @see [[hivemall.tools.text.WordNgramsUDF]]
+   * @group tools.text
+   */
+  def word_ngrams(words: Column, minSize: Column, maxSize: Column): Column = withExpr {
+    planHiveUDF(
+      "hivemall.tools.text.WordNgramsUDF",
+      "word_ngrams",
+      words :: minSize :: maxSize :: Nil
+    )
+  }
+
+  /**
    * @see [[hivemall.tools.math.SigmoidGenericUDF]]
    * @group misc
    */

[4/4] incubator-hivemall git commit: Fixed assertion conditions

Posted by my...@apache.org.

Fixed assertion conditions


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/e8abae25
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/e8abae25
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/e8abae25

Branch: refs/heads/master
Commit: e8abae257f45bab419654ebe2d5387755dc7f105
Parents: c58eaea
Author: Makoto Yui <my...@apache.org>
Authored: Mon Oct 16 20:54:53 2017 +0900
Committer: Makoto Yui <my...@apache.org>
Committed: Mon Oct 16 21:05:00 2017 +0900

----------------------------------------------------------------------
 core/src/main/java/hivemall/evaluation/AUCUDAF.java       | 2 +-
 core/src/main/java/hivemall/evaluation/HitRateUDAF.java   | 2 +-
 core/src/main/java/hivemall/evaluation/MAPUDAF.java       | 2 +-
 core/src/main/java/hivemall/evaluation/MRRUDAF.java       | 2 +-
 core/src/main/java/hivemall/evaluation/NDCGUDAF.java      | 2 +-
 core/src/main/java/hivemall/evaluation/PrecisionUDAF.java | 2 +-
 core/src/main/java/hivemall/evaluation/RecallUDAF.java    | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e8abae25/core/src/main/java/hivemall/evaluation/AUCUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/AUCUDAF.java b/core/src/main/java/hivemall/evaluation/AUCUDAF.java
index 6dba174..3c7faa7 100644
--- a/core/src/main/java/hivemall/evaluation/AUCUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/AUCUDAF.java
@@ -110,7 +110,7 @@ public final class AUCUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
+            assert (parameters.length == 1 || parameters.length == 2) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e8abae25/core/src/main/java/hivemall/evaluation/HitRateUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/HitRateUDAF.java b/core/src/main/java/hivemall/evaluation/HitRateUDAF.java
index 6a2d0da..5464654 100644
--- a/core/src/main/java/hivemall/evaluation/HitRateUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/HitRateUDAF.java
@@ -106,7 +106,7 @@ public final class HitRateUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
+            assert (parameters.length >=1 && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e8abae25/core/src/main/java/hivemall/evaluation/MAPUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/MAPUDAF.java b/core/src/main/java/hivemall/evaluation/MAPUDAF.java
index fef1f43..61341a3 100644
--- a/core/src/main/java/hivemall/evaluation/MAPUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/MAPUDAF.java
@@ -88,7 +88,7 @@ public final class MAPUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
+            assert (parameters.length >=1 && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e8abae25/core/src/main/java/hivemall/evaluation/MRRUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/MRRUDAF.java b/core/src/main/java/hivemall/evaluation/MRRUDAF.java
index fcd9d51..e92df67 100644
--- a/core/src/main/java/hivemall/evaluation/MRRUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/MRRUDAF.java
@@ -88,7 +88,7 @@ public final class MRRUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
+            assert (parameters.length >=1 && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e8abae25/core/src/main/java/hivemall/evaluation/NDCGUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/NDCGUDAF.java b/core/src/main/java/hivemall/evaluation/NDCGUDAF.java
index 00aa16a..6b92afe 100644
--- a/core/src/main/java/hivemall/evaluation/NDCGUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/NDCGUDAF.java
@@ -91,7 +91,7 @@ public final class NDCGUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
+            assert (parameters.length >=1 && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e8abae25/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java
index 2e09a71..ca6c4f0 100644
--- a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java
@@ -88,7 +88,7 @@ public final class PrecisionUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
+            assert (parameters.length >=1 && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e8abae25/core/src/main/java/hivemall/evaluation/RecallUDAF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/evaluation/RecallUDAF.java b/core/src/main/java/hivemall/evaluation/RecallUDAF.java
index d084c94..d0722d0 100644
--- a/core/src/main/java/hivemall/evaluation/RecallUDAF.java
+++ b/core/src/main/java/hivemall/evaluation/RecallUDAF.java
@@ -88,7 +88,7 @@ public final class RecallUDAF extends AbstractGenericUDAFResolver {
 
         @Override
         public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
-            assert (0 < parameters.length && parameters.length <= 3) : parameters.length;
+            assert (parameters.length >=1 && parameters.length <= 3) : parameters.length;
             super.init(mode, parameters);
 
             // initialize input