You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@spark.apache.org by me...@apache.org on 2015/12/08 08:26:52 UTC

[1/3] spark git commit: [SPARK-11551][DOC][EXAMPLE] Replace example code in ml-features.md using include_example

Repository: spark
Updated Branches:
  refs/heads/master 3e7e05f5e -> 78209b0cc


http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
new file mode 100644
index 0000000..1be8a5f
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.StringIndexer
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object StringIndexerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("StringIndexerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val df = sqlContext.createDataFrame(
+      Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
+    ).toDF("id", "category")
+
+    val indexer = new StringIndexer()
+      .setInputCol("category")
+      .setOutputCol("categoryIndex")
+
+    val indexed = indexer.fit(df).transform(df)
+    indexed.show()
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
new file mode 100644
index 0000000..01e0d13
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.{RegexTokenizer, Tokenizer}
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object TokenizerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("TokenizerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val sentenceDataFrame = sqlContext.createDataFrame(Seq(
+      (0, "Hi I heard about Spark"),
+      (1, "I wish Java could use case classes"),
+      (2, "Logistic,regression,models,are,neat")
+    )).toDF("label", "sentence")
+
+    val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
+    val regexTokenizer = new RegexTokenizer()
+      .setInputCol("sentence")
+      .setOutputCol("words")
+      .setPattern("\\W") // alternatively .setPattern("\\w+").setGaps(false)
+
+    val tokenized = tokenizer.transform(sentenceDataFrame)
+    tokenized.select("words", "label").take(3).foreach(println)
+    val regexTokenized = regexTokenizer.transform(sentenceDataFrame)
+    regexTokenized.select("words", "label").take(3).foreach(println)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
new file mode 100644
index 0000000..d527924
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object VectorAssemblerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("VectorAssemblerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val dataset = sqlContext.createDataFrame(
+      Seq((0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0))
+    ).toDF("id", "hour", "mobile", "userFeatures", "clicked")
+
+    val assembler = new VectorAssembler()
+      .setInputCols(Array("hour", "mobile", "userFeatures"))
+      .setOutputCol("features")
+
+    val output = assembler.transform(dataset)
+    println(output.select("features", "clicked").first())
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
new file mode 100644
index 0000000..14279d6
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.VectorIndexer
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object VectorIndexerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("VectorIndexerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    val indexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexed")
+      .setMaxCategories(10)
+
+    val indexerModel = indexer.fit(data)
+
+    val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
+    println(s"Chose ${categoricalFeatures.size} categorical features: " +
+      categoricalFeatures.mkString(", "))
+
+    // Create new column "indexed" with categorical values transformed to indices
+    val indexedData = indexerModel.transform(data)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
new file mode 100644
index 0000000..04f1982
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute}
+import org.apache.spark.ml.feature.VectorSlicer
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types.StructType
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object VectorSlicerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("VectorSlicerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val data = Array(Row(Vectors.dense(-2.0, 2.3, 0.0)))
+
+    val defaultAttr = NumericAttribute.defaultAttr
+    val attrs = Array("f1", "f2", "f3").map(defaultAttr.withName)
+    val attrGroup = new AttributeGroup("userFeatures", attrs.asInstanceOf[Array[Attribute]])
+
+    val dataRDD = sc.parallelize(data)
+    val dataset = sqlContext.createDataFrame(dataRDD, StructType(Array(attrGroup.toStructField())))
+
+    val slicer = new VectorSlicer().setInputCol("userFeatures").setOutputCol("features")
+
+    slicer.setIndices(Array(1)).setNames(Array("f3"))
+    // or slicer.setIndices(Array(1, 2)), or slicer.setNames(Array("f2", "f3"))
+
+    val output = slicer.transform(dataset)
+    println(output.select("userFeatures", "features").first())
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org

[2/3] spark git commit: [SPARK-11551][DOC][EXAMPLE] Replace example code in ml-features.md using include_example

Posted by me...@apache.org.

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
new file mode 100644
index 0000000..668f71e
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.PolynomialExpansion;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaPolynomialExpansionExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaPolynomialExpansionExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    PolynomialExpansion polyExpansion = new PolynomialExpansion()
+      .setInputCol("features")
+      .setOutputCol("polyFeatures")
+      .setDegree(3);
+
+    JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+      RowFactory.create(Vectors.dense(-2.0, 2.3)),
+      RowFactory.create(Vectors.dense(0.0, 0.0)),
+      RowFactory.create(Vectors.dense(0.6, -1.1))
+    ));
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+
+    DataFrame df = jsql.createDataFrame(data, schema);
+    DataFrame polyDF = polyExpansion.transform(df);
+
+    Row[] row = polyDF.select("polyFeatures").take(3);
+    for (Row r : row) {
+      System.out.println(r.get(0));
+    }
+    // $example off$
+    jsc.stop();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java
new file mode 100644
index 0000000..1e1062b
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.RFormula;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+// $example off$
+
+public class JavaRFormulaExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaRFormulaExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    // $example on$
+    StructType schema = createStructType(new StructField[]{
+      createStructField("id", IntegerType, false),
+      createStructField("country", StringType, false),
+      createStructField("hour", IntegerType, false),
+      createStructField("clicked", DoubleType, false)
+    });
+
+    JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
+      RowFactory.create(7, "US", 18, 1.0),
+      RowFactory.create(8, "CA", 12, 0.0),
+      RowFactory.create(9, "NZ", 15, 0.0)
+    ));
+
+    DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
+    RFormula formula = new RFormula()
+      .setFormula("clicked ~ country + hour")
+      .setFeaturesCol("features")
+      .setLabelCol("label");
+    DataFrame output = formula.fit(dataset).transform(dataset);
+    output.select("features", "label").show();
+    // $example off$
+    jsc.stop();
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java
new file mode 100644
index 0000000..0cbdc97
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import org.apache.spark.ml.feature.StandardScaler;
+import org.apache.spark.ml.feature.StandardScalerModel;
+import org.apache.spark.sql.DataFrame;
+// $example off$
+
+public class JavaStandardScalerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaStandardScalerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    DataFrame dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+
+    StandardScaler scaler = new StandardScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures")
+      .setWithStd(true)
+      .setWithMean(false);
+
+    // Compute summary statistics by fitting the StandardScaler
+    StandardScalerModel scalerModel = scaler.fit(dataFrame);
+
+    // Normalize each feature to have unit standard deviation.
+    DataFrame scaledData = scalerModel.transform(dataFrame);
+    // $example off$
+    jsc.stop();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java
new file mode 100644
index 0000000..b6b201c
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.StopWordsRemover;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaStopWordsRemoverExample {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaStopWordsRemoverExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    StopWordsRemover remover = new StopWordsRemover()
+      .setInputCol("raw")
+      .setOutputCol("filtered");
+
+    JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
+      RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
+      RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))
+    ));
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField(
+        "raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
+    });
+
+    DataFrame dataset = jsql.createDataFrame(rdd, schema);
+    remover.transform(dataset).show();
+    // $example off$
+    jsc.stop();
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java
new file mode 100644
index 0000000..05d12c1
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+// $example off$
+
+public class JavaStringIndexerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaStringIndexerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    // $example on$
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+      RowFactory.create(0, "a"),
+      RowFactory.create(1, "b"),
+      RowFactory.create(2, "c"),
+      RowFactory.create(3, "a"),
+      RowFactory.create(4, "a"),
+      RowFactory.create(5, "c")
+    ));
+    StructType schema = new StructType(new StructField[]{
+      createStructField("id", IntegerType, false),
+      createStructField("category", StringType, false)
+    });
+    DataFrame df = sqlContext.createDataFrame(jrdd, schema);
+    StringIndexer indexer = new StringIndexer()
+      .setInputCol("category")
+      .setOutputCol("categoryIndex");
+    DataFrame indexed = indexer.fit(df).transform(df);
+    indexed.show();
+    // $example off$
+    jsc.stop();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
new file mode 100644
index 0000000..617dc3f
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.RegexTokenizer;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaTokenizerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaTokenizerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    // $example on$
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+      RowFactory.create(0, "Hi I heard about Spark"),
+      RowFactory.create(1, "I wish Java could use case classes"),
+      RowFactory.create(2, "Logistic,regression,models,are,neat")
+    ));
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+    });
+
+    DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
+
+    Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+
+    DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+    for (Row r : wordsDataFrame.select("words", "label"). take(3)) {
+      java.util.List<String> words = r.getList(0);
+      for (String word : words) System.out.print(word + " ");
+      System.out.println();
+    }
+
+    RegexTokenizer regexTokenizer = new RegexTokenizer()
+      .setInputCol("sentence")
+      .setOutputCol("words")
+      .setPattern("\\W");  // alternatively .setPattern("\\w+").setGaps(false);
+    // $example off$
+    jsc.stop();
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
new file mode 100644
index 0000000..7e230b5
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.VectorAssembler;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+// $example off$
+
+public class JavaVectorAssemblerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaVectorAssemblerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    // $example on$
+    StructType schema = createStructType(new StructField[]{
+      createStructField("id", IntegerType, false),
+      createStructField("hour", IntegerType, false),
+      createStructField("mobile", DoubleType, false),
+      createStructField("userFeatures", new VectorUDT(), false),
+      createStructField("clicked", DoubleType, false)
+    });
+    Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
+    JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(row));
+    DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
+
+    VectorAssembler assembler = new VectorAssembler()
+      .setInputCols(new String[]{"hour", "mobile", "userFeatures"})
+      .setOutputCol("features");
+
+    DataFrame output = assembler.transform(dataset);
+    System.out.println(output.select("features", "clicked").first());
+    // $example off$
+    jsc.stop();
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java
new file mode 100644
index 0000000..06b4bf6
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Map;
+
+import org.apache.spark.ml.feature.VectorIndexer;
+import org.apache.spark.ml.feature.VectorIndexerModel;
+import org.apache.spark.sql.DataFrame;
+// $example off$
+
+public class JavaVectorIndexerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaVectorIndexerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    DataFrame data = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+
+    VectorIndexer indexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexed")
+      .setMaxCategories(10);
+    VectorIndexerModel indexerModel = indexer.fit(data);
+
+    Map<Integer, Map<Double, Integer>> categoryMaps = indexerModel.javaCategoryMaps();
+    System.out.print("Chose " + categoryMaps.size() + " categorical features:");
+
+    for (Integer feature : categoryMaps.keySet()) {
+      System.out.print(" " + feature);
+    }
+    System.out.println();
+
+    // Create new column "indexed" with categorical values transformed to indices
+    DataFrame indexedData = indexerModel.transform(data);
+    // $example off$
+    jsc.stop();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
new file mode 100644
index 0000000..4d5cb04
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.attribute.Attribute;
+import org.apache.spark.ml.attribute.AttributeGroup;
+import org.apache.spark.ml.attribute.NumericAttribute;
+import org.apache.spark.ml.feature.VectorSlicer;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+public class JavaVectorSlicerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaVectorSlicerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    Attribute[] attrs = new Attribute[]{
+      NumericAttribute.defaultAttr().withName("f1"),
+      NumericAttribute.defaultAttr().withName("f2"),
+      NumericAttribute.defaultAttr().withName("f3")
+    };
+    AttributeGroup group = new AttributeGroup("userFeatures", attrs);
+
+    JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+      RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})),
+      RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0))
+    ));
+
+    DataFrame dataset = jsql.createDataFrame(jrdd, (new StructType()).add(group.toStructField()));
+
+    VectorSlicer vectorSlicer = new VectorSlicer()
+      .setInputCol("userFeatures").setOutputCol("features");
+
+    vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"});
+    // or slicer.setIndices(new int[]{1, 2}), or slicer.setNames(new String[]{"f2", "f3"})
+
+    DataFrame output = vectorSlicer.transform(dataset);
+
+    System.out.println(output.select("userFeatures", "features").first());
+    // $example off$
+    jsc.stop();
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/binarizer_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/binarizer_example.py b/examples/src/main/python/ml/binarizer_example.py
new file mode 100644
index 0000000..960ad20
--- /dev/null
+++ b/examples/src/main/python/ml/binarizer_example.py
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import Binarizer
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="BinarizerExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    continuousDataFrame = sqlContext.createDataFrame([
+        (0, 0.1),
+        (1, 0.8),
+        (2, 0.2)
+    ], ["label", "feature"])
+    binarizer = Binarizer(threshold=0.5, inputCol="feature", outputCol="binarized_feature")
+    binarizedDataFrame = binarizer.transform(continuousDataFrame)
+    binarizedFeatures = binarizedDataFrame.select("binarized_feature")
+    for binarized_feature, in binarizedFeatures.collect():
+        print(binarized_feature)
+   # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/bucketizer_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/bucketizer_example.py b/examples/src/main/python/ml/bucketizer_example.py
new file mode 100644
index 0000000..a12750a
--- /dev/null
+++ b/examples/src/main/python/ml/bucketizer_example.py
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import Bucketizer
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="BucketizerExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
+
+    data = [(-0.5,), (-0.3,), (0.0,), (0.2,)]
+    dataFrame = sqlContext.createDataFrame(data, ["features"])
+
+    bucketizer = Bucketizer(splits=splits, inputCol="features", outputCol="bucketedFeatures")
+
+    # Transform original data into its bucket index.
+    bucketedData = bucketizer.transform(dataFrame)
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/elementwise_product_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/elementwise_product_example.py b/examples/src/main/python/ml/elementwise_product_example.py
new file mode 100644
index 0000000..c85cb0d
--- /dev/null
+++ b/examples/src/main/python/ml/elementwise_product_example.py
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import ElementwiseProduct
+from pyspark.mllib.linalg import Vectors
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="ElementwiseProductExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    data = [(Vectors.dense([1.0, 2.0, 3.0]),), (Vectors.dense([4.0, 5.0, 6.0]),)]
+    df = sqlContext.createDataFrame(data, ["vector"])
+    transformer = ElementwiseProduct(scalingVec=Vectors.dense([0.0, 1.0, 2.0]),
+                                     inputCol="vector", outputCol="transformedVector")
+    transformer.transform(df).show()
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/n_gram_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/n_gram_example.py b/examples/src/main/python/ml/n_gram_example.py
new file mode 100644
index 0000000..f2d85f5
--- /dev/null
+++ b/examples/src/main/python/ml/n_gram_example.py
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import NGram
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="NGramExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    wordDataFrame = sqlContext.createDataFrame([
+        (0, ["Hi", "I", "heard", "about", "Spark"]),
+        (1, ["I", "wish", "Java", "could", "use", "case", "classes"]),
+        (2, ["Logistic", "regression", "models", "are", "neat"])
+    ], ["label", "words"])
+    ngram = NGram(inputCol="words", outputCol="ngrams")
+    ngramDataFrame = ngram.transform(wordDataFrame)
+    for ngrams_label in ngramDataFrame.select("ngrams", "label").take(3):
+        print(ngrams_label)
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/normalizer_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/normalizer_example.py b/examples/src/main/python/ml/normalizer_example.py
new file mode 100644
index 0000000..833d93e
--- /dev/null
+++ b/examples/src/main/python/ml/normalizer_example.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import Normalizer
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="NormalizerExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    # Normalize each Vector using $L^1$ norm.
+    normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0)
+    l1NormData = normalizer.transform(dataFrame)
+
+    # Normalize each Vector using $L^\infty$ norm.
+    lInfNormData = normalizer.transform(dataFrame, {normalizer.p: float("inf")})
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/onehot_encoder_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/onehot_encoder_example.py b/examples/src/main/python/ml/onehot_encoder_example.py
new file mode 100644
index 0000000..7529dfd
--- /dev/null
+++ b/examples/src/main/python/ml/onehot_encoder_example.py
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import OneHotEncoder, StringIndexer
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="OneHotEncoderExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    df = sqlContext.createDataFrame([
+        (0, "a"),
+        (1, "b"),
+        (2, "c"),
+        (3, "a"),
+        (4, "a"),
+        (5, "c")
+    ], ["id", "category"])
+    
+    stringIndexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
+    model = stringIndexer.fit(df)
+    indexed = model.transform(df)
+    encoder = OneHotEncoder(dropLast=False, inputCol="categoryIndex", outputCol="categoryVec")
+    encoded = encoder.transform(indexed)
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/pca_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/pca_example.py b/examples/src/main/python/ml/pca_example.py
new file mode 100644
index 0000000..8b66140
--- /dev/null
+++ b/examples/src/main/python/ml/pca_example.py
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import PCA
+from pyspark.mllib.linalg import Vectors
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PCAExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
+        (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
+        (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
+    df = sqlContext.createDataFrame(data,["features"])
+    pca = PCA(k=3, inputCol="features", outputCol="pcaFeatures")
+    model = pca.fit(df)
+    result = model.transform(df).select("pcaFeatures")
+    result.show(truncate=False)
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/polynomial_expansion_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py
new file mode 100644
index 0000000..030a613
--- /dev/null
+++ b/examples/src/main/python/ml/polynomial_expansion_example.py
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import PolynomialExpansion
+from pyspark.mllib.linalg import Vectors
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PolynomialExpansionExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    df = sqlContext.createDataFrame(
+        [(Vectors.dense([-2.0, 2.3]), ),
+        (Vectors.dense([0.0, 0.0]), ),
+        (Vectors.dense([0.6, -1.1]), )],
+        ["features"])
+    px = PolynomialExpansion(degree=2, inputCol="features", outputCol="polyFeatures")
+    polyDF = px.transform(df)
+    for expanded in polyDF.select("polyFeatures").take(3):
+        print(expanded)
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/rformula_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/rformula_example.py b/examples/src/main/python/ml/rformula_example.py
new file mode 100644
index 0000000..b544a14
--- /dev/null
+++ b/examples/src/main/python/ml/rformula_example.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import RFormula
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="RFormulaExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    dataset = sqlContext.createDataFrame(
+        [(7, "US", 18, 1.0),
+         (8, "CA", 12, 0.0),
+         (9, "NZ", 15, 0.0)],
+        ["id", "country", "hour", "clicked"])
+    formula = RFormula(
+        formula="clicked ~ country + hour",
+        featuresCol="features",
+        labelCol="label")
+    output = formula.fit(dataset).transform(dataset)
+    output.select("features", "label").show()
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/standard_scaler_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/standard_scaler_example.py b/examples/src/main/python/ml/standard_scaler_example.py
new file mode 100644
index 0000000..139acec
--- /dev/null
+++ b/examples/src/main/python/ml/standard_scaler_example.py
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import StandardScaler
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="StandardScalerExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+    scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures",
+                            withStd=True, withMean=False)
+
+    # Compute summary statistics by fitting the StandardScaler
+    scalerModel = scaler.fit(dataFrame)
+
+    # Normalize each feature to have unit standard deviation.
+    scaledData = scalerModel.transform(dataFrame)
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/stopwords_remover_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/stopwords_remover_example.py b/examples/src/main/python/ml/stopwords_remover_example.py
new file mode 100644
index 0000000..01f94af
--- /dev/null
+++ b/examples/src/main/python/ml/stopwords_remover_example.py
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import StopWordsRemover
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="StopWordsRemoverExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    sentenceData = sqlContext.createDataFrame([
+        (0, ["I", "saw", "the", "red", "baloon"]),
+        (1, ["Mary", "had", "a", "little", "lamb"])
+    ], ["label", "raw"])
+
+    remover = StopWordsRemover(inputCol="raw", outputCol="filtered")
+    remover.transform(sentenceData).show(truncate=False)
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/string_indexer_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/string_indexer_example.py b/examples/src/main/python/ml/string_indexer_example.py
new file mode 100644
index 0000000..58a8cb5
--- /dev/null
+++ b/examples/src/main/python/ml/string_indexer_example.py
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import StringIndexer
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="StringIndexerExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    df = sqlContext.createDataFrame(
+        [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")],
+        ["id", "category"])
+    indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
+    indexed = indexer.fit(df).transform(df)
+    indexed.show()
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/tokenizer_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/tokenizer_example.py b/examples/src/main/python/ml/tokenizer_example.py
new file mode 100644
index 0000000..ce9b225
--- /dev/null
+++ b/examples/src/main/python/ml/tokenizer_example.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import Tokenizer, RegexTokenizer
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="TokenizerExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    sentenceDataFrame = sqlContext.createDataFrame([
+        (0, "Hi I heard about Spark"),
+        (1, "I wish Java could use case classes"),
+        (2, "Logistic,regression,models,are,neat")
+    ], ["label", "sentence"])
+    tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
+    wordsDataFrame = tokenizer.transform(sentenceDataFrame)
+    for words_label in wordsDataFrame.select("words", "label").take(3):
+        print(words_label)
+    regexTokenizer = RegexTokenizer(inputCol="sentence", outputCol="words", pattern="\\W")
+    # alternatively, pattern="\\w+", gaps(False)
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/vector_assembler_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/vector_assembler_example.py b/examples/src/main/python/ml/vector_assembler_example.py
new file mode 100644
index 0000000..04f6483
--- /dev/null
+++ b/examples/src/main/python/ml/vector_assembler_example.py
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.mllib.linalg import Vectors
+from pyspark.ml.feature import VectorAssembler
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="VectorAssemblerExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    dataset = sqlContext.createDataFrame(
+        [(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0)],
+        ["id", "hour", "mobile", "userFeatures", "clicked"])
+    assembler = VectorAssembler(
+        inputCols=["hour", "mobile", "userFeatures"],
+        outputCol="features")
+    output = assembler.transform(dataset)
+    print(output.select("features", "clicked").first())
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/python/ml/vector_indexer_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/vector_indexer_example.py b/examples/src/main/python/ml/vector_indexer_example.py
new file mode 100644
index 0000000..cc00d14
--- /dev/null
+++ b/examples/src/main/python/ml/vector_indexer_example.py
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.feature import VectorIndexer
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="VectorIndexerExample")
+    sqlContext = SQLContext(sc)
+
+    # $example on$
+    data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+    indexer = VectorIndexer(inputCol="features", outputCol="indexed", maxCategories=10)
+    indexerModel = indexer.fit(data)
+
+    # Create new column "indexed" with categorical values transformed to indices
+    indexedData = indexerModel.transform(data)
+    # $example off$
+
+    sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
new file mode 100644
index 0000000..e724aa5
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Binarizer
+// $example off$
+import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.{SparkConf, SparkContext}
+
+object BinarizerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("BinarizerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    // $example on$
+    val data = Array((0, 0.1), (1, 0.8), (2, 0.2))
+    val dataFrame: DataFrame = sqlContext.createDataFrame(data).toDF("label", "feature")
+
+    val binarizer: Binarizer = new Binarizer()
+      .setInputCol("feature")
+      .setOutputCol("binarized_feature")
+      .setThreshold(0.5)
+
+    val binarizedDataFrame = binarizer.transform(dataFrame)
+    val binarizedFeatures = binarizedDataFrame.select("binarized_feature")
+    binarizedFeatures.collect().foreach(println)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
new file mode 100644
index 0000000..30c2776
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Bucketizer
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object BucketizerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("BucketizerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
+
+    val data = Array(-0.5, -0.3, 0.0, 0.2)
+    val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+
+    val bucketizer = new Bucketizer()
+      .setInputCol("features")
+      .setOutputCol("bucketedFeatures")
+      .setSplits(splits)
+
+    // Transform original data into its bucket index.
+    val bucketedData = bucketizer.transform(dataFrame)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
new file mode 100644
index 0000000..314c2c2
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.DCT
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object DCTExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("DCTExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val data = Seq(
+      Vectors.dense(0.0, 1.0, -2.0, 3.0),
+      Vectors.dense(-1.0, 2.0, 4.0, -7.0),
+      Vectors.dense(14.0, -2.0, -5.0, 1.0))
+
+    val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+
+    val dct = new DCT()
+      .setInputCol("features")
+      .setOutputCol("featuresDCT")
+      .setInverse(false)
+
+    val dctDf = dct.transform(df)
+    dctDf.select("featuresDCT").show(3)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
new file mode 100644
index 0000000..ac50bb7
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.ElementwiseProduct
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object ElementwiseProductExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("ElementwiseProductExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    // Create some vector data; also works for sparse vectors
+    val dataFrame = sqlContext.createDataFrame(Seq(
+      ("a", Vectors.dense(1.0, 2.0, 3.0)),
+      ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector")
+
+    val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
+    val transformer = new ElementwiseProduct()
+      .setScalingVec(transformingVector)
+      .setInputCol("vector")
+      .setOutputCol("transformedVector")
+
+    // Batch transform the vectors to create new column:
+    transformer.transform(dataFrame).show()
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
new file mode 100644
index 0000000..dac3679
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.MinMaxScaler
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object MinMaxScalerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("MinMaxScalerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    val scaler = new MinMaxScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures")
+
+    // Compute summary statistics and generate MinMaxScalerModel
+    val scalerModel = scaler.fit(dataFrame)
+
+    // rescale each feature to range [min, max].
+    val scaledData = scalerModel.transform(dataFrame)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
new file mode 100644
index 0000000..8a85f71
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.NGram
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object NGramExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("NGramExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val wordDataFrame = sqlContext.createDataFrame(Seq(
+      (0, Array("Hi", "I", "heard", "about", "Spark")),
+      (1, Array("I", "wish", "Java", "could", "use", "case", "classes")),
+      (2, Array("Logistic", "regression", "models", "are", "neat"))
+    )).toDF("label", "words")
+
+    val ngram = new NGram().setInputCol("words").setOutputCol("ngrams")
+    val ngramDataFrame = ngram.transform(wordDataFrame)
+    ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(println)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
new file mode 100644
index 0000000..17571f0
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Normalizer
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object NormalizerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("NormalizerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Normalize each Vector using $L^1$ norm.
+    val normalizer = new Normalizer()
+      .setInputCol("features")
+      .setOutputCol("normFeatures")
+      .setP(1.0)
+
+    val l1NormData = normalizer.transform(dataFrame)
+
+    // Normalize each Vector using $L^\infty$ norm.
+    val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.PositiveInfinity)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
new file mode 100644
index 0000000..4512736
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object OneHotEncoderExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("OneHotEncoderExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val df = sqlContext.createDataFrame(Seq(
+      (0, "a"),
+      (1, "b"),
+      (2, "c"),
+      (3, "a"),
+      (4, "a"),
+      (5, "c")
+    )).toDF("id", "category")
+
+    val indexer = new StringIndexer()
+      .setInputCol("category")
+      .setOutputCol("categoryIndex")
+      .fit(df)
+    val indexed = indexer.transform(df)
+
+    val encoder = new OneHotEncoder().setInputCol("categoryIndex").
+      setOutputCol("categoryVec")
+    val encoded = encoder.transform(indexed)
+    encoded.select("id", "categoryVec").foreach(println)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
new file mode 100644
index 0000000..a18d4f3
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.PCA
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object PCAExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("PCAExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val data = Array(
+      Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
+      Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+      Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    )
+    val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+    val pca = new PCA()
+      .setInputCol("features")
+      .setOutputCol("pcaFeatures")
+      .setK(3)
+      .fit(df)
+    val pcaDF = pca.transform(df)
+    val result = pcaDF.select("pcaFeatures")
+    result.show()
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
new file mode 100644
index 0000000..b8e9e69
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.PolynomialExpansion
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object PolynomialExpansionExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("PolynomialExpansionExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val data = Array(
+      Vectors.dense(-2.0, 2.3),
+      Vectors.dense(0.0, 0.0),
+      Vectors.dense(0.6, -1.1)
+    )
+    val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+    val polynomialExpansion = new PolynomialExpansion()
+      .setInputCol("features")
+      .setOutputCol("polyFeatures")
+      .setDegree(3)
+    val polyDF = polynomialExpansion.transform(df)
+    polyDF.select("polyFeatures").take(3).foreach(println)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
new file mode 100644
index 0000000..286866e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.RFormula
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object RFormulaExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("RFormulaExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val dataset = sqlContext.createDataFrame(Seq(
+      (7, "US", 18, 1.0),
+      (8, "CA", 12, 0.0),
+      (9, "NZ", 15, 0.0)
+    )).toDF("id", "country", "hour", "clicked")
+    val formula = new RFormula()
+      .setFormula("clicked ~ country + hour")
+      .setFeaturesCol("features")
+      .setLabelCol("label")
+    val output = formula.fit(dataset).transform(dataset)
+    output.select("features", "label").show()
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
new file mode 100644
index 0000000..646ce0f
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.StandardScaler
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object StandardScalerExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("StandardScalerExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    val scaler = new StandardScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures")
+      .setWithStd(true)
+      .setWithMean(false)
+
+    // Compute summary statistics by fitting the StandardScaler.
+    val scalerModel = scaler.fit(dataFrame)
+
+    // Normalize each feature to have unit standard deviation.
+    val scaledData = scalerModel.transform(dataFrame)
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
new file mode 100644
index 0000000..655ffce
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.StopWordsRemover
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object StopWordsRemoverExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("StopWordsRemoverExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    // $example on$
+    val remover = new StopWordsRemover()
+      .setInputCol("raw")
+      .setOutputCol("filtered")
+
+    val dataSet = sqlContext.createDataFrame(Seq(
+      (0, Seq("I", "saw", "the", "red", "baloon")),
+      (1, Seq("Mary", "had", "a", "little", "lamb"))
+    )).toDF("id", "raw")
+
+    remover.transform(dataSet).show()
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org

[3/3] spark git commit: [SPARK-11551][DOC][EXAMPLE] Replace example code in ml-features.md using include_example

Posted by me...@apache.org.

[SPARK-11551][DOC][EXAMPLE] Replace example code in ml-features.md using include_example

Made new patch contaning only markdown examples moved to exmaple/folder.
Ony three  java code were not shfted since they were contaning compliation error ,these classes are
1)StandardScale 2)NormalizerExample 3)VectorIndexer

Author: Xusen Yin <yi...@gmail.com>
Author: somideshmukh <so...@us.ibm.com>

Closes #10002 from somideshmukh/SomilBranch1.33.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78209b0c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78209b0c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78209b0c

Branch: refs/heads/master
Commit: 78209b0ccaf3f22b5e2345dfb2b98edfdb746819
Parents: 3e7e05f
Author: somideshmukh <so...@us.ibm.com>
Authored: Mon Dec 7 23:26:34 2015 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Mon Dec 7 23:26:34 2015 -0800

----------------------------------------------------------------------
 docs/ml-features.md                             | 1109 +-----------------
 .../spark/examples/ml/JavaBinarizerExample.java |   68 ++
 .../examples/ml/JavaBucketizerExample.java      |   70 ++
 .../spark/examples/ml/JavaDCTExample.java       |   65 +
 .../ml/JavaElementwiseProductExample.java       |   75 ++
 .../examples/ml/JavaMinMaxScalerExample.java    |   50 +
 .../spark/examples/ml/JavaNGramExample.java     |   71 ++
 .../examples/ml/JavaNormalizerExample.java      |   52 +
 .../examples/ml/JavaOneHotEncoderExample.java   |   77 ++
 .../spark/examples/ml/JavaPCAExample.java       |   71 ++
 .../ml/JavaPolynomialExpansionExample.java      |   71 ++
 .../spark/examples/ml/JavaRFormulaExample.java  |   69 ++
 .../examples/ml/JavaStandardScalerExample.java  |   53 +
 .../ml/JavaStopWordsRemoverExample.java         |   65 +
 .../examples/ml/JavaStringIndexerExample.java   |   66 ++
 .../spark/examples/ml/JavaTokenizerExample.java |   75 ++
 .../examples/ml/JavaVectorAssemblerExample.java |   67 ++
 .../examples/ml/JavaVectorIndexerExample.java   |   60 +
 .../examples/ml/JavaVectorSlicerExample.java    |   73 ++
 .../src/main/python/ml/binarizer_example.py     |   43 +
 .../src/main/python/ml/bucketizer_example.py    |   42 +
 .../python/ml/elementwise_product_example.py    |   39 +
 examples/src/main/python/ml/n_gram_example.py   |   42 +
 .../src/main/python/ml/normalizer_example.py    |   41 +
 .../main/python/ml/onehot_encoder_example.py    |   47 +
 examples/src/main/python/ml/pca_example.py      |   42 +
 .../python/ml/polynomial_expansion_example.py   |   43 +
 examples/src/main/python/ml/rformula_example.py |   44 +
 .../main/python/ml/standard_scaler_example.py   |   42 +
 .../main/python/ml/stopwords_remover_example.py |   40 +
 .../main/python/ml/string_indexer_example.py    |   39 +
 .../src/main/python/ml/tokenizer_example.py     |   44 +
 .../main/python/ml/vector_assembler_example.py  |   42 +
 .../main/python/ml/vector_indexer_example.py    |   39 +
 .../spark/examples/ml/BinarizerExample.scala    |   48 +
 .../spark/examples/ml/BucketizerExample.scala   |   51 +
 .../apache/spark/examples/ml/DCTExample.scala   |   54 +
 .../examples/ml/ElementWiseProductExample.scala |   53 +
 .../spark/examples/ml/MinMaxScalerExample.scala |   49 +
 .../apache/spark/examples/ml/NGramExample.scala |   47 +
 .../spark/examples/ml/NormalizerExample.scala   |   50 +
 .../examples/ml/OneHotEncoderExample.scala      |   58 +
 .../apache/spark/examples/ml/PCAExample.scala   |   54 +
 .../ml/PolynomialExpansionExample.scala         |   53 +
 .../spark/examples/ml/RFormulaExample.scala     |   49 +
 .../examples/ml/StandardScalerExample.scala     |   51 +
 .../examples/ml/StopWordsRemoverExample.scala   |   48 +
 .../examples/ml/StringIndexerExample.scala      |   49 +
 .../spark/examples/ml/TokenizerExample.scala    |   54 +
 .../examples/ml/VectorAssemblerExample.scala    |   49 +
 .../examples/ml/VectorIndexerExample.scala      |   53 +
 .../spark/examples/ml/VectorSlicerExample.scala |   58 +
 52 files changed, 2806 insertions(+), 1058 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/docs/ml-features.md
----------------------------------------------------------------------
diff --git a/docs/ml-features.md b/docs/ml-features.md
index b499d6e..5105a94 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -170,25 +170,7 @@ Refer to the [Tokenizer Scala docs](api/scala/index.html#org.apache.spark.ml.fea
 and the [RegexTokenizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Tokenizer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.{Tokenizer, RegexTokenizer}
-
-val sentenceDataFrame = sqlContext.createDataFrame(Seq(
-  (0, "Hi I heard about Spark"),
-  (1, "I wish Java could use case classes"),
-  (2, "Logistic,regression,models,are,neat")
-)).toDF("label", "sentence")
-val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
-val regexTokenizer = new RegexTokenizer()
-  .setInputCol("sentence")
-  .setOutputCol("words")
-  .setPattern("\\W")  // alternatively .setPattern("\\w+").setGaps(false)
-
-val tokenized = tokenizer.transform(sentenceDataFrame)
-tokenized.select("words", "label").take(3).foreach(println)
-val regexTokenized = regexTokenizer.transform(sentenceDataFrame)
-regexTokenized.select("words", "label").take(3).foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/TokenizerExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -197,44 +179,7 @@ Refer to the [Tokenizer Java docs](api/java/org/apache/spark/ml/feature/Tokenize
 and the [RegexTokenizer Java docs](api/java/org/apache/spark/ml/feature/RegexTokenizer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.RegexTokenizer;
-import org.apache.spark.ml.feature.Tokenizer;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(0, "Hi I heard about Spark"),
-  RowFactory.create(1, "I wish Java could use case classes"),
-  RowFactory.create(2, "Logistic,regression,models,are,neat")
-));
-StructType schema = new StructType(new StructField[]{
-  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
-  new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
-});
-DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
-Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
-DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
-for (Row r : wordsDataFrame.select("words", "label").take(3)) {
-  java.util.List<String> words = r.getList(0);
-  for (String word : words) System.out.print(word + " ");
-  System.out.println();
-}
-
-RegexTokenizer regexTokenizer = new RegexTokenizer()
-  .setInputCol("sentence")
-  .setOutputCol("words")
-  .setPattern("\\W");  // alternatively .setPattern("\\w+").setGaps(false);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaTokenizerExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -243,21 +188,7 @@ Refer to the [Tokenizer Python docs](api/python/pyspark.ml.html#pyspark.ml.featu
 the the [RegexTokenizer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.RegexTokenizer)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import Tokenizer, RegexTokenizer
-
-sentenceDataFrame = sqlContext.createDataFrame([
-  (0, "Hi I heard about Spark"),
-  (1, "I wish Java could use case classes"),
-  (2, "Logistic,regression,models,are,neat")
-], ["label", "sentence"])
-tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
-wordsDataFrame = tokenizer.transform(sentenceDataFrame)
-for words_label in wordsDataFrame.select("words", "label").take(3):
-  print(words_label)
-regexTokenizer = RegexTokenizer(inputCol="sentence", outputCol="words", pattern="\\W")
-# alternatively, pattern="\\w+", gaps(False)
-{% endhighlight %}
+{% include_example python/ml/tokenizer_example.py %}
 </div>
 </div>
 
@@ -306,19 +237,7 @@ filtered out.
 Refer to the [StopWordsRemover Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StopWordsRemover)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.StopWordsRemover
-
-val remover = new StopWordsRemover()
-  .setInputCol("raw")
-  .setOutputCol("filtered")
-val dataSet = sqlContext.createDataFrame(Seq(
-  (0, Seq("I", "saw", "the", "red", "baloon")),
-  (1, Seq("Mary", "had", "a", "little", "lamb"))
-)).toDF("id", "raw")
-
-remover.transform(dataSet).show()
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -326,34 +245,7 @@ remover.transform(dataSet).show()
 Refer to the [StopWordsRemover Java docs](api/java/org/apache/spark/ml/feature/StopWordsRemover.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.StopWordsRemover;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-StopWordsRemover remover = new StopWordsRemover()
-  .setInputCol("raw")
-  .setOutputCol("filtered");
-
-JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
-  RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
-});
-DataFrame dataset = jsql.createDataFrame(rdd, schema);
-
-remover.transform(dataset).show();
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -361,17 +253,7 @@ remover.transform(dataset).show();
 Refer to the [StopWordsRemover Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.StopWordsRemover)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import StopWordsRemover
-
-sentenceData = sqlContext.createDataFrame([
-  (0, ["I", "saw", "the", "red", "baloon"]),
-  (1, ["Mary", "had", "a", "little", "lamb"])
-], ["label", "raw"])
-
-remover = StopWordsRemover(inputCol="raw", outputCol="filtered")
-remover.transform(sentenceData).show(truncate=False)
-{% endhighlight %}
+{% include_example python/ml/stopwords_remover_example.py %}
 </div>
 </div>
 
@@ -388,19 +270,7 @@ An [n-gram](https://en.wikipedia.org/wiki/N-gram) is a sequence of $n$ tokens (t
 Refer to the [NGram Scala docs](api/scala/index.html#org.apache.spark.ml.feature.NGram)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.NGram
-
-val wordDataFrame = sqlContext.createDataFrame(Seq(
-  (0, Array("Hi", "I", "heard", "about", "Spark")),
-  (1, Array("I", "wish", "Java", "could", "use", "case", "classes")),
-  (2, Array("Logistic", "regression", "models", "are", "neat"))
-)).toDF("label", "words")
-
-val ngram = new NGram().setInputCol("words").setOutputCol("ngrams")
-val ngramDataFrame = ngram.transform(wordDataFrame)
-ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/NGramExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -408,38 +278,7 @@ ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(pri
 Refer to the [NGram Java docs](api/java/org/apache/spark/ml/feature/NGram.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.NGram;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
-  RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
-  RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))
-));
-StructType schema = new StructType(new StructField[]{
-  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
-  new StructField("words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
-});
-DataFrame wordDataFrame = sqlContext.createDataFrame(jrdd, schema);
-NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams");
-DataFrame ngramDataFrame = ngramTransformer.transform(wordDataFrame);
-for (Row r : ngramDataFrame.select("ngrams", "label").take(3)) {
-  java.util.List<String> ngrams = r.getList(0);
-  for (String ngram : ngrams) System.out.print(ngram + " --- ");
-  System.out.println();
-}
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaNGramExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -447,19 +286,7 @@ for (Row r : ngramDataFrame.select("ngrams", "label").take(3)) {
 Refer to the [NGram Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.NGram)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import NGram
-
-wordDataFrame = sqlContext.createDataFrame([
-  (0, ["Hi", "I", "heard", "about", "Spark"]),
-  (1, ["I", "wish", "Java", "could", "use", "case", "classes"]),
-  (2, ["Logistic", "regression", "models", "are", "neat"])
-], ["label", "words"])
-ngram = NGram(inputCol="words", outputCol="ngrams")
-ngramDataFrame = ngram.transform(wordDataFrame)
-for ngrams_label in ngramDataFrame.select("ngrams", "label").take(3):
-  print(ngrams_label)
-{% endhighlight %}
+{% include_example python/ml/n_gram_example.py %}
 </div>
 </div>
 
@@ -476,26 +303,7 @@ Binarization is the process of thresholding numerical features to binary (0/1) f
 Refer to the [Binarizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Binarizer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.Binarizer
-import org.apache.spark.sql.DataFrame
-
-val data = Array(
-  (0, 0.1),
-  (1, 0.8),
-  (2, 0.2)
-)
-val dataFrame: DataFrame = sqlContext.createDataFrame(data).toDF("label", "feature")
-
-val binarizer: Binarizer = new Binarizer()
-  .setInputCol("feature")
-  .setOutputCol("binarized_feature")
-  .setThreshold(0.5)
-
-val binarizedDataFrame = binarizer.transform(dataFrame)
-val binarizedFeatures = binarizedDataFrame.select("binarized_feature")
-binarizedFeatures.collect().foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/BinarizerExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -503,40 +311,7 @@ binarizedFeatures.collect().foreach(println)
 Refer to the [Binarizer Java docs](api/java/org/apache/spark/ml/feature/Binarizer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.Binarizer;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(0, 0.1),
-  RowFactory.create(1, 0.8),
-  RowFactory.create(2, 0.2)
-));
-StructType schema = new StructType(new StructField[]{
-  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
-  new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
-});
-DataFrame continuousDataFrame = jsql.createDataFrame(jrdd, schema);
-Binarizer binarizer = new Binarizer()
-  .setInputCol("feature")
-  .setOutputCol("binarized_feature")
-  .setThreshold(0.5);
-DataFrame binarizedDataFrame = binarizer.transform(continuousDataFrame);
-DataFrame binarizedFeatures = binarizedDataFrame.select("binarized_feature");
-for (Row r : binarizedFeatures.collect()) {
-  Double binarized_value = r.getDouble(0);
-  System.out.println(binarized_value);
-}
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaBinarizerExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -544,20 +319,7 @@ for (Row r : binarizedFeatures.collect()) {
 Refer to the [Binarizer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.Binarizer)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import Binarizer
-
-continuousDataFrame = sqlContext.createDataFrame([
-  (0, 0.1),
-  (1, 0.8),
-  (2, 0.2)
-], ["label", "feature"])
-binarizer = Binarizer(threshold=0.5, inputCol="feature", outputCol="binarized_feature")
-binarizedDataFrame = binarizer.transform(continuousDataFrame)
-binarizedFeatures = binarizedDataFrame.select("binarized_feature")
-for binarized_feature, in binarizedFeatures.collect():
-  print(binarized_feature)
-{% endhighlight %}
+{% include_example python/ml/binarizer_example.py %}
 </div>
 </div>
 
@@ -571,25 +333,7 @@ for binarized_feature, in binarizedFeatures.collect():
 Refer to the [PCA Scala docs](api/scala/index.html#org.apache.spark.ml.feature.PCA)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.PCA
-import org.apache.spark.mllib.linalg.Vectors
-
-val data = Array(
-  Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
-  Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
-  Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
-)
-val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-val pca = new PCA()
-  .setInputCol("features")
-  .setOutputCol("pcaFeatures")
-  .setK(3)
-  .fit(df)
-val pcaDF = pca.transform(df)
-val result = pcaDF.select("pcaFeatures")
-result.show()
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/PCAExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -597,42 +341,7 @@ result.show()
 Refer to the [PCA Java docs](api/java/org/apache/spark/ml/feature/PCA.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.ml.feature.PCA
-import org.apache.spark.ml.feature.PCAModel
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaSparkContext jsc = ...
-SQLContext jsql = ...
-JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
-  RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})),
-  RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)),
-  RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("features", new VectorUDT(), false, Metadata.empty()),
-});
-DataFrame df = jsql.createDataFrame(data, schema);
-PCAModel pca = new PCA()
-  .setInputCol("features")
-  .setOutputCol("pcaFeatures")
-  .setK(3)
-  .fit(df);
-DataFrame result = pca.transform(df).select("pcaFeatures");
-result.show();
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaPCAExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -640,19 +349,7 @@ result.show();
 Refer to the [PCA Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.PCA)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import PCA
-from pyspark.mllib.linalg import Vectors
-
-data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
-  (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
-  (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
-df = sqlContext.createDataFrame(data,["features"])
-pca = PCA(k=3, inputCol="features", outputCol="pcaFeatures")
-model = pca.fit(df)
-result = model.transform(df).select("pcaFeatures")
-result.show(truncate=False)
-{% endhighlight %}
+{% include_example python/ml/pca_example.py %}
 </div>
 </div>
 
@@ -666,23 +363,7 @@ result.show(truncate=False)
 Refer to the [PolynomialExpansion Scala docs](api/scala/index.html#org.apache.spark.ml.feature.PolynomialExpansion)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.PolynomialExpansion
-import org.apache.spark.mllib.linalg.Vectors
-
-val data = Array(
-  Vectors.dense(-2.0, 2.3),
-  Vectors.dense(0.0, 0.0),
-  Vectors.dense(0.6, -1.1)
-)
-val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-val polynomialExpansion = new PolynomialExpansion()
-  .setInputCol("features")
-  .setOutputCol("polyFeatures")
-  .setDegree(3)
-val polyDF = polynomialExpansion.transform(df)
-polyDF.select("polyFeatures").take(3).foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -690,43 +371,7 @@ polyDF.select("polyFeatures").take(3).foreach(println)
 Refer to the [PolynomialExpansion Java docs](api/java/org/apache/spark/ml/feature/PolynomialExpansion.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaSparkContext jsc = ...
-SQLContext jsql = ...
-PolynomialExpansion polyExpansion = new PolynomialExpansion()
-  .setInputCol("features")
-  .setOutputCol("polyFeatures")
-  .setDegree(3);
-JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
-  RowFactory.create(Vectors.dense(-2.0, 2.3)),
-  RowFactory.create(Vectors.dense(0.0, 0.0)),
-  RowFactory.create(Vectors.dense(0.6, -1.1))
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("features", new VectorUDT(), false, Metadata.empty()),
-});
-DataFrame df = jsql.createDataFrame(data, schema);
-DataFrame polyDF = polyExpansion.transform(df);
-Row[] row = polyDF.select("polyFeatures").take(3);
-for (Row r : row) {
-  System.out.println(r.get(0));
-}
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -734,20 +379,7 @@ for (Row r : row) {
 Refer to the [PolynomialExpansion Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.PolynomialExpansion)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import PolynomialExpansion
-from pyspark.mllib.linalg import Vectors
-
-df = sqlContext.createDataFrame(
-  [(Vectors.dense([-2.0, 2.3]), ),
-  (Vectors.dense([0.0, 0.0]), ),
-  (Vectors.dense([0.6, -1.1]), )],
-  ["features"])
-px = PolynomialExpansion(degree=2, inputCol="features", outputCol="polyFeatures")
-polyDF = px.transform(df)
-for expanded in polyDF.select("polyFeatures").take(3):
-  print(expanded)
-{% endhighlight %}
+{% include_example python/ml/polynomial_expansion_example.py %}
 </div>
 </div>
 
@@ -771,22 +403,7 @@ $0$th DCT coefficient and _not_ the $N/2$th).
 Refer to the [DCT Scala docs](api/scala/index.html#org.apache.spark.ml.feature.DCT)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.DCT
-import org.apache.spark.mllib.linalg.Vectors
-
-val data = Seq(
-  Vectors.dense(0.0, 1.0, -2.0, 3.0),
-  Vectors.dense(-1.0, 2.0, 4.0, -7.0),
-  Vectors.dense(14.0, -2.0, -5.0, 1.0))
-val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-val dct = new DCT()
-  .setInputCol("features")
-  .setOutputCol("featuresDCT")
-  .setInverse(false)
-val dctDf = dct.transform(df)
-dctDf.select("featuresDCT").show(3)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/DCTExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -794,39 +411,7 @@ dctDf.select("featuresDCT").show(3)
 Refer to the [DCT Java docs](api/java/org/apache/spark/ml/feature/DCT.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.ml.feature.DCT;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
-  RowFactory.create(Vectors.dense(0.0, 1.0, -2.0, 3.0)),
-  RowFactory.create(Vectors.dense(-1.0, 2.0, 4.0, -7.0)),
-  RowFactory.create(Vectors.dense(14.0, -2.0, -5.0, 1.0))
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("features", new VectorUDT(), false, Metadata.empty()),
-});
-DataFrame df = jsql.createDataFrame(data, schema);
-DCT dct = new DCT()
-  .setInputCol("features")
-  .setOutputCol("featuresDCT")
-  .setInverse(false);
-DataFrame dctDf = dct.transform(df);
-dctDf.select("featuresDCT").show(3);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaDCTExample.java %}}
 </div>
 </div>
 
@@ -881,18 +466,7 @@ index `2`.
 Refer to the [StringIndexer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StringIndexer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.StringIndexer
-
-val df = sqlContext.createDataFrame(
-  Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
-).toDF("id", "category")
-val indexer = new StringIndexer()
-  .setInputCol("category")
-  .setOutputCol("categoryIndex")
-val indexed = indexer.fit(df).transform(df)
-indexed.show()
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/StringIndexerExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -900,37 +474,7 @@ indexed.show()
 Refer to the [StringIndexer Java docs](api/java/org/apache/spark/ml/feature/StringIndexer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.StringIndexer;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-import static org.apache.spark.sql.types.DataTypes.*;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(0, "a"),
-  RowFactory.create(1, "b"),
-  RowFactory.create(2, "c"),
-  RowFactory.create(3, "a"),
-  RowFactory.create(4, "a"),
-  RowFactory.create(5, "c")
-));
-StructType schema = new StructType(new StructField[] {
-  createStructField("id", DoubleType, false),
-  createStructField("category", StringType, false)
-});
-DataFrame df = sqlContext.createDataFrame(jrdd, schema);
-StringIndexer indexer = new StringIndexer()
-  .setInputCol("category")
-  .setOutputCol("categoryIndex");
-DataFrame indexed = indexer.fit(df).transform(df);
-indexed.show();
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaStringIndexerExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -938,16 +482,7 @@ indexed.show();
 Refer to the [StringIndexer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.StringIndexer)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import StringIndexer
-
-df = sqlContext.createDataFrame(
-    [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")],
-    ["id", "category"])
-indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
-indexed = indexer.fit(df).transform(df)
-indexed.show()
-{% endhighlight %}
+{% include_example python/ml/string_indexer_example.py %}
 </div>
 </div>
 
@@ -961,29 +496,7 @@ indexed.show()
 Refer to the [OneHotEncoder Scala docs](api/scala/index.html#org.apache.spark.ml.feature.OneHotEncoder)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
-
-val df = sqlContext.createDataFrame(Seq(
-  (0, "a"),
-  (1, "b"),
-  (2, "c"),
-  (3, "a"),
-  (4, "a"),
-  (5, "c")
-)).toDF("id", "category")
-
-val indexer = new StringIndexer()
-  .setInputCol("category")
-  .setOutputCol("categoryIndex")
-  .fit(df)
-val indexed = indexer.transform(df)
-
-val encoder = new OneHotEncoder().setInputCol("categoryIndex").
-  setOutputCol("categoryVec")
-val encoded = encoder.transform(indexed)
-encoded.select("id", "categoryVec").foreach(println)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -991,45 +504,7 @@ encoded.select("id", "categoryVec").foreach(println)
 Refer to the [OneHotEncoder Java docs](api/java/org/apache/spark/ml/feature/OneHotEncoder.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.OneHotEncoder;
-import org.apache.spark.ml.feature.StringIndexer;
-import org.apache.spark.ml.feature.StringIndexerModel;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-    RowFactory.create(0, "a"),
-    RowFactory.create(1, "b"),
-    RowFactory.create(2, "c"),
-    RowFactory.create(3, "a"),
-    RowFactory.create(4, "a"),
-    RowFactory.create(5, "c")
-));
-StructType schema = new StructType(new StructField[]{
-    new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
-    new StructField("category", DataTypes.StringType, false, Metadata.empty())
-});
-DataFrame df = sqlContext.createDataFrame(jrdd, schema);
-StringIndexerModel indexer = new StringIndexer()
-  .setInputCol("category")
-  .setOutputCol("categoryIndex")
-  .fit(df);
-DataFrame indexed = indexer.transform(df);
-
-OneHotEncoder encoder = new OneHotEncoder()
-  .setInputCol("categoryIndex")
-  .setOutputCol("categoryVec");
-DataFrame encoded = encoder.transform(indexed);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -1037,24 +512,7 @@ DataFrame encoded = encoder.transform(indexed);
 Refer to the [OneHotEncoder Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.OneHotEncoder)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import OneHotEncoder, StringIndexer
-
-df = sqlContext.createDataFrame([
-  (0, "a"),
-  (1, "b"),
-  (2, "c"),
-  (3, "a"),
-  (4, "a"),
-  (5, "c")
-], ["id", "category"])
-
-stringIndexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
-model = stringIndexer.fit(df)
-indexed = model.transform(df)
-encoder = OneHotEncoder(includeFirst=False, inputCol="categoryIndex", outputCol="categoryVec")
-encoded = encoder.transform(indexed)
-{% endhighlight %}
+{% include_example python/ml/onehot_encoder_example.py %}
 </div>
 </div>
 
@@ -1078,23 +536,7 @@ In the example below, we read in a dataset of labeled points and then use `Vecto
 Refer to the [VectorIndexer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.VectorIndexer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.VectorIndexer
-
-val data = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-val indexer = new VectorIndexer()
-  .setInputCol("features")
-  .setOutputCol("indexed")
-  .setMaxCategories(10)
-val indexerModel = indexer.fit(data)
-val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
-println(s"Chose ${categoricalFeatures.size} categorical features: " +
-  categoricalFeatures.mkString(", "))
-
-// Create new column "indexed" with categorical values transformed to indices
-val indexedData = indexerModel.transform(data)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/VectorIndexerExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1102,30 +544,7 @@ val indexedData = indexerModel.transform(data)
 Refer to the [VectorIndexer Java docs](api/java/org/apache/spark/ml/feature/VectorIndexer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Map;
-
-import org.apache.spark.ml.feature.VectorIndexer;
-import org.apache.spark.ml.feature.VectorIndexerModel;
-import org.apache.spark.sql.DataFrame;
-
-DataFrame data = sqlContext.read().format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt");
-VectorIndexer indexer = new VectorIndexer()
-  .setInputCol("features")
-  .setOutputCol("indexed")
-  .setMaxCategories(10);
-VectorIndexerModel indexerModel = indexer.fit(data);
-Map<Integer, Map<Double, Integer>> categoryMaps = indexerModel.javaCategoryMaps();
-System.out.print("Chose " + categoryMaps.size() + "categorical features:");
-for (Integer feature : categoryMaps.keySet()) {
-  System.out.print(" " + feature);
-}
-System.out.println();
-
-// Create new column "indexed" with categorical values transformed to indices
-DataFrame indexedData = indexerModel.transform(data);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -1133,17 +552,7 @@ DataFrame indexedData = indexerModel.transform(data);
 Refer to the [VectorIndexer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.VectorIndexer)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import VectorIndexer
-
-data = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-indexer = VectorIndexer(inputCol="features", outputCol="indexed", maxCategories=10)
-indexerModel = indexer.fit(data)
-
-# Create new column "indexed" with categorical values transformed to indices
-indexedData = indexerModel.transform(data)
-{% endhighlight %}
+{% include_example python/ml/vector_indexer_example.py %}
 </div>
 </div>
 
@@ -1160,22 +569,7 @@ The following example demonstrates how to load a dataset in libsvm format and th
 Refer to the [Normalizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Normalizer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.Normalizer
-
-val dataFrame = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-
-// Normalize each Vector using $L^1$ norm.
-val normalizer = new Normalizer()
-  .setInputCol("features")
-  .setOutputCol("normFeatures")
-  .setP(1.0)
-val l1NormData = normalizer.transform(dataFrame)
-
-// Normalize each Vector using $L^\infty$ norm.
-val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.PositiveInfinity)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/NormalizerExample.scala %}
 </div>
 
 <div data-lang="java">
@@ -1183,24 +577,7 @@ val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.Positi
 Refer to the [Normalizer Java docs](api/java/org/apache/spark/ml/feature/Normalizer.html)
 for more details on the API.
 
-{% highlight java %}
-import org.apache.spark.ml.feature.Normalizer;
-import org.apache.spark.sql.DataFrame;
-
-DataFrame dataFrame = sqlContext.read().format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt");
-
-// Normalize each Vector using $L^1$ norm.
-Normalizer normalizer = new Normalizer()
-  .setInputCol("features")
-  .setOutputCol("normFeatures")
-  .setP(1.0);
-DataFrame l1NormData = normalizer.transform(dataFrame);
-
-// Normalize each Vector using $L^\infty$ norm.
-DataFrame lInfNormData =
-  normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY));
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaNormalizerExample.java %}
 </div>
 
 <div data-lang="python">
@@ -1208,19 +585,7 @@ DataFrame lInfNormData =
 Refer to the [Normalizer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.Normalizer)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import Normalizer
-
-dataFrame = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-
-# Normalize each Vector using $L^1$ norm.
-normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0)
-l1NormData = normalizer.transform(dataFrame)
-
-# Normalize each Vector using $L^\infty$ norm.
-lInfNormData = normalizer.transform(dataFrame, {normalizer.p: float("inf")})
-{% endhighlight %}
+{% include_example python/ml/normalizer_example.py %}
 </div>
 </div>
 
@@ -1244,23 +609,7 @@ The following example demonstrates how to load a dataset in libsvm format and th
 Refer to the [StandardScaler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StandardScaler)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.StandardScaler
-
-val dataFrame = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-val scaler = new StandardScaler()
-  .setInputCol("features")
-  .setOutputCol("scaledFeatures")
-  .setWithStd(true)
-  .setWithMean(false)
-
-// Compute summary statistics by fitting the StandardScaler
-val scalerModel = scaler.fit(dataFrame)
-
-// Normalize each feature to have unit standard deviation.
-val scaledData = scalerModel.transform(dataFrame)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/StandardScalerExample.scala %}
 </div>
 
 <div data-lang="java">
@@ -1268,25 +617,7 @@ val scaledData = scalerModel.transform(dataFrame)
 Refer to the [StandardScaler Java docs](api/java/org/apache/spark/ml/feature/StandardScaler.html)
 for more details on the API.
 
-{% highlight java %}
-import org.apache.spark.ml.feature.StandardScaler;
-import org.apache.spark.ml.feature.StandardScalerModel;
-import org.apache.spark.sql.DataFrame;
-
-DataFrame dataFrame = sqlContext.read().format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt");
-StandardScaler scaler = new StandardScaler()
-  .setInputCol("features")
-  .setOutputCol("scaledFeatures")
-  .setWithStd(true)
-  .setWithMean(false);
-
-// Compute summary statistics by fitting the StandardScaler
-StandardScalerModel scalerModel = scaler.fit(dataFrame);
-
-// Normalize each feature to have unit standard deviation.
-DataFrame scaledData = scalerModel.transform(dataFrame);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaStandardScalerExample.java %}
 </div>
 
 <div data-lang="python">
@@ -1294,20 +625,7 @@ DataFrame scaledData = scalerModel.transform(dataFrame);
 Refer to the [StandardScaler Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.StandardScaler)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import StandardScaler
-
-dataFrame = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures",
-                        withStd=True, withMean=False)
-
-# Compute summary statistics by fitting the StandardScaler
-scalerModel = scaler.fit(dataFrame)
-
-# Normalize each feature to have unit standard deviation.
-scaledData = scalerModel.transform(dataFrame)
-{% endhighlight %}
+{% include_example python/ml/standard_scaler_example.py %}
 </div>
 </div>
 
@@ -1337,21 +655,7 @@ Refer to the [MinMaxScaler Scala docs](api/scala/index.html#org.apache.spark.ml.
 and the [MinMaxScalerModel Scala docs](api/scala/index.html#org.apache.spark.ml.feature.MinMaxScalerModel)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.MinMaxScaler
-
-val dataFrame = sqlContext.read.format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt")
-val scaler = new MinMaxScaler()
-  .setInputCol("features")
-  .setOutputCol("scaledFeatures")
-
-// Compute summary statistics and generate MinMaxScalerModel
-val scalerModel = scaler.fit(dataFrame)
-
-// rescale each feature to range [min, max].
-val scaledData = scalerModel.transform(dataFrame)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1360,24 +664,7 @@ Refer to the [MinMaxScaler Java docs](api/java/org/apache/spark/ml/feature/MinMa
 and the [MinMaxScalerModel Java docs](api/java/org/apache/spark/ml/feature/MinMaxScalerModel.html)
 for more details on the API.
 
-{% highlight java %}
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.MinMaxScaler;
-import org.apache.spark.ml.feature.MinMaxScalerModel;
-import org.apache.spark.sql.DataFrame;
-
-DataFrame dataFrame = sqlContext.read().format("libsvm")
-  .load("data/mllib/sample_libsvm_data.txt");
-MinMaxScaler scaler = new MinMaxScaler()
-  .setInputCol("features")
-  .setOutputCol("scaledFeatures");
-
-// Compute summary statistics and generate MinMaxScalerModel
-MinMaxScalerModel scalerModel = scaler.fit(dataFrame);
-
-// rescale each feature to range [min, max].
-DataFrame scaledData = scalerModel.transform(dataFrame);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java %}
 </div>
 </div>
 
@@ -1401,23 +688,7 @@ The following example demonstrates how to bucketize a column of `Double`s into a
 Refer to the [Bucketizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Bucketizer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.Bucketizer
-import org.apache.spark.sql.DataFrame
-
-val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
-
-val data = Array(-0.5, -0.3, 0.0, 0.2)
-val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-
-val bucketizer = new Bucketizer()
-  .setInputCol("features")
-  .setOutputCol("bucketedFeatures")
-  .setSplits(splits)
-
-// Transform original data into its bucket index.
-val bucketedData = bucketizer.transform(dataFrame)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/BucketizerExample.scala %}
 </div>
 
 <div data-lang="java">
@@ -1425,38 +696,7 @@ val bucketedData = bucketizer.transform(dataFrame)
 Refer to the [Bucketizer Java docs](api/java/org/apache/spark/ml/feature/Bucketizer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
-
-JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
-  RowFactory.create(-0.5),
-  RowFactory.create(-0.3),
-  RowFactory.create(0.0),
-  RowFactory.create(0.2)
-));
-StructType schema = new StructType(new StructField[] {
-  new StructField("features", DataTypes.DoubleType, false, Metadata.empty())
-});
-DataFrame dataFrame = jsql.createDataFrame(data, schema);
-
-Bucketizer bucketizer = new Bucketizer()
-  .setInputCol("features")
-  .setOutputCol("bucketedFeatures")
-  .setSplits(splits);
-
-// Transform original data into its bucket index.
-DataFrame bucketedData = bucketizer.transform(dataFrame);
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaBucketizerExample.java %}
 </div>
 
 <div data-lang="python">
@@ -1464,19 +704,7 @@ DataFrame bucketedData = bucketizer.transform(dataFrame);
 Refer to the [Bucketizer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.Bucketizer)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import Bucketizer
-
-splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
-
-data = [(-0.5,), (-0.3,), (0.0,), (0.2,)]
-dataFrame = sqlContext.createDataFrame(data, ["features"])
-
-bucketizer = Bucketizer(splits=splits, inputCol="features", outputCol="bucketedFeatures")
-
-# Transform original data into its bucket index.
-bucketedData = bucketizer.transform(dataFrame)
-{% endhighlight %}
+{% include_example python/ml/bucketizer_example.py %}
 </div>
 </div>
 
@@ -1508,25 +736,7 @@ This example below demonstrates how to transform vectors using a transforming ve
 Refer to the [ElementwiseProduct Scala docs](api/scala/index.html#org.apache.spark.ml.feature.ElementwiseProduct)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.ElementwiseProduct
-import org.apache.spark.mllib.linalg.Vectors
-
-// Create some vector data; also works for sparse vectors
-val dataFrame = sqlContext.createDataFrame(Seq(
-  ("a", Vectors.dense(1.0, 2.0, 3.0)),
-  ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector")
-
-val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
-val transformer = new ElementwiseProduct()
-  .setScalingVec(transformingVector)
-  .setInputCol("vector")
-  .setOutputCol("transformedVector")
-
-// Batch transform the vectors to create new column:
-transformer.transform(dataFrame).show()
-
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1534,41 +744,7 @@ transformer.transform(dataFrame).show()
 Refer to the [ElementwiseProduct Java docs](api/java/org/apache/spark/ml/feature/ElementwiseProduct.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.ElementwiseProduct;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-// Create some vector data; also works for sparse vectors
-JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
-  RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
-));
-List<StructField> fields = new ArrayList<StructField>(2);
-fields.add(DataTypes.createStructField("id", DataTypes.StringType, false));
-fields.add(DataTypes.createStructField("vector", DataTypes.StringType, false));
-StructType schema = DataTypes.createStructType(fields);
-DataFrame dataFrame = sqlContext.createDataFrame(jrdd, schema);
-Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
-ElementwiseProduct transformer = new ElementwiseProduct()
-  .setScalingVec(transformingVector)
-  .setInputCol("vector")
-  .setOutputCol("transformedVector");
-// Batch transform the vectors to create new column:
-transformer.transform(dataFrame).show();
-
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -1576,19 +752,8 @@ transformer.transform(dataFrame).show();
 Refer to the [ElementwiseProduct Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.ElementwiseProduct)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import ElementwiseProduct
-from pyspark.mllib.linalg import Vectors
-
-data = [(Vectors.dense([1.0, 2.0, 3.0]),), (Vectors.dense([4.0, 5.0, 6.0]),)]
-df = sqlContext.createDataFrame(data, ["vector"])
-transformer = ElementwiseProduct(scalingVec=Vectors.dense([0.0, 1.0, 2.0]), 
-                                 inputCol="vector", outputCol="transformedVector")
-transformer.transform(df).show()
-
-{% endhighlight %}
+{% include_example python/ml/elementwise_product_example.py %}
 </div>
-
 </div>
 
 ## VectorAssembler
@@ -1632,19 +797,7 @@ output column to `features`, after transformation we should get the following Da
 Refer to the [VectorAssembler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.VectorAssembler)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.ml.feature.VectorAssembler
-
-val dataset = sqlContext.createDataFrame(
-  Seq((0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0))
-).toDF("id", "hour", "mobile", "userFeatures", "clicked")
-val assembler = new VectorAssembler()
-  .setInputCols(Array("hour", "mobile", "userFeatures"))
-  .setOutputCol("features")
-val output = assembler.transform(dataset)
-println(output.select("features", "clicked").first())
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1652,36 +805,7 @@ println(output.select("features", "clicked").first())
 Refer to the [VectorAssembler Java docs](api/java/org/apache/spark/ml/feature/VectorAssembler.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.*;
-import static org.apache.spark.sql.types.DataTypes.*;
-
-StructType schema = createStructType(new StructField[] {
-  createStructField("id", IntegerType, false),
-  createStructField("hour", IntegerType, false),
-  createStructField("mobile", DoubleType, false),
-  createStructField("userFeatures", new VectorUDT(), false),
-  createStructField("clicked", DoubleType, false)
-});
-Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
-JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(row));
-DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
-
-VectorAssembler assembler = new VectorAssembler()
-  .setInputCols(new String[] {"hour", "mobile", "userFeatures"})
-  .setOutputCol("features");
-
-DataFrame output = assembler.transform(dataset);
-System.out.println(output.select("features", "clicked").first());
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -1689,19 +813,7 @@ System.out.println(output.select("features", "clicked").first());
 Refer to the [VectorAssembler Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.VectorAssembler)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.mllib.linalg import Vectors
-from pyspark.ml.feature import VectorAssembler
-
-dataset = sqlContext.createDataFrame(
-    [(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0)],
-    ["id", "hour", "mobile", "userFeatures", "clicked"])
-assembler = VectorAssembler(
-    inputCols=["hour", "mobile", "userFeatures"],
-    outputCol="features")
-output = assembler.transform(dataset)
-print(output.select("features", "clicked").first())
-{% endhighlight %}
+{% include_example python/ml/vector_assembler_example.py %}
 </div>
 </div>
 
@@ -1831,33 +943,7 @@ Suppose also that we have a potential input attributes for the `userFeatures`, i
 Refer to the [VectorSlicer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.VectorSlicer)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute}
-import org.apache.spark.ml.feature.VectorSlicer
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.{DataFrame, Row, SQLContext}
-
-val data = Array(
-  Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
-  Vectors.dense(-2.0, 2.3, 0.0)
-)
-
-val defaultAttr = NumericAttribute.defaultAttr
-val attrs = Array("f1", "f2", "f3").map(defaultAttr.withName)
-val attrGroup = new AttributeGroup("userFeatures", attrs.asInstanceOf[Array[Attribute]])
-
-val dataRDD = sc.parallelize(data).map(Row.apply)
-val dataset = sqlContext.createDataFrame(dataRDD, StructType(attrGroup.toStructField()))
-
-val slicer = new VectorSlicer().setInputCol("userFeatures").setOutputCol("features")
-
-slicer.setIndices(1).setNames("f3")
-// or slicer.setIndices(Array(1, 2)), or slicer.setNames(Array("f2", "f3"))
-
-val output = slicer.transform(dataset)
-println(output.select("userFeatures", "features").first())
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/VectorSlicerExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1865,41 +951,7 @@ println(output.select("userFeatures", "features").first())
 Refer to the [VectorSlicer Java docs](api/java/org/apache/spark/ml/feature/VectorSlicer.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.*;
-import static org.apache.spark.sql.types.DataTypes.*;
-
-Attribute[] attrs = new Attribute[]{
-  NumericAttribute.defaultAttr().withName("f1"),
-  NumericAttribute.defaultAttr().withName("f2"),
-  NumericAttribute.defaultAttr().withName("f3")
-};
-AttributeGroup group = new AttributeGroup("userFeatures", attrs);
-
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
-  RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})),
-  RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0))
-));
-
-DataFrame dataset = jsql.createDataFrame(jrdd, (new StructType()).add(group.toStructField()));
-
-VectorSlicer vectorSlicer = new VectorSlicer()
-  .setInputCol("userFeatures").setOutputCol("features");
-
-vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"});
-// or slicer.setIndices(new int[]{1, 2}), or slicer.setNames(new String[]{"f2", "f3"})
-
-DataFrame output = vectorSlicer.transform(dataset);
-
-System.out.println(output.select("userFeatures", "features").first());
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java %}
 </div>
 </div>
 
@@ -1936,21 +988,7 @@ id | country | hour | clicked | features         | label
 Refer to the [RFormula Scala docs](api/scala/index.html#org.apache.spark.ml.feature.RFormula)
 for more details on the API.
 
-{% highlight scala %}
-import org.apache.spark.ml.feature.RFormula
-
-val dataset = sqlContext.createDataFrame(Seq(
-  (7, "US", 18, 1.0),
-  (8, "CA", 12, 0.0),
-  (9, "NZ", 15, 0.0)
-)).toDF("id", "country", "hour", "clicked")
-val formula = new RFormula()
-  .setFormula("clicked ~ country + hour")
-  .setFeaturesCol("features")
-  .setLabelCol("label")
-val output = formula.fit(dataset).transform(dataset)
-output.select("features", "label").show()
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/ml/RFormulaExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -1958,38 +996,7 @@ output.select("features", "label").show()
 Refer to the [RFormula Java docs](api/java/org/apache/spark/ml/feature/RFormula.html)
 for more details on the API.
 
-{% highlight java %}
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.RFormula;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.*;
-import static org.apache.spark.sql.types.DataTypes.*;
-
-StructType schema = createStructType(new StructField[] {
-  createStructField("id", IntegerType, false),
-  createStructField("country", StringType, false),
-  createStructField("hour", IntegerType, false),
-  createStructField("clicked", DoubleType, false)
-});
-JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
-  RowFactory.create(7, "US", 18, 1.0),
-  RowFactory.create(8, "CA", 12, 0.0),
-  RowFactory.create(9, "NZ", 15, 0.0)
-));
-DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
-
-RFormula formula = new RFormula()
-  .setFormula("clicked ~ country + hour")
-  .setFeaturesCol("features")
-  .setLabelCol("label");
-
-DataFrame output = formula.fit(dataset).transform(dataset);
-output.select("features", "label").show();
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/ml/JavaRFormulaExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -1997,21 +1004,7 @@ output.select("features", "label").show();
 Refer to the [RFormula Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.RFormula)
 for more details on the API.
 
-{% highlight python %}
-from pyspark.ml.feature import RFormula
-
-dataset = sqlContext.createDataFrame(
-    [(7, "US", 18, 1.0),
-     (8, "CA", 12, 0.0),
-     (9, "NZ", 15, 0.0)],
-    ["id", "country", "hour", "clicked"])
-formula = RFormula(
-    formula="clicked ~ country + hour",
-    featuresCol="features",
-    labelCol="label")
-output = formula.fit(dataset).transform(dataset)
-output.select("features", "label").show()
-{% endhighlight %}
+{% include_example python/ml/rformula_example.py %}
 </div>
 </div>
 

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
new file mode 100644
index 0000000..9698cac
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.Binarizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaBinarizerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaBinarizerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+      RowFactory.create(0, 0.1),
+      RowFactory.create(1, 0.8),
+      RowFactory.create(2, 0.2)
+    ));
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
+    });
+    DataFrame continuousDataFrame = jsql.createDataFrame(jrdd, schema);
+    Binarizer binarizer = new Binarizer()
+      .setInputCol("feature")
+      .setOutputCol("binarized_feature")
+      .setThreshold(0.5);
+    DataFrame binarizedDataFrame = binarizer.transform(continuousDataFrame);
+    DataFrame binarizedFeatures = binarizedDataFrame.select("binarized_feature");
+    for (Row r : binarizedFeatures.collect()) {
+    Double binarized_value = r.getDouble(0);
+      System.out.println(binarized_value);
+    }
+    // $example off$
+    jsc.stop();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java
new file mode 100644
index 0000000..b06a23e
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.Bucketizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaBucketizerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaBucketizerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
+
+    JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+      RowFactory.create(-0.5),
+      RowFactory.create(-0.3),
+      RowFactory.create(0.0),
+      RowFactory.create(0.2)
+    ));
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", DataTypes.DoubleType, false, Metadata.empty())
+    });
+    DataFrame dataFrame = jsql.createDataFrame(data, schema);
+
+    Bucketizer bucketizer = new Bucketizer()
+      .setInputCol("features")
+      .setOutputCol("bucketedFeatures")
+      .setSplits(splits);
+
+    // Transform original data into its bucket index.
+    DataFrame bucketedData = bucketizer.transform(dataFrame);
+    // $example off$
+    jsc.stop();
+  }
+}
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
new file mode 100644
index 0000000..35c0d53
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.DCT;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaDCTExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaDCTExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+      RowFactory.create(Vectors.dense(0.0, 1.0, -2.0, 3.0)),
+      RowFactory.create(Vectors.dense(-1.0, 2.0, 4.0, -7.0)),
+      RowFactory.create(Vectors.dense(14.0, -2.0, -5.0, 1.0))
+    ));
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+    DataFrame df = jsql.createDataFrame(data, schema);
+    DCT dct = new DCT()
+      .setInputCol("features")
+      .setOutputCol("featuresDCT")
+      .setInverse(false);
+    DataFrame dctDf = dct.transform(df);
+    dctDf.select("featuresDCT").show(3);
+    // $example off$
+    jsc.stop();
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java
new file mode 100644
index 0000000..2898acc
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.ElementwiseProduct;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaElementwiseProductExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaElementwiseProductExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    // $example on$
+    // Create some vector data; also works for sparse vectors
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+      RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
+      RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
+    ));
+
+    List<StructField> fields = new ArrayList<StructField>(2);
+    fields.add(DataTypes.createStructField("id", DataTypes.StringType, false));
+    fields.add(DataTypes.createStructField("vector", new VectorUDT(), false));
+
+    StructType schema = DataTypes.createStructType(fields);
+
+    DataFrame dataFrame = sqlContext.createDataFrame(jrdd, schema);
+
+    Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
+
+    ElementwiseProduct transformer = new ElementwiseProduct()
+      .setScalingVec(transformingVector)
+      .setInputCol("vector")
+      .setOutputCol("transformedVector");
+
+    // Batch transform the vectors to create new column:
+    transformer.transform(dataFrame).show();
+    // $example off$
+    jsc.stop();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java
new file mode 100644
index 0000000..138b3ab
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import org.apache.spark.ml.feature.MinMaxScaler;
+import org.apache.spark.ml.feature.MinMaxScalerModel;
+import org.apache.spark.sql.DataFrame;
+// $example off$
+
+public class JavaMinMaxScalerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JaveMinMaxScalerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    DataFrame dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+    MinMaxScaler scaler = new MinMaxScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures");
+
+    // Compute summary statistics and generate MinMaxScalerModel
+    MinMaxScalerModel scalerModel = scaler.fit(dataFrame);
+
+    // rescale each feature to range [min, max].
+    DataFrame scaledData = scalerModel.transform(dataFrame);
+    // $example off$
+    jsc.stop();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java
new file mode 100644
index 0000000..8fd75ed
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.NGram;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaNGramExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaNGramExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    // $example on$
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+      RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
+      RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
+      RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))
+    ));
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField(
+        "words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
+    });
+
+    DataFrame wordDataFrame = sqlContext.createDataFrame(jrdd, schema);
+
+    NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams");
+
+    DataFrame ngramDataFrame = ngramTransformer.transform(wordDataFrame);
+
+    for (Row r : ngramDataFrame.select("ngrams", "label").take(3)) {
+      java.util.List<String> ngrams = r.getList(0);
+      for (String ngram : ngrams) System.out.print(ngram + " --- ");
+      System.out.println();
+    }
+    // $example off$
+    jsc.stop();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java
new file mode 100644
index 0000000..6283a35
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import org.apache.spark.ml.feature.Normalizer;
+import org.apache.spark.sql.DataFrame;
+// $example off$
+
+public class JavaNormalizerExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaNormalizerExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    DataFrame dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+
+    // Normalize each Vector using $L^1$ norm.
+    Normalizer normalizer = new Normalizer()
+      .setInputCol("features")
+      .setOutputCol("normFeatures")
+      .setP(1.0);
+
+    DataFrame l1NormData = normalizer.transform(dataFrame);
+
+    // Normalize each Vector using $L^\infty$ norm.
+    DataFrame lInfNormData =
+      normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY));
+    // $example off$
+    jsc.stop();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
new file mode 100644
index 0000000..172a9cc
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.OneHotEncoder;
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.ml.feature.StringIndexerModel;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaOneHotEncoderExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaOneHotEncoderExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    // $example on$
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+      RowFactory.create(0, "a"),
+      RowFactory.create(1, "b"),
+      RowFactory.create(2, "c"),
+      RowFactory.create(3, "a"),
+      RowFactory.create(4, "a"),
+      RowFactory.create(5, "c")
+    ));
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("category", DataTypes.StringType, false, Metadata.empty())
+    });
+
+    DataFrame df = sqlContext.createDataFrame(jrdd, schema);
+
+    StringIndexerModel indexer = new StringIndexer()
+      .setInputCol("category")
+      .setOutputCol("categoryIndex")
+      .fit(df);
+    DataFrame indexed = indexer.transform(df);
+
+    OneHotEncoder encoder = new OneHotEncoder()
+      .setInputCol("categoryIndex")
+      .setOutputCol("categoryVec");
+    DataFrame encoded = encoder.transform(indexed);
+    // $example off$
+    jsc.stop();
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/spark/blob/78209b0c/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
new file mode 100644
index 0000000..8282fab
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.PCA;
+import org.apache.spark.ml.feature.PCAModel;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaPCAExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaPCAExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // $example on$
+    JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+      RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})),
+      RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)),
+      RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
+    ));
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+
+    DataFrame df = jsql.createDataFrame(data, schema);
+
+    PCAModel pca = new PCA()
+      .setInputCol("features")
+      .setOutputCol("pcaFeatures")
+      .setK(3)
+      .fit(df);
+
+    DataFrame result = pca.transform(df).select("pcaFeatures");
+    result.show();
+    // $example off$
+    jsc.stop();
+  }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org