You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2017/02/21 17:38:18 UTC

spark git commit: [SPARK-19337][ML][DOC] Documentation and examples for LinearSVC

Repository: spark
Updated Branches:
  refs/heads/master 17b93b5fe -> 280afe0ef


[SPARK-19337][ML][DOC] Documentation and examples for LinearSVC

## What changes were proposed in this pull request?

Documentation and examples (Java, scala, python, R) for LinearSVC

## How was this patch tested?
local doc generation

Author: Yuhao Yang <yu...@intel.com>

Closes #16968 from hhbyyh/mlsvmdoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/280afe0e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/280afe0e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/280afe0e

Branch: refs/heads/master
Commit: 280afe0ef384433eae2076cda569c5f1b3e49d42
Parents: 17b93b5
Author: Yuhao Yang <yu...@intel.com>
Authored: Tue Feb 21 09:38:14 2017 -0800
Committer: Felix Cheung <fe...@apache.org>
Committed: Tue Feb 21 09:38:14 2017 -0800

----------------------------------------------------------------------
 docs/ml-classification-regression.md            | 44 ++++++++++++++++
 .../spark/examples/ml/JavaLinearSVCExample.java | 54 ++++++++++++++++++++
 examples/src/main/python/ml/linearsvc.py        | 46 +++++++++++++++++
 .../spark/examples/ml/LinearSVCExample.scala    | 52 +++++++++++++++++++
 4 files changed, 196 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/280afe0e/docs/ml-classification-regression.md
----------------------------------------------------------------------
diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index 782ee58..37862f8 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -363,6 +363,50 @@ Refer to the [R API docs](api/R/spark.mlp.html) for more details.
 
 </div>
 
+## Linear Support Vector Machine
+
+A [support vector machine](https://en.wikipedia.org/wiki/Support_vector_machine) constructs a hyperplane
+or set of hyperplanes in a high- or infinite-dimensional space, which can be used for classification,
+regression, or other tasks. Intuitively, a good separation is achieved by the hyperplane that has
+the largest distance to the nearest training-data points of any class (so-called functional margin),
+since in general the larger the margin the lower the generalization error of the classifier. LinearSVC
+in Spark ML supports binary classification with linear SVM. Internally, it optimizes the 
+[Hinge Loss](https://en.wikipedia.org/wiki/Hinge_loss) using OWLQN optimizer.
+
+
+**Examples**
+
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
+
+Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.classification.LinearSVC) for more details.
+
+{% include_example scala/org/apache/spark/examples/ml/LinearSVCExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+Refer to the [Java API docs](api/java/org/apache/spark/ml/classification/LinearSVC.html) for more details.
+
+{% include_example java/org/apache/spark/examples/ml/JavaLinearSVCExample.java %}
+</div>
+
+<div data-lang="python" markdown="1">
+
+Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.classification.LinearSVC) for more details.
+
+{% include_example python/ml/linearsvc.py %}
+</div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.svmLinear.html) for more details.
+
+{% include_example r/ml/svmLinear.R %}
+</div>
+
+</div>
 
 ## One-vs-Rest classifier (a.k.a. One-vs-All)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/280afe0e/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearSVCExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearSVCExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearSVCExample.java
new file mode 100644
index 0000000..a18ed1d
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearSVCExample.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.classification.LinearSVC;
+import org.apache.spark.ml.classification.LinearSVCModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaLinearSVCExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaLinearSVCExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load training data
+    Dataset<Row> training = spark.read().format("libsvm")
+      .load("data/mllib/sample_libsvm_data.txt");
+
+    LinearSVC lsvc = new LinearSVC()
+      .setMaxIter(10)
+      .setRegParam(0.1);
+
+    // Fit the model
+    LinearSVCModel lsvcModel = lsvc.fit(training);
+
+    // Print the coefficients and intercept for LinearSVC
+    System.out.println("Coefficients: "
+      + lsvcModel.coefficients() + " Intercept: " + lsvcModel.intercept());
+    // $example off$
+
+    spark.stop();
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/280afe0e/examples/src/main/python/ml/linearsvc.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/linearsvc.py b/examples/src/main/python/ml/linearsvc.py
new file mode 100644
index 0000000..18cbf87
--- /dev/null
+++ b/examples/src/main/python/ml/linearsvc.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.classification import LinearSVC
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("linearSVC Example")\
+        .getOrCreate()
+
+    # $example on$
+    # Load training data
+    training = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    lsvc = LinearSVC(maxIter=10, regParam=0.1)
+
+    # Fit the model
+    lsvcModel = lsvc.fit(training)
+
+    # Print the coefficients and intercept for linearsSVC
+    print("Coefficients: " + str(lsvcModel.coefficients))
+    print("Intercept: " + str(lsvcModel.intercept))
+
+    # $example off$
+
+    spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/280afe0e/examples/src/main/scala/org/apache/spark/examples/ml/LinearSVCExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LinearSVCExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LinearSVCExample.scala
new file mode 100644
index 0000000..5f43e65
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LinearSVCExample.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.LinearSVC
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object LinearSVCExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("LinearSVCExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load training data
+    val training = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    val lsvc = new LinearSVC()
+      .setMaxIter(10)
+      .setRegParam(0.1)
+
+    // Fit the model
+    val lsvcModel = lsvc.fit(training)
+
+    // Print the coefficients and intercept for linear svc
+    println(s"Coefficients: ${lsvcModel.coefficients} Intercept: ${lsvcModel.intercept}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org