You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2016/12/28 22:35:33 UTC

[22/25] spark-website git commit: Update 2.1.0 docs to include https://github.com/apache/spark/pull/16294

http://git-wip-us.apache.org/repos/asf/spark-website/blob/d2bcf185/site/docs/2.1.0/ml-classification-regression.html
----------------------------------------------------------------------
diff --git a/site/docs/2.1.0/ml-classification-regression.html b/site/docs/2.1.0/ml-classification-regression.html
index 1e0665b..0b264bb 100644
--- a/site/docs/2.1.0/ml-classification-regression.html
+++ b/site/docs/2.1.0/ml-classification-regression.html
@@ -329,58 +329,58 @@ discussing specific classes of algorithms, such as linear methods, trees, and en
 <p><strong>Table of Contents</strong></p>
 
 <ul id="markdown-toc">
-  <li><a href="#classification" id="markdown-toc-classification">Classification</a>    <ul>
-      <li><a href="#logistic-regression" id="markdown-toc-logistic-regression">Logistic regression</a>        <ul>
-          <li><a href="#binomial-logistic-regression" id="markdown-toc-binomial-logistic-regression">Binomial logistic regression</a></li>
-          <li><a href="#multinomial-logistic-regression" id="markdown-toc-multinomial-logistic-regression">Multinomial logistic regression</a></li>
+  <li><a href="#classification">Classification</a>    <ul>
+      <li><a href="#logistic-regression">Logistic regression</a>        <ul>
+          <li><a href="#binomial-logistic-regression">Binomial logistic regression</a></li>
+          <li><a href="#multinomial-logistic-regression">Multinomial logistic regression</a></li>
         </ul>
       </li>
-      <li><a href="#decision-tree-classifier" id="markdown-toc-decision-tree-classifier">Decision tree classifier</a></li>
-      <li><a href="#random-forest-classifier" id="markdown-toc-random-forest-classifier">Random forest classifier</a></li>
-      <li><a href="#gradient-boosted-tree-classifier" id="markdown-toc-gradient-boosted-tree-classifier">Gradient-boosted tree classifier</a></li>
-      <li><a href="#multilayer-perceptron-classifier" id="markdown-toc-multilayer-perceptron-classifier">Multilayer perceptron classifier</a></li>
-      <li><a href="#one-vs-rest-classifier-aka-one-vs-all" id="markdown-toc-one-vs-rest-classifier-aka-one-vs-all">One-vs-Rest classifier (a.k.a. One-vs-All)</a></li>
-      <li><a href="#naive-bayes" id="markdown-toc-naive-bayes">Naive Bayes</a></li>
+      <li><a href="#decision-tree-classifier">Decision tree classifier</a></li>
+      <li><a href="#random-forest-classifier">Random forest classifier</a></li>
+      <li><a href="#gradient-boosted-tree-classifier">Gradient-boosted tree classifier</a></li>
+      <li><a href="#multilayer-perceptron-classifier">Multilayer perceptron classifier</a></li>
+      <li><a href="#one-vs-rest-classifier-aka-one-vs-all">One-vs-Rest classifier (a.k.a. One-vs-All)</a></li>
+      <li><a href="#naive-bayes">Naive Bayes</a></li>
     </ul>
   </li>
-  <li><a href="#regression" id="markdown-toc-regression">Regression</a>    <ul>
-      <li><a href="#linear-regression" id="markdown-toc-linear-regression">Linear regression</a></li>
-      <li><a href="#generalized-linear-regression" id="markdown-toc-generalized-linear-regression">Generalized linear regression</a>        <ul>
-          <li><a href="#available-families" id="markdown-toc-available-families">Available families</a></li>
+  <li><a href="#regression">Regression</a>    <ul>
+      <li><a href="#linear-regression">Linear regression</a></li>
+      <li><a href="#generalized-linear-regression">Generalized linear regression</a>        <ul>
+          <li><a href="#available-families">Available families</a></li>
         </ul>
       </li>
-      <li><a href="#decision-tree-regression" id="markdown-toc-decision-tree-regression">Decision tree regression</a></li>
-      <li><a href="#random-forest-regression" id="markdown-toc-random-forest-regression">Random forest regression</a></li>
-      <li><a href="#gradient-boosted-tree-regression" id="markdown-toc-gradient-boosted-tree-regression">Gradient-boosted tree regression</a></li>
-      <li><a href="#survival-regression" id="markdown-toc-survival-regression">Survival regression</a></li>
-      <li><a href="#isotonic-regression" id="markdown-toc-isotonic-regression">Isotonic regression</a>        <ul>
-          <li><a href="#examples" id="markdown-toc-examples">Examples</a></li>
+      <li><a href="#decision-tree-regression">Decision tree regression</a></li>
+      <li><a href="#random-forest-regression">Random forest regression</a></li>
+      <li><a href="#gradient-boosted-tree-regression">Gradient-boosted tree regression</a></li>
+      <li><a href="#survival-regression">Survival regression</a></li>
+      <li><a href="#isotonic-regression">Isotonic regression</a>        <ul>
+          <li><a href="#examples">Examples</a></li>
         </ul>
       </li>
     </ul>
   </li>
-  <li><a href="#linear-methods" id="markdown-toc-linear-methods">Linear methods</a></li>
-  <li><a href="#decision-trees" id="markdown-toc-decision-trees">Decision trees</a>    <ul>
-      <li><a href="#inputs-and-outputs" id="markdown-toc-inputs-and-outputs">Inputs and Outputs</a>        <ul>
-          <li><a href="#input-columns" id="markdown-toc-input-columns">Input Columns</a></li>
-          <li><a href="#output-columns" id="markdown-toc-output-columns">Output Columns</a></li>
+  <li><a href="#linear-methods">Linear methods</a></li>
+  <li><a href="#decision-trees">Decision trees</a>    <ul>
+      <li><a href="#inputs-and-outputs">Inputs and Outputs</a>        <ul>
+          <li><a href="#input-columns">Input Columns</a></li>
+          <li><a href="#output-columns">Output Columns</a></li>
         </ul>
       </li>
     </ul>
   </li>
-  <li><a href="#tree-ensembles" id="markdown-toc-tree-ensembles">Tree Ensembles</a>    <ul>
-      <li><a href="#random-forests" id="markdown-toc-random-forests">Random Forests</a>        <ul>
-          <li><a href="#inputs-and-outputs-1" id="markdown-toc-inputs-and-outputs-1">Inputs and Outputs</a>            <ul>
-              <li><a href="#input-columns-1" id="markdown-toc-input-columns-1">Input Columns</a></li>
-              <li><a href="#output-columns-predictions" id="markdown-toc-output-columns-predictions">Output Columns (Predictions)</a></li>
+  <li><a href="#tree-ensembles">Tree Ensembles</a>    <ul>
+      <li><a href="#random-forests">Random Forests</a>        <ul>
+          <li><a href="#inputs-and-outputs-1">Inputs and Outputs</a>            <ul>
+              <li><a href="#input-columns-1">Input Columns</a></li>
+              <li><a href="#output-columns-predictions">Output Columns (Predictions)</a></li>
             </ul>
           </li>
         </ul>
       </li>
-      <li><a href="#gradient-boosted-trees-gbts" id="markdown-toc-gradient-boosted-trees-gbts">Gradient-Boosted Trees (GBTs)</a>        <ul>
-          <li><a href="#inputs-and-outputs-2" id="markdown-toc-inputs-and-outputs-2">Inputs and Outputs</a>            <ul>
-              <li><a href="#input-columns-2" id="markdown-toc-input-columns-2">Input Columns</a></li>
-              <li><a href="#output-columns-predictions-1" id="markdown-toc-output-columns-predictions-1">Output Columns (Predictions)</a></li>
+      <li><a href="#gradient-boosted-trees-gbts">Gradient-Boosted Trees (GBTs)</a>        <ul>
+          <li><a href="#inputs-and-outputs-2">Inputs and Outputs</a>            <ul>
+              <li><a href="#input-columns-2">Input Columns</a></li>
+              <li><a href="#output-columns-predictions-1">Output Columns (Predictions)</a></li>
             </ul>
           </li>
         </ul>
@@ -407,7 +407,7 @@ parameter to select between these two algorithms, or leave it unset and Spark wi
 
 <h3 id="binomial-logistic-regression">Binomial logistic regression</h3>
 
-<p>For more background and more details about the implementation of binomial logistic regression, refer to the documentation of <a href="mllib-linear-methods.html#logistic-regression">logistic regression in <code>spark.mllib</code></a>.</p>
+<p>For more background and more details about the implementation of binomial logistic regression, refer to the documentation of <a href="mllib-linear-methods.html#logistic-regression">logistic regression in <code>spark.mllib</code></a>. </p>
 
 <p><strong>Example</strong></p>
 
@@ -421,7 +421,7 @@ $\alpha$ and <code>regParam</code> corresponds to $\lambda$.</p>
 
     <p>More details on parameters can be found in the <a href="api/scala/index.html#org.apache.spark.ml.classification.LogisticRegression">Scala API documentation</a>.</p>
 
-    <div class="highlight"><pre><span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegression</span>
+    <div class="highlight"><pre><span></span><span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegression</span>
 
 <span class="c1">// Load training data</span>
 <span class="k">val</span> <span class="n">training</span> <span class="k">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">format</span><span class="o">(</span><span class="s">&quot;libsvm&quot;</span><span class="o">).</span><span class="n">load</span><span class="o">(</span><span class="s">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="o">)</span>
@@ -435,7 +435,7 @@ $\alpha$ and <code>regParam</code> corresponds to $\lambda$.</p>
 <span class="k">val</span> <span class="n">lrModel</span> <span class="k">=</span> <span class="n">lr</span><span class="o">.</span><span class="n">fit</span><span class="o">(</span><span class="n">training</span><span class="o">)</span>
 
 <span class="c1">// Print the coefficients and intercept for logistic regression</span>
-<span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">&quot;Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}&quot;</span><span class="o">)</span>
+<span class="n">println</span><span class="o">(</span><span class="s">s&quot;Coefficients: </span><span class="si">${</span><span class="n">lrModel</span><span class="o">.</span><span class="n">coefficients</span><span class="si">}</span><span class="s"> Intercept: </span><span class="si">${</span><span class="n">lrModel</span><span class="o">.</span><span class="n">intercept</span><span class="si">}</span><span class="s">&quot;</span><span class="o">)</span>
 
 <span class="c1">// We can also use the multinomial family for binary classification</span>
 <span class="k">val</span> <span class="n">mlr</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">LogisticRegression</span><span class="o">()</span>
@@ -447,8 +447,8 @@ $\alpha$ and <code>regParam</code> corresponds to $\lambda$.</p>
 <span class="k">val</span> <span class="n">mlrModel</span> <span class="k">=</span> <span class="n">mlr</span><span class="o">.</span><span class="n">fit</span><span class="o">(</span><span class="n">training</span><span class="o">)</span>
 
 <span class="c1">// Print the coefficients and intercepts for logistic regression with multinomial family</span>
-<span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">&quot;Multinomial coefficients: ${mlrModel.coefficientMatrix}&quot;</span><span class="o">)</span>
-<span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">&quot;Multinomial intercepts: ${mlrModel.interceptVector}&quot;</span><span class="o">)</span>
+<span class="n">println</span><span class="o">(</span><span class="s">s&quot;Multinomial coefficients: </span><span class="si">${</span><span class="n">mlrModel</span><span class="o">.</span><span class="n">coefficientMatrix</span><span class="si">}</span><span class="s">&quot;</span><span class="o">)</span>
+<span class="n">println</span><span class="o">(</span><span class="s">s&quot;Multinomial intercepts: </span><span class="si">${</span><span class="n">mlrModel</span><span class="o">.</span><span class="n">interceptVector</span><span class="si">}</span><span class="s">&quot;</span><span class="o">)</span>
 </pre></div>
     <div><small>Find full example code at "examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala" in the Spark repo.</small></div>
   </div>
@@ -457,7 +457,7 @@ $\alpha$ and <code>regParam</code> corresponds to $\lambda$.</p>
 
     <p>More details on parameters can be found in the <a href="api/java/org/apache/spark/ml/classification/LogisticRegression.html">Java API documentation</a>.</p>
 
-    <div class="highlight"><pre><span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegression</span><span class="o">;</span>
+    <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegression</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegressionModel</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.sql.Dataset</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.sql.Row</span><span class="o">;</span>
@@ -467,7 +467,7 @@ $\alpha$ and <code>regParam</code> corresponds to $\lambda$.</p>
 <span class="n">Dataset</span><span class="o">&lt;</span><span class="n">Row</span><span class="o">&gt;</span> <span class="n">training</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="na">read</span><span class="o">().</span><span class="na">format</span><span class="o">(</span><span class="s">&quot;libsvm&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">load</span><span class="o">(</span><span class="s">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="o">);</span>
 
-<span class="n">LogisticRegression</span> <span class="n">lr</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">LogisticRegression</span><span class="o">()</span>
+<span class="n">LogisticRegression</span> <span class="n">lr</span> <span class="o">=</span> <span class="k">new</span> <span class="n">LogisticRegression</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setMaxIter</span><span class="o">(</span><span class="mi">10</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setRegParam</span><span class="o">(</span><span class="mf">0.3</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setElasticNetParam</span><span class="o">(</span><span class="mf">0.8</span><span class="o">);</span>
@@ -480,7 +480,7 @@ $\alpha$ and <code>regParam</code> corresponds to $\lambda$.</p>
   <span class="o">+</span> <span class="n">lrModel</span><span class="o">.</span><span class="na">coefficients</span><span class="o">()</span> <span class="o">+</span> <span class="s">&quot; Intercept: &quot;</span> <span class="o">+</span> <span class="n">lrModel</span><span class="o">.</span><span class="na">intercept</span><span class="o">());</span>
 
 <span class="c1">// We can also use the multinomial family for binary classification</span>
-<span class="n">LogisticRegression</span> <span class="n">mlr</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">LogisticRegression</span><span class="o">()</span>
+<span class="n">LogisticRegression</span> <span class="n">mlr</span> <span class="o">=</span> <span class="k">new</span> <span class="n">LogisticRegression</span><span class="o">()</span>
         <span class="o">.</span><span class="na">setMaxIter</span><span class="o">(</span><span class="mi">10</span><span class="o">)</span>
         <span class="o">.</span><span class="na">setRegParam</span><span class="o">(</span><span class="mf">0.3</span><span class="o">)</span>
         <span class="o">.</span><span class="na">setElasticNetParam</span><span class="o">(</span><span class="mf">0.8</span><span class="o">)</span>
@@ -500,29 +500,29 @@ $\alpha$ and <code>regParam</code> corresponds to $\lambda$.</p>
 
     <p>More details on parameters can be found in the <a href="api/python/pyspark.ml.html#pyspark.ml.classification.LogisticRegression">Python API documentation</a>.</p>
 
-    <div class="highlight"><pre><span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
+    <div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
 
-<span class="c"># Load training data</span>
-<span class="n">training</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s">&quot;libsvm&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="p">)</span>
+<span class="c1"># Load training data</span>
+<span class="n">training</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s2">&quot;libsvm&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s2">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="p">)</span>
 
 <span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">regParam</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">elasticNetParam</span><span class="o">=</span><span class="mf">0.8</span><span class="p">)</span>
 
-<span class="c"># Fit the model</span>
+<span class="c1"># Fit the model</span>
 <span class="n">lrModel</span> <span class="o">=</span> <span class="n">lr</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
 
-<span class="c"># Print the coefficients and intercept for logistic regression</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;Coefficients: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">lrModel</span><span class="o">.</span><span class="n">coefficients</span><span class="p">))</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;Intercept: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">lrModel</span><span class="o">.</span><span class="n">intercept</span><span class="p">))</span>
+<span class="c1"># Print the coefficients and intercept for logistic regression</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;Coefficients: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">lrModel</span><span class="o">.</span><span class="n">coefficients</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;Intercept: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">lrModel</span><span class="o">.</span><span class="n">intercept</span><span class="p">))</span>
 
-<span class="c"># We can also use the multinomial family for binary classification</span>
-<span class="n">mlr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">regParam</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">elasticNetParam</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span> <span class="n">family</span><span class="o">=</span><span class="s">&quot;multinomial&quot;</span><span class="p">)</span>
+<span class="c1"># We can also use the multinomial family for binary classification</span>
+<span class="n">mlr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">regParam</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">elasticNetParam</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span> <span class="n">family</span><span class="o">=</span><span class="s2">&quot;multinomial&quot;</span><span class="p">)</span>
 
-<span class="c"># Fit the model</span>
+<span class="c1"># Fit the model</span>
 <span class="n">mlrModel</span> <span class="o">=</span> <span class="n">mlr</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
 
-<span class="c"># Print the coefficients and intercepts for logistic regression with multinomial family</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;Multinomial coefficients: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">mlrModel</span><span class="o">.</span><span class="n">coefficientMatrix</span><span class="p">))</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;Multinomial intercepts: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">mlrModel</span><span class="o">.</span><span class="n">interceptVector</span><span class="p">))</span>
+<span class="c1"># Print the coefficients and intercepts for logistic regression with multinomial family</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;Multinomial coefficients: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">mlrModel</span><span class="o">.</span><span class="n">coefficientMatrix</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;Multinomial intercepts: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">mlrModel</span><span class="o">.</span><span class="n">interceptVector</span><span class="p">))</span>
 </pre></div>
     <div><small>Find full example code at "examples/src/main/python/ml/logistic_regression_with_elastic_net.py" in the Spark repo.</small></div>
   </div>
@@ -531,7 +531,7 @@ $\alpha$ and <code>regParam</code> corresponds to $\lambda$.</p>
 
     <p>More details on parameters can be found in the <a href="api/R/spark.logit.html">R API documentation</a>.</p>
 
-    <div class="highlight"><pre><span class="c1"># Load training data</span>
+    <div class="highlight"><pre><span></span><span class="c1"># Load training data</span>
 df <span class="o">&lt;-</span> read.df<span class="p">(</span><span class="s">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="p">,</span> <span class="kn">source</span> <span class="o">=</span> <span class="s">&quot;libsvm&quot;</span><span class="p">)</span>
 training <span class="o">&lt;-</span> df
 test <span class="o">&lt;-</span> df
@@ -571,7 +571,7 @@ This will likely change when multiclass classification is supported.</p>
 
     <p>Continuing the earlier example:</p>
 
-    <div class="highlight"><pre><span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.</span><span class="o">{</span><span class="nc">BinaryLogisticRegressionSummary</span><span class="o">,</span> <span class="nc">LogisticRegression</span><span class="o">}</span>
+    <div class="highlight"><pre><span></span><span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.</span><span class="o">{</span><span class="nc">BinaryLogisticRegressionSummary</span><span class="o">,</span> <span class="nc">LogisticRegression</span><span class="o">}</span>
 
 <span class="c1">// Extract the summary from the returned LogisticRegressionModel instance trained in the earlier</span>
 <span class="c1">// example</span>
@@ -590,7 +590,7 @@ This will likely change when multiclass classification is supported.</p>
 <span class="c1">// Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.</span>
 <span class="k">val</span> <span class="n">roc</span> <span class="k">=</span> <span class="n">binarySummary</span><span class="o">.</span><span class="n">roc</span>
 <span class="n">roc</span><span class="o">.</span><span class="n">show</span><span class="o">()</span>
-<span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">&quot;areaUnderROC: ${binarySummary.areaUnderROC}&quot;</span><span class="o">)</span>
+<span class="n">println</span><span class="o">(</span><span class="s">s&quot;areaUnderROC: </span><span class="si">${</span><span class="n">binarySummary</span><span class="o">.</span><span class="n">areaUnderROC</span><span class="si">}</span><span class="s">&quot;</span><span class="o">)</span>
 
 <span class="c1">// Set the model threshold to maximize F-Measure</span>
 <span class="k">val</span> <span class="n">fMeasure</span> <span class="k">=</span> <span class="n">binarySummary</span><span class="o">.</span><span class="n">fMeasureByThreshold</span>
@@ -613,7 +613,7 @@ Support for multiclass model summaries will be added in the future.</p>
 
     <p>Continuing the earlier example:</p>
 
-    <div class="highlight"><pre><span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.BinaryLogisticRegressionSummary</span><span class="o">;</span>
+    <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.BinaryLogisticRegressionSummary</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegression</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegressionModel</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegressionTrainingSummary</span><span class="o">;</span>
@@ -663,27 +663,27 @@ Currently, only binary classification is supported. Support for multiclass model
 
     <p>Continuing the earlier example:</p>
 
-    <div class="highlight"><pre><span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
+    <div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
 
-<span class="c"># Extract the summary from the returned LogisticRegressionModel instance trained</span>
-<span class="c"># in the earlier example</span>
+<span class="c1"># Extract the summary from the returned LogisticRegressionModel instance trained</span>
+<span class="c1"># in the earlier example</span>
 <span class="n">trainingSummary</span> <span class="o">=</span> <span class="n">lrModel</span><span class="o">.</span><span class="n">summary</span>
 
-<span class="c"># Obtain the objective per iteration</span>
+<span class="c1"># Obtain the objective per iteration</span>
 <span class="n">objectiveHistory</span> <span class="o">=</span> <span class="n">trainingSummary</span><span class="o">.</span><span class="n">objectiveHistory</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;objectiveHistory:&quot;</span><span class="p">)</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;objectiveHistory:&quot;</span><span class="p">)</span>
 <span class="k">for</span> <span class="n">objective</span> <span class="ow">in</span> <span class="n">objectiveHistory</span><span class="p">:</span>
     <span class="k">print</span><span class="p">(</span><span class="n">objective</span><span class="p">)</span>
 
-<span class="c"># Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.</span>
+<span class="c1"># Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.</span>
 <span class="n">trainingSummary</span><span class="o">.</span><span class="n">roc</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;areaUnderROC: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">trainingSummary</span><span class="o">.</span><span class="n">areaUnderROC</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;areaUnderROC: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">trainingSummary</span><span class="o">.</span><span class="n">areaUnderROC</span><span class="p">))</span>
 
-<span class="c"># Set the model threshold to maximize F-Measure</span>
+<span class="c1"># Set the model threshold to maximize F-Measure</span>
 <span class="n">fMeasure</span> <span class="o">=</span> <span class="n">trainingSummary</span><span class="o">.</span><span class="n">fMeasureByThreshold</span>
-<span class="n">maxFMeasure</span> <span class="o">=</span> <span class="n">fMeasure</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="s">&#39;F-Measure&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s">&#39;max(F-Measure)&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
-<span class="n">bestThreshold</span> <span class="o">=</span> <span class="n">fMeasure</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">fMeasure</span><span class="p">[</span><span class="s">&#39;F-Measure&#39;</span><span class="p">]</span> <span class="o">==</span> <span class="n">maxFMeasure</span><span class="p">[</span><span class="s">&#39;max(F-Measure)&#39;</span><span class="p">])</span> \
-    <span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s">&#39;threshold&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">()[</span><span class="s">&#39;threshold&#39;</span><span class="p">]</span>
+<span class="n">maxFMeasure</span> <span class="o">=</span> <span class="n">fMeasure</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="s1">&#39;F-Measure&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">&#39;max(F-Measure)&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
+<span class="n">bestThreshold</span> <span class="o">=</span> <span class="n">fMeasure</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">fMeasure</span><span class="p">[</span><span class="s1">&#39;F-Measure&#39;</span><span class="p">]</span> <span class="o">==</span> <span class="n">maxFMeasure</span><span class="p">[</span><span class="s1">&#39;max(F-Measure)&#39;</span><span class="p">])</span> \
+    <span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">&#39;threshold&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">()[</span><span class="s1">&#39;threshold&#39;</span><span class="p">]</span>
 <span class="n">lr</span><span class="o">.</span><span class="n">setThreshold</span><span class="p">(</span><span class="n">bestThreshold</span><span class="p">)</span>
 </pre></div>
     <div><small>Find full example code at "examples/src/main/python/ml/logistic_regression_summary_example.py" in the Spark repo.</small></div>
@@ -728,7 +728,7 @@ model with elastic net regularization.</p>
 <div class="codetabs">
 
 <div data-lang="scala">
-    <div class="highlight"><pre><span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegression</span>
+    <div class="highlight"><pre><span></span><span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegression</span>
 
 <span class="c1">// Load training data</span>
 <span class="k">val</span> <span class="n">training</span> <span class="k">=</span> <span class="n">spark</span>
@@ -745,14 +745,14 @@ model with elastic net regularization.</p>
 <span class="k">val</span> <span class="n">lrModel</span> <span class="k">=</span> <span class="n">lr</span><span class="o">.</span><span class="n">fit</span><span class="o">(</span><span class="n">training</span><span class="o">)</span>
 
 <span class="c1">// Print the coefficients and intercept for multinomial logistic regression</span>
-<span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">&quot;Coefficients: \n${lrModel.coefficientMatrix}&quot;</span><span class="o">)</span>
-<span class="n">println</span><span class="o">(</span><span class="n">s</span><span class="s">&quot;Intercepts: ${lrModel.interceptVector}&quot;</span><span class="o">)</span>
+<span class="n">println</span><span class="o">(</span><span class="s">s&quot;Coefficients: \n</span><span class="si">${</span><span class="n">lrModel</span><span class="o">.</span><span class="n">coefficientMatrix</span><span class="si">}</span><span class="s">&quot;</span><span class="o">)</span>
+<span class="n">println</span><span class="o">(</span><span class="s">s&quot;Intercepts: </span><span class="si">${</span><span class="n">lrModel</span><span class="o">.</span><span class="n">interceptVector</span><span class="si">}</span><span class="s">&quot;</span><span class="o">)</span>
 </pre></div>
     <div><small>Find full example code at "examples/src/main/scala/org/apache/spark/examples/ml/MulticlassLogisticRegressionWithElasticNetExample.scala" in the Spark repo.</small></div>
   </div>
 
 <div data-lang="java">
-    <div class="highlight"><pre><span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegression</span><span class="o">;</span>
+    <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegression</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.LogisticRegressionModel</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.sql.Dataset</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.sql.Row</span><span class="o">;</span>
@@ -762,7 +762,7 @@ model with elastic net regularization.</p>
 <span class="n">Dataset</span><span class="o">&lt;</span><span class="n">Row</span><span class="o">&gt;</span> <span class="n">training</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="na">read</span><span class="o">().</span><span class="na">format</span><span class="o">(</span><span class="s">&quot;libsvm&quot;</span><span class="o">)</span>
         <span class="o">.</span><span class="na">load</span><span class="o">(</span><span class="s">&quot;data/mllib/sample_multiclass_classification_data.txt&quot;</span><span class="o">);</span>
 
-<span class="n">LogisticRegression</span> <span class="n">lr</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">LogisticRegression</span><span class="o">()</span>
+<span class="n">LogisticRegression</span> <span class="n">lr</span> <span class="o">=</span> <span class="k">new</span> <span class="n">LogisticRegression</span><span class="o">()</span>
         <span class="o">.</span><span class="na">setMaxIter</span><span class="o">(</span><span class="mi">10</span><span class="o">)</span>
         <span class="o">.</span><span class="na">setRegParam</span><span class="o">(</span><span class="mf">0.3</span><span class="o">)</span>
         <span class="o">.</span><span class="na">setElasticNetParam</span><span class="o">(</span><span class="mf">0.8</span><span class="o">);</span>
@@ -778,22 +778,22 @@ model with elastic net regularization.</p>
   </div>
 
 <div data-lang="python">
-    <div class="highlight"><pre><span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
+    <div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
 
-<span class="c"># Load training data</span>
+<span class="c1"># Load training data</span>
 <span class="n">training</span> <span class="o">=</span> <span class="n">spark</span> \
     <span class="o">.</span><span class="n">read</span> \
-    <span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s">&quot;libsvm&quot;</span><span class="p">)</span> \
-    <span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&quot;data/mllib/sample_multiclass_classification_data.txt&quot;</span><span class="p">)</span>
+    <span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s2">&quot;libsvm&quot;</span><span class="p">)</span> \
+    <span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s2">&quot;data/mllib/sample_multiclass_classification_data.txt&quot;</span><span class="p">)</span>
 
 <span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">regParam</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">elasticNetParam</span><span class="o">=</span><span class="mf">0.8</span><span class="p">)</span>
 
-<span class="c"># Fit the model</span>
+<span class="c1"># Fit the model</span>
 <span class="n">lrModel</span> <span class="o">=</span> <span class="n">lr</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
 
-<span class="c"># Print the coefficients and intercept for multinomial logistic regression</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;Coefficients: </span><span class="se">\n</span><span class="s">&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">lrModel</span><span class="o">.</span><span class="n">coefficientMatrix</span><span class="p">))</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;Intercept: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">lrModel</span><span class="o">.</span><span class="n">interceptVector</span><span class="p">))</span>
+<span class="c1"># Print the coefficients and intercept for multinomial logistic regression</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;Coefficients: </span><span class="se">\n</span><span class="s2">&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">lrModel</span><span class="o">.</span><span class="n">coefficientMatrix</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;Intercept: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">lrModel</span><span class="o">.</span><span class="n">interceptVector</span><span class="p">))</span>
 </pre></div>
     <div><small>Find full example code at "examples/src/main/python/ml/multiclass_logistic_regression_with_elastic_net.py" in the Spark repo.</small></div>
   </div>
@@ -802,7 +802,7 @@ model with elastic net regularization.</p>
 
     <p>More details on parameters can be found in the <a href="api/R/spark.logit.html">R API documentation</a>.</p>
 
-    <div class="highlight"><pre><span class="c1"># Load training data</span>
+    <div class="highlight"><pre><span></span><span class="c1"># Load training data</span>
 df <span class="o">&lt;-</span> read.df<span class="p">(</span><span class="s">&quot;data/mllib/sample_multiclass_classification_data.txt&quot;</span><span class="p">,</span> <span class="kn">source</span> <span class="o">=</span> <span class="s">&quot;libsvm&quot;</span><span class="p">)</span>
 training <span class="o">&lt;-</span> df
 test <span class="o">&lt;-</span> df
@@ -837,7 +837,7 @@ We use two feature transformers to prepare the data; these help index categories
 
     <p>More details on parameters can be found in the <a href="api/scala/index.html#org.apache.spark.ml.classification.DecisionTreeClassifier">Scala API documentation</a>.</p>
 
-    <div class="highlight"><pre><span class="k">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span>
+    <div class="highlight"><pre><span></span><span class="k">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.DecisionTreeClassificationModel</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.DecisionTreeClassifier</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator</span>
@@ -905,7 +905,7 @@ We use two feature transformers to prepare the data; these help index categories
 
     <p>More details on parameters can be found in the <a href="api/java/org/apache/spark/ml/classification/DecisionTreeClassifier.html">Java API documentation</a>.</p>
 
-    <div class="highlight"><pre><span class="kn">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span><span class="o">;</span>
+    <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.PipelineModel</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.PipelineStage</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.DecisionTreeClassifier</span><span class="o">;</span>
@@ -924,13 +924,13 @@ We use two feature transformers to prepare the data; these help index categories
 
 <span class="c1">// Index labels, adding metadata to the label column.</span>
 <span class="c1">// Fit on whole dataset to include all labels in index.</span>
-<span class="n">StringIndexerModel</span> <span class="n">labelIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">StringIndexer</span><span class="o">()</span>
+<span class="n">StringIndexerModel</span> <span class="n">labelIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">StringIndexer</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setInputCol</span><span class="o">(</span><span class="s">&quot;label&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span class="o">(</span><span class="s">&quot;indexedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">data</span><span class="o">);</span>
 
 <span class="c1">// Automatically identify categorical features, and index them.</span>
-<span class="n">VectorIndexerModel</span> <span class="n">featureIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">VectorIndexer</span><span class="o">()</span>
+<span class="n">VectorIndexerModel</span> <span class="n">featureIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">VectorIndexer</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setInputCol</span><span class="o">(</span><span class="s">&quot;features&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span class="o">(</span><span class="s">&quot;indexedFeatures&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setMaxCategories</span><span class="o">(</span><span class="mi">4</span><span class="o">)</span> <span class="c1">// features with &gt; 4 distinct values are treated as continuous.</span>
@@ -942,18 +942,18 @@ We use two feature transformers to prepare the data; these help index categories
 <span class="n">Dataset</span><span class="o">&lt;</span><span class="n">Row</span><span class="o">&gt;</span> <span class="n">testData</span> <span class="o">=</span> <span class="n">splits</span><span class="o">[</span><span class="mi">1</span><span class="o">];</span>
 
 <span class="c1">// Train a DecisionTree model.</span>
-<span class="n">DecisionTreeClassifier</span> <span class="n">dt</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">DecisionTreeClassifier</span><span class="o">()</span>
+<span class="n">DecisionTreeClassifier</span> <span class="n">dt</span> <span class="o">=</span> <span class="k">new</span> <span class="n">DecisionTreeClassifier</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setLabelCol</span><span class="o">(</span><span class="s">&quot;indexedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setFeaturesCol</span><span class="o">(</span><span class="s">&quot;indexedFeatures&quot;</span><span class="o">);</span>
 
 <span class="c1">// Convert indexed labels back to original labels.</span>
-<span class="n">IndexToString</span> <span class="n">labelConverter</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">IndexToString</span><span class="o">()</span>
+<span class="n">IndexToString</span> <span class="n">labelConverter</span> <span class="o">=</span> <span class="k">new</span> <span class="n">IndexToString</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setInputCol</span><span class="o">(</span><span class="s">&quot;prediction&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span class="o">(</span><span class="s">&quot;predictedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setLabels</span><span class="o">(</span><span class="n">labelIndexer</span><span class="o">.</span><span class="na">labels</span><span class="o">());</span>
 
 <span class="c1">// Chain indexers and tree in a Pipeline.</span>
-<span class="n">Pipeline</span> <span class="n">pipeline</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">Pipeline</span><span class="o">()</span>
+<span class="n">Pipeline</span> <span class="n">pipeline</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Pipeline</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setStages</span><span class="o">(</span><span class="k">new</span> <span class="n">PipelineStage</span><span class="o">[]{</span><span class="n">labelIndexer</span><span class="o">,</span> <span class="n">featureIndexer</span><span class="o">,</span> <span class="n">dt</span><span class="o">,</span> <span class="n">labelConverter</span><span class="o">});</span>
 
 <span class="c1">// Train model. This also runs the indexers.</span>
@@ -966,7 +966,7 @@ We use two feature transformers to prepare the data; these help index categories
 <span class="n">predictions</span><span class="o">.</span><span class="na">select</span><span class="o">(</span><span class="s">&quot;predictedLabel&quot;</span><span class="o">,</span> <span class="s">&quot;label&quot;</span><span class="o">,</span> <span class="s">&quot;features&quot;</span><span class="o">).</span><span class="na">show</span><span class="o">(</span><span class="mi">5</span><span class="o">);</span>
 
 <span class="c1">// Select (prediction, true label) and compute test error.</span>
-<span class="n">MulticlassClassificationEvaluator</span> <span class="n">evaluator</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">MulticlassClassificationEvaluator</span><span class="o">()</span>
+<span class="n">MulticlassClassificationEvaluator</span> <span class="n">evaluator</span> <span class="o">=</span> <span class="k">new</span> <span class="n">MulticlassClassificationEvaluator</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setLabelCol</span><span class="o">(</span><span class="s">&quot;indexedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setPredictionCol</span><span class="o">(</span><span class="s">&quot;prediction&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setMetricName</span><span class="o">(</span><span class="s">&quot;accuracy&quot;</span><span class="o">);</span>
@@ -985,48 +985,48 @@ We use two feature transformers to prepare the data; these help index categories
 
     <p>More details on parameters can be found in the <a href="api/python/pyspark.ml.html#pyspark.ml.classification.DecisionTreeClassifier">Python API documentation</a>.</p>
 
-    <div class="highlight"><pre><span class="kn">from</span> <span class="nn">pyspark.ml</span> <span class="kn">import</span> <span class="n">Pipeline</span>
+    <div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pyspark.ml</span> <span class="kn">import</span> <span class="n">Pipeline</span>
 <span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">DecisionTreeClassifier</span>
 <span class="kn">from</span> <span class="nn">pyspark.ml.feature</span> <span class="kn">import</span> <span class="n">StringIndexer</span><span class="p">,</span> <span class="n">VectorIndexer</span>
 <span class="kn">from</span> <span class="nn">pyspark.ml.evaluation</span> <span class="kn">import</span> <span class="n">MulticlassClassificationEvaluator</span>
 
-<span class="c"># Load the data stored in LIBSVM format as a DataFrame.</span>
-<span class="n">data</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s">&quot;libsvm&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="p">)</span>
+<span class="c1"># Load the data stored in LIBSVM format as a DataFrame.</span>
+<span class="n">data</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s2">&quot;libsvm&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s2">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="p">)</span>
 
-<span class="c"># Index labels, adding metadata to the label column.</span>
-<span class="c"># Fit on whole dataset to include all labels in index.</span>
-<span class="n">labelIndexer</span> <span class="o">=</span> <span class="n">StringIndexer</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s">&quot;label&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s">&quot;indexedLabel&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
-<span class="c"># Automatically identify categorical features, and index them.</span>
-<span class="c"># We specify maxCategories so features with &gt; 4 distinct values are treated as continuous.</span>
+<span class="c1"># Index labels, adding metadata to the label column.</span>
+<span class="c1"># Fit on whole dataset to include all labels in index.</span>
+<span class="n">labelIndexer</span> <span class="o">=</span> <span class="n">StringIndexer</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s2">&quot;label&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s2">&quot;indexedLabel&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
+<span class="c1"># Automatically identify categorical features, and index them.</span>
+<span class="c1"># We specify maxCategories so features with &gt; 4 distinct values are treated as continuous.</span>
 <span class="n">featureIndexer</span> <span class="o">=</span>\
-    <span class="n">VectorIndexer</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s">&quot;features&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s">&quot;indexedFeatures&quot;</span><span class="p">,</span> <span class="n">maxCategories</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
+    <span class="n">VectorIndexer</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s2">&quot;features&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s2">&quot;indexedFeatures&quot;</span><span class="p">,</span> <span class="n">maxCategories</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
 
-<span class="c"># Split the data into training and test sets (30% held out for testing)</span>
+<span class="c1"># Split the data into training and test sets (30% held out for testing)</span>
 <span class="p">(</span><span class="n">trainingData</span><span class="p">,</span> <span class="n">testData</span><span class="p">)</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">randomSplit</span><span class="p">([</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">])</span>
 
-<span class="c"># Train a DecisionTree model.</span>
-<span class="n">dt</span> <span class="o">=</span> <span class="n">DecisionTreeClassifier</span><span class="p">(</span><span class="n">labelCol</span><span class="o">=</span><span class="s">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="n">featuresCol</span><span class="o">=</span><span class="s">&quot;indexedFeatures&quot;</span><span class="p">)</span>
+<span class="c1"># Train a DecisionTree model.</span>
+<span class="n">dt</span> <span class="o">=</span> <span class="n">DecisionTreeClassifier</span><span class="p">(</span><span class="n">labelCol</span><span class="o">=</span><span class="s2">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="n">featuresCol</span><span class="o">=</span><span class="s2">&quot;indexedFeatures&quot;</span><span class="p">)</span>
 
-<span class="c"># Chain indexers and tree in a Pipeline</span>
+<span class="c1"># Chain indexers and tree in a Pipeline</span>
 <span class="n">pipeline</span> <span class="o">=</span> <span class="n">Pipeline</span><span class="p">(</span><span class="n">stages</span><span class="o">=</span><span class="p">[</span><span class="n">labelIndexer</span><span class="p">,</span> <span class="n">featureIndexer</span><span class="p">,</span> <span class="n">dt</span><span class="p">])</span>
 
-<span class="c"># Train model.  This also runs the indexers.</span>
+<span class="c1"># Train model.  This also runs the indexers.</span>
 <span class="n">model</span> <span class="o">=</span> <span class="n">pipeline</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">trainingData</span><span class="p">)</span>
 
-<span class="c"># Make predictions.</span>
+<span class="c1"># Make predictions.</span>
 <span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">testData</span><span class="p">)</span>
 
-<span class="c"># Select example rows to display.</span>
-<span class="n">predictions</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s">&quot;prediction&quot;</span><span class="p">,</span> <span class="s">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="s">&quot;features&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
+<span class="c1"># Select example rows to display.</span>
+<span class="n">predictions</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">&quot;prediction&quot;</span><span class="p">,</span> <span class="s2">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="s2">&quot;features&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
 
-<span class="c"># Select (prediction, true label) and compute test error</span>
+<span class="c1"># Select (prediction, true label) and compute test error</span>
 <span class="n">evaluator</span> <span class="o">=</span> <span class="n">MulticlassClassificationEvaluator</span><span class="p">(</span>
-    <span class="n">labelCol</span><span class="o">=</span><span class="s">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="n">predictionCol</span><span class="o">=</span><span class="s">&quot;prediction&quot;</span><span class="p">,</span> <span class="n">metricName</span><span class="o">=</span><span class="s">&quot;accuracy&quot;</span><span class="p">)</span>
+    <span class="n">labelCol</span><span class="o">=</span><span class="s2">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="n">predictionCol</span><span class="o">=</span><span class="s2">&quot;prediction&quot;</span><span class="p">,</span> <span class="n">metricName</span><span class="o">=</span><span class="s2">&quot;accuracy&quot;</span><span class="p">)</span>
 <span class="n">accuracy</span> <span class="o">=</span> <span class="n">evaluator</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">predictions</span><span class="p">)</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;Test Error = </span><span class="si">%g</span><span class="s"> &quot;</span> <span class="o">%</span> <span class="p">(</span><span class="mf">1.0</span> <span class="o">-</span> <span class="n">accuracy</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;Test Error = </span><span class="si">%g</span><span class="s2"> &quot;</span> <span class="o">%</span> <span class="p">(</span><span class="mf">1.0</span> <span class="o">-</span> <span class="n">accuracy</span><span class="p">))</span>
 
 <span class="n">treeModel</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">stages</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
-<span class="c"># summary only</span>
+<span class="c1"># summary only</span>
 <span class="k">print</span><span class="p">(</span><span class="n">treeModel</span><span class="p">)</span>
 </pre></div>
     <div><small>Find full example code at "examples/src/main/python/ml/decision_tree_classification_example.py" in the Spark repo.</small></div>
@@ -1050,7 +1050,7 @@ We use two feature transformers to prepare the data; these help index categories
 
     <p>Refer to the <a href="api/scala/index.html#org.apache.spark.ml.classification.RandomForestClassifier">Scala API docs</a> for more details.</p>
 
-    <div class="highlight"><pre><span class="k">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span>
+    <div class="highlight"><pre><span></span><span class="k">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.</span><span class="o">{</span><span class="nc">RandomForestClassificationModel</span><span class="o">,</span> <span class="nc">RandomForestClassifier</span><span class="o">}</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.ml.feature.</span><span class="o">{</span><span class="nc">IndexToString</span><span class="o">,</span> <span class="nc">StringIndexer</span><span class="o">,</span> <span class="nc">VectorIndexer</span><span class="o">}</span>
@@ -1118,7 +1118,7 @@ We use two feature transformers to prepare the data; these help index categories
 
     <p>Refer to the <a href="api/java/org/apache/spark/ml/classification/RandomForestClassifier.html">Java API docs</a> for more details.</p>
 
-    <div class="highlight"><pre><span class="kn">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span><span class="o">;</span>
+    <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.PipelineModel</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.PipelineStage</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.RandomForestClassificationModel</span><span class="o">;</span>
@@ -1134,13 +1134,13 @@ We use two feature transformers to prepare the data; these help index categories
 
 <span class="c1">// Index labels, adding metadata to the label column.</span>
 <span class="c1">// Fit on whole dataset to include all labels in index.</span>
-<span class="n">StringIndexerModel</span> <span class="n">labelIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">StringIndexer</span><span class="o">()</span>
+<span class="n">StringIndexerModel</span> <span class="n">labelIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">StringIndexer</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setInputCol</span><span class="o">(</span><span class="s">&quot;label&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span class="o">(</span><span class="s">&quot;indexedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">data</span><span class="o">);</span>
 <span class="c1">// Automatically identify categorical features, and index them.</span>
 <span class="c1">// Set maxCategories so features with &gt; 4 distinct values are treated as continuous.</span>
-<span class="n">VectorIndexerModel</span> <span class="n">featureIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">VectorIndexer</span><span class="o">()</span>
+<span class="n">VectorIndexerModel</span> <span class="n">featureIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">VectorIndexer</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setInputCol</span><span class="o">(</span><span class="s">&quot;features&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span class="o">(</span><span class="s">&quot;indexedFeatures&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setMaxCategories</span><span class="o">(</span><span class="mi">4</span><span class="o">)</span>
@@ -1152,18 +1152,18 @@ We use two feature transformers to prepare the data; these help index categories
 <span class="n">Dataset</span><span class="o">&lt;</span><span class="n">Row</span><span class="o">&gt;</span> <span class="n">testData</span> <span class="o">=</span> <span class="n">splits</span><span class="o">[</span><span class="mi">1</span><span class="o">];</span>
 
 <span class="c1">// Train a RandomForest model.</span>
-<span class="n">RandomForestClassifier</span> <span class="n">rf</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">RandomForestClassifier</span><span class="o">()</span>
+<span class="n">RandomForestClassifier</span> <span class="n">rf</span> <span class="o">=</span> <span class="k">new</span> <span class="n">RandomForestClassifier</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setLabelCol</span><span class="o">(</span><span class="s">&quot;indexedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setFeaturesCol</span><span class="o">(</span><span class="s">&quot;indexedFeatures&quot;</span><span class="o">);</span>
 
 <span class="c1">// Convert indexed labels back to original labels.</span>
-<span class="n">IndexToString</span> <span class="n">labelConverter</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">IndexToString</span><span class="o">()</span>
+<span class="n">IndexToString</span> <span class="n">labelConverter</span> <span class="o">=</span> <span class="k">new</span> <span class="n">IndexToString</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setInputCol</span><span class="o">(</span><span class="s">&quot;prediction&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span class="o">(</span><span class="s">&quot;predictedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setLabels</span><span class="o">(</span><span class="n">labelIndexer</span><span class="o">.</span><span class="na">labels</span><span class="o">());</span>
 
 <span class="c1">// Chain indexers and forest in a Pipeline</span>
-<span class="n">Pipeline</span> <span class="n">pipeline</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">Pipeline</span><span class="o">()</span>
+<span class="n">Pipeline</span> <span class="n">pipeline</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Pipeline</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setStages</span><span class="o">(</span><span class="k">new</span> <span class="n">PipelineStage</span><span class="o">[]</span> <span class="o">{</span><span class="n">labelIndexer</span><span class="o">,</span> <span class="n">featureIndexer</span><span class="o">,</span> <span class="n">rf</span><span class="o">,</span> <span class="n">labelConverter</span><span class="o">});</span>
 
 <span class="c1">// Train model. This also runs the indexers.</span>
@@ -1176,7 +1176,7 @@ We use two feature transformers to prepare the data; these help index categories
 <span class="n">predictions</span><span class="o">.</span><span class="na">select</span><span class="o">(</span><span class="s">&quot;predictedLabel&quot;</span><span class="o">,</span> <span class="s">&quot;label&quot;</span><span class="o">,</span> <span class="s">&quot;features&quot;</span><span class="o">).</span><span class="na">show</span><span class="o">(</span><span class="mi">5</span><span class="o">);</span>
 
 <span class="c1">// Select (prediction, true label) and compute test error</span>
-<span class="n">MulticlassClassificationEvaluator</span> <span class="n">evaluator</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">MulticlassClassificationEvaluator</span><span class="o">()</span>
+<span class="n">MulticlassClassificationEvaluator</span> <span class="n">evaluator</span> <span class="o">=</span> <span class="k">new</span> <span class="n">MulticlassClassificationEvaluator</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setLabelCol</span><span class="o">(</span><span class="s">&quot;indexedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setPredictionCol</span><span class="o">(</span><span class="s">&quot;prediction&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setMetricName</span><span class="o">(</span><span class="s">&quot;accuracy&quot;</span><span class="o">);</span>
@@ -1193,53 +1193,53 @@ We use two feature transformers to prepare the data; these help index categories
 
     <p>Refer to the <a href="api/python/pyspark.ml.html#pyspark.ml.classification.RandomForestClassifier">Python API docs</a> for more details.</p>
 
-    <div class="highlight"><pre><span class="kn">from</span> <span class="nn">pyspark.ml</span> <span class="kn">import</span> <span class="n">Pipeline</span>
+    <div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pyspark.ml</span> <span class="kn">import</span> <span class="n">Pipeline</span>
 <span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">RandomForestClassifier</span>
 <span class="kn">from</span> <span class="nn">pyspark.ml.feature</span> <span class="kn">import</span> <span class="n">IndexToString</span><span class="p">,</span> <span class="n">StringIndexer</span><span class="p">,</span> <span class="n">VectorIndexer</span>
 <span class="kn">from</span> <span class="nn">pyspark.ml.evaluation</span> <span class="kn">import</span> <span class="n">MulticlassClassificationEvaluator</span>
 
-<span class="c"># Load and parse the data file, converting it to a DataFrame.</span>
-<span class="n">data</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s">&quot;libsvm&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="p">)</span>
+<span class="c1"># Load and parse the data file, converting it to a DataFrame.</span>
+<span class="n">data</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s2">&quot;libsvm&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s2">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="p">)</span>
 
-<span class="c"># Index labels, adding metadata to the label column.</span>
-<span class="c"># Fit on whole dataset to include all labels in index.</span>
-<span class="n">labelIndexer</span> <span class="o">=</span> <span class="n">StringIndexer</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s">&quot;label&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s">&quot;indexedLabel&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
+<span class="c1"># Index labels, adding metadata to the label column.</span>
+<span class="c1"># Fit on whole dataset to include all labels in index.</span>
+<span class="n">labelIndexer</span> <span class="o">=</span> <span class="n">StringIndexer</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s2">&quot;label&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s2">&quot;indexedLabel&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
 
-<span class="c"># Automatically identify categorical features, and index them.</span>
-<span class="c"># Set maxCategories so features with &gt; 4 distinct values are treated as continuous.</span>
+<span class="c1"># Automatically identify categorical features, and index them.</span>
+<span class="c1"># Set maxCategories so features with &gt; 4 distinct values are treated as continuous.</span>
 <span class="n">featureIndexer</span> <span class="o">=</span>\
-    <span class="n">VectorIndexer</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s">&quot;features&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s">&quot;indexedFeatures&quot;</span><span class="p">,</span> <span class="n">maxCategories</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
+    <span class="n">VectorIndexer</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s2">&quot;features&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s2">&quot;indexedFeatures&quot;</span><span class="p">,</span> <span class="n">maxCategories</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
 
-<span class="c"># Split the data into training and test sets (30% held out for testing)</span>
+<span class="c1"># Split the data into training and test sets (30% held out for testing)</span>
 <span class="p">(</span><span class="n">trainingData</span><span class="p">,</span> <span class="n">testData</span><span class="p">)</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">randomSplit</span><span class="p">([</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">])</span>
 
-<span class="c"># Train a RandomForest model.</span>
-<span class="n">rf</span> <span class="o">=</span> <span class="n">RandomForestClassifier</span><span class="p">(</span><span class="n">labelCol</span><span class="o">=</span><span class="s">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="n">featuresCol</span><span class="o">=</span><span class="s">&quot;indexedFeatures&quot;</span><span class="p">,</span> <span class="n">numTrees</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
+<span class="c1"># Train a RandomForest model.</span>
+<span class="n">rf</span> <span class="o">=</span> <span class="n">RandomForestClassifier</span><span class="p">(</span><span class="n">labelCol</span><span class="o">=</span><span class="s2">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="n">featuresCol</span><span class="o">=</span><span class="s2">&quot;indexedFeatures&quot;</span><span class="p">,</span> <span class="n">numTrees</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
 
-<span class="c"># Convert indexed labels back to original labels.</span>
-<span class="n">labelConverter</span> <span class="o">=</span> <span class="n">IndexToString</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s">&quot;prediction&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s">&quot;predictedLabel&quot;</span><span class="p">,</span>
+<span class="c1"># Convert indexed labels back to original labels.</span>
+<span class="n">labelConverter</span> <span class="o">=</span> <span class="n">IndexToString</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s2">&quot;prediction&quot;</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s2">&quot;predictedLabel&quot;</span><span class="p">,</span>
                                <span class="n">labels</span><span class="o">=</span><span class="n">labelIndexer</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span>
 
-<span class="c"># Chain indexers and forest in a Pipeline</span>
+<span class="c1"># Chain indexers and forest in a Pipeline</span>
 <span class="n">pipeline</span> <span class="o">=</span> <span class="n">Pipeline</span><span class="p">(</span><span class="n">stages</span><span class="o">=</span><span class="p">[</span><span class="n">labelIndexer</span><span class="p">,</span> <span class="n">featureIndexer</span><span class="p">,</span> <span class="n">rf</span><span class="p">,</span> <span class="n">labelConverter</span><span class="p">])</span>
 
-<span class="c"># Train model.  This also runs the indexers.</span>
+<span class="c1"># Train model.  This also runs the indexers.</span>
 <span class="n">model</span> <span class="o">=</span> <span class="n">pipeline</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">trainingData</span><span class="p">)</span>
 
-<span class="c"># Make predictions.</span>
+<span class="c1"># Make predictions.</span>
 <span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">testData</span><span class="p">)</span>
 
-<span class="c"># Select example rows to display.</span>
-<span class="n">predictions</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s">&quot;predictedLabel&quot;</span><span class="p">,</span> <span class="s">&quot;label&quot;</span><span class="p">,</span> <span class="s">&quot;features&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
+<span class="c1"># Select example rows to display.</span>
+<span class="n">predictions</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">&quot;predictedLabel&quot;</span><span class="p">,</span> <span class="s2">&quot;label&quot;</span><span class="p">,</span> <span class="s2">&quot;features&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
 
-<span class="c"># Select (prediction, true label) and compute test error</span>
+<span class="c1"># Select (prediction, true label) and compute test error</span>
 <span class="n">evaluator</span> <span class="o">=</span> <span class="n">MulticlassClassificationEvaluator</span><span class="p">(</span>
-    <span class="n">labelCol</span><span class="o">=</span><span class="s">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="n">predictionCol</span><span class="o">=</span><span class="s">&quot;prediction&quot;</span><span class="p">,</span> <span class="n">metricName</span><span class="o">=</span><span class="s">&quot;accuracy&quot;</span><span class="p">)</span>
+    <span class="n">labelCol</span><span class="o">=</span><span class="s2">&quot;indexedLabel&quot;</span><span class="p">,</span> <span class="n">predictionCol</span><span class="o">=</span><span class="s2">&quot;prediction&quot;</span><span class="p">,</span> <span class="n">metricName</span><span class="o">=</span><span class="s2">&quot;accuracy&quot;</span><span class="p">)</span>
 <span class="n">accuracy</span> <span class="o">=</span> <span class="n">evaluator</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">predictions</span><span class="p">)</span>
-<span class="k">print</span><span class="p">(</span><span class="s">&quot;Test Error = </span><span class="si">%g</span><span class="s">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="mf">1.0</span> <span class="o">-</span> <span class="n">accuracy</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="s2">&quot;Test Error = </span><span class="si">%g</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="mf">1.0</span> <span class="o">-</span> <span class="n">accuracy</span><span class="p">))</span>
 
 <span class="n">rfModel</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">stages</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
-<span class="k">print</span><span class="p">(</span><span class="n">rfModel</span><span class="p">)</span>  <span class="c"># summary only</span>
+<span class="k">print</span><span class="p">(</span><span class="n">rfModel</span><span class="p">)</span>  <span class="c1"># summary only</span>
 </pre></div>
     <div><small>Find full example code at "examples/src/main/python/ml/random_forest_classifier_example.py" in the Spark repo.</small></div>
   </div>
@@ -1248,7 +1248,7 @@ We use two feature transformers to prepare the data; these help index categories
 
     <p>Refer to the <a href="api/R/spark.randomForest.html">R API docs</a> for more details.</p>
 
-    <div class="highlight"><pre><span class="c1"># Load training data</span>
+    <div class="highlight"><pre><span></span><span class="c1"># Load training data</span>
 df <span class="o">&lt;-</span> read.df<span class="p">(</span><span class="s">&quot;data/mllib/sample_libsvm_data.txt&quot;</span><span class="p">,</span> <span class="kn">source</span> <span class="o">=</span> <span class="s">&quot;libsvm&quot;</span><span class="p">)</span>
 training <span class="o">&lt;-</span> df
 test <span class="o">&lt;-</span> df
@@ -1283,7 +1283,7 @@ We use two feature transformers to prepare the data; these help index categories
 
     <p>Refer to the <a href="api/scala/index.html#org.apache.spark.ml.classification.GBTClassifier">Scala API docs</a> for more details.</p>
 
-    <div class="highlight"><pre><span class="k">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span>
+    <div class="highlight"><pre><span></span><span class="k">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.ml.classification.</span><span class="o">{</span><span class="nc">GBTClassificationModel</span><span class="o">,</span> <span class="nc">GBTClassifier</span><span class="o">}</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.ml.feature.</span><span class="o">{</span><span class="nc">IndexToString</span><span class="o">,</span> <span class="nc">StringIndexer</span><span class="o">,</span> <span class="nc">VectorIndexer</span><span class="o">}</span>
@@ -1351,7 +1351,7 @@ We use two feature transformers to prepare the data; these help index categories
 
     <p>Refer to the <a href="api/java/org/apache/spark/ml/classification/GBTClassifier.html">Java API docs</a> for more details.</p>
 
-    <div class="highlight"><pre><span class="kn">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span><span class="o">;</span>
+    <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">org.apache.spark.ml.Pipeline</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.PipelineModel</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.PipelineStage</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">org.apache.spark.ml.classification.GBTClassificationModel</span><span class="o">;</span>
@@ -1370,13 +1370,13 @@ We use two feature transformers to prepare the data; these help index categories
 
 <span class="c1">// Index labels, adding metadata to the label column.</span>
 <span class="c1">// Fit on whole dataset to include all labels in index.</span>
-<span class="n">StringIndexerModel</span> <span class="n">labelIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">StringIndexer</span><span class="o">()</span>
+<span class="n">StringIndexerModel</span> <span class="n">labelIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">StringIndexer</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setInputCol</span><span class="o">(</span><span class="s">&quot;label&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span class="o">(</span><span class="s">&quot;indexedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">data</span><span class="o">);</span>
 <span class="c1">// Automatically identify categorical features, and index them.</span>
 <span class="c1">// Set maxCategories so features with &gt; 4 distinct values are treated as continuous.</span>
-<span class="n">VectorIndexerModel</span> <span class="n">featureIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">VectorIndexer</span><span class="o">()</span>
+<span class="n">VectorIndexerModel</span> <span class="n">featureIndexer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">VectorIndexer</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setInputCol</span><span class="o">(</span><span class="s">&quot;features&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span class="o">(</span><span class="s">&quot;indexedFeatures&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setMaxCategories</span><span class="o">(</span><span class="mi">4</span><span class="o">)</span>
@@ -1388,19 +1388,19 @@ We use two feature transformers to prepare the data; these help index categories
 <span class="n">Dataset</span><span class="o">&lt;</span><span class="n">Row</span><span class="o">&gt;</span> <span class="n">testData</span> <span class="o">=</span> <span class="n">splits</span><span class="o">[</span><span class="mi">1</span><span class="o">];</span>
 
 <span class="c1">// Train a GBT model.</span>
-<span class="n">GBTClassifier</span> <span class="n">gbt</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">GBTClassifier</span><span class="o">()</span>
+<span class="n">GBTClassifier</span> <span class="n">gbt</span> <span class="o">=</span> <span class="k">new</span> <span class="n">GBTClassifier</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setLabelCol</span><span class="o">(</span><span class="s">&quot;indexedLabel&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setFeaturesCol</span><span class="o">(</span><span class="s">&quot;indexedFeatures&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setMaxIter</span><span class="o">(</span><span class="mi">10</span><span class="o">);</span>
 
 <span class="c1">// Convert indexed labels back to original labels.</span>
-<span class="n">IndexToString</span> <span class="n">labelConverter</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">IndexToString</span><span class="o">()</span>
+<span clas

<TRUNCATED>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org