You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/07/04 04:40:18 UTC

[3/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.2.1

http://git-wip-us.apache.org/repos/asf/spark-website/blob/26b52712/site/docs/2.2.1/api/python/pyspark.sql.html
----------------------------------------------------------------------
diff --git a/site/docs/2.2.1/api/python/pyspark.sql.html b/site/docs/2.2.1/api/python/pyspark.sql.html
index 8b349cc..2174c25 100644
--- a/site/docs/2.2.1/api/python/pyspark.sql.html
+++ b/site/docs/2.2.1/api/python/pyspark.sql.html
@@ -5,14 +5,14 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
   <head>
     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-    <title>pyspark.sql module &#8212; PySpark  documentation</title>
+    <title>pyspark.sql module &#8212; PySpark 2.2.1 documentation</title>
     <link rel="stylesheet" href="_static/nature.css" type="text/css" />
     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
     <link rel="stylesheet" href="_static/pyspark.css" type="text/css" />
     <script type="text/javascript">
       var DOCUMENTATION_OPTIONS = {
         URL_ROOT:    './',
-        VERSION:     '',
+        VERSION:     '2.2.1',
         COLLAPSE_INDEX: false,
         FILE_SUFFIX: '.html',
         HAS_SOURCE:  true,
@@ -39,7 +39,7 @@
           <a href="pyspark.html" title="pyspark package"
              accesskey="P">previous</a> |</li>
     
-        <li class="nav-item nav-item-0"><a href="index.html">PySpark  documentation</a> &#187;</li>
+        <li class="nav-item nav-item-0"><a href="index.html">PySpark 2.2.1 documentation</a> &#187;</li>
 
           <li class="nav-item nav-item-1"><a href="pyspark.html" accesskey="U">pyspark package</a> &#187;</li> 
       </ul>
@@ -280,22 +280,22 @@ omit the <code class="docutils literal"><span class="pre">struct&lt;&gt;</span><
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">l</span> <span class="o">=</span> <span class="p">[(</span><span class="s1">&#39;Alice&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(_1=u&#39;Alice&#39;, _2=1)]</span>
+<span class="go">[Row(_1=&#39;Alice&#39;, _2=1)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;age&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">d</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="s1">&#39;Alice&#39;</span><span class="p">,</span> <span class="s1">&#39;age&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">}]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=1, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=1, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(_1=u&#39;Alice&#39;, _2=1)]</span>
+<span class="go">[Row(_1=&#39;Alice&#39;, _2=1)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;age&#39;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
@@ -303,7 +303,7 @@ omit the <code class="docutils literal"><span class="pre">struct&lt;&gt;</span><
 <span class="gp">&gt;&gt;&gt; </span><span class="n">person</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">Person</span><span class="p">(</span><span class="o">*</span><span class="n">r</span><span class="p">))</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">person</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="o">*</span>
@@ -312,17 +312,17 @@ omit the <code class="docutils literal"><span class="pre">struct&lt;&gt;</span><
 <span class="gp">... </span>   <span class="n">StructField</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="kc">True</span><span class="p">)])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df3</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df3</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">toPandas</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>  
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">pandas</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>  
 <span class="go">[Row(0=1, 1=2)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">&quot;a: string, b: int&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(a=u&#39;Alice&#39;, b=1)]</span>
+<span class="go">[Row(a=&#39;Alice&#39;, b=1)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">&quot;int&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
 <span class="go">[Row(value=1)]</span>
@@ -449,7 +449,7 @@ as a streaming <a class="reference internal" href="#pyspark.sql.DataFrame" title
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">createOrReplaceTempView</span><span class="p">(</span><span class="s2">&quot;table1&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">&quot;SELECT field1 AS f1, field2 as f2 from table1&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(f1=1, f2=u&#39;row1&#39;), Row(f1=2, f2=u&#39;row2&#39;), Row(f1=3, f2=u&#39;row3&#39;)]</span>
+<span class="go">[Row(f1=1, f2=&#39;row1&#39;), Row(f1=2, f2=&#39;row2&#39;), Row(f1=3, f2=&#39;row3&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -630,22 +630,22 @@ If it’s not a <a class="reference internal" href="#pyspark.sql.types.StructTyp
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">l</span> <span class="o">=</span> <span class="p">[(</span><span class="s1">&#39;Alice&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(_1=u&#39;Alice&#39;, _2=1)]</span>
+<span class="go">[Row(_1=&#39;Alice&#39;, _2=1)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;age&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">d</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="s1">&#39;Alice&#39;</span><span class="p">,</span> <span class="s1">&#39;age&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">}]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=1, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=1, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(_1=u&#39;Alice&#39;, _2=1)]</span>
+<span class="go">[Row(_1=&#39;Alice&#39;, _2=1)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;age&#39;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
@@ -653,7 +653,7 @@ If it’s not a <a class="reference internal" href="#pyspark.sql.types.StructTyp
 <span class="gp">&gt;&gt;&gt; </span><span class="n">person</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">Person</span><span class="p">(</span><span class="o">*</span><span class="n">r</span><span class="p">))</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">person</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="o">*</span>
@@ -662,17 +662,17 @@ If it’s not a <a class="reference internal" href="#pyspark.sql.types.StructTyp
 <span class="gp">... </span>   <span class="n">StructField</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="kc">True</span><span class="p">)])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df3</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df3</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">toPandas</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>  
-<span class="go">[Row(name=u&#39;Alice&#39;, age=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=1)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">pandas</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>  
 <span class="go">[Row(0=1, 1=2)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">&quot;a: string, b: int&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(a=u&#39;Alice&#39;, b=1)]</span>
+<span class="go">[Row(a=&#39;Alice&#39;, b=1)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">&quot;int&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
 <span class="go">[Row(value=1)]</span>
@@ -731,12 +731,12 @@ created external table.</p>
 defaultValue. If the key is not set and defaultValue is None, return
 the system default value.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">getConf</span><span class="p">(</span><span class="s2">&quot;spark.sql.shuffle.partitions&quot;</span><span class="p">)</span>
-<span class="go">u&#39;200&#39;</span>
+<span class="go">&#39;200&#39;</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">getConf</span><span class="p">(</span><span class="s2">&quot;spark.sql.shuffle.partitions&quot;</span><span class="p">,</span> <span class="sa">u</span><span class="s2">&quot;10&quot;</span><span class="p">)</span>
-<span class="go">u&#39;10&#39;</span>
+<span class="go">&#39;10&#39;</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">setConf</span><span class="p">(</span><span class="s2">&quot;spark.sql.shuffle.partitions&quot;</span><span class="p">,</span> <span class="sa">u</span><span class="s2">&quot;50&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">getConf</span><span class="p">(</span><span class="s2">&quot;spark.sql.shuffle.partitions&quot;</span><span class="p">,</span> <span class="sa">u</span><span class="s2">&quot;10&quot;</span><span class="p">)</span>
-<span class="go">u&#39;50&#39;</span>
+<span class="go">&#39;50&#39;</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -890,7 +890,7 @@ be done.  For any other return type, the produced object must match the specifie
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerFunction</span><span class="p">(</span><span class="s2">&quot;stringLengthString&quot;</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">&quot;SELECT stringLengthString(&#39;test&#39;)&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(stringLengthString(test)=u&#39;4&#39;)]</span>
+<span class="go">[Row(stringLengthString(test)=&#39;4&#39;)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="n">IntegerType</span>
@@ -958,7 +958,7 @@ When the return type is not specified we would infer it via reflection.
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerDataFrameAsTable</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="s2">&quot;table1&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">&quot;SELECT field1 AS f1, field2 as f2 from table1&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(f1=1, f2=u&#39;row1&#39;), Row(f1=2, f2=u&#39;row2&#39;), Row(f1=3, f2=u&#39;row3&#39;)]</span>
+<span class="go">[Row(f1=1, f2=&#39;row1&#39;), Row(f1=2, f2=&#39;row2&#39;), Row(f1=3, f2=&#39;row3&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -1049,7 +1049,7 @@ When the return type is not specified we would infer it via reflection.
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerDataFrameAsTable</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="s2">&quot;table1&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="s2">&quot;tableName = &#39;table1&#39;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
-<span class="go">Row(database=u&#39;&#39;, tableName=u&#39;table1&#39;, isTemporary=True)</span>
+<span class="go">Row(database=&#39;&#39;, tableName=&#39;table1&#39;, isTemporary=True)</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -1147,7 +1147,7 @@ be done.  For any other return type, the produced object must match the specifie
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerFunction</span><span class="p">(</span><span class="s2">&quot;stringLengthString&quot;</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">&quot;SELECT stringLengthString(&#39;test&#39;)&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(stringLengthString(test)=u&#39;4&#39;)]</span>
+<span class="go">[Row(stringLengthString(test)=&#39;4&#39;)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="n">IntegerType</span>
@@ -1222,7 +1222,7 @@ and can be created using various functions in <a class="reference internal" href
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df_as2</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;df_as2&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">joined_df</span> <span class="o">=</span> <span class="n">df_as1</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df_as2</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;df_as1.name&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;df_as2.name&quot;</span><span class="p">),</span> <span class="s1">&#39;inner&#39;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">joined_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">&quot;df_as1.name&quot;</span><span class="p">,</span> <span class="s2">&quot;df_as2.name&quot;</span><span class="p">,</span> <span class="s2">&quot;df_as2.age&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Bob&#39;, name=u&#39;Bob&#39;, age=5), Row(name=u&#39;Alice&#39;, name=u&#39;Alice&#39;, age=2)]</span>
+<span class="go">[Row(name=&#39;Bob&#39;, name=&#39;Bob&#39;, age=5), Row(name=&#39;Alice&#39;, name=&#39;Alice&#39;, age=2)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -1349,7 +1349,7 @@ the current partitioning is).</p>
 <code class="descname">collect</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.collect"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.collect" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns all the records as a list of <a class="reference internal" href="#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal"><span class="pre">Row</span></code></a>.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;), Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;), Row(age=5, name=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -1528,12 +1528,12 @@ catalog.</p>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;), Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;), Row(age=5, name=&#39;Bob&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">&quot;name&quot;</span><span class="p">,</span> <span class="s2">&quot;height&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Tom&#39;, height=80), Row(name=u&#39;Bob&#39;, height=85)]</span>
+<span class="go">[Row(name=&#39;Tom&#39;, height=80), Row(name=&#39;Bob&#39;, height=85)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">crossJoin</span><span class="p">(</span><span class="n">df2</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">&quot;height&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">,</span> <span class="s2">&quot;height&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;, height=80), Row(age=2, name=u&#39;Alice&#39;, height=85),</span>
-<span class="go"> Row(age=5, name=u&#39;Bob&#39;, height=80), Row(age=5, name=u&#39;Bob&#39;, height=85)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;, height=80), Row(age=2, name=&#39;Alice&#39;, height=85),</span>
+<span class="go"> Row(age=5, name=&#39;Bob&#39;, height=80), Row(age=5, name=&#39;Bob&#39;, height=85)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -1660,23 +1660,23 @@ This is a no-op if schema doesn’t contain the given column name(s).</p>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">&#39;age&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;), Row(name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;), Row(name=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;), Row(name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;), Row(name=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">&#39;inner&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, height=85, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=5, height=85, name=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">&#39;inner&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;, height=85)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;, height=85)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;inner&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">&#39;age&#39;</span><span class="p">,</span> <span class="s1">&#39;height&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(name=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -1878,15 +1878,15 @@ or a string of SQL expression.</td>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">&gt;</span> <span class="mi">3</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">==</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="s2">&quot;age &gt; 3&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="s2">&quot;age = 2&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -1899,7 +1899,7 @@ or a string of SQL expression.</td>
 <code class="descname">first</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.first"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.first" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns the first row as a <a class="reference internal" href="#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal"><span class="pre">Row</span></code></a>.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
-<span class="go">Row(age=2, name=u&#39;Alice&#39;)</span>
+<span class="go">Row(age=2, name=&#39;Alice&#39;)</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -1988,11 +1988,11 @@ Each element should be a column name (string) or an expression (<a class="refere
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">avg</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
 <span class="go">[Row(avg(age)=3.5)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="nb">sorted</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="s1">&#39;name&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">agg</span><span class="p">({</span><span class="s1">&#39;age&#39;</span><span class="p">:</span> <span class="s1">&#39;mean&#39;</span><span class="p">})</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, avg(age)=2.0), Row(name=u&#39;Bob&#39;, avg(age)=5.0)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, avg(age)=2.0), Row(name=&#39;Bob&#39;, avg(age)=5.0)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="nb">sorted</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">avg</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, avg(age)=2.0), Row(name=u&#39;Bob&#39;, avg(age)=5.0)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, avg(age)=2.0), Row(name=&#39;Bob&#39;, avg(age)=5.0)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="nb">sorted</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">([</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">])</span><span class="o">.</span><span class="n">count</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=2, count=1), Row(name=u&#39;Bob&#39;, age=5, count=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=2, count=1), Row(name=&#39;Bob&#39;, age=5, count=1)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2030,9 +2030,9 @@ If n is 1, return a single Row.</td>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
-<span class="go">Row(age=2, name=u&#39;Alice&#39;)</span>
+<span class="go">Row(age=2, name=&#39;Alice&#39;)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2135,24 +2135,24 @@ the column(s) must exist on both sides, and this performs an equi-join.</li>
 </table>
 <p>The following performs a full outer join between <code class="docutils literal"><span class="pre">df1</span></code> and <code class="docutils literal"><span class="pre">df2</span></code>.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">&#39;outer&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df2</span><span class="o">.</span><span class="n">height</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=None, height=80), Row(name=u&#39;Bob&#39;, height=85), Row(name=u&#39;Alice&#39;, height=None)]</span>
+<span class="go">[Row(name=None, height=80), Row(name=&#39;Bob&#39;, height=85), Row(name=&#39;Alice&#39;, height=None)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;outer&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;height&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Tom&#39;, height=80), Row(name=u&#39;Bob&#39;, height=85), Row(name=u&#39;Alice&#39;, height=None)]</span>
+<span class="go">[Row(name=&#39;Tom&#39;, height=80), Row(name=&#39;Bob&#39;, height=85), Row(name=&#39;Alice&#39;, height=None)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">cond</span> <span class="o">=</span> <span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df3</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">==</span> <span class="n">df3</span><span class="o">.</span><span class="n">age</span><span class="p">]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df3</span><span class="p">,</span> <span class="n">cond</span><span class="p">,</span> <span class="s1">&#39;outer&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df3</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=2), Row(name=u&#39;Bob&#39;, age=5)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=2), Row(name=&#39;Bob&#39;, age=5)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="s1">&#39;name&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df2</span><span class="o">.</span><span class="n">height</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Bob&#39;, height=85)]</span>
+<span class="go">[Row(name=&#39;Bob&#39;, height=85)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df4</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;age&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Bob&#39;, age=5)]</span>
+<span class="go">[Row(name=&#39;Bob&#39;, age=5)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2165,7 +2165,7 @@ the column(s) must exist on both sides, and this performs an equi-join.</li>
 <code class="descname">limit</code><span class="sig-paren">(</span><em>num</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.limit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.limit" title="Permalink to this definition">¶</a></dt>
 <dd><p>Limits the result count to the number specified.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
 <span class="go">[]</span>
 </pre></div>
@@ -2203,18 +2203,18 @@ If a list is specified, length of the list must equal length of the <cite>cols</
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql.functions</span> <span class="k">import</span> <span class="o">*</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">asc</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;), Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;), Row(age=5, name=&#39;Bob&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">desc</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">),</span> <span class="s2">&quot;name&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">([</span><span class="s2">&quot;age&quot;</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">],</span> <span class="n">ascending</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2537,11 +2537,11 @@ in the current DataFrame.</td>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">&#39;*&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;), Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;), Row(age=5, name=&#39;Bob&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;age&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=2), Row(name=u&#39;Bob&#39;, age=5)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=2), Row(name=&#39;Bob&#39;, age=5)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">+</span> <span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;age&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, age=12), Row(name=u&#39;Bob&#39;, age=15)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, age=12), Row(name=&#39;Bob&#39;, age=15)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2623,18 +2623,18 @@ If a list is specified, length of the list must equal length of the <cite>cols</
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql.functions</span> <span class="k">import</span> <span class="o">*</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">asc</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;), Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;), Row(age=5, name=&#39;Bob&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">desc</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">),</span> <span class="s2">&quot;name&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">([</span><span class="s2">&quot;age&quot;</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">],</span> <span class="n">ascending</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;), Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;), Row(age=2, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2716,7 +2716,7 @@ but not in another frame.</p>
 <code class="descname">take</code><span class="sig-paren">(</span><em>num</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.take"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.take" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns the first <code class="docutils literal"><span class="pre">num</span></code> rows as a <code class="xref py py-class docutils literal"><span class="pre">list</span></code> of <a class="reference internal" href="#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal"><span class="pre">Row</span></code></a>.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;), Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;), Row(age=5, name=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2737,7 +2737,7 @@ but not in another frame.</p>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">toDF</span><span class="p">(</span><span class="s1">&#39;f1&#39;</span><span class="p">,</span> <span class="s1">&#39;f2&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(f1=2, f2=u&#39;Alice&#39;), Row(f1=5, f2=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(f1=2, f2=&#39;Alice&#39;), Row(f1=5, f2=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 </dd></dl>
@@ -2748,7 +2748,7 @@ but not in another frame.</p>
 <dd><p>Converts a <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal"><span class="pre">DataFrame</span></code></a> into a <code class="xref py py-class docutils literal"><span class="pre">RDD</span></code> of string.</p>
 <p>Each row is turned into a JSON document as one element in the returned RDD.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">toJSON</span><span class="p">()</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
-<span class="go">u&#39;{&quot;age&quot;:2,&quot;name&quot;:&quot;Alice&quot;}&#39;</span>
+<span class="go">&#39;{&quot;age&quot;:2,&quot;name&quot;:&quot;Alice&quot;}&#39;</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2762,7 +2762,7 @@ but not in another frame.</p>
 <dd><p>Returns an iterator that contains all of the rows in this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal"><span class="pre">DataFrame</span></code></a>.
 The iterator will consume as much memory as the largest partition in this DataFrame.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="nb">list</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">toLocalIterator</span><span class="p">())</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;), Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;), Row(age=5, name=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2860,7 +2860,7 @@ existing column that has the same name.</p>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s1">&#39;age2&#39;</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">+</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;, age2=4), Row(age=5, name=u&#39;Bob&#39;, age2=7)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;, age2=4), Row(age=5, name=&#39;Bob&#39;, age2=7)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -2886,7 +2886,7 @@ This is a no-op if schema doesn’t contain the given column name.</p>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">withColumnRenamed</span><span class="p">(</span><span class="s1">&#39;age&#39;</span><span class="p">,</span> <span class="s1">&#39;age2&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age2=2, name=u&#39;Alice&#39;), Row(age2=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age2=2, name=&#39;Alice&#39;), Row(age2=5, name=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -3013,12 +3013,12 @@ or a list of <a class="reference internal" href="#pyspark.sql.Column" title="pys
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">gdf</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="nb">sorted</span><span class="p">(</span><span class="n">gdf</span><span class="o">.</span><span class="n">agg</span><span class="p">({</span><span class="s2">&quot;*&quot;</span><span class="p">:</span> <span class="s2">&quot;count&quot;</span><span class="p">})</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, count(1)=1), Row(name=u&#39;Bob&#39;, count(1)=1)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, count(1)=1), Row(name=&#39;Bob&#39;, count(1)=1)]</span>
 </pre></div>
 </div>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">F</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="nb">sorted</span><span class="p">(</span><span class="n">gdf</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u&#39;Alice&#39;, min(age)=2), Row(name=u&#39;Bob&#39;, min(age)=5)]</span>
+<span class="go">[Row(name=&#39;Alice&#39;, min(age)=2), Row(name=&#39;Bob&#39;, min(age)=5)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -3291,9 +3291,9 @@ expression is between the given columns.</p>
 <code class="descname">cast</code><span class="sig-paren">(</span><em>dataType</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/column.html#Column.cast"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.Column.cast" title="Permalink to this definition">¶</a></dt>
 <dd><p>Convert the column into type <code class="docutils literal"><span class="pre">dataType</span></code>.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;string&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;ages&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(ages=u&#39;2&#39;), Row(ages=u&#39;5&#39;)]</span>
+<span class="go">[Row(ages=&#39;2&#39;), Row(ages=&#39;5&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">StringType</span><span class="p">())</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;ages&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(ages=u&#39;2&#39;), Row(ages=u&#39;5&#39;)]</span>
+<span class="go">[Row(ages=&#39;2&#39;), Row(ages=&#39;5&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -3326,7 +3326,7 @@ expression is between the given columns.</p>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">&#39;ice&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">&#39;ice$&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
 <span class="go">[]</span>
 </pre></div>
@@ -3389,9 +3389,9 @@ or gets an item by key out of a dict.</p>
 <dd><p>True if the current expression is null. Often combined with
 <a class="reference internal" href="#pyspark.sql.DataFrame.filter" title="pyspark.sql.DataFrame.filter"><code class="xref py py-func docutils literal"><span class="pre">DataFrame.filter()</span></code></a> to select rows with non-null values.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
-<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">&#39;Tom&#39;</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">&#39;Alice&#39;</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span><span class="o">.</span><span class="n">toDF</span><span class="p">()</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;Tom&#39;</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;Alice&#39;</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span><span class="o">.</span><span class="n">toDF</span><span class="p">()</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df2</span><span class="o">.</span><span class="n">height</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(height=80, name=u&#39;Tom&#39;)]</span>
+<span class="go">[Row(height=80, name=&#39;Tom&#39;)]</span>
 </pre></div>
 </div>
 </dd></dl>
@@ -3402,9 +3402,9 @@ or gets an item by key out of a dict.</p>
 <dd><p>True if the current expression is null. Often combined with
 <a class="reference internal" href="#pyspark.sql.DataFrame.filter" title="pyspark.sql.DataFrame.filter"><code class="xref py py-func docutils literal"><span class="pre">DataFrame.filter()</span></code></a> to select rows with null values.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
-<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">&#39;Tom&#39;</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">&#39;Alice&#39;</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span><span class="o">.</span><span class="n">toDF</span><span class="p">()</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;Tom&#39;</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;Alice&#39;</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span><span class="o">.</span><span class="n">toDF</span><span class="p">()</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df2</span><span class="o">.</span><span class="n">height</span><span class="o">.</span><span class="n">isNull</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(height=None, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(height=None, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 </dd></dl>
@@ -3415,9 +3415,9 @@ or gets an item by key out of a dict.</p>
 <dd><p>A boolean expression that is evaluated to true if the value of this
 expression is contained by the evaluated values of the arguments.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="s2">&quot;Bob&quot;</span><span class="p">,</span> <span class="s2">&quot;Mike&quot;</span><span class="p">)]</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(age=5, name=&#39;Bob&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])]</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -3439,7 +3439,7 @@ expression is contained by the evaluated values of the arguments.</p>
 </table>
 <p>See <a class="reference internal" href="#pyspark.sql.Column.rlike" title="pyspark.sql.Column.rlike"><code class="xref py py-func docutils literal"><span class="pre">rlike()</span></code></a> for a regex version</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">like</span><span class="p">(</span><span class="s1">&#39;Al%&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 </dd></dl>
@@ -3520,7 +3520,7 @@ If <a class="reference internal" href="#pyspark.sql.Column.otherwise" title="pys
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">rlike</span><span class="p">(</span><span class="s1">&#39;ice$&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;)]</span>
 </pre></div>
 </div>
 </dd></dl>
@@ -3538,7 +3538,7 @@ If <a class="reference internal" href="#pyspark.sql.Column.otherwise" title="pys
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">&#39;Al&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u&#39;Alice&#39;)]</span>
+<span class="go">[Row(age=2, name=&#39;Alice&#39;)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">&#39;^Al&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
 <span class="go">[]</span>
 </pre></div>
@@ -3562,7 +3562,7 @@ If <a class="reference internal" href="#pyspark.sql.Column.otherwise" title="pys
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">substr</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;col&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(col=u&#39;Ali&#39;), Row(col=u&#39;Bob&#39;)]</span>
+<span class="go">[Row(col=&#39;Ali&#39;), Row(col=&#39;Bob&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -4143,12 +4143,12 @@ any value greater than or equal to 9223372036854775807.</li>
 
 <dl class="attribute">
 <dt id="pyspark.sql.Window.unboundedFollowing">
-<code class="descname">unboundedFollowing</code><em class="property"> = 9223372036854775807L</em><a class="headerlink" href="#pyspark.sql.Window.unboundedFollowing" title="Permalink to this definition">¶</a></dt>
+<code class="descname">unboundedFollowing</code><em class="property"> = 9223372036854775807</em><a class="headerlink" href="#pyspark.sql.Window.unboundedFollowing" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>
 
 <dl class="attribute">
 <dt id="pyspark.sql.Window.unboundedPreceding">
-<code class="descname">unboundedPreceding</code><em class="property"> = -9223372036854775808L</em><a class="headerlink" href="#pyspark.sql.Window.unboundedPreceding" title="Permalink to this definition">¶</a></dt>
+<code class="descname">unboundedPreceding</code><em class="property"> = -9223372036854775808</em><a class="headerlink" href="#pyspark.sql.Window.unboundedPreceding" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -4685,7 +4685,7 @@ are any.</p>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">read</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="s1">&#39;python/test_support/sql/text-test.txt&#39;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(value=u&#39;hello&#39;), Row(value=u&#39;this&#39;)]</span>
+<span class="go">[Row(value=&#39;hello&#39;), Row(value=&#39;this&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -5779,7 +5779,7 @@ given value, and false otherwise.</p>
 <code class="descclassname">pyspark.sql.functions.</code><code class="descname">bin</code><span class="sig-paren">(</span><em>col</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/functions.html#bin"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.functions.bin" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns the string representation of the binary value of the given column.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="nb">bin</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;c&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(c=u&#39;10&#39;), Row(c=u&#39;101&#39;)]</span>
+<span class="go">[Row(c=&#39;10&#39;), Row(c=&#39;101&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -5919,7 +5919,7 @@ or at integral part when <cite>scale</cite> &lt; 0.</p>
 <dd><p>Concatenates multiple input string columns together into a single string column.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s1">&#39;abcd&#39;</span><span class="p">,</span><span class="s1">&#39;123&#39;</span><span class="p">)],</span> <span class="p">[</span><span class="s1">&#39;s&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">concat</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">s</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;s&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(s=u&#39;abcd123&#39;)]</span>
+<span class="go">[Row(s=&#39;abcd123&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -5934,7 +5934,7 @@ or at integral part when <cite>scale</cite> &lt; 0.</p>
 using the given separator.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s1">&#39;abcd&#39;</span><span class="p">,</span><span class="s1">&#39;123&#39;</span><span class="p">)],</span> <span class="p">[</span><span class="s1">&#39;s&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">concat_ws</span><span class="p">(</span><span class="s1">&#39;-&#39;</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">s</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;s&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(s=u&#39;abcd-123&#39;)]</span>
+<span class="go">[Row(s=&#39;abcd-123&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -5948,7 +5948,7 @@ using the given separator.</p>
 <dd><p>Convert a number in a string column from one base to another.</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s2">&quot;010101&quot;</span><span class="p">,)],</span> <span class="p">[</span><span class="s1">&#39;n&#39;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">conv</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">n</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">16</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;hex&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(hex=u&#39;15&#39;)]</span>
+<span class="go">[Row(hex=&#39;15&#39;)]</span>
 </pre></div>
 </div>
 <div class="versionadded">
@@ -6079,9 +6079,9 @@ as key-value pairs, e.g. (key1, value1, key2, value2, …).</td>
 </tbody>
 </table>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">create_map</span><span class="p">(</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;age&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;map&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(map={u&#39;Alice&#39;: 2}), Row(map={u&#39;Bob&#39;: 5})]</span>
+<span class="go">[Row(map={&#39;Alice&#39;: 2}), Row(map={&#39;Bob&#39;: 5})]</span>
 <span

<TRUNCATED>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org