You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/07/04 04:41:50 UTC
[3/7] spark-website git commit: Fix signature description broken in
PySpark API documentation in 2.3.1
http://git-wip-us.apache.org/repos/asf/spark-website/blob/5660fb9a/site/docs/2.3.1/api/python/pyspark.sql.html
----------------------------------------------------------------------
diff --git a/site/docs/2.3.1/api/python/pyspark.sql.html b/site/docs/2.3.1/api/python/pyspark.sql.html
index 43c51be..6716867 100644
--- a/site/docs/2.3.1/api/python/pyspark.sql.html
+++ b/site/docs/2.3.1/api/python/pyspark.sql.html
@@ -5,14 +5,14 @@
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
- <title>pyspark.sql module — PySpark master documentation</title>
+ <title>pyspark.sql module — PySpark 2.3.1 documentation</title>
<link rel="stylesheet" href="_static/nature.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/pyspark.css" type="text/css" />
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: './',
- VERSION: 'master',
+ VERSION: '2.3.1',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
@@ -39,7 +39,7 @@
<a href="pyspark.html" title="pyspark package"
accesskey="P">previous</a> |</li>
- <li class="nav-item nav-item-0"><a href="index.html">PySpark master documentation</a> »</li>
+ <li class="nav-item nav-item-0"><a href="index.html">PySpark 2.3.1 documentation</a> »</li>
<li class="nav-item nav-item-1"><a href="pyspark.html" accesskey="U">pyspark package</a> »</li>
</ul>
@@ -292,22 +292,22 @@ omit the <code class="docutils literal"><span class="pre">struct<></span><
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">l</span> <span class="o">=</span> <span class="p">[(</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)]</span>
<span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(_1=u'Alice', _2=1)]</span>
+<span class="go">[Row(_1='Alice', _2=1)]</span>
<span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">d</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'Alice'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">:</span> <span class="mi">1</span><span class="p">}]</span>
<span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=1, name=u'Alice')]</span>
+<span class="go">[Row(age=1, name='Alice')]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(_1=u'Alice', _2=1)]</span>
+<span class="go">[Row(_1='Alice', _2=1)]</span>
<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
@@ -315,7 +315,7 @@ omit the <code class="docutils literal"><span class="pre">struct<></span><
<span class="gp">>>> </span><span class="n">person</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">Person</span><span class="p">(</span><span class="o">*</span><span class="n">r</span><span class="p">))</span>
<span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">person</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="o">*</span>
@@ -324,17 +324,17 @@ omit the <code class="docutils literal"><span class="pre">struct<></span><
<span class="gp">... </span> <span class="n">StructField</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="kc">True</span><span class="p">)])</span>
<span class="gp">>>> </span><span class="n">df3</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">df3</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">toPandas</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
<span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">pandas</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[Row(0=1, 1=2)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">"a: string, b: int"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(a=u'Alice', b=1)]</span>
+<span class="go">[Row(a='Alice', b=1)]</span>
<span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
<span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">"int"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[Row(value=1)]</span>
@@ -461,7 +461,7 @@ as a streaming <a class="reference internal" href="#pyspark.sql.DataFrame" title
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">createOrReplaceTempView</span><span class="p">(</span><span class="s2">"table1"</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT field1 AS f1, field2 as f2 from table1"</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]</span>
+<span class="go">[Row(f1=1, f2='row1'), Row(f1=2, f2='row2'), Row(f1=3, f2='row3')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -642,22 +642,22 @@ If it’s not a <a class="reference internal" href="#pyspark.sql.types.StructTyp
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">l</span> <span class="o">=</span> <span class="p">[(</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)]</span>
<span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(_1=u'Alice', _2=1)]</span>
+<span class="go">[Row(_1='Alice', _2=1)]</span>
<span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">d</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'Alice'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">:</span> <span class="mi">1</span><span class="p">}]</span>
<span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">d</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=1, name=u'Alice')]</span>
+<span class="go">[Row(age=1, name='Alice')]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(_1=u'Alice', _2=1)]</span>
+<span class="go">[Row(_1='Alice', _2=1)]</span>
<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
@@ -665,7 +665,7 @@ If it’s not a <a class="reference internal" href="#pyspark.sql.types.StructTyp
<span class="gp">>>> </span><span class="n">person</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">Person</span><span class="p">(</span><span class="o">*</span><span class="n">r</span><span class="p">))</span>
<span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">person</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="o">*</span>
@@ -674,17 +674,17 @@ If it’s not a <a class="reference internal" href="#pyspark.sql.types.StructTyp
<span class="gp">... </span> <span class="n">StructField</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="kc">True</span><span class="p">)])</span>
<span class="gp">>>> </span><span class="n">df3</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">df3</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">toPandas</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=1)]</span>
+<span class="go">[Row(name='Alice', age=1)]</span>
<span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">pandas</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[Row(0=1, 1=2)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">"a: string, b: int"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(a=u'Alice', b=1)]</span>
+<span class="go">[Row(a='Alice', b=1)]</span>
<span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
<span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="s2">"int"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[Row(value=1)]</span>
@@ -743,12 +743,12 @@ created external table.</p>
defaultValue. If the key is not set and defaultValue is not set, return
the system default value.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">getConf</span><span class="p">(</span><span class="s2">"spark.sql.shuffle.partitions"</span><span class="p">)</span>
-<span class="go">u'200'</span>
+<span class="go">'200'</span>
<span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">getConf</span><span class="p">(</span><span class="s2">"spark.sql.shuffle.partitions"</span><span class="p">,</span> <span class="sa">u</span><span class="s2">"10"</span><span class="p">)</span>
-<span class="go">u'10'</span>
+<span class="go">'10'</span>
<span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">setConf</span><span class="p">(</span><span class="s2">"spark.sql.shuffle.partitions"</span><span class="p">,</span> <span class="sa">u</span><span class="s2">"50"</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">getConf</span><span class="p">(</span><span class="s2">"spark.sql.shuffle.partitions"</span><span class="p">,</span> <span class="sa">u</span><span class="s2">"10"</span><span class="p">)</span>
-<span class="go">u'50'</span>
+<span class="go">'50'</span>
</pre></div>
</div>
<div class="versionadded">
@@ -931,7 +931,7 @@ See <a class="reference internal" href="#pyspark.sql.UDFRegistration.registerJav
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerDataFrameAsTable</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="s2">"table1"</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT field1 AS f1, field2 as f2 from table1"</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]</span>
+<span class="go">[Row(f1=1, f2='row1'), Row(f1=2, f2='row2'), Row(f1=3, f2='row3')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -1022,7 +1022,7 @@ See <a class="reference internal" href="#pyspark.sql.UDFRegistration.registerJav
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sqlContext</span><span class="o">.</span><span class="n">registerDataFrameAsTable</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="s2">"table1"</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">sqlContext</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span>
<span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="s2">"tableName = 'table1'"</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
-<span class="go">Row(database=u'', tableName=u'table1', isTemporary=True)</span>
+<span class="go">Row(database='', tableName='table1', isTemporary=True)</span>
</pre></div>
</div>
<div class="versionadded">
@@ -1138,11 +1138,11 @@ object must match the specified type. In this case, this API works as if
<cite>register(name, f, returnType=StringType())</cite>.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">strlen</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">udf</span><span class="o">.</span><span class="n">register</span><span class="p">(</span><span class="s2">"stringLengthString"</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
<span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT stringLengthString('test')"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(stringLengthString(test)=u'4')]</span>
+<span class="go">[Row(stringLengthString(test)='4')]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT 'foo' AS text"</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">strlen</span><span class="p">(</span><span class="s2">"text"</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(stringLengthString(text)=u'3')]</span>
+<span class="go">[Row(stringLengthString(text)='3')]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="k">import</span> <span class="n">IntegerType</span>
@@ -1268,7 +1268,7 @@ a <a class="reference internal" href="#pyspark.sql.types.DataType" title="pyspar
<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="mi">1</span><span class="p">,</span> <span class="s2">"a"</span><span class="p">),(</span><span class="mi">2</span><span class="p">,</span> <span class="s2">"b"</span><span class="p">),</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s2">"a"</span><span class="p">)],[</span><span class="s2">"id"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">])</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">createOrReplaceTempView</span><span class="p">(</span><span class="s2">"df"</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT name, javaUDAF(id) as avg from df group by name"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'b', avg=102.0), Row(name=u'a', avg=102.0)]</span>
+<span class="go">[Row(name='b', avg=102.0), Row(name='a', avg=102.0)]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -1331,7 +1331,7 @@ and can be created using various functions in <a class="reference internal" href
<span class="gp">>>> </span><span class="n">df_as2</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"df_as2"</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">joined_df</span> <span class="o">=</span> <span class="n">df_as1</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df_as2</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">"df_as1.name"</span><span class="p">)</span> <span class="o">==</span> <span class="n">col</span><span class="p">(</span><span class="s2">"df_as2.name"</span><span class="p">),</span> <span class="s1">'inner'</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">joined_df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"df_as1.name"</span><span class="p">,</span> <span class="s2">"df_as2.name"</span><span class="p">,</span> <span class="s2">"df_as2.age"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Bob', name=u'Bob', age=5), Row(name=u'Alice', name=u'Alice', age=2)]</span>
+<span class="go">[Row(name='Bob', name='Bob', age=5), Row(name='Alice', name='Alice', age=2)]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -1487,7 +1487,7 @@ as <a class="reference internal" href="#pyspark.sql.Column" title="pyspark.sql.C
<code class="descname">collect</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.collect"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.collect" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns all the records as a list of <a class="reference internal" href="#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal"><span class="pre">Row</span></code></a>.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -1666,12 +1666,12 @@ catalog.</p>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span>
<span class="gp">>>> </span><span class="n">df2</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"name"</span><span class="p">,</span> <span class="s2">"height"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Tom', height=80), Row(name=u'Bob', height=85)]</span>
+<span class="go">[Row(name='Tom', height=80), Row(name='Bob', height=85)]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">crossJoin</span><span class="p">(</span><span class="n">df2</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"height"</span><span class="p">))</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">,</span> <span class="s2">"height"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice', height=80), Row(age=2, name=u'Alice', height=85),</span>
-<span class="go"> Row(age=5, name=u'Bob', height=80), Row(age=5, name=u'Bob', height=85)]</span>
+<span class="go">[Row(age=2, name='Alice', height=80), Row(age=2, name='Alice', height=85),</span>
+<span class="go"> Row(age=5, name='Bob', height=80), Row(age=5, name='Bob', height=85)]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -1799,23 +1799,23 @@ This is a no-op if schema doesn’t contain the given column name(s).</p>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">'age'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice'), Row(name=u'Bob')]</span>
+<span class="go">[Row(name='Alice'), Row(name='Bob')]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice'), Row(name=u'Bob')]</span>
+<span class="go">[Row(name='Alice'), Row(name='Bob')]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'inner'</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, height=85, name=u'Bob')]</span>
+<span class="go">[Row(age=5, height=85, name='Bob')]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'inner'</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob', height=85)]</span>
+<span class="go">[Row(age=5, name='Bob', height=85)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="s1">'name'</span><span class="p">,</span> <span class="s1">'inner'</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">'age'</span><span class="p">,</span> <span class="s1">'height'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Bob')]</span>
+<span class="go">[Row(name='Bob')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -2027,15 +2027,15 @@ or a string of SQL expression.</td>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">></span> <span class="mi">3</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=5, name='Bob')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">==</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=2, name='Alice')]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="s2">"age > 3"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=5, name='Bob')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="s2">"age = 2"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=2, name='Alice')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -2048,7 +2048,7 @@ or a string of SQL expression.</td>
<code class="descname">first</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.first"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.first" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the first row as a <a class="reference internal" href="#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal"><span class="pre">Row</span></code></a>.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
-<span class="go">Row(age=2, name=u'Alice')</span>
+<span class="go">Row(age=2, name='Alice')</span>
</pre></div>
</div>
<div class="versionadded">
@@ -2137,11 +2137,11 @@ Each element should be a column name (string) or an expression (<a class="refere
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">()</span><span class="o">.</span><span class="n">avg</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[Row(avg(age)=3.5)]</span>
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="s1">'name'</span><span class="p">)</span><span class="o">.</span><span class="n">agg</span><span class="p">({</span><span class="s1">'age'</span><span class="p">:</span> <span class="s1">'mean'</span><span class="p">})</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u'Alice', avg(age)=2.0), Row(name=u'Bob', avg(age)=5.0)]</span>
+<span class="go">[Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]</span>
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">avg</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u'Alice', avg(age)=2.0), Row(name=u'Bob', avg(age)=5.0)]</span>
+<span class="go">[Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]</span>
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">([</span><span class="s1">'name'</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">])</span><span class="o">.</span><span class="n">count</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u'Alice', age=2, count=1), Row(name=u'Bob', age=5, count=1)]</span>
+<span class="go">[Row(name='Alice', age=2, count=1), Row(name='Bob', age=5, count=1)]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -2179,9 +2179,9 @@ If n is 1, return a single Row.</td>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
-<span class="go">Row(age=2, name=u'Alice')</span>
+<span class="go">Row(age=2, name='Alice')</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
-<span class="go">[Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=2, name='Alice')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -2284,24 +2284,24 @@ the column(s) must exist on both sides, and this performs an equi-join.</li>
</table>
<p>The following performs a full outer join between <code class="docutils literal"><span class="pre">df1</span></code> and <code class="docutils literal"><span class="pre">df2</span></code>.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df2</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'outer'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df2</span><span class="o">.</span><span class="n">height</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=None, height=80), Row(name=u'Bob', height=85), Row(name=u'Alice', height=None)]</span>
+<span class="go">[Row(name=None, height=80), Row(name='Bob', height=85), Row(name='Alice', height=None)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="s1">'name'</span><span class="p">,</span> <span class="s1">'outer'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'height'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Tom', height=80), Row(name=u'Bob', height=85), Row(name=u'Alice', height=None)]</span>
+<span class="go">[Row(name='Tom', height=80), Row(name='Bob', height=85), Row(name='Alice', height=None)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">cond</span> <span class="o">=</span> <span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">df3</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">==</span> <span class="n">df3</span><span class="o">.</span><span class="n">age</span><span class="p">]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df3</span><span class="p">,</span> <span class="n">cond</span><span class="p">,</span> <span class="s1">'outer'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df3</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]</span>
+<span class="go">[Row(name='Alice', age=2), Row(name='Bob', age=5)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="s1">'name'</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df2</span><span class="o">.</span><span class="n">height</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Bob', height=85)]</span>
+<span class="go">[Row(name='Bob', height=85)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">df4</span><span class="p">,</span> <span class="p">[</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">])</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Bob', age=5)]</span>
+<span class="go">[Row(name='Bob', age=5)]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -2314,7 +2314,7 @@ the column(s) must exist on both sides, and this performs an equi-join.</li>
<code class="descname">limit</code><span class="sig-paren">(</span><em>num</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.limit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.limit" title="Permalink to this definition">¶</a></dt>
<dd><p>Limits the result count to the number specified.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[]</span>
</pre></div>
@@ -2376,18 +2376,18 @@ If a list is specified, length of the list must equal length of the <cite>cols</
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.functions</span> <span class="k">import</span> <span class="o">*</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">asc</span><span class="p">(</span><span class="s2">"age"</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">desc</span><span class="p">(</span><span class="s2">"age"</span><span class="p">),</span> <span class="s2">"name"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">([</span><span class="s2">"age"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">],</span> <span class="n">ascending</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -2760,11 +2760,11 @@ in the current DataFrame.</td>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">'*'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s1">'name'</span><span class="p">,</span> <span class="s1">'age'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]</span>
+<span class="go">[Row(name='Alice', age=2), Row(name='Bob', age=5)]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">+</span> <span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'age'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice', age=12), Row(name=u'Bob', age=15)]</span>
+<span class="go">[Row(name='Alice', age=12), Row(name='Bob', age=15)]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -2855,18 +2855,18 @@ If a list is specified, length of the list must equal length of the <cite>cols</
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="s2">"age"</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql.functions</span> <span class="k">import</span> <span class="o">*</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">asc</span><span class="p">(</span><span class="s2">"age"</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">desc</span><span class="p">(</span><span class="s2">"age"</span><span class="p">),</span> <span class="s2">"name"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">orderBy</span><span class="p">([</span><span class="s2">"age"</span><span class="p">,</span> <span class="s2">"name"</span><span class="p">],</span> <span class="n">ascending</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=5, name='Bob'), Row(age=2, name='Alice')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -3007,7 +3007,7 @@ guarantee about the backward compatibility of the schema of the resulting DataFr
<code class="descname">take</code><span class="sig-paren">(</span><em>num</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/dataframe.html#DataFrame.take"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame.take" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the first <code class="docutils literal"><span class="pre">num</span></code> rows as a <code class="xref py py-class docutils literal"><span class="pre">list</span></code> of <a class="reference internal" href="#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal"><span class="pre">Row</span></code></a>.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
-<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -3028,7 +3028,7 @@ guarantee about the backward compatibility of the schema of the resulting DataFr
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">toDF</span><span class="p">(</span><span class="s1">'f1'</span><span class="p">,</span> <span class="s1">'f2'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(f1=2, f2=u'Alice'), Row(f1=5, f2=u'Bob')]</span>
+<span class="go">[Row(f1=2, f2='Alice'), Row(f1=5, f2='Bob')]</span>
</pre></div>
</div>
</dd></dl>
@@ -3039,7 +3039,7 @@ guarantee about the backward compatibility of the schema of the resulting DataFr
<dd><p>Converts a <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal"><span class="pre">DataFrame</span></code></a> into a <code class="xref py py-class docutils literal"><span class="pre">RDD</span></code> of string.</p>
<p>Each row is turned into a JSON document as one element in the returned RDD.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">toJSON</span><span class="p">()</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
-<span class="go">u'{"age":2,"name":"Alice"}'</span>
+<span class="go">'{"age":2,"name":"Alice"}'</span>
</pre></div>
</div>
<div class="versionadded">
@@ -3053,7 +3053,7 @@ guarantee about the backward compatibility of the schema of the resulting DataFr
<dd><p>Returns an iterator that contains all of the rows in this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal"><span class="pre">DataFrame</span></code></a>.
The iterator will consume as much memory as the largest partition in this DataFrame.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">list</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">toLocalIterator</span><span class="p">())</span>
-<span class="go">[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=2, name='Alice'), Row(age=5, name='Bob')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -3181,7 +3181,7 @@ a column from some other dataframe will raise an error.</p>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s1">'age2'</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">age</span> <span class="o">+</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice', age2=4), Row(age=5, name=u'Bob', age2=7)]</span>
+<span class="go">[Row(age=2, name='Alice', age2=4), Row(age=5, name='Bob', age2=7)]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -3207,7 +3207,7 @@ This is a no-op if schema doesn’t contain the given column name.</p>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">withColumnRenamed</span><span class="p">(</span><span class="s1">'age'</span><span class="p">,</span> <span class="s1">'age2'</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age2=2, name=u'Alice'), Row(age2=5, name=u'Bob')]</span>
+<span class="go">[Row(age2=2, name='Alice'), Row(age2=5, name='Bob')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -3334,12 +3334,12 @@ or a list of <a class="reference internal" href="#pyspark.sql.Column" title="pys
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">gdf</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">gdf</span><span class="o">.</span><span class="n">agg</span><span class="p">({</span><span class="s2">"*"</span><span class="p">:</span> <span class="s2">"count"</span><span class="p">})</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u'Alice', count(1)=1), Row(name=u'Bob', count(1)=1)]</span>
+<span class="go">[Row(name='Alice', count(1)=1), Row(name='Bob', count(1)=1)]</span>
</pre></div>
</div>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">F</span>
<span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">gdf</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span>
-<span class="go">[Row(name=u'Alice', min(age)=2), Row(name=u'Bob', min(age)=5)]</span>
+<span class="go">[Row(name='Alice', min(age)=2), Row(name='Bob', min(age)=5)]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -3615,9 +3615,9 @@ corresponding :class: <cite>StructField</cite> (optional, keyword only argument)
<code class="descname">asc</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#pyspark.sql.Column.asc" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns a sort expression based on the ascending order of the given column name</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
-<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">'Tom'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span>
+<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'Tom'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">asc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Alice'), Row(name=u'Tom')]</span>
+<span class="go">[Row(name='Alice'), Row(name='Tom')]</span>
</pre></div>
</div>
</dd></dl>
@@ -3718,9 +3718,9 @@ this <a class="reference internal" href="#pyspark.sql.Column" title="pyspark.sql
<code class="descname">cast</code><span class="sig-paren">(</span><em>dataType</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/sql/column.html#Column.cast"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.Column.cast" title="Permalink to this definition">¶</a></dt>
<dd><p>Convert the column into type <code class="docutils literal"><span class="pre">dataType</span></code>.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">"string"</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'ages'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(ages=u'2'), Row(ages=u'5')]</span>
+<span class="go">[Row(ages='2'), Row(ages='5')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">StringType</span><span class="p">())</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">'ages'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(ages=u'2'), Row(ages=u'5')]</span>
+<span class="go">[Row(ages='2'), Row(ages='5')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -3741,7 +3741,7 @@ this <a class="reference internal" href="#pyspark.sql.Column" title="pyspark.sql
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">contains</span><span class="p">(</span><span class="s1">'o'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=5, name='Bob')]</span>
</pre></div>
</div>
</dd></dl>
@@ -3751,9 +3751,9 @@ this <a class="reference internal" href="#pyspark.sql.Column" title="pyspark.sql
<code class="descname">desc</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#pyspark.sql.Column.desc" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns a sort expression based on the descending order of the given column name.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
-<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">'Tom'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span>
+<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'Tom'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(name=u'Tom'), Row(name=u'Alice')]</span>
+<span class="go">[Row(name='Tom'), Row(name='Alice')]</span>
</pre></div>
</div>
</dd></dl>
@@ -3771,7 +3771,7 @@ this <a class="reference internal" href="#pyspark.sql.Column" title="pyspark.sql
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">'ice'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">'ice$'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[]</span>
</pre></div>
@@ -3898,9 +3898,9 @@ or gets an item by key out of a dict.</p>
<code class="descname">isNotNull</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#pyspark.sql.Column.isNotNull" title="Permalink to this definition">¶</a></dt>
<dd><p>True if the current expression is NOT null.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
-<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">'Tom'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span>
+<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'Tom'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">height</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(height=80, name=u'Tom')]</span>
+<span class="go">[Row(height=80, name='Tom')]</span>
</pre></div>
</div>
</dd></dl>
@@ -3910,9 +3910,9 @@ or gets an item by key out of a dict.</p>
<code class="descname">isNull</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#pyspark.sql.Column.isNull" title="Permalink to this definition">¶</a></dt>
<dd><p>True if the current expression is null.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="k">import</span> <span class="n">Row</span>
-<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">'Tom'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="sa">u</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span>
+<span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span><span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'Tom'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">80</span><span class="p">),</span> <span class="n">Row</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'Alice'</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="kc">None</span><span class="p">)])</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">height</span><span class="o">.</span><span class="n">isNull</span><span class="p">())</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(height=None, name=u'Alice')]</span>
+<span class="go">[Row(height=None, name='Alice')]</span>
</pre></div>
</div>
</dd></dl>
@@ -3923,9 +3923,9 @@ or gets an item by key out of a dict.</p>
<dd><p>A boolean expression that is evaluated to true if the value of this
expression is contained by the evaluated values of the arguments.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="s2">"Bob"</span><span class="p">,</span> <span class="s2">"Mike"</span><span class="p">)]</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=5, name=u'Bob')]</span>
+<span class="go">[Row(age=5, name='Bob')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">age</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])]</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=2, name='Alice')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -3947,7 +3947,7 @@ expression is contained by the evaluated values of the arguments.</p>
</table>
<p>See <a class="reference internal" href="#pyspark.sql.Column.rlike" title="pyspark.sql.Column.rlike"><code class="xref py py-func docutils literal"><span class="pre">rlike()</span></code></a> for a regex version</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">like</span><span class="p">(</span><span class="s1">'Al%'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=2, name='Alice')]</span>
</pre></div>
</div>
</dd></dl>
@@ -4029,7 +4029,7 @@ match.</p>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">rlike</span><span class="p">(</span><span class="s1">'ice$'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=2, name='Alice')]</span>
</pre></div>
</div>
</dd></dl>
@@ -4047,7 +4047,7 @@ match.</p>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'Al'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(age=2, name=u'Alice')]</span>
+<span class="go">[Row(age=2, name='Alice')]</span>
<span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'^Al'</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[]</span>
</pre></div>
@@ -4071,7 +4071,7 @@ match.</p>
</tbody>
</table>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">substr</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"col"</span><span class="p">))</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
-<span class="go">[Row(col=u'Ali'), Row(col=u'Bob')]</span>
+<span class="go">[Row(col='Ali'), Row(col='Bob')]</span>
</pre></div>
</div>
<div class="versionadded">
@@ -4922,12 +4922,12 @@ any value greater than or equal to 9223372036854775807.</li>
<dl class="attribute">
<dt id="pyspark.sql.Window.unboundedFollowing">
-<code class="descname">unboundedFollowing</code><em class="property"> = 9223372036854775807L</em><a class="headerlink" href="#pyspark.sql.Window.unboundedFollowing" title="Permalink to this definition">¶</a></dt>
+<code class="descname">unboundedFollowing</code><em class="property"> = 9223372036854775807</em><a class="headerlink" href="#pyspark.sql.Window.unboundedFollowing" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>
<dl class="attribute">
<dt id="pyspark.sql.Window.unboundedPreceding">
-<code class="descname">unboundedPreceding</code><em class="property"> = -9223372036854775808L</em><a class="headerlink" href="#pyspark.sql.Window.unboundedPreceding" title="Permalink to this definition">¶</a></dt>
+<code class="descname">unb
<TRUNCATED>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org