You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/12/23 16:31:23 UTC

[16/51] [partial] arrow-site git commit: Upload nightly docs

http://git-wip-us.apache.org/repos/asf/arrow-site/blob/62ef7145/docs/latest/python/data.html
----------------------------------------------------------------------
diff --git a/docs/latest/python/data.html b/docs/latest/python/data.html
new file mode 100644
index 0000000..392d1e3
--- /dev/null
+++ b/docs/latest/python/data.html
@@ -0,0 +1,982 @@
+
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  <title>Data Types and In-Memory Data Model &mdash; Apache Arrow v0.11.1.dev473+g6ed02454</title>
+  
+
+  
+  
+  
+  
+
+  
+
+  
+  
+    
+
+  
+
+  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Streaming, Serialization, and IPC" href="ipc.html" />
+    <link rel="prev" title="Memory and IO Interfaces" href="memory.html" /> 
+
+  
+  <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav">
+
+   
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search">
+          
+
+          
+            <a href="../index.html" class="icon icon-home"> Apache Arrow
+          
+
+          
+          </a>
+
+          
+            
+            
+              <div class="version">
+                0.11.1.dev473+g6ed02454
+              </div>
+            
+          
+
+          
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+
+          
+        </div>
+
+        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+          
+            
+            
+              
+            
+            
+              <p class="caption"><span class="caption-text">Memory Format</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../format/README.html">Arrow specification documents</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../format/Guidelines.html">Implementation guidelines</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../format/Layout.html">Physical memory layout</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../format/Metadata.html">Metadata: Logical types, schemas, data headers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../format/IPC.html">Interprocess messaging / communication (IPC)</a></li>
+</ul>
+<p class="caption"><span class="caption-text">Languages</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../cpp/index.html">C++ Implementation</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="index.html">Python bindings</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="install.html">Installing PyArrow</a></li>
+<li class="toctree-l2"><a class="reference internal" href="memory.html">Memory and IO Interfaces</a></li>
+<li class="toctree-l2 current"><a class="current reference internal" href="#">Data Types and In-Memory Data Model</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#type-metadata">Type Metadata</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#schemas">Schemas</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#arrays">Arrays</a><ul>
+<li class="toctree-l4"><a class="reference internal" href="#none-values-and-nan-handling">None values and NAN handling</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#list-arrays">List arrays</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#struct-arrays">Struct arrays</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#union-arrays">Union arrays</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#dictionary-arrays">Dictionary Arrays</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="#record-batches">Record Batches</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#tables">Tables</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#custom-schema-and-field-metadata">Custom Schema and Field Metadata</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="ipc.html">Streaming, Serialization, and IPC</a></li>
+<li class="toctree-l2"><a class="reference internal" href="filesystems.html">File System Interfaces</a></li>
+<li class="toctree-l2"><a class="reference internal" href="plasma.html">The Plasma In-Memory Object Store</a></li>
+<li class="toctree-l2"><a class="reference internal" href="numpy.html">NumPy Integration</a></li>
+<li class="toctree-l2"><a class="reference internal" href="pandas.html">Pandas Integration</a></li>
+<li class="toctree-l2"><a class="reference internal" href="csv.html">Reading CSV files</a></li>
+<li class="toctree-l2"><a class="reference internal" href="parquet.html">Reading and Writing the Apache Parquet Format</a></li>
+<li class="toctree-l2"><a class="reference internal" href="extending.html">Using pyarrow from C++ and Cython Code</a></li>
+<li class="toctree-l2"><a class="reference internal" href="api.html">API Reference</a></li>
+<li class="toctree-l2"><a class="reference internal" href="development.html">Development</a></li>
+<li class="toctree-l2"><a class="reference internal" href="getting_involved.html">Getting Involved</a></li>
+</ul>
+</li>
+</ul>
+
+            
+          
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" aria-label="top navigation">
+        
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../index.html">Apache Arrow</a>
+        
+      </nav>
+
+
+      <div class="wy-nav-content">
+        
+        <div class="rst-content">
+        
+          
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+  <ul class="wy-breadcrumbs">
+    
+      <li><a href="../index.html">Docs</a> &raquo;</li>
+        
+          <li><a href="index.html">Python bindings</a> &raquo;</li>
+        
+      <li>Data Types and In-Memory Data Model</li>
+    
+    
+      <li class="wy-breadcrumbs-aside">
+        
+            
+            <a href="../_sources/python/data.rst.txt" rel="nofollow"> View page source</a>
+          
+        
+      </li>
+    
+  </ul>
+
+  
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+            
+  <div class="section" id="data-types-and-in-memory-data-model">
+<span id="data"></span><h1>Data Types and In-Memory Data Model<a class="headerlink" href="#data-types-and-in-memory-data-model" title="Permalink to this headline">¶</a></h1>
+<p>Apache Arrow defines columnar array data structures by composing type metadata
+with memory buffers, like the ones explained in the documentation on
+<a class="reference internal" href="memory.html#io"><span class="std std-ref">Memory and IO</span></a>. These data structures are exposed in Python through
+a series of interrelated classes:</p>
+<ul class="simple">
+<li><strong>Type Metadata</strong>: Instances of <code class="docutils literal notranslate"><span class="pre">pyarrow.DataType</span></code>, which describe a logical
+array type</li>
+<li><strong>Schemas</strong>: Instances of <code class="docutils literal notranslate"><span class="pre">pyarrow.Schema</span></code>, which describe a named
+collection of types. These can be thought of as the column types in a
+table-like object.</li>
+<li><strong>Arrays</strong>: Instances of <code class="docutils literal notranslate"><span class="pre">pyarrow.Array</span></code>, which are atomic, contiguous
+columnar data structures composed from Arrow Buffer objects</li>
+<li><strong>Record Batches</strong>: Instances of <code class="docutils literal notranslate"><span class="pre">pyarrow.RecordBatch</span></code>, which are a
+collection of Array objects with a particular Schema</li>
+<li><strong>Tables</strong>: Instances of <code class="docutils literal notranslate"><span class="pre">pyarrow.Table</span></code>, a logical table data structure in
+which each column consists of one or more <code class="docutils literal notranslate"><span class="pre">pyarrow.Array</span></code> objects of the
+same type.</li>
+</ul>
+<p>We will examine these in the sections below in a series of examples.</p>
+<div class="section" id="type-metadata">
+<span id="data-types"></span><h2>Type Metadata<a class="headerlink" href="#type-metadata" title="Permalink to this headline">¶</a></h2>
+<p>Apache Arrow defines language agnostic column-oriented data structures for
+array data. These include:</p>
+<ul class="simple">
+<li><strong>Fixed-length primitive types</strong>: numbers, booleans, date and times, fixed
+size binary, decimals, and other values that fit into a given number</li>
+<li><strong>Variable-length primitive types</strong>: binary, string</li>
+<li><strong>Nested types</strong>: list, struct, and union</li>
+<li><strong>Dictionary type</strong>: An encoded categorical type (more on this later)</li>
+</ul>
+<p>Each logical data type in Arrow has a corresponding factory function for
+creating an instance of that type object in Python:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [1]: </span><span class="kn">import</span> <span class="nn">pyarrow</span> <span class="kn">as</span> <span class="nn">pa</span>
+
+<span class="gp">In [2]: </span><span class="n">t1</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">int32</span><span class="p">()</span>
+
+<span class="gp">In [3]: </span><span class="n">t2</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">string</span><span class="p">()</span>
+
+<span class="gp">In [4]: </span><span class="n">t3</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">binary</span><span class="p">()</span>
+
+<span class="gp">In [5]: </span><span class="n">t4</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">binary</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
+
+<span class="gp">In [6]: </span><span class="n">t5</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">timestamp</span><span class="p">(</span><span class="s1">&#39;ms&#39;</span><span class="p">)</span>
+
+<span class="gp">In [7]: </span><span class="n">t1</span>
+<span class="gh">Out[7]: </span><span class="go">DataType(int32)</span>
+
+<span class="gp">In [8]: </span><span class="k">print</span><span class="p">(</span><span class="n">t1</span><span class="p">)</span>
+<span class="go">int32</span>
+
+<span class="gp">In [9]: </span><span class="k">print</span><span class="p">(</span><span class="n">t4</span><span class="p">)</span>
+<span class="go">fixed_size_binary[10]</span>
+
+<span class="gp">In [10]: </span><span class="k">print</span><span class="p">(</span><span class="n">t5</span><span class="p">)</span>
+<span class="go">timestamp[ms]</span>
+</pre></div>
+</div>
+<p>We use the name <strong>logical type</strong> because the <strong>physical</strong> storage may be the
+same for one or more types. For example, <code class="docutils literal notranslate"><span class="pre">int64</span></code>, <code class="docutils literal notranslate"><span class="pre">float64</span></code>, and
+<code class="docutils literal notranslate"><span class="pre">timestamp[ms]</span></code> all occupy 64 bits per value.</p>
+<p>These objects are <cite>metadata</cite>; they are used for describing the data in arrays,
+schemas, and record batches. In Python, they can be used in functions where the
+input data (e.g. Python objects) may be coerced to more than one Arrow type.</p>
+<p>The <a class="reference internal" href="generated/pyarrow.Field.html#pyarrow.Field" title="pyarrow.Field"><code class="xref py py-class docutils literal notranslate"><span class="pre">Field</span></code></a> type is a type plus a name and optional
+user-defined metadata:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [11]: </span><span class="n">f0</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;int32_field&#39;</span><span class="p">,</span> <span class="n">t1</span><span class="p">)</span>
+
+<span class="gp">In [12]: </span><span class="n">f0</span>
+<span class="gh">Out[12]: </span><span class="go">pyarrow.Field&lt;int32_field: int32&gt;</span>
+
+<span class="gp">In [13]: </span><span class="n">f0</span><span class="o">.</span><span class="n">name</span>
+<span class="go">Out[13]: &#39;int32_field&#39;</span>
+
+<span class="gp">In [14]: </span><span class="n">f0</span><span class="o">.</span><span class="n">type</span>
+<span class="go">Out[14]: DataType(int32)</span>
+</pre></div>
+</div>
+<p>Arrow supports <strong>nested value types</strong> like list, struct, and union. When
+creating these, you must pass types or fields to indicate the data types of the
+types’ children. For example, we can define a list of int32 values with:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [15]: </span><span class="n">t6</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">list_</span><span class="p">(</span><span class="n">t1</span><span class="p">)</span>
+
+<span class="gp">In [16]: </span><span class="n">t6</span>
+<span class="gh">Out[16]: </span><span class="go">ListType(list&lt;item: int32&gt;)</span>
+</pre></div>
+</div>
+<p>A <cite>struct</cite> is a collection of named fields:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [17]: </span><span class="n">fields</span> <span class="o">=</span> <span class="p">[</span>
+<span class="gp">   ....: </span>    <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;s0&#39;</span><span class="p">,</span> <span class="n">t1</span><span class="p">),</span>
+<span class="gp">   ....: </span>    <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;s1&#39;</span><span class="p">,</span> <span class="n">t2</span><span class="p">),</span>
+<span class="gp">   ....: </span>    <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;s2&#39;</span><span class="p">,</span> <span class="n">t4</span><span class="p">),</span>
+<span class="gp">   ....: </span>    <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;s3&#39;</span><span class="p">,</span> <span class="n">t6</span><span class="p">),</span>
+<span class="gp">   ....: </span><span class="p">]</span>
+<span class="gp">   ....: </span>
+
+<span class="gp">In [18]: </span><span class="n">t7</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">struct</span><span class="p">(</span><span class="n">fields</span><span class="p">)</span>
+
+<span class="gp">In [19]: </span><span class="k">print</span><span class="p">(</span><span class="n">t7</span><span class="p">)</span>
+<span class="go">struct&lt;s0: int32, s1: string, s2: fixed_size_binary[10], s3: list&lt;item: int32&gt;&gt;</span>
+</pre></div>
+</div>
+<p>For convenience, you can pass <code class="docutils literal notranslate"><span class="pre">(name,</span> <span class="pre">type)</span></code> tuples directly instead of
+<a class="reference internal" href="generated/pyarrow.Field.html#pyarrow.Field" title="pyarrow.Field"><code class="xref py py-class docutils literal notranslate"><span class="pre">Field</span></code></a> instances:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [20]: </span><span class="n">t8</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">struct</span><span class="p">([(</span><span class="s1">&#39;s0&#39;</span><span class="p">,</span> <span class="n">t1</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;s1&#39;</span><span class="p">,</span> <span class="n">t2</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;s2&#39;</span><span class="p">,</span> <span class="n">t4</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;s3&#39;</span><span class="p">,</span> <span class="n">t6</span><span class="p">)])</span>
+
+<span class="gp">In [21]: </span><span class="k">print</span><span class="p">(</span><span class="n">t8</span><span class="p">)</span>
+<span class="go">struct&lt;s0: int32, s1: string, s2: fixed_size_binary[10], s3: list&lt;item: int32&gt;&gt;</span>
+
+<span class="gp">In [22]: </span><span class="n">t8</span> <span class="o">==</span> <span class="n">t7</span>
+<span class="go">Out[22]: True</span>
+</pre></div>
+</div>
+<p>See <a class="reference internal" href="api.html#api-types"><span class="std std-ref">Data Types API</span></a> for a full listing of data type
+functions.</p>
+</div>
+<div class="section" id="schemas">
+<span id="data-schema"></span><h2>Schemas<a class="headerlink" href="#schemas" title="Permalink to this headline">¶</a></h2>
+<p>The <a class="reference internal" href="generated/pyarrow.Schema.html#pyarrow.Schema" title="pyarrow.Schema"><code class="xref py py-class docutils literal notranslate"><span class="pre">Schema</span></code></a> type is similar to the <code class="docutils literal notranslate"><span class="pre">struct</span></code> array type; it
+defines the column names and types in a record batch or table data
+structure. The <code class="xref py py-func docutils literal notranslate"><span class="pre">pyarrow.schema()</span></code> factory function makes new Schema objects in
+Python:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [23]: </span><span class="n">my_schema</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">([(</span><span class="s1">&#39;field0&#39;</span><span class="p">,</span> <span class="n">t1</span><span class="p">),</span>
+<span class="gp">   ....: </span>                       <span class="p">(</span><span class="s1">&#39;field1&#39;</span><span class="p">,</span> <span class="n">t2</span><span class="p">),</span>
+<span class="gp">   ....: </span>                       <span class="p">(</span><span class="s1">&#39;field2&#39;</span><span class="p">,</span> <span class="n">t4</span><span class="p">),</span>
+<span class="gp">   ....: </span>                       <span class="p">(</span><span class="s1">&#39;field3&#39;</span><span class="p">,</span> <span class="n">t6</span><span class="p">)])</span>
+<span class="gp">   ....: </span>
+
+<span class="gp">In [24]: </span><span class="n">my_schema</span>
+<span class="gh">Out[24]: </span><span class="go"></span>
+<span class="go">field0: int32</span>
+<span class="go">field1: string</span>
+<span class="go">field2: fixed_size_binary[10]</span>
+<span class="go">field3: list&lt;item: int32&gt;</span>
+<span class="go">  child 0, item: int32</span>
+</pre></div>
+</div>
+<p>In some applications, you may not create schemas directly, only using the ones
+that are embedded in <a class="reference internal" href="ipc.html#ipc"><span class="std std-ref">IPC messages</span></a>.</p>
+</div>
+<div class="section" id="arrays">
+<span id="data-array"></span><h2>Arrays<a class="headerlink" href="#arrays" title="Permalink to this headline">¶</a></h2>
+<p>For each data type, there is an accompanying array data structure for holding
+memory buffers that define a single contiguous chunk of columnar array
+data. When you are using PyArrow, this data may come from IPC tools, though it
+can also be created from various types of Python sequences (lists, NumPy
+arrays, pandas data).</p>
+<p>A simple way to create arrays is with <code class="docutils literal notranslate"><span class="pre">pyarrow.array</span></code>, which is similar to
+the <code class="docutils literal notranslate"><span class="pre">numpy.array</span></code> function.  By default PyArrow will infer the data type
+for you:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [25]: </span><span class="n">arr</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
+
+<span class="gp">In [26]: </span><span class="n">arr</span>
+<span class="gh">Out[26]: </span><span class="go"></span>
+<span class="go">&lt;pyarrow.lib.Int64Array object at 0x7f4939805728&gt;</span>
+<span class="go">[</span>
+<span class="go">  1,</span>
+<span class="go">  2,</span>
+<span class="go">  null,</span>
+<span class="go">  3</span>
+<span class="go">]</span>
+</pre></div>
+</div>
+<p>But you may also pass a specific data type to override type inference:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [27]: </span><span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="nb">type</span><span class="o">=</span><span class="n">pa</span><span class="o">.</span><span class="n">uint16</span><span class="p">())</span>
+<span class="gh">Out[27]: </span><span class="go"></span>
+<span class="go">&lt;pyarrow.lib.UInt16Array object at 0x7f49398059f8&gt;</span>
+<span class="go">[</span>
+<span class="go">  1,</span>
+<span class="go">  2</span>
+<span class="go">]</span>
+</pre></div>
+</div>
+<p>The array’s <code class="docutils literal notranslate"><span class="pre">type</span></code> attribute is the corresponding piece of type metadata:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [28]: </span><span class="n">arr</span><span class="o">.</span><span class="n">type</span>
+<span class="gh">Out[28]: </span><span class="go">DataType(int64)</span>
+</pre></div>
+</div>
+<p>Each in-memory array has a known length and null count (which will be 0 if
+there are no null values):</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [29]: </span><span class="nb">len</span><span class="p">(</span><span class="n">arr</span><span class="p">)</span>
+<span class="gh">Out[29]: </span><span class="go">4</span>
+
+<span class="gp">In [30]: </span><span class="n">arr</span><span class="o">.</span><span class="n">null_count</span>
+<span class="go">Out[30]: 1</span>
+</pre></div>
+</div>
+<p>Scalar values can be selected with normal indexing.  <code class="docutils literal notranslate"><span class="pre">pyarrow.array</span></code> converts
+<code class="docutils literal notranslate"><span class="pre">None</span></code> values to Arrow nulls; we return the special <code class="docutils literal notranslate"><span class="pre">pyarrow.NA</span></code> value for
+nulls:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [31]: </span><span class="n">arr</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+<span class="gh">Out[31]: </span><span class="go">1</span>
+
+<span class="gp">In [32]: </span><span class="n">arr</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
+<span class="go">Out[32]: NULL</span>
+</pre></div>
+</div>
+<p>Arrow data is immutable, so values can be selected but not assigned.</p>
+<p>Arrays can be sliced without copying:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [33]: </span><span class="n">arr</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span>
+<span class="gh">Out[33]: </span><span class="go"></span>
+<span class="go">&lt;pyarrow.lib.Int64Array object at 0x7f49397f8a48&gt;</span>
+<span class="go">[</span>
+<span class="go">  2,</span>
+<span class="go">  null</span>
+<span class="go">]</span>
+</pre></div>
+</div>
+<div class="section" id="none-values-and-nan-handling">
+<h3>None values and NAN handling<a class="headerlink" href="#none-values-and-nan-handling" title="Permalink to this headline">¶</a></h3>
+<p>As mentioned in the above section, the Python object <code class="docutils literal notranslate"><span class="pre">None</span></code> is always
+converted to an Arrow null element on the conversion to <code class="docutils literal notranslate"><span class="pre">pyarrow.Array</span></code>. For
+the float NaN value which is either represented by the Python object
+<code class="docutils literal notranslate"><span class="pre">float('nan')</span></code> or <code class="docutils literal notranslate"><span class="pre">numpy.nan</span></code> we normally convert it to a <em>valid</em> float
+value during the conversion. If an integer input is supplied to
+<code class="docutils literal notranslate"><span class="pre">pyarrow.array</span></code> that contains <code class="docutils literal notranslate"><span class="pre">np.nan</span></code>, <code class="docutils literal notranslate"><span class="pre">ValueError</span></code> is raised.</p>
+<p>To handle better compability with Pandas, we support interpreting NaN values as
+null elements. This is enabled automatically on all <code class="docutils literal notranslate"><span class="pre">from_pandas</span></code> function and
+can be enable on the other conversion functions by passing <code class="docutils literal notranslate"><span class="pre">from_pandas=True</span></code>
+as a function parameter.</p>
+</div>
+<div class="section" id="list-arrays">
+<h3>List arrays<a class="headerlink" href="#list-arrays" title="Permalink to this headline">¶</a></h3>
+<p><code class="docutils literal notranslate"><span class="pre">pyarrow.array</span></code> is able to infer the type of simple nested data structures
+like lists:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [34]: </span><span class="n">nested_arr</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([[],</span> <span class="bp">None</span><span class="p">,</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="bp">None</span><span class="p">,</span> <span class="mi">1</span><span class="p">]])</span>
+
+<span class="gp">In [35]: </span><span class="k">print</span><span class="p">(</span><span class="n">nested_arr</span><span class="o">.</span><span class="n">type</span><span class="p">)</span>
+<span class="go">list&lt;item: int64&gt;</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="struct-arrays">
+<h3>Struct arrays<a class="headerlink" href="#struct-arrays" title="Permalink to this headline">¶</a></h3>
+<p>For other kinds of nested arrays, such as struct arrays, you currently need
+to pass the type explicitly.  Struct arrays can be initialized from a
+sequence of Python dicts or tuples:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [36]: </span><span class="n">ty</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">struct</span><span class="p">([(</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int8</span><span class="p">()),</span>
+<span class="gp">   ....: </span>                <span class="p">(</span><span class="s1">&#39;y&#39;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">bool_</span><span class="p">())])</span>
+<span class="gp">   ....: </span>
+
+<span class="gp">In [37]: </span><span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([{</span><span class="s1">&#39;x&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;y&#39;</span><span class="p">:</span> <span class="bp">True</span><span class="p">},</span> <span class="p">{</span><span class="s1">&#39;x&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;y&#39;</span><span class="p">:</span> <span class="bp">False</span><span class="p">}],</span> <span class="nb">type</span><span class="o">=</span><span class="n">ty</span><span class="p">)</span>
+<span class="gh">Out[37]: </span><span class="go"></span>
+<span class="go">&lt;pyarrow.lib.StructArray object at 0x7f4938da44a8&gt;</span>
+<span class="go">-- is_valid: all not null</span>
+<span class="go">-- child 0 type: int8</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2</span>
+<span class="go">  ]</span>
+<span class="go">-- child 1 type: bool</span>
+<span class="go">  [</span>
+<span class="go">    true,</span>
+<span class="go">    false</span>
+<span class="go">  ]</span>
+
+<span class="gp">In [38]: </span><span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([(</span><span class="mi">3</span><span class="p">,</span> <span class="bp">True</span><span class="p">),</span> <span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="bp">False</span><span class="p">)],</span> <span class="nb">type</span><span class="o">=</span><span class="n">ty</span><span class="p">)</span>
+<span class="go">Out[38]: </span>
+<span class="go">&lt;pyarrow.lib.StructArray object at 0x7f4938da4638&gt;</span>
+<span class="go">-- is_valid: all not null</span>
+<span class="go">-- child 0 type: int8</span>
+<span class="go">  [</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ]</span>
+<span class="go">-- child 1 type: bool</span>
+<span class="go">  [</span>
+<span class="go">    true,</span>
+<span class="go">    false</span>
+<span class="go">  ]</span>
+</pre></div>
+</div>
+<p>When initializing a struct array, nulls are allowed both at the struct
+level and at the individual field level.  If initializing from a sequence
+of Python dicts, a missing dict key is handled as a null value:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [39]: </span><span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([{</span><span class="s1">&#39;x&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">},</span> <span class="bp">None</span><span class="p">,</span> <span class="p">{</span><span class="s1">&#39;y&#39;</span><span class="p">:</span> <span class="bp">None</span><span class="p">}],</span> <span class="nb">type</span><span class="o">=</span><span class="n">ty</span><span class="p">)</span>
+<span class="gh">Out[39]: </span><span class="go"></span>
+<span class="go">&lt;pyarrow.lib.StructArray object at 0x7f4938da4c78&gt;</span>
+<span class="go">-- is_valid:</span>
+<span class="go">  [</span>
+<span class="go">    true,</span>
+<span class="go">    false,</span>
+<span class="go">    true</span>
+<span class="go">  ]</span>
+<span class="go">-- child 0 type: int8</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    null,</span>
+<span class="go">    null</span>
+<span class="go">  ]</span>
+<span class="go">-- child 1 type: bool</span>
+<span class="go">  [</span>
+<span class="go">    null,</span>
+<span class="go">    null,</span>
+<span class="go">    null</span>
+<span class="go">  ]</span>
+</pre></div>
+</div>
+<p>You can also construct a struct array from existing arrays for each of the
+struct’s components.  In this case, data storage will be shared with the
+individual arrays, and no copy is involved:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [40]: </span><span class="n">xs</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span> <span class="nb">type</span><span class="o">=</span><span class="n">pa</span><span class="o">.</span><span class="n">int16</span><span class="p">())</span>
+
+<span class="gp">In [41]: </span><span class="n">ys</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="bp">False</span><span class="p">,</span> <span class="bp">True</span><span class="p">,</span> <span class="bp">True</span><span class="p">])</span>
+
+<span class="gp">In [42]: </span><span class="n">arr</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructArray</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">((</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">),</span> <span class="n">names</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="s1">&#39;y&#39;</span><span class="p">))</span>
+
+<span class="gp">In [43]: </span><span class="n">arr</span><span class="o">.</span><span class="n">type</span>
+<span class="gh">Out[43]: </span><span class="go">StructType(struct&lt;x: int16, y: bool&gt;)</span>
+
+<span class="gp">In [44]: </span><span class="n">arr</span>
+<span class="go">Out[44]: </span>
+<span class="go">&lt;pyarrow.lib.StructArray object at 0x7f4938dbc048&gt;</span>
+<span class="go">-- is_valid: all not null</span>
+<span class="go">-- child 0 type: int16</span>
+<span class="go">  [</span>
+<span class="go">    5,</span>
+<span class="go">    6,</span>
+<span class="go">    7</span>
+<span class="go">  ]</span>
+<span class="go">-- child 1 type: bool</span>
+<span class="go">  [</span>
+<span class="go">    false,</span>
+<span class="go">    true,</span>
+<span class="go">    true</span>
+<span class="go">  ]</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="union-arrays">
+<h3>Union arrays<a class="headerlink" href="#union-arrays" title="Permalink to this headline">¶</a></h3>
+<p>The union type represents a nested array type where each value can be one
+(and only one) of a set of possible types.  There are two possible
+storage types for union arrays: sparse and dense.</p>
+<p>In a sparse union array, each of the child arrays has the same length
+as the resulting union array.  They are adjuncted with a <code class="docutils literal notranslate"><span class="pre">int8</span></code> “types”
+array that tells, for each value, from which child array it must be
+selected:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [45]: </span><span class="n">xs</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">])</span>
+
+<span class="gp">In [46]: </span><span class="n">ys</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="bp">False</span><span class="p">,</span> <span class="bp">False</span><span class="p">,</span> <span class="bp">True</span><span class="p">])</span>
+
+<span class="gp">In [47]: </span><span class="n">types</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="nb">type</span><span class="o">=</span><span class="n">pa</span><span class="o">.</span><span class="n">int8</span><span class="p">())</span>
+
+<span class="gp">In [48]: </span><span class="n">union_arr</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">UnionArray</span><span class="o">.</span><span class="n">from_sparse</span><span class="p">(</span><span class="n">types</span><span class="p">,</span> <span class="p">[</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">])</span>
+
+<span class="gp">In [49]: </span><span class="n">union_arr</span><span class="o">.</span><span class="n">type</span>
+<span class="gh">Out[49]: </span><span class="go">UnionType(union[sparse]&lt;0: int64=0, 1: bool=1&gt;)</span>
+
+<span class="gp">In [50]: </span><span class="n">union_arr</span>
+<span class="go">Out[50]: </span>
+<span class="go">&lt;pyarrow.lib.UnionArray object at 0x7f4938dbca48&gt;</span>
+<span class="go">-- is_valid: all not null</span>
+<span class="go">-- type_ids:   [</span>
+<span class="go">    0,</span>
+<span class="go">    1,</span>
+<span class="go">    1</span>
+<span class="go">  ]</span>
+<span class="go">-- child 0 type: int64</span>
+<span class="go">  [</span>
+<span class="go">    5,</span>
+<span class="go">    6,</span>
+<span class="go">    7</span>
+<span class="go">  ]</span>
+<span class="go">-- child 1 type: bool</span>
+<span class="go">  [</span>
+<span class="go">    false,</span>
+<span class="go">    false,</span>
+<span class="go">    true</span>
+<span class="go">  ]</span>
+</pre></div>
+</div>
+<p>In a dense union array, you also pass, in addition to the <code class="docutils literal notranslate"><span class="pre">int8</span></code> “types”
+array, a <code class="docutils literal notranslate"><span class="pre">int32</span></code> “offsets” array that tells, for each value, at
+each offset in the selected child array it can be found:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [51]: </span><span class="n">xs</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">])</span>
+
+<span class="gp">In [52]: </span><span class="n">ys</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="bp">False</span><span class="p">,</span> <span class="bp">True</span><span class="p">])</span>
+
+<span class="gp">In [53]: </span><span class="n">types</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="nb">type</span><span class="o">=</span><span class="n">pa</span><span class="o">.</span><span class="n">int8</span><span class="p">())</span>
+
+<span class="gp">In [54]: </span><span class="n">offsets</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="nb">type</span><span class="o">=</span><span class="n">pa</span><span class="o">.</span><span class="n">int32</span><span class="p">())</span>
+
+<span class="gp">In [55]: </span><span class="n">union_arr</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">UnionArray</span><span class="o">.</span><span class="n">from_dense</span><span class="p">(</span><span class="n">types</span><span class="p">,</span> <span class="n">offsets</span><span class="p">,</span> <span class="p">[</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">])</span>
+
+<span class="gp">In [56]: </span><span class="n">union_arr</span><span class="o">.</span><span class="n">type</span>
+<span class="gh">Out[56]: </span><span class="go">UnionType(union[dense]&lt;0: int64=0, 1: bool=1&gt;)</span>
+
+<span class="gp">In [57]: </span><span class="n">union_arr</span>
+<span class="go">Out[57]: </span>
+<span class="go">&lt;pyarrow.lib.UnionArray object at 0x7f4938dc7278&gt;</span>
+<span class="go">-- is_valid: all not null</span>
+<span class="go">-- type_ids:   [</span>
+<span class="go">    0,</span>
+<span class="go">    1,</span>
+<span class="go">    1,</span>
+<span class="go">    0,</span>
+<span class="go">    0</span>
+<span class="go">  ]</span>
+<span class="go">-- value_offsets:   [</span>
+<span class="go">    0,</span>
+<span class="go">    0,</span>
+<span class="go">    1,</span>
+<span class="go">    1,</span>
+<span class="go">    2</span>
+<span class="go">  ]</span>
+<span class="go">-- child 0 type: int64</span>
+<span class="go">  [</span>
+<span class="go">    5,</span>
+<span class="go">    6,</span>
+<span class="go">    7</span>
+<span class="go">  ]</span>
+<span class="go">-- child 1 type: bool</span>
+<span class="go">  [</span>
+<span class="go">    false,</span>
+<span class="go">    true</span>
+<span class="go">  ]</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="dictionary-arrays">
+<h3>Dictionary Arrays<a class="headerlink" href="#dictionary-arrays" title="Permalink to this headline">¶</a></h3>
+<p>The <strong>Dictionary</strong> type in PyArrow is a special array type that is similar to a
+factor in R or a <code class="docutils literal notranslate"><span class="pre">pandas.Categorical</span></code>. It enables one or more record batches
+in a file or stream to transmit integer <em>indices</em> referencing a shared
+<strong>dictionary</strong> containing the distinct values in the logical array. This is
+particularly often used with strings to save memory and improve performance.</p>
+<p>The way that dictionaries are handled in the Apache Arrow format and the way
+they appear in C++ and Python is slightly different. We define a special
+<a class="reference internal" href="generated/pyarrow.DictionaryArray.html#pyarrow.DictionaryArray" title="pyarrow.DictionaryArray"><code class="xref py py-class docutils literal notranslate"><span class="pre">DictionaryArray</span></code></a> type with a corresponding dictionary type. Let’s
+consider an example:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [58]: </span><span class="n">indices</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
+
+<span class="gp">In [59]: </span><span class="n">dictionary</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">,</span> <span class="s1">&#39;baz&#39;</span><span class="p">])</span>
+
+<span class="gp">In [60]: </span><span class="n">dict_array</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">DictionaryArray</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">(</span><span class="n">indices</span><span class="p">,</span> <span class="n">dictionary</span><span class="p">)</span>
+
+<span class="gp">In [61]: </span><span class="n">dict_array</span>
+<span class="gh">Out[61]: </span><span class="go"></span>
+<span class="go">&lt;pyarrow.lib.DictionaryArray object at 0x7f4935c40468&gt;</span>
+
+<span class="go">-- dictionary:</span>
+<span class="go">  [</span>
+<span class="go">    &quot;foo&quot;,</span>
+<span class="go">    &quot;bar&quot;,</span>
+<span class="go">    &quot;baz&quot;</span>
+<span class="go">  ]</span>
+<span class="go">-- indices:</span>
+<span class="go">  [</span>
+<span class="go">    0,</span>
+<span class="go">    1,</span>
+<span class="go">    0,</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    0,</span>
+<span class="go">    null,</span>
+<span class="go">    2</span>
+<span class="go">  ]</span>
+</pre></div>
+</div>
+<p>Here we have:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [62]: </span><span class="k">print</span><span class="p">(</span><span class="n">dict_array</span><span class="o">.</span><span class="n">type</span><span class="p">)</span>
+<span class="go">dictionary&lt;values=string, indices=int64, ordered=0&gt;</span>
+
+<span class="gp">In [63]: </span><span class="n">dict_array</span><span class="o">.</span><span class="n">indices</span>
+<span class="go">Out[63]: </span>
+<span class="go">&lt;pyarrow.lib.Int64Array object at 0x7f4938dcc728&gt;</span>
+<span class="go">[</span>
+<span class="go">  0,</span>
+<span class="go">  1,</span>
+<span class="go">  0,</span>
+<span class="go">  1,</span>
+<span class="go">  2,</span>
+<span class="go">  0,</span>
+<span class="go">  null,</span>
+<span class="go">  2</span>
+<span class="go">]</span>
+
+<span class="gp">In [64]: </span><span class="n">dict_array</span><span class="o">.</span><span class="n">dictionary</span>
+<span class="go">Out[64]: </span>
+<span class="go">&lt;pyarrow.lib.StringArray object at 0x7f4938dcc6d8&gt;</span>
+<span class="go">[</span>
+<span class="go">  &quot;foo&quot;,</span>
+<span class="go">  &quot;bar&quot;,</span>
+<span class="go">  &quot;baz&quot;</span>
+<span class="go">]</span>
+</pre></div>
+</div>
+<p>When using <a class="reference internal" href="generated/pyarrow.DictionaryArray.html#pyarrow.DictionaryArray" title="pyarrow.DictionaryArray"><code class="xref py py-class docutils literal notranslate"><span class="pre">DictionaryArray</span></code></a> with pandas, the analogue is
+<code class="docutils literal notranslate"><span class="pre">pandas.Categorical</span></code> (more on this later):</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [65]: </span><span class="n">dict_array</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
+<span class="gh">Out[65]: </span><span class="go"></span>
+<span class="go">[foo, bar, foo, bar, baz, foo, NaN, baz]</span>
+<span class="go">Categories (3, object): [foo, bar, baz]</span>
+</pre></div>
+</div>
+</div>
+</div>
+<div class="section" id="record-batches">
+<span id="data-record-batch"></span><h2>Record Batches<a class="headerlink" href="#record-batches" title="Permalink to this headline">¶</a></h2>
+<p>A <strong>Record Batch</strong> in Apache Arrow is a collection of equal-length array
+instances. Let’s consider a collection of arrays:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [66]: </span><span class="n">data</span> <span class="o">=</span> <span class="p">[</span>
+<span class="gp">   ....: </span>    <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]),</span>
+<span class="gp">   ....: </span>    <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">,</span> <span class="s1">&#39;baz&#39;</span><span class="p">,</span> <span class="bp">None</span><span class="p">]),</span>
+<span class="gp">   ....: </span>    <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="bp">True</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">False</span><span class="p">,</span> <span class="bp">True</span><span class="p">])</span>
+<span class="gp">   ....: </span><span class="p">]</span>
+<span class="gp">   ....: </span>
+</pre></div>
+</div>
+<p>A record batch can be created from this list of arrays using
+<code class="docutils literal notranslate"><span class="pre">RecordBatch.from_arrays</span></code>:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [67]: </span><span class="n">batch</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;f0&#39;</span><span class="p">,</span> <span class="s1">&#39;f1&#39;</span><span class="p">,</span> <span class="s1">&#39;f2&#39;</span><span class="p">])</span>
+
+<span class="gp">In [68]: </span><span class="n">batch</span><span class="o">.</span><span class="n">num_columns</span>
+<span class="gh">Out[68]: </span><span class="go">3</span>
+
+<span class="gp">In [69]: </span><span class="n">batch</span><span class="o">.</span><span class="n">num_rows</span>
+<span class="go">Out[69]: 4</span>
+
+<span class="gp">In [70]: </span><span class="n">batch</span><span class="o">.</span><span class="n">schema</span>
+<span class="go">Out[70]: </span>
+<span class="go">f0: int64</span>
+<span class="go">f1: string</span>
+<span class="go">f2: bool</span>
+
+<span class="gp">In [71]: </span><span class="n">batch</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+<span class="go">Out[71]: </span>
+<span class="go">&lt;pyarrow.lib.StringArray object at 0x7f493a9f6818&gt;</span>
+<span class="go">[</span>
+<span class="go">  &quot;foo&quot;,</span>
+<span class="go">  &quot;bar&quot;,</span>
+<span class="go">  &quot;baz&quot;,</span>
+<span class="go">  null</span>
+<span class="go">]</span>
+</pre></div>
+</div>
+<p>A record batch can be sliced without copying memory like an array:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [72]: </span><span class="n">batch2</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
+
+<span class="gp">In [73]: </span><span class="n">batch2</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+<span class="gh">Out[73]: </span><span class="go"></span>
+<span class="go">&lt;pyarrow.lib.StringArray object at 0x7f493a9f6db8&gt;</span>
+<span class="go">[</span>
+<span class="go">  &quot;bar&quot;,</span>
+<span class="go">  &quot;baz&quot;,</span>
+<span class="go">  null</span>
+<span class="go">]</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="tables">
+<span id="data-table"></span><h2>Tables<a class="headerlink" href="#tables" title="Permalink to this headline">¶</a></h2>
+<p>The PyArrow <a class="reference internal" href="generated/pyarrow.Table.html#pyarrow.Table" title="pyarrow.Table"><code class="xref py py-class docutils literal notranslate"><span class="pre">Table</span></code></a> type is not part of the Apache Arrow
+specification, but is rather a tool to help with wrangling multiple record
+batches and array pieces as a single logical dataset. As a relevant example, we
+may receive multiple small record batches in a socket stream, then need to
+concatenate them into contiguous memory for use in NumPy or pandas. The Table
+object makes this efficient without requiring additional memory copying.</p>
+<p>Considering the record batch we created above, we can create a Table containing
+one or more copies of the batch using <code class="docutils literal notranslate"><span class="pre">Table.from_batches</span></code>:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [74]: </span><span class="n">batches</span> <span class="o">=</span> <span class="p">[</span><span class="n">batch</span><span class="p">]</span> <span class="o">*</span> <span class="mi">5</span>
+
+<span class="gp">In [75]: </span><span class="n">table</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span>
+
+<span class="gp">In [76]: </span><span class="n">table</span>
+<span class="gh">Out[76]: </span><span class="go"></span>
+<span class="go">pyarrow.Table</span>
+<span class="go">f0: int64</span>
+<span class="go">f1: string</span>
+<span class="go">f2: bool</span>
+
+<span class="gp">In [77]: </span><span class="n">table</span><span class="o">.</span><span class="n">num_rows</span>
+<span class="go">Out[77]: 20</span>
+</pre></div>
+</div>
+<p>The table’s columns are instances of <a class="reference internal" href="generated/pyarrow.Column.html#pyarrow.Column" title="pyarrow.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a>, which is a container
+for one or more arrays of the same type.</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [78]: </span><span class="n">c</span> <span class="o">=</span> <span class="n">table</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+
+<span class="gp">In [79]: </span><span class="n">c</span>
+<span class="gh">Out[79]: </span><span class="go"></span>
+<span class="go">&lt;Column name=&#39;f0&#39; type=DataType(int64)&gt;</span>
+<span class="go">[</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ],</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ],</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ],</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ],</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ]</span>
+<span class="go">]</span>
+
+<span class="gp">In [80]: </span><span class="n">c</span><span class="o">.</span><span class="n">data</span>
+<span class="go">Out[80]: </span>
+<span class="go">&lt;pyarrow.lib.ChunkedArray object at 0x7f49471393c0&gt;</span>
+<span class="go">[</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ],</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ],</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ],</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ],</span>
+<span class="go">  [</span>
+<span class="go">    1,</span>
+<span class="go">    2,</span>
+<span class="go">    3,</span>
+<span class="go">    4</span>
+<span class="go">  ]</span>
+<span class="go">]</span>
+
+<span class="gp">In [81]: </span><span class="n">c</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">num_chunks</span>
+<span class="go">Out[81]: 5</span>
+
+<span class="gp">In [82]: </span><span class="n">c</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+<span class="go">Out[82]: </span>
+<span class="go">&lt;pyarrow.lib.Int64Array object at 0x7f493a9f0d18&gt;</span>
+<span class="go">[</span>
+<span class="go">  1,</span>
+<span class="go">  2,</span>
+<span class="go">  3,</span>
+<span class="go">  4</span>
+<span class="go">]</span>
+</pre></div>
+</div>
+<p>As you’ll see in the <a class="reference internal" href="pandas.html#pandas-interop"><span class="std std-ref">pandas section</span></a>, we can convert
+these objects to contiguous NumPy arrays for use in pandas:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [83]: </span><span class="n">c</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
+<span class="gh">Out[83]: </span><span class="go"></span>
+<span class="go">0     1</span>
+<span class="go">1     2</span>
+<span class="go">2     3</span>
+<span class="go">3     4</span>
+<span class="go">4     1</span>
+<span class="go">5     2</span>
+<span class="go">6     3</span>
+<span class="go">7     4</span>
+<span class="go">8     1</span>
+<span class="go">9     2</span>
+<span class="go">10    3</span>
+<span class="go">11    4</span>
+<span class="go">12    1</span>
+<span class="go">13    2</span>
+<span class="go">14    3</span>
+<span class="go">15    4</span>
+<span class="go">16    1</span>
+<span class="go">17    2</span>
+<span class="go">18    3</span>
+<span class="go">19    4</span>
+<span class="go">Name: f0, dtype: int64</span>
+</pre></div>
+</div>
+<p>Multiple tables can also be concatenated together to form a single table using
+<code class="docutils literal notranslate"><span class="pre">pyarrow.concat_tables</span></code>, if the schemas are equal:</p>
+<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [84]: </span><span class="n">tables</span> <span class="o">=</span> <span class="p">[</span><span class="n">table</span><span class="p">]</span> <span class="o">*</span> <span class="mi">2</span>
+
+<span class="gp">In [85]: </span><span class="n">table_all</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span><span class="n">tables</span><span class="p">)</span>
+
+<span class="gp">In [86]: </span><span class="n">table_all</span><span class="o">.</span><span class="n">num_rows</span>
+<span class="gh">Out[86]: </span><span class="go">40</span>
+
+<span class="gp">In [87]: </span><span class="n">c</span> <span class="o">=</span> <span class="n">table_all</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+
+<span class="gp">In [88]: </span><span class="n">c</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">num_chunks</span>
+<span class="gh">Out[88]: </span><span class="go">10</span>
+</pre></div>
+</div>
+<p>This is similar to <code class="docutils literal notranslate"><span class="pre">Table.from_batches</span></code>, but uses tables as input instead of
+record batches. Record batches can be made into tables, but not the other way
+around, so if your data is already in table form, then use
+<code class="docutils literal notranslate"><span class="pre">pyarrow.concat_tables</span></code>.</p>
+</div>
+<div class="section" id="custom-schema-and-field-metadata">
+<h2>Custom Schema and Field Metadata<a class="headerlink" href="#custom-schema-and-field-metadata" title="Permalink to this headline">¶</a></h2>
+<p>TODO</p>
+</div>
+</div>
+
+
+           </div>
+           
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
+      
+        <a href="ipc.html" class="btn btn-neutral float-right" title="Streaming, Serialization, and IPC" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
+      
+      
+        <a href="memory.html" class="btn btn-neutral" title="Memory and IO Interfaces" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>
+        &copy; Copyright 2016-2018 Apache Software Foundation
+
+    </p>
+  </div>
+  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
+
+</footer>
+
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+  
+
+
+  
+
+    
+    
+      <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
+        <script type="text/javascript" src="../_static/jquery.js"></script>
+        <script type="text/javascript" src="../_static/underscore.js"></script>
+        <script type="text/javascript" src="../_static/doctools.js"></script>
+    
+
+  
+
+  <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+  <script type="text/javascript">
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script>
+<script async src="https://www.googletagmanager.com/gtag/js?id=UA-107500873-1"></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'UA-107500873-1');
+</script>
+
+
+</body>
+</html>
\ No newline at end of file