You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/11/19 14:56:32 UTC
[19/19] arrow-site git commit: API doc update
API doc update
Project: http://git-wip-us.apache.org/repos/asf/arrow-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow-site/commit/35611f84
Tree: http://git-wip-us.apache.org/repos/asf/arrow-site/tree/35611f84
Diff: http://git-wip-us.apache.org/repos/asf/arrow-site/diff/35611f84
Branch: refs/heads/asf-site
Commit: 35611f84fc0826dabb99ff16a21a0f5aaeecf73a
Parents: 35e0e75
Author: Korn, Uwe <Uw...@blue-yonder.com>
Authored: Sun Nov 19 15:56:03 2017 +0100
Committer: Korn, Uwe <Uw...@blue-yonder.com>
Committed: Sun Nov 19 15:56:03 2017 +0100
----------------------------------------------------------------------
docs/python/_modules/index.html | 4 +-
docs/python/_modules/pyarrow/feather.html | 37 +-
docs/python/_modules/pyarrow/filesystem.html | 26 +-
docs/python/_modules/pyarrow/hdfs.html | 3 +-
docs/python/_modules/pyarrow/ipc.html | 20 +-
docs/python/_modules/pyarrow/parquet.html | 159 +-
docs/python/_modules/pyarrow/types.html | 323 +++
docs/python/_sources/api.rst.txt | 5 +-
docs/python/_sources/development.rst.txt | 27 +-
.../_sources/generated/pyarrow.Array.rst.txt | 32 +-
.../_sources/generated/pyarrow.Column.rst.txt | 1 +
.../_sources/generated/pyarrow.Field.rst.txt | 31 +-
.../_sources/generated/pyarrow.Schema.rst.txt | 31 +-
.../generated/pyarrow.chunked_array.rst.txt | 6 +
.../generated/pyarrow.lib.Array.rst.txt | 2 +
.../generated/pyarrow.lib.BinaryArray.rst.txt | 2 +
.../generated/pyarrow.lib.BooleanArray.rst.txt | 2 +
.../generated/pyarrow.lib.DataType.rst.txt | 1 +
.../generated/pyarrow.lib.Date32Array.rst.txt | 2 +
.../generated/pyarrow.lib.Date64Array.rst.txt | 2 +
.../pyarrow.lib.Decimal128Array.rst.txt | 31 +
.../pyarrow.lib.DictionaryArray.rst.txt | 2 +
.../pyarrow.lib.FixedSizeBinaryArray.rst.txt | 2 +
.../pyarrow.lib.FloatingPointArray.rst.txt | 2 +
.../generated/pyarrow.lib.Int16Array.rst.txt | 2 +
.../generated/pyarrow.lib.Int32Array.rst.txt | 2 +
.../generated/pyarrow.lib.Int64Array.rst.txt | 2 +
.../generated/pyarrow.lib.Int8Array.rst.txt | 2 +
.../generated/pyarrow.lib.IntegerArray.rst.txt | 2 +
.../generated/pyarrow.lib.ListArray.rst.txt | 2 +
.../generated/pyarrow.lib.NullArray.rst.txt | 2 +
.../generated/pyarrow.lib.NumericArray.rst.txt | 2 +
.../generated/pyarrow.lib.StringArray.rst.txt | 2 +
.../generated/pyarrow.lib.Time32Array.rst.txt | 2 +
.../generated/pyarrow.lib.Time64Array.rst.txt | 2 +
.../pyarrow.lib.TimestampArray.rst.txt | 2 +
.../generated/pyarrow.lib.UInt16Array.rst.txt | 2 +
.../generated/pyarrow.lib.UInt32Array.rst.txt | 2 +
.../generated/pyarrow.lib.UInt64Array.rst.txt | 2 +
.../generated/pyarrow.lib.UInt8Array.rst.txt | 2 +
.../pyarrow.parquet.ParquetWriter.rst.txt | 24 +
.../generated/pyarrow.types.is_binary.rst.txt | 6 +
.../generated/pyarrow.types.is_boolean.rst.txt | 6 +
.../generated/pyarrow.types.is_date.rst.txt | 6 +
.../generated/pyarrow.types.is_decimal.rst.txt | 6 +
.../pyarrow.types.is_dictionary.rst.txt | 6 +
.../pyarrow.types.is_fixed_size_binary.rst.txt | 6 +
.../generated/pyarrow.types.is_floating.rst.txt | 6 +
.../generated/pyarrow.types.is_integer.rst.txt | 6 +
.../generated/pyarrow.types.is_list.rst.txt | 6 +
.../generated/pyarrow.types.is_map.rst.txt | 6 +
.../generated/pyarrow.types.is_nested.rst.txt | 6 +
.../generated/pyarrow.types.is_null.rst.txt | 6 +
.../pyarrow.types.is_signed_integer.rst.txt | 6 +
.../generated/pyarrow.types.is_string.rst.txt | 6 +
.../generated/pyarrow.types.is_struct.rst.txt | 6 +
.../generated/pyarrow.types.is_temporal.rst.txt | 6 +
.../generated/pyarrow.types.is_time.rst.txt | 6 +
.../pyarrow.types.is_timestamp.rst.txt | 6 +
.../generated/pyarrow.types.is_unicode.rst.txt | 6 +
.../generated/pyarrow.types.is_union.rst.txt | 6 +
.../pyarrow.types.is_unsigned_integer.rst.txt | 6 +
docs/python/api.html | 516 ++---
docs/python/data.html | 536 ++---
docs/python/development.html | 23 +-
docs/python/filesystems.html | 75 +-
docs/python/generated/pyarrow.Array.html | 184 +-
docs/python/generated/pyarrow.ArrayValue.html | 3 +-
docs/python/generated/pyarrow.BinaryValue.html | 3 +-
docs/python/generated/pyarrow.BooleanValue.html | 3 +-
docs/python/generated/pyarrow.Buffer.html | 3 +-
.../generated/pyarrow.BufferOutputStream.html | 3 +-
docs/python/generated/pyarrow.BufferReader.html | 3 +-
docs/python/generated/pyarrow.ChunkedArray.html | 12 +-
docs/python/generated/pyarrow.Column.html | 46 +-
docs/python/generated/pyarrow.Date32Value.html | 3 +-
docs/python/generated/pyarrow.Date64Value.html | 3 +-
docs/python/generated/pyarrow.DecimalValue.html | 8 +-
docs/python/generated/pyarrow.DoubleValue.html | 3 +-
docs/python/generated/pyarrow.Field.html | 146 +-
.../generated/pyarrow.FixedSizeBinaryValue.html | 3 +-
docs/python/generated/pyarrow.FloatValue.html | 3 +-
.../generated/pyarrow.HadoopFileSystem.cat.html | 3 +-
.../pyarrow.HadoopFileSystem.chmod.html | 3 +-
.../pyarrow.HadoopFileSystem.chown.html | 3 +-
.../pyarrow.HadoopFileSystem.delete.html | 3 +-
.../generated/pyarrow.HadoopFileSystem.df.html | 3 +-
.../pyarrow.HadoopFileSystem.disk_usage.html | 3 +-
.../pyarrow.HadoopFileSystem.download.html | 3 +-
.../pyarrow.HadoopFileSystem.exists.html | 3 +-
.../pyarrow.HadoopFileSystem.get_capacity.html | 3 +-
...pyarrow.HadoopFileSystem.get_space_used.html | 3 +-
.../pyarrow.HadoopFileSystem.info.html | 3 +-
.../generated/pyarrow.HadoopFileSystem.ls.html | 3 +-
.../pyarrow.HadoopFileSystem.mkdir.html | 3 +-
.../pyarrow.HadoopFileSystem.open.html | 3 +-
.../pyarrow.HadoopFileSystem.rename.html | 3 +-
.../generated/pyarrow.HadoopFileSystem.rm.html | 3 +-
.../pyarrow.HadoopFileSystem.upload.html | 3 +-
docs/python/generated/pyarrow.HdfsFile.html | 3 +-
docs/python/generated/pyarrow.Int16Value.html | 3 +-
docs/python/generated/pyarrow.Int32Value.html | 3 +-
docs/python/generated/pyarrow.Int64Value.html | 3 +-
docs/python/generated/pyarrow.Int8Value.html | 3 +-
docs/python/generated/pyarrow.ListValue.html | 3 +-
.../generated/pyarrow.LocalFileSystem.html | 3 +-
.../generated/pyarrow.MemoryMappedFile.html | 3 +-
docs/python/generated/pyarrow.MemoryPool.html | 3 +-
docs/python/generated/pyarrow.Message.html | 3 +-
.../python/generated/pyarrow.MessageReader.html | 3 +-
docs/python/generated/pyarrow.NA.html | 7 +-
docs/python/generated/pyarrow.NativeFile.html | 3 +-
docs/python/generated/pyarrow.PythonFile.html | 3 +-
docs/python/generated/pyarrow.RecordBatch.html | 9 +-
.../pyarrow.RecordBatchFileReader.html | 3 +-
.../pyarrow.RecordBatchFileWriter.html | 3 +-
.../pyarrow.RecordBatchStreamReader.html | 3 +-
.../pyarrow.RecordBatchStreamWriter.html | 3 +-
docs/python/generated/pyarrow.Scalar.html | 3 +-
docs/python/generated/pyarrow.Schema.html | 151 +-
.../generated/pyarrow.SerializationContext.html | 3 +-
.../generated/pyarrow.SerializedPyObject.html | 3 +-
docs/python/generated/pyarrow.StringValue.html | 3 +-
docs/python/generated/pyarrow.Table.html | 13 +-
docs/python/generated/pyarrow.Tensor.html | 3 +-
.../generated/pyarrow.TimestampValue.html | 3 +-
docs/python/generated/pyarrow.UInt16Value.html | 3 +-
docs/python/generated/pyarrow.UInt32Value.html | 3 +-
docs/python/generated/pyarrow.UInt64Value.html | 3 +-
docs/python/generated/pyarrow.UInt8Value.html | 3 +-
.../generated/pyarrow.allocate_buffer.html | 3 +-
docs/python/generated/pyarrow.binary.html | 3 +-
docs/python/generated/pyarrow.bool_.html | 3 +-
.../python/generated/pyarrow.chunked_array.html | 178 ++
.../generated/pyarrow.create_memory_map.html | 3 +-
docs/python/generated/pyarrow.date32.html | 3 +-
docs/python/generated/pyarrow.date64.html | 3 +-
docs/python/generated/pyarrow.decimal.html | 3 +-
.../generated/pyarrow.default_memory_pool.html | 3 +-
docs/python/generated/pyarrow.deserialize.html | 3 +-
.../generated/pyarrow.deserialize_from.html | 3 +-
docs/python/generated/pyarrow.dictionary.html | 3 +-
.../generated/pyarrow.feather.read_feather.html | 3 +-
.../pyarrow.feather.write_feather.html | 3 +-
docs/python/generated/pyarrow.field.html | 5 +-
docs/python/generated/pyarrow.float16.html | 3 +-
docs/python/generated/pyarrow.float32.html | 3 +-
docs/python/generated/pyarrow.float64.html | 3 +-
.../generated/pyarrow.from_numpy_dtype.html | 7 +-
.../pyarrow.get_record_batch_size.html | 3 +-
.../generated/pyarrow.get_tensor_size.html | 3 +-
docs/python/generated/pyarrow.hdfs.connect.html | 3 +-
docs/python/generated/pyarrow.int16.html | 3 +-
docs/python/generated/pyarrow.int32.html | 3 +-
docs/python/generated/pyarrow.int64.html | 3 +-
docs/python/generated/pyarrow.int8.html | 3 +-
docs/python/generated/pyarrow.lib.Array.html | 47 +-
.../generated/pyarrow.lib.BinaryArray.html | 42 +-
.../generated/pyarrow.lib.BooleanArray.html | 42 +-
docs/python/generated/pyarrow.lib.DataType.html | 24 +-
.../generated/pyarrow.lib.Date32Array.html | 42 +-
.../generated/pyarrow.lib.Date64Array.html | 42 +-
.../generated/pyarrow.lib.Decimal128Array.html | 373 ++++
.../generated/pyarrow.lib.DictionaryArray.html | 48 +-
docs/python/generated/pyarrow.lib.Field.html | 3 +-
.../pyarrow.lib.FixedSizeBinaryArray.html | 42 +-
.../pyarrow.lib.FloatingPointArray.html | 42 +-
.../generated/pyarrow.lib.Int16Array.html | 42 +-
.../generated/pyarrow.lib.Int32Array.html | 42 +-
.../generated/pyarrow.lib.Int64Array.html | 42 +-
.../python/generated/pyarrow.lib.Int8Array.html | 42 +-
.../generated/pyarrow.lib.IntegerArray.html | 42 +-
.../python/generated/pyarrow.lib.ListArray.html | 55 +-
.../python/generated/pyarrow.lib.NullArray.html | 42 +-
.../generated/pyarrow.lib.NumericArray.html | 42 +-
docs/python/generated/pyarrow.lib.Schema.html | 3 +-
.../generated/pyarrow.lib.StringArray.html | 42 +-
.../generated/pyarrow.lib.Time32Array.html | 42 +-
.../generated/pyarrow.lib.Time64Array.html | 42 +-
.../generated/pyarrow.lib.TimestampArray.html | 46 +-
.../generated/pyarrow.lib.UInt16Array.html | 42 +-
.../generated/pyarrow.lib.UInt32Array.html | 42 +-
.../generated/pyarrow.lib.UInt64Array.html | 42 +-
.../generated/pyarrow.lib.UInt8Array.html | 42 +-
docs/python/generated/pyarrow.list_.html | 3 +-
.../pyarrow.log_memory_allocations.html | 3 +-
docs/python/generated/pyarrow.memory_map.html | 3 +-
docs/python/generated/pyarrow.null.html | 3 +-
docs/python/generated/pyarrow.open_file.html | 3 +-
docs/python/generated/pyarrow.open_stream.html | 3 +-
.../pyarrow.parquet.ParquetDataset.html | 3 +-
.../generated/pyarrow.parquet.ParquetFile.html | 7 +-
.../pyarrow.parquet.ParquetWriter.html | 223 ++
.../pyarrow.parquet.read_metadata.html | 3 +-
.../generated/pyarrow.parquet.read_pandas.html | 3 +-
.../generated/pyarrow.parquet.read_schema.html | 3 +-
.../generated/pyarrow.parquet.read_table.html | 7 +-
.../pyarrow.parquet.write_metadata.html | 3 +-
.../generated/pyarrow.parquet.write_table.html | 5 +-
.../generated/pyarrow.plasma.ObjectID.html | 3 +-
.../generated/pyarrow.plasma.PlasmaBuffer.html | 3 +-
.../generated/pyarrow.plasma.PlasmaClient.html | 3 +-
docs/python/generated/pyarrow.read_message.html | 3 +-
.../generated/pyarrow.read_record_batch.html | 3 +-
.../generated/pyarrow.read_serialized.html | 3 +-
docs/python/generated/pyarrow.read_tensor.html | 3 +-
docs/python/generated/pyarrow.schema.html | 14 +-
docs/python/generated/pyarrow.serialize.html | 6 +-
docs/python/generated/pyarrow.serialize_to.html | 6 +-
.../generated/pyarrow.set_memory_pool.html | 3 +-
docs/python/generated/pyarrow.string.html | 3 +-
docs/python/generated/pyarrow.struct.html | 3 +-
docs/python/generated/pyarrow.time32.html | 3 +-
docs/python/generated/pyarrow.time64.html | 3 +-
docs/python/generated/pyarrow.timestamp.html | 3 +-
.../pyarrow.total_allocated_bytes.html | 3 +-
.../generated/pyarrow.types.is_binary.html | 168 ++
.../generated/pyarrow.types.is_boolean.html | 168 ++
.../python/generated/pyarrow.types.is_date.html | 168 ++
.../generated/pyarrow.types.is_decimal.html | 168 ++
.../generated/pyarrow.types.is_dictionary.html | 168 ++
.../pyarrow.types.is_fixed_size_binary.html | 168 ++
.../generated/pyarrow.types.is_floating.html | 168 ++
.../generated/pyarrow.types.is_integer.html | 168 ++
.../python/generated/pyarrow.types.is_list.html | 168 ++
docs/python/generated/pyarrow.types.is_map.html | 168 ++
.../generated/pyarrow.types.is_nested.html | 168 ++
.../python/generated/pyarrow.types.is_null.html | 168 ++
.../pyarrow.types.is_signed_integer.html | 168 ++
.../generated/pyarrow.types.is_string.html | 168 ++
.../generated/pyarrow.types.is_struct.html | 168 ++
.../generated/pyarrow.types.is_temporal.html | 169 ++
.../python/generated/pyarrow.types.is_time.html | 168 ++
.../generated/pyarrow.types.is_timestamp.html | 168 ++
.../generated/pyarrow.types.is_unicode.html | 168 ++
.../generated/pyarrow.types.is_union.html | 168 ++
.../pyarrow.types.is_unsigned_integer.html | 168 ++
docs/python/generated/pyarrow.uint16.html | 3 +-
docs/python/generated/pyarrow.uint32.html | 3 +-
docs/python/generated/pyarrow.uint64.html | 3 +-
docs/python/generated/pyarrow.uint8.html | 3 +-
docs/python/generated/pyarrow.write_tensor.html | 3 +-
docs/python/genindex.html | 1917 ++++++++++++++++++
docs/python/getting_involved.html | 5 +
docs/python/ipc.html | 304 +--
docs/python/memory.html | 222 +-
docs/python/objects.inv | Bin 924 -> 7916 bytes
docs/python/pandas.html | 2 +-
docs/python/parquet.html | 190 +-
docs/python/plasma.html | 5 +
docs/python/searchindex.js | 2 +-
251 files changed, 8795 insertions(+), 2263 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow-site/blob/35611f84/docs/python/_modules/index.html
----------------------------------------------------------------------
diff --git a/docs/python/_modules/index.html b/docs/python/_modules/index.html
index 03fa369..25e6d19 100644
--- a/docs/python/_modules/index.html
+++ b/docs/python/_modules/index.html
@@ -71,6 +71,7 @@
<li class="toctree-l1"><a class="reference internal" href="../plasma.html">The Plasma In-Memory Object Store</a></li>
<li class="toctree-l1"><a class="reference internal" href="../pandas.html">Using PyArrow with pandas</a></li>
<li class="toctree-l1"><a class="reference internal" href="../parquet.html">Reading and Writing the Apache Parquet Format</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../extending.html">Building C++ and Cython Extensions using pyarrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api.html">API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../getting_involved.html">Getting Involved</a></li>
</ul>
@@ -127,6 +128,7 @@
<li><a href="pyarrow/lib.html">pyarrow.lib</a></li>
<li><a href="pyarrow/parquet.html">pyarrow.parquet</a></li>
<li><a href="pyarrow/plasma.html">pyarrow.plasma</a></li>
+<li><a href="pyarrow/types.html">pyarrow.types</a></li>
</ul>
</div>
@@ -141,7 +143,7 @@
</p>
<p>
© Copyright 2016-2017 Apache Software Foundation.<br/>
- Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.4.<br/>
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.5.<br/>
</p>
</div>
</footer>
http://git-wip-us.apache.org/repos/asf/arrow-site/blob/35611f84/docs/python/_modules/pyarrow/feather.html
----------------------------------------------------------------------
diff --git a/docs/python/_modules/pyarrow/feather.html b/docs/python/_modules/pyarrow/feather.html
index 6e0b1a7..7372eba 100644
--- a/docs/python/_modules/pyarrow/feather.html
+++ b/docs/python/_modules/pyarrow/feather.html
@@ -71,6 +71,7 @@
<li class="toctree-l1"><a class="reference internal" href="../../plasma.html">The Plasma In-Memory Object Store</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../pandas.html">Using PyArrow with pandas</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../parquet.html">Reading and Writing the Apache Parquet Format</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../extending.html">Building C++ and Cython Extensions using pyarrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../getting_involved.html">Getting Involved</a></li>
</ul>
@@ -145,7 +146,7 @@
<span class="kn">from</span> <span class="nn">pyarrow.compat</span> <span class="k">import</span> <span class="n">pdapi</span>
<span class="kn">from</span> <span class="nn">pyarrow.lib</span> <span class="k">import</span> <span class="n">FeatherError</span> <span class="c1"># noqa</span>
-<span class="kn">from</span> <span class="nn">pyarrow.lib</span> <span class="k">import</span> <span class="n">Table</span>
+<span class="kn">from</span> <span class="nn">pyarrow.lib</span> <span class="k">import</span> <span class="n">RecordBatch</span><span class="p">,</span> <span class="n">Table</span>
<span class="kn">import</span> <span class="nn">pyarrow.lib</span> <span class="k">as</span> <span class="nn">ext</span>
<span class="k">try</span><span class="p">:</span>
@@ -197,30 +198,12 @@
<span class="k">if</span> <span class="ow">not</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="o">.</span><span class="n">is_unique</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"cannot serialize duplicate column names"</span><span class="p">)</span>
- <span class="c1"># TODO(wesm): pipeline conversion to Arrow memory layout</span>
- <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">):</span>
- <span class="n">col</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="n">i</span><span class="p">]</span>
-
- <span class="k">if</span> <span class="n">pdapi</span><span class="o">.</span><span class="n">is_object_dtype</span><span class="p">(</span><span class="n">col</span><span class="p">):</span>
- <span class="n">inferred_type</span> <span class="o">=</span> <span class="n">infer_dtype</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
- <span class="n">msg</span> <span class="o">=</span> <span class="p">(</span><span class="s2">"cannot serialize column </span><span class="si">{n}</span><span class="s2"> "</span>
- <span class="s2">"named </span><span class="si">{name}</span><span class="s2"> with dtype </span><span class="si">{dtype}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
- <span class="n">n</span><span class="o">=</span><span class="n">i</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">inferred_type</span><span class="p">))</span>
-
- <span class="k">if</span> <span class="n">inferred_type</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'mixed'</span><span class="p">]:</span>
-
- <span class="c1"># allow columns with nulls + an inferable type</span>
- <span class="n">inferred_type</span> <span class="o">=</span> <span class="n">infer_dtype</span><span class="p">(</span><span class="n">col</span><span class="p">[</span><span class="n">col</span><span class="o">.</span><span class="n">notnull</span><span class="p">()])</span>
- <span class="k">if</span> <span class="n">inferred_type</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'mixed'</span><span class="p">]:</span>
- <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
-
- <span class="k">elif</span> <span class="n">inferred_type</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'unicode'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">]:</span>
- <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
-
- <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
- <span class="n">name</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
-
- <span class="bp">self</span><span class="o">.</span><span class="n">writer</span><span class="o">.</span><span class="n">write_array</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span>
+ <span class="c1"># TODO(wesm): Remove this length check, see ARROW-1732</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">batch</span> <span class="o">=</span> <span class="n">RecordBatch</span><span class="o">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">preserve_index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">batch</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">names</span><span class="p">):</span>
+ <span class="n">col</span> <span class="o">=</span> <span class="n">batch</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">writer</span><span class="o">.</span><span class="n">write_array</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">writer</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
@@ -238,7 +221,7 @@
<span class="n">writer</span> <span class="o">=</span> <span class="n">FeatherWriter</span><span class="p">(</span><span class="n">dest</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">writer</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
- <span class="k">except</span><span class="p">:</span>
+ <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
<span class="c1"># Try to make sure the resource is closed</span>
<span class="kn">import</span> <span class="nn">gc</span>
<span class="n">writer</span> <span class="o">=</span> <span class="kc">None</span>
@@ -284,7 +267,7 @@
</p>
<p>
© Copyright 2016-2017 Apache Software Foundation.<br/>
- Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.4.<br/>
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.5.<br/>
</p>
</div>
</footer>
http://git-wip-us.apache.org/repos/asf/arrow-site/blob/35611f84/docs/python/_modules/pyarrow/filesystem.html
----------------------------------------------------------------------
diff --git a/docs/python/_modules/pyarrow/filesystem.html b/docs/python/_modules/pyarrow/filesystem.html
index 6ec48f3..8ab1249 100644
--- a/docs/python/_modules/pyarrow/filesystem.html
+++ b/docs/python/_modules/pyarrow/filesystem.html
@@ -71,6 +71,7 @@
<li class="toctree-l1"><a class="reference internal" href="../../plasma.html">The Plasma In-Memory Object Store</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../pandas.html">Using PyArrow with pandas</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../parquet.html">Reading and Writing the Apache Parquet Format</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../extending.html">Building C++ and Cython Extensions using pyarrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../getting_involved.html">Getting Involved</a></li>
</ul>
@@ -257,6 +258,13 @@
<span class="sd"> """</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span>
+ <span class="k">def</span> <span class="nf">_isfilestore</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns True if this FileSystem is a unix-style file store with</span>
+<span class="sd"> directories.</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span>
+
<span class="k">def</span> <span class="nf">read_parquet</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">metadata</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">nthreads</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">use_pandas_metadata</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="sd">"""</span>
@@ -331,6 +339,10 @@
<span class="k">def</span> <span class="nf">isfile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="k">return</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
+ <span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">_isfilestore</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">_isfilestore</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span>
+
<div class="viewcode-block" id="LocalFileSystem.exists"><a class="viewcode-back" href="../../generated/pyarrow.LocalFileSystem.html#pyarrow.LocalFileSystem.exists">[docs]</a> <span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">exists</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="k">return</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
@@ -369,10 +381,22 @@
<span class="k">def</span> <span class="nf">isfile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">"Unsupported file system API"</span><span class="p">)</span>
+ <span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">_isfilestore</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">_isfilestore</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Object Stores like S3 and GCSFS are based on key lookups, not true</span>
+<span class="sd"> file-paths</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="kc">False</span>
+
<span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">delete</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">recursive</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs</span><span class="o">.</span><span class="n">rm</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">recursive</span><span class="o">=</span><span class="n">recursive</span><span class="p">)</span>
+ <span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">exists</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+
<span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">mkdir</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">mkdir</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
@@ -460,7 +484,7 @@
</p>
<p>
© Copyright 2016-2017 Apache Software Foundation.<br/>
- Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.4.<br/>
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.5.<br/>
</p>
</div>
</footer>
http://git-wip-us.apache.org/repos/asf/arrow-site/blob/35611f84/docs/python/_modules/pyarrow/hdfs.html
----------------------------------------------------------------------
diff --git a/docs/python/_modules/pyarrow/hdfs.html b/docs/python/_modules/pyarrow/hdfs.html
index eabce0a..48d7004 100644
--- a/docs/python/_modules/pyarrow/hdfs.html
+++ b/docs/python/_modules/pyarrow/hdfs.html
@@ -71,6 +71,7 @@
<li class="toctree-l1"><a class="reference internal" href="../../plasma.html">The Plasma In-Memory Object Store</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../pandas.html">Using PyArrow with pandas</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../parquet.html">Reading and Writing the Apache Parquet Format</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../extending.html">Building C++ and Cython Extensions using pyarrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../getting_involved.html">Getting Involved</a></li>
</ul>
@@ -305,7 +306,7 @@
</p>
<p>
© Copyright 2016-2017 Apache Software Foundation.<br/>
- Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.4.<br/>
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.5.<br/>
</p>
</div>
</footer>
http://git-wip-us.apache.org/repos/asf/arrow-site/blob/35611f84/docs/python/_modules/pyarrow/ipc.html
----------------------------------------------------------------------
diff --git a/docs/python/_modules/pyarrow/ipc.html b/docs/python/_modules/pyarrow/ipc.html
index acb06d9..dd1e512 100644
--- a/docs/python/_modules/pyarrow/ipc.html
+++ b/docs/python/_modules/pyarrow/ipc.html
@@ -71,6 +71,7 @@
<li class="toctree-l1"><a class="reference internal" href="../../plasma.html">The Plasma In-Memory Object Store</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../pandas.html">Using PyArrow with pandas</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../parquet.html">Reading and Writing the Apache Parquet Format</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../extending.html">Building C++ and Cython Extensions using pyarrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../getting_involved.html">Getting Involved</a></li>
</ul>
@@ -264,19 +265,25 @@
<span class="k">return</span> <span class="n">RecordBatchFileReader</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="n">footer_offset</span><span class="o">=</span><span class="n">footer_offset</span><span class="p">)</span></div>
-<span class="k">def</span> <span class="nf">serialize_pandas</span><span class="p">(</span><span class="n">df</span><span class="p">):</span>
+<span class="k">def</span> <span class="nf">serialize_pandas</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">nthreads</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">preserve_index</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="sd">"""Serialize a pandas DataFrame into a buffer protocol compatible object.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> df : pandas.DataFrame</span>
+<span class="sd"> nthreads : int, default None</span>
+<span class="sd"> Number of threads to use for conversion to Arrow, default all CPUs</span>
+<span class="sd"> preserve_index : boolean, default True</span>
+<span class="sd"> If True, preserve the pandas index data, otherwise the result will have</span>
+<span class="sd"> a default RangeIndex</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> buf : buffer</span>
<span class="sd"> An object compatible with the buffer protocol</span>
<span class="sd"> """</span>
- <span class="n">batch</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="o">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
+ <span class="n">batch</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="o">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">nthreads</span><span class="o">=</span><span class="n">nthreads</span><span class="p">,</span>
+ <span class="n">preserve_index</span><span class="o">=</span><span class="n">preserve_index</span><span class="p">)</span>
<span class="n">sink</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">BufferOutputStream</span><span class="p">()</span>
<span class="n">writer</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">RecordBatchStreamWriter</span><span class="p">(</span><span class="n">sink</span><span class="p">,</span> <span class="n">batch</span><span class="o">.</span><span class="n">schema</span><span class="p">)</span>
<span class="n">writer</span><span class="o">.</span><span class="n">write_batch</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
@@ -284,15 +291,16 @@
<span class="k">return</span> <span class="n">sink</span><span class="o">.</span><span class="n">get_result</span><span class="p">()</span>
-<span class="k">def</span> <span class="nf">deserialize_pandas</span><span class="p">(</span><span class="n">buf</span><span class="p">,</span> <span class="n">nthreads</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
+<span class="k">def</span> <span class="nf">deserialize_pandas</span><span class="p">(</span><span class="n">buf</span><span class="p">,</span> <span class="n">nthreads</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">"""Deserialize a buffer protocol compatible object into a pandas DataFrame.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> buf : buffer</span>
<span class="sd"> An object compatible with the buffer protocol</span>
-<span class="sd"> nthreads : int, optional</span>
-<span class="sd"> The number of threads to use to convert the buffer to a DataFrame.</span>
+<span class="sd"> nthreads : int, defualt None</span>
+<span class="sd"> The number of threads to use to convert the buffer to a DataFrame,</span>
+<span class="sd"> default all CPUs</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
@@ -316,7 +324,7 @@
</p>
<p>
© Copyright 2016-2017 Apache Software Foundation.<br/>
- Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.4.<br/>
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.5.<br/>
</p>
</div>
</footer>
http://git-wip-us.apache.org/repos/asf/arrow-site/blob/35611f84/docs/python/_modules/pyarrow/parquet.html
----------------------------------------------------------------------
diff --git a/docs/python/_modules/pyarrow/parquet.html b/docs/python/_modules/pyarrow/parquet.html
index 565baa2..ed5f218 100644
--- a/docs/python/_modules/pyarrow/parquet.html
+++ b/docs/python/_modules/pyarrow/parquet.html
@@ -71,6 +71,7 @@
<li class="toctree-l1"><a class="reference internal" href="../../plasma.html">The Plasma In-Memory Object Store</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../pandas.html">Using PyArrow with pandas</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../parquet.html">Reading and Writing the Apache Parquet Format</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../extending.html">Building C++ and Cython Extensions using pyarrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../getting_involved.html">Getting Involved</a></li>
</ul>
@@ -324,17 +325,47 @@
<span class="k">return</span> <span class="n">table</span>
-<span class="k">class</span> <span class="nc">ParquetWriter</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
- <span class="sd">"""</span>
+<span class="n">_parquet_writer_arg_docs</span> <span class="o">=</span> <span class="s2">"""version : {"1.0", "2.0"}, default "1.0"</span>
+<span class="s2"> The Parquet format version, defaults to 1.0</span>
+<span class="s2">use_dictionary : bool or list</span>
+<span class="s2"> Specify if we should use dictionary encoding in general or only for</span>
+<span class="s2"> some columns.</span>
+<span class="s2">use_deprecated_int96_timestamps : boolean, default None</span>
+<span class="s2"> Write nanosecond resolution timestamps to INT96 Parquet</span>
+<span class="s2"> format. Defaults to False unless enabled by flavor argument</span>
+<span class="s2">coerce_timestamps : string, default None</span>
+<span class="s2"> Cast timestamps a particular resolution.</span>
+<span class="s2"> Valid values: {None, 'ms', 'us'}</span>
+<span class="s2">compression : str or dict</span>
+<span class="s2"> Specify the compression codec, either on a general basis or per-column.</span>
+<span class="s2">flavor : {'spark'}, default None</span>
+<span class="s2"> Sanitize schema or set other compatibility options for compatibility"""</span>
+
+
+<div class="viewcode-block" id="ParquetWriter"><a class="viewcode-back" href="../../generated/pyarrow.parquet.ParquetWriter.html#pyarrow.parquet.ParquetWriter">[docs]</a><span class="k">class</span> <span class="nc">ParquetWriter</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+
+ <span class="vm">__doc__</span> <span class="o">=</span> <span class="s2">"""</span>
+<span class="s2">Class for incrementally building a Parquet file for Arrow tables</span>
+
+<span class="s2">Parameters</span>
+<span class="s2">----------</span>
+<span class="s2">where : path or file-like object</span>
+<span class="s2">schema : arrow Schema</span>
+<span class="si">{0}</span><span class="s2"></span>
+<span class="s2">"""</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">_parquet_writer_arg_docs</span><span class="p">)</span>
+
+<div class="viewcode-block" id="ParquetWriter.__init__"><a class="viewcode-back" href="../../generated/pyarrow.parquet.ParquetWriter.html#pyarrow.parquet.ParquetWriter.__init__">[docs]</a> <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">flavor</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+ <span class="n">version</span><span class="o">=</span><span class="s1">'1.0'</span><span class="p">,</span>
+ <span class="n">use_dictionary</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
+ <span class="n">compression</span><span class="o">=</span><span class="s1">'snappy'</span><span class="p">,</span>
+ <span class="n">use_deprecated_int96_timestamps</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">use_deprecated_int96_timestamps</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="c1"># Use int96 timestamps for Spark</span>
+ <span class="k">if</span> <span class="n">flavor</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="s1">'spark'</span> <span class="ow">in</span> <span class="n">flavor</span><span class="p">:</span>
+ <span class="n">use_deprecated_int96_timestamps</span> <span class="o">=</span> <span class="kc">True</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">use_deprecated_int96_timestamps</span> <span class="o">=</span> <span class="kc">False</span>
-<span class="sd"> Parameters</span>
-<span class="sd"> ----------</span>
-<span class="sd"> where</span>
-<span class="sd"> schema</span>
-<span class="sd"> flavor : {'spark', ...}</span>
-<span class="sd"> Set options for compatibility with a particular reader</span>
-<span class="sd"> """</span>
- <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">flavor</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">flavor</span> <span class="o">=</span> <span class="n">flavor</span>
<span class="k">if</span> <span class="n">flavor</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">schema</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">schema_changed</span> <span class="o">=</span> <span class="n">_sanitize_schema</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">flavor</span><span class="p">)</span>
@@ -342,15 +373,29 @@
<span class="bp">self</span><span class="o">.</span><span class="n">schema_changed</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">schema</span> <span class="o">=</span> <span class="n">schema</span>
- <span class="bp">self</span><span class="o">.</span><span class="n">writer</span> <span class="o">=</span> <span class="n">_parquet</span><span class="o">.</span><span class="n">ParquetWriter</span><span class="p">(</span><span class="n">where</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">)</span>
-
- <span class="k">def</span> <span class="nf">write_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">row_group_size</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">writer</span> <span class="o">=</span> <span class="n">_parquet</span><span class="o">.</span><span class="n">ParquetWriter</span><span class="p">(</span>
+ <span class="n">where</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span>
+ <span class="n">version</span><span class="o">=</span><span class="n">version</span><span class="p">,</span>
+ <span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">,</span>
+ <span class="n">use_dictionary</span><span class="o">=</span><span class="n">use_dictionary</span><span class="p">,</span>
+ <span class="n">use_deprecated_int96_timestamps</span><span class="o">=</span><span class="n">use_deprecated_int96_timestamps</span><span class="p">,</span>
+ <span class="o">**</span><span class="n">options</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_open</span> <span class="o">=</span> <span class="kc">True</span></div>
+
+ <span class="k">def</span> <span class="nf">__del__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'is_open'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+
+<div class="viewcode-block" id="ParquetWriter.write_table"><a class="viewcode-back" href="../../generated/pyarrow.parquet.ParquetWriter.html#pyarrow.parquet.ParquetWriter.write_table">[docs]</a> <span class="k">def</span> <span class="nf">write_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">row_group_size</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">schema_changed</span><span class="p">:</span>
<span class="n">table</span> <span class="o">=</span> <span class="n">_sanitize_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">flavor</span><span class="p">)</span>
- <span class="bp">self</span><span class="o">.</span><span class="n">writer</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">row_group_size</span><span class="o">=</span><span class="n">row_group_size</span><span class="p">)</span>
+ <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_open</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">writer</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">row_group_size</span><span class="o">=</span><span class="n">row_group_size</span><span class="p">)</span></div>
- <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
- <span class="bp">self</span><span class="o">.</span><span class="n">writer</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+<div class="viewcode-block" id="ParquetWriter.close"><a class="viewcode-back" href="../../generated/pyarrow.parquet.ParquetWriter.html#pyarrow.parquet.ParquetWriter.close">[docs]</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_open</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">writer</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_open</span> <span class="o">=</span> <span class="kc">False</span></div></div>
<span class="k">def</span> <span class="nf">_get_pandas_index_columns</span><span class="p">(</span><span class="n">keyvalues</span><span class="p">):</span>
@@ -651,7 +696,7 @@
<span class="n">filtered_files</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
<span class="n">filtered_directories</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
- <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">files</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">filtered_directories</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">filtered_files</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">filtered_directories</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Found files in an intermediate '</span>
<span class="s1">'directory: </span><span class="si">{0}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">base_path</span><span class="p">))</span>
<span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">filtered_directories</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
@@ -979,54 +1024,21 @@
<span class="n">use_deprecated_int96_timestamps</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">coerce_timestamps</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">flavor</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
- <span class="sd">"""</span>
-<span class="sd"> Write a Table to Parquet format</span>
-
-<span class="sd"> Parameters</span>
-<span class="sd"> ----------</span>
-<span class="sd"> table : pyarrow.Table</span>
-<span class="sd"> where: string or pyarrow.io.NativeFile</span>
-<span class="sd"> row_group_size : int, default None</span>
-<span class="sd"> The maximum number of rows in each Parquet RowGroup. As a default,</span>
-<span class="sd"> we will write a single RowGroup per file.</span>
-<span class="sd"> version : {"1.0", "2.0"}, default "1.0"</span>
-<span class="sd"> The Parquet format version, defaults to 1.0</span>
-<span class="sd"> use_dictionary : bool or list</span>
-<span class="sd"> Specify if we should use dictionary encoding in general or only for</span>
-<span class="sd"> some columns.</span>
-<span class="sd"> use_deprecated_int96_timestamps : boolean, default None</span>
-<span class="sd"> Write nanosecond resolution timestamps to INT96 Parquet</span>
-<span class="sd"> format. Defaults to False unless enabled by flavor argument</span>
-<span class="sd"> coerce_timestamps : string, default None</span>
-<span class="sd"> Cast timestamps a particular resolution.</span>
-<span class="sd"> Valid values: {None, 'ms', 'us'}</span>
-<span class="sd"> compression : str or dict</span>
-<span class="sd"> Specify the compression codec, either on a general basis or per-column.</span>
-<span class="sd"> flavor : {'spark'}, default None</span>
-<span class="sd"> Sanitize schema or set other compatibility options for compatibility</span>
-<span class="sd"> """</span>
- <span class="n">row_group_size</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'chunk_size'</span><span class="p">,</span> <span class="n">row_group_size</span><span class="p">)</span>
-
- <span class="k">if</span> <span class="n">use_deprecated_int96_timestamps</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
- <span class="c1"># Use int96 timestamps for Spark</span>
- <span class="k">if</span> <span class="n">flavor</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="s1">'spark'</span> <span class="ow">in</span> <span class="n">flavor</span><span class="p">:</span>
- <span class="n">use_deprecated_int96_timestamps</span> <span class="o">=</span> <span class="kc">True</span>
- <span class="k">else</span><span class="p">:</span>
- <span class="n">use_deprecated_int96_timestamps</span> <span class="o">=</span> <span class="kc">False</span>
-
- <span class="n">options</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span>
- <span class="n">use_dictionary</span><span class="o">=</span><span class="n">use_dictionary</span><span class="p">,</span>
- <span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">,</span>
- <span class="n">version</span><span class="o">=</span><span class="n">version</span><span class="p">,</span>
- <span class="n">use_deprecated_int96_timestamps</span><span class="o">=</span><span class="n">use_deprecated_int96_timestamps</span><span class="p">,</span>
- <span class="n">coerce_timestamps</span><span class="o">=</span><span class="n">coerce_timestamps</span><span class="p">)</span>
+ <span class="n">row_group_size</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'chunk_size'</span><span class="p">,</span> <span class="n">row_group_size</span><span class="p">)</span>
<span class="n">writer</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
- <span class="n">writer</span> <span class="o">=</span> <span class="n">ParquetWriter</span><span class="p">(</span><span class="n">where</span><span class="p">,</span> <span class="n">table</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="n">flavor</span><span class="o">=</span><span class="n">flavor</span><span class="p">,</span>
- <span class="o">**</span><span class="n">options</span><span class="p">)</span>
+ <span class="n">writer</span> <span class="o">=</span> <span class="n">ParquetWriter</span><span class="p">(</span>
+ <span class="n">where</span><span class="p">,</span> <span class="n">table</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span>
+ <span class="n">version</span><span class="o">=</span><span class="n">version</span><span class="p">,</span>
+ <span class="n">flavor</span><span class="o">=</span><span class="n">flavor</span><span class="p">,</span>
+ <span class="n">use_dictionary</span><span class="o">=</span><span class="n">use_dictionary</span><span class="p">,</span>
+ <span class="n">coerce_timestamps</span><span class="o">=</span><span class="n">coerce_timestamps</span><span class="p">,</span>
+ <span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">,</span>
+ <span class="n">use_deprecated_int96_timestamps</span><span class="o">=</span><span class="n">use_deprecated_int96_timestamps</span><span class="p">,</span>
+ <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="n">writer</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">row_group_size</span><span class="o">=</span><span class="n">row_group_size</span><span class="p">)</span>
- <span class="k">except</span><span class="p">:</span>
+ <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
<span class="k">if</span> <span class="n">writer</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">writer</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">where</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
@@ -1039,6 +1051,17 @@
<span class="n">writer</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
+<span class="n">write_table</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="s2">"""</span>
+<span class="s2">Write a Table to Parquet format</span>
+
+<span class="s2">Parameters</span>
+<span class="s2">----------</span>
+<span class="s2">table : pyarrow.Table</span>
+<span class="s2">where: string or pyarrow.io.NativeFile</span>
+<span class="si">{0}</span><span class="s2"></span>
+<span class="s2">"""</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">_parquet_writer_arg_docs</span><span class="p">)</span>
+
+
<span class="k">def</span> <span class="nf">write_to_dataset</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">root_path</span><span class="p">,</span> <span class="n">partition_cols</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">filesystem</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">preserve_index</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="sd">"""</span>
@@ -1085,7 +1108,7 @@
<span class="k">else</span><span class="p">:</span>
<span class="n">fs</span> <span class="o">=</span> <span class="n">_ensure_filesystem</span><span class="p">(</span><span class="n">filesystem</span><span class="p">)</span>
- <span class="k">if</span> <span class="ow">not</span> <span class="n">fs</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">root_path</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">fs</span><span class="o">.</span><span class="n">_isfilestore</span><span class="p">()</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">fs</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">root_path</span><span class="p">):</span>
<span class="n">fs</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">root_path</span><span class="p">)</span>
<span class="k">if</span> <span class="n">partition_cols</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">partition_cols</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
@@ -1104,7 +1127,7 @@
<span class="n">subtable</span> <span class="o">=</span> <span class="n">Table</span><span class="o">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">subgroup</span><span class="p">,</span>
<span class="n">preserve_index</span><span class="o">=</span><span class="n">preserve_index</span><span class="p">)</span>
<span class="n">prefix</span> <span class="o">=</span> <span class="s2">"/"</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">root_path</span><span class="p">,</span> <span class="n">subdir</span><span class="p">])</span>
- <span class="k">if</span> <span class="ow">not</span> <span class="n">fs</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">prefix</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">fs</span><span class="o">.</span><span class="n">_isfilestore</span><span class="p">()</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">fs</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">prefix</span><span class="p">):</span>
<span class="n">fs</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">prefix</span><span class="p">)</span>
<span class="n">outfile</span> <span class="o">=</span> <span class="n">compat</span><span class="o">.</span><span class="n">guid</span><span class="p">()</span> <span class="o">+</span> <span class="s2">".parquet"</span>
<span class="n">full_path</span> <span class="o">=</span> <span class="s2">"/"</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">prefix</span><span class="p">,</span> <span class="n">outfile</span><span class="p">])</span>
@@ -1135,12 +1158,10 @@
<span class="sd"> Cast timestamps a particular resolution.</span>
<span class="sd"> Valid values: {None, 'ms', 'us'}</span>
<span class="sd"> """</span>
- <span class="n">options</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span>
- <span class="n">version</span><span class="o">=</span><span class="n">version</span><span class="p">,</span>
+ <span class="n">writer</span> <span class="o">=</span> <span class="n">ParquetWriter</span><span class="p">(</span>
+ <span class="n">where</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="n">version</span><span class="p">,</span>
<span class="n">use_deprecated_int96_timestamps</span><span class="o">=</span><span class="n">use_deprecated_int96_timestamps</span><span class="p">,</span>
- <span class="n">coerce_timestamps</span><span class="o">=</span><span class="n">coerce_timestamps</span>
- <span class="p">)</span>
- <span class="n">writer</span> <span class="o">=</span> <span class="n">ParquetWriter</span><span class="p">(</span><span class="n">where</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">)</span>
+ <span class="n">coerce_timestamps</span><span class="o">=</span><span class="n">coerce_timestamps</span><span class="p">)</span>
<span class="n">writer</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
@@ -1186,7 +1207,7 @@
</p>
<p>
© Copyright 2016-2017 Apache Software Foundation.<br/>
- Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.4.<br/>
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.5.<br/>
</p>
</div>
</footer>